Created
March 4, 2025 09:11
-
-
Save Kobzol/217769d9cdf03074fe035047c113994f to your computer and use it in GitHub Desktop.
Backfill script to fill in PyLadies alumni from git history
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| import dataclasses | |
| import io | |
| import json | |
| import os | |
| from collections import defaultdict | |
| from typing import Any, Dict, List, Optional | |
| import git | |
| import tqdm | |
| import yaml | |
| from importlib_metadata.compat.py39 import normalized_name | |
| @dataclasses.dataclass | |
| class Person: | |
| name: str | |
| city: str | |
| img: Optional[str] | |
| role: Optional[str] | |
| metadata: Any | |
| def load_people(city: str, data: Any) -> Dict[str, Person]: | |
| result = {} | |
| assert isinstance(data, list) | |
| for p in data: | |
| if "_inactive" in p: | |
| result.update(load_people(city, p["_inactive"])) | |
| else: | |
| if "name" not in p: | |
| continue | |
| name = p.pop("name") | |
| img = p.pop("img", None) | |
| role = p.pop("role", None) | |
| if name in result: | |
| if img is not None: | |
| result[name].img = img | |
| if role is not None: | |
| result[name].role = role | |
| continue | |
| else: | |
| result[name] = Person( | |
| name=name, | |
| city=city, | |
| img=img, | |
| role=role, | |
| metadata=dict(p) | |
| ) | |
| return result | |
| def load_team(city: str, stream) -> Dict[str, Person]: | |
| try: | |
| data = yaml.safe_load(stream) | |
| except: | |
| return {} | |
| assert isinstance(data, list) | |
| return load_people(city, data) | |
| def load_teams() -> Dict[str, Dict[str, Person]]: | |
| teams = {} | |
| for path in os.listdir("teams"): | |
| file = os.path.join("teams", path) | |
| with open(file) as f: | |
| teams[path] = load_team(path, f) | |
| return teams | |
| def load_file_at_commit(commit, file: str) -> Optional[bytes]: | |
| if file.startswith("img"): | |
| file = f"static/{file}" | |
| targetfile = commit.tree / file | |
| with io.BytesIO(targetfile.data_stream.read()) as f: | |
| return f.read() | |
| def renormalize_yaml(path: str): | |
| with open(path) as f: | |
| lines = f.readlines() | |
| normalized_path = f"{path}.bak" | |
| with open(normalized_path, "w") as normalized: | |
| for (index, line) in enumerate(lines): | |
| if index > 0 and line.startswith("- name:"): | |
| normalized.write("\n") | |
| normalized.write(line) | |
| os.rename(normalized_path, path) | |
| ref_teams = load_teams() | |
| missing_people = defaultdict(dict) | |
| repo = git.Repo(".") | |
| people = defaultdict(set) | |
| commits = list(repo.iter_commits())[::-1][1000:] | |
| for (index, commit) in enumerate(tqdm.tqdm(commits)): | |
| for file in commit.stats.files.keys(): | |
| if file.startswith("teams") and file.endswith(".yml"): | |
| file_data = load_file_at_commit(commit, file) | |
| city = os.path.basename(file) | |
| team = load_team(city, io.BytesIO(file_data)) | |
| for (name, person) in team.items(): | |
| existing_person = ref_teams.get(city).get(name) | |
| if existing_person is None: | |
| img_data = None | |
| if person.img is not None: | |
| img_data = load_file_at_commit(commit, person.img) | |
| missing_people[city][name] = (person, img_data) | |
| for (city, people) in missing_people.items(): | |
| yaml_path = f"teams/{city}" | |
| with open(yaml_path) as f: | |
| data = yaml.safe_load(f) | |
| for (name, (person, img_data)) in people.items(): | |
| entry = { | |
| "name": name, | |
| "alumni": True | |
| } | |
| if person.img is not None: | |
| entry["img"] = person.img | |
| img_path = f"static/{person.img}" | |
| if not os.path.isfile(img_path): | |
| with open(img_path, "wb") as f: | |
| f.write(img_data) | |
| if person.role is not None: | |
| entry["role"] = person.role | |
| entry.update(person.metadata) | |
| data.append(entry) | |
| with open(yaml_path, "w") as f: | |
| yaml.dump(data, f, sort_keys=False, allow_unicode=True) | |
| renormalize_yaml(yaml_path) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment