Skip to content

Instantly share code, notes, and snippets.

@Kobzol
Created March 4, 2025 09:11
Show Gist options
  • Select an option

  • Save Kobzol/217769d9cdf03074fe035047c113994f to your computer and use it in GitHub Desktop.

Select an option

Save Kobzol/217769d9cdf03074fe035047c113994f to your computer and use it in GitHub Desktop.
Backfill script to fill in PyLadies alumni from git history
import dataclasses
import io
import json
import os
from collections import defaultdict
from typing import Any, Dict, List, Optional
import git
import tqdm
import yaml
from importlib_metadata.compat.py39 import normalized_name
@dataclasses.dataclass
class Person:
name: str
city: str
img: Optional[str]
role: Optional[str]
metadata: Any
def load_people(city: str, data: Any) -> Dict[str, Person]:
result = {}
assert isinstance(data, list)
for p in data:
if "_inactive" in p:
result.update(load_people(city, p["_inactive"]))
else:
if "name" not in p:
continue
name = p.pop("name")
img = p.pop("img", None)
role = p.pop("role", None)
if name in result:
if img is not None:
result[name].img = img
if role is not None:
result[name].role = role
continue
else:
result[name] = Person(
name=name,
city=city,
img=img,
role=role,
metadata=dict(p)
)
return result
def load_team(city: str, stream) -> Dict[str, Person]:
try:
data = yaml.safe_load(stream)
except:
return {}
assert isinstance(data, list)
return load_people(city, data)
def load_teams() -> Dict[str, Dict[str, Person]]:
teams = {}
for path in os.listdir("teams"):
file = os.path.join("teams", path)
with open(file) as f:
teams[path] = load_team(path, f)
return teams
def load_file_at_commit(commit, file: str) -> Optional[bytes]:
if file.startswith("img"):
file = f"static/{file}"
targetfile = commit.tree / file
with io.BytesIO(targetfile.data_stream.read()) as f:
return f.read()
def renormalize_yaml(path: str):
with open(path) as f:
lines = f.readlines()
normalized_path = f"{path}.bak"
with open(normalized_path, "w") as normalized:
for (index, line) in enumerate(lines):
if index > 0 and line.startswith("- name:"):
normalized.write("\n")
normalized.write(line)
os.rename(normalized_path, path)
ref_teams = load_teams()
missing_people = defaultdict(dict)
repo = git.Repo(".")
people = defaultdict(set)
commits = list(repo.iter_commits())[::-1][1000:]
for (index, commit) in enumerate(tqdm.tqdm(commits)):
for file in commit.stats.files.keys():
if file.startswith("teams") and file.endswith(".yml"):
file_data = load_file_at_commit(commit, file)
city = os.path.basename(file)
team = load_team(city, io.BytesIO(file_data))
for (name, person) in team.items():
existing_person = ref_teams.get(city).get(name)
if existing_person is None:
img_data = None
if person.img is not None:
img_data = load_file_at_commit(commit, person.img)
missing_people[city][name] = (person, img_data)
for (city, people) in missing_people.items():
yaml_path = f"teams/{city}"
with open(yaml_path) as f:
data = yaml.safe_load(f)
for (name, (person, img_data)) in people.items():
entry = {
"name": name,
"alumni": True
}
if person.img is not None:
entry["img"] = person.img
img_path = f"static/{person.img}"
if not os.path.isfile(img_path):
with open(img_path, "wb") as f:
f.write(img_data)
if person.role is not None:
entry["role"] = person.role
entry.update(person.metadata)
data.append(entry)
with open(yaml_path, "w") as f:
yaml.dump(data, f, sort_keys=False, allow_unicode=True)
renormalize_yaml(yaml_path)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment