Created
March 3, 2026 19:22
-
-
Save scfrisby/2991629dec0b320f3be0872b3edc8c16 to your computer and use it in GitHub Desktop.
Drop in model replacement
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| # βββ | |
| stuart_ratings.py β drop-in ratings module for generate-predictions.py | |
| Replaces compute_match_ratings() and compute_xg_ratings() with the blended | |
| methodology from Stuartβs Championship model. | |
| ## Blend formula | |
| blended_att(team) = 0.50 * xGF/g + 0.50 * actual_GF/g | |
| blended_def(team) = 0.50 * xGA/g + 0.50 * actual_GA/g | |
| strength(team, venue) = | |
| 0.40 * venue_PPG | |
| + 0.60 * last_6_form (points from last 6 games / 6, on a 0β3 scale) | |
| + 0.10 * blended_xG_differential_bonus | |
| Ratings are normalised to league average = 1.0 so they slot into | |
| Benβs compute_match_lambdas() formula unchanged: | |
| ``` | |
| lam_h = hr["home_attack"] * ar["away_defense"] * avg_home_goals | |
| lam_a = ar["away_attack"] * hr["home_defense"] * avg_away_goals | |
| ``` | |
| ## Drop-in usage | |
| In generate-predictions.py, replace: | |
| ``` | |
| match_ratings = compute_match_ratings(profiles, league_stats) | |
| ... | |
| ratings = blend_ratings(match_ratings, xg_ratings, xg_weight) | |
| ``` | |
| with: | |
| ``` | |
| from stuart_ratings import compute_stuart_ratings | |
| ratings = compute_stuart_ratings(matches, fotmob_stats, league_stats) | |
| ``` | |
| Everything downstream (apply_overrides, compute_match_lambdas, | |
| simulate_season, output) is untouched. | |
| ## xG data format (FotMob stats JSON) | |
| { | |
| βstatsβ: { | |
| β<team_id>β: { | |
| βexpected_goals_teamβ: {βvalueβ: <float>}, // season xGF total | |
| βexpected_goals_conceded_teamβ: {βvalueβ: <float>}, // season xGA total | |
| βmatchesPlayedβ: <int> | |
| } | |
| } | |
| } | |
| If fotmob_stats is None or empty, falls back to actual goals only. | |
| βββ | |
| from collections import defaultdict | |
| # ββ Weighting constants βββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| SEASON_VS_XG_BLEND = 0.50 # 0.5 = equal weight to xG and actual goals per game | |
| VENUE_PPG_WEIGHT = 0.40 # fraction of composite strength from home/away PPG | |
| FORM_WEIGHT = 0.60 # fraction of composite strength from last-6 form | |
| XG_DIFF_BONUS = 0.10 # multiplier on blended xG differential | |
| FORM_WINDOW = 6 # number of recent matches for form calculation | |
| # Empirically calibrated from Championship data: the mean of all home+away | |
| # strength scores across the division. Used to convert composite strength | |
| # into a multiplier relative to a league-average team. | |
| NEUTRAL_STRENGTH = 1.57 | |
| def _extract_xg_per_game(fotmob_stats): | |
| βββ | |
| Parse FotMob stats JSON into {team_id_str: (xgf_per_game, xga_per_game)}. | |
| Returns empty dict if data is missing or malformed. | |
| βββ | |
| xg = {} | |
| if not fotmob_stats: | |
| return xg | |
| for tid, team_stats in fotmob_stats.get(βstatsβ, {}).items(): | |
| xg_entry = team_stats.get(βexpected_goals_teamβ) | |
| xga_entry = team_stats.get(βexpected_goals_conceded_teamβ) | |
| mp = team_stats.get(βmatchesPlayedβ, 0) | |
| if not xg_entry or not xga_entry or mp == 0: | |
| continue | |
| xg[str(tid)] = ( | |
| xg_entry[βvalueβ] / mp, | |
| xga_entry[βvalueβ] / mp, | |
| ) | |
| return xg | |
| def _build_season_stats(matches): | |
| βββ | |
| Derive per-team season stats from finished matches. | |
| ``` | |
| Returns {team_id_str: { | |
| home_gf, home_ga, home_games, home_ppg, | |
| away_gf, away_ga, away_games, away_ppg, | |
| last6_pts | |
| }} | |
| """ | |
| finished = sorted( | |
| [m for m in matches if m["status"] == "FINISHED"], | |
| key=lambda m: m["utcDate"], | |
| ) | |
| raw = defaultdict(lambda: { | |
| "home_gf": 0, "home_ga": 0, "home_games": 0, "home_pts": 0, | |
| "away_gf": 0, "away_ga": 0, "away_games": 0, "away_pts": 0, | |
| "results": [], | |
| }) | |
| for m in finished: | |
| hid = str(m["homeTeamId"]) | |
| aid = str(m["awayTeamId"]) | |
| hg = m["homeGoals"] | |
| ag = m["awayGoals"] | |
| if hg is None or ag is None: | |
| continue | |
| raw[hid]["home_gf"] += hg | |
| raw[hid]["home_ga"] += ag | |
| raw[hid]["home_games"] += 1 | |
| raw[aid]["away_gf"] += ag | |
| raw[aid]["away_ga"] += hg | |
| raw[aid]["away_games"] += 1 | |
| if hg > ag: | |
| raw[hid]["home_pts"] += 3; raw[hid]["results"].append(3) | |
| raw[aid]["results"].append(0) | |
| elif hg == ag: | |
| raw[hid]["home_pts"] += 1; raw[hid]["results"].append(1) | |
| raw[aid]["away_pts"] += 1; raw[aid]["results"].append(1) | |
| else: | |
| raw[aid]["away_pts"] += 3; raw[aid]["results"].append(3) | |
| raw[hid]["results"].append(0) | |
| out = {} | |
| for tid, s in raw.items(): | |
| hg = s["home_games"]; ag = s["away_games"] | |
| out[tid] = { | |
| "home_gf": s["home_gf"], | |
| "home_ga": s["home_ga"], | |
| "home_games": hg, | |
| "home_ppg": s["home_pts"] / hg if hg else 1.0, | |
| "away_gf": s["away_gf"], | |
| "away_ga": s["away_ga"], | |
| "away_games": ag, | |
| "away_ppg": s["away_pts"] / ag if ag else 1.0, | |
| "last6_pts": sum(s["results"][-FORM_WINDOW:]), | |
| } | |
| return out | |
| ``` | |
| def compute_stuart_ratings(matches, fotmob_stats, league_stats): | |
| βββ | |
| Build blended attack/defense ratings compatible with Benβs | |
| compute_match_lambdas() interface. | |
| ``` | |
| Parameters | |
| ---------- | |
| matches : list β Ben's matches.json match dicts | |
| fotmob_stats : dict β Ben's fotmob-stats.json, or None | |
| league_stats : dict β {"avg_home_goals": float, "avg_away_goals": float} | |
| Returns | |
| ------- | |
| ratings : {team_id_str: { | |
| "home_attack", "home_defense", | |
| "away_attack", "away_defense" | |
| }} | |
| All values are normalised to league average = 1.0. | |
| """ | |
| season = _build_season_stats(matches) | |
| xg_map = _extract_xg_per_game(fotmob_stats) | |
| avg_h = league_stats["avg_home_goals"] | |
| avg_a = league_stats["avg_away_goals"] | |
| # ββ Blended goals per game ββββββββββββββββββββββββββββββββββββββββββββββββ | |
| def blended_att(tid): | |
| s = season[tid] | |
| tot = s["home_games"] + s["away_games"] | |
| if tot == 0: | |
| return (avg_h + avg_a) / 2 | |
| actual = (s["home_gf"] + s["away_gf"]) / tot | |
| if tid in xg_map: | |
| return SEASON_VS_XG_BLEND * xg_map[tid][0] + (1 - SEASON_VS_XG_BLEND) * actual | |
| return actual | |
| def blended_def(tid): | |
| s = season[tid] | |
| tot = s["home_games"] + s["away_games"] | |
| if tot == 0: | |
| return (avg_h + avg_a) / 2 | |
| actual = (s["home_ga"] + s["away_ga"]) / tot | |
| if tid in xg_map: | |
| return SEASON_VS_XG_BLEND * xg_map[tid][1] + (1 - SEASON_VS_XG_BLEND) * actual | |
| return actual | |
| league_avg_blend_att = ( | |
| sum(blended_att(t) for t in season) / len(season) if season else 1.0 | |
| ) | |
| # ββ Composite strength (our model's signal) βββββββββββββββββββββββββββββββ | |
| def strength(tid, venue): | |
| s = season[tid] | |
| sp = s["home_ppg"] if venue == "home" else s["away_ppg"] | |
| form = s["last6_pts"] / FORM_WINDOW # normalised to 0β3 scale | |
| xgd = blended_att(tid) - league_avg_blend_att | |
| return VENUE_PPG_WEIGHT * sp + FORM_WEIGHT * form + XG_DIFF_BONUS * xgd | |
| # ββ Convert to Ben's normalised rating format βββββββββββββββββββββββββββββ | |
| # | |
| # Ben's lambda formula: | |
| # lam_h = home_attack * away_defense * avg_home_goals | |
| # lam_a = away_attack * home_defense * avg_away_goals | |
| # | |
| # We express attack/defense as fractions of league average so the scalar | |
| # multiplication by avg_home/away_goals produces realistic goal counts. | |
| # | |
| # The composite strength score modulates this via a multiplier relative to | |
| # a league-average team (NEUTRAL_STRENGTH), clamped to [0.5, 2.0] to | |
| # prevent extreme outliers from dominating. | |
| ratings = {} | |
| for tid in season: | |
| ba = blended_att(tid) | |
| bd = blended_def(tid) | |
| home_mult = max(0.5, min(2.0, strength(tid, "home") / NEUTRAL_STRENGTH)) | |
| away_mult = max(0.5, min(2.0, strength(tid, "away") / NEUTRAL_STRENGTH)) | |
| ratings[tid] = { | |
| "home_attack": (ba / avg_h) * home_mult if avg_h else 1.0, | |
| "away_attack": (ba / avg_a) * away_mult if avg_a else 1.0, | |
| "home_defense": (bd / avg_a) if avg_a else 1.0, | |
| "away_defense": (bd / avg_h) if avg_h else 1.0, | |
| } | |
| return ratings | |
| ``` |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment