scfrisby/stuart_ratings.py

## stuart_ratings.py
# “””
stuart_ratings.py — drop-in ratings module for generate-predictions.py

Replaces compute_match_ratings() and compute_xg_ratings() with the blended
methodology from Stuart’s Championship model.

## Blend formula

blended_att(team) = 0.50 * xGF/g  +  0.50 * actual_GF/g
blended_def(team) = 0.50 * xGA/g  +  0.50 * actual_GA/g

strength(team, venue) =
0.40 * venue_PPG
+ 0.60 * last_6_form  (points from last 6 games / 6, on a 0–3 scale)
+ 0.10 * blended_xG_differential_bonus

Ratings are normalised to league average = 1.0 so they slot into
Ben’s compute_match_lambdas() formula unchanged:

```
lam_h = hr["home_attack"] * ar["away_defense"] * avg_home_goals
lam_a = ar["away_attack"] * hr["home_defense"] * avg_away_goals
```

## Drop-in usage

In generate-predictions.py, replace:

```
match_ratings = compute_match_ratings(profiles, league_stats)
...
ratings = blend_ratings(match_ratings, xg_ratings, xg_weight)
```

with:

```
from stuart_ratings import compute_stuart_ratings
ratings = compute_stuart_ratings(matches, fotmob_stats, league_stats)
```

Everything downstream (apply_overrides, compute_match_lambdas,
simulate_season, output) is untouched.

## xG data format (FotMob stats JSON)

{
“stats”: {
“<team_id>”: {
“expected_goals_team”:           {“value”: <float>},  // season xGF total
“expected_goals_conceded_team”:  {“value”: <float>},  // season xGA total
“matchesPlayed”: <int>
}
}
}

If fotmob_stats is None or empty, falls back to actual goals only.
“””

from collections import defaultdict

# ── Weighting constants ───────────────────────────────────────────────────────

SEASON_VS_XG_BLEND = 0.50   # 0.5 = equal weight to xG and actual goals per game
VENUE_PPG_WEIGHT   = 0.40   # fraction of composite strength from home/away PPG
FORM_WEIGHT        = 0.60   # fraction of composite strength from last-6 form
XG_DIFF_BONUS      = 0.10   # multiplier on blended xG differential

FORM_WINDOW        = 6      # number of recent matches for form calculation

# Empirically calibrated from Championship data: the mean of all home+away

# strength scores across the division. Used to convert composite strength

# into a multiplier relative to a league-average team.

NEUTRAL_STRENGTH   = 1.57

def _extract_xg_per_game(fotmob_stats):
“””
Parse FotMob stats JSON into {team_id_str: (xgf_per_game, xga_per_game)}.
Returns empty dict if data is missing or malformed.
“””
xg = {}
if not fotmob_stats:
return xg
for tid, team_stats in fotmob_stats.get(“stats”, {}).items():
xg_entry  = team_stats.get(“expected_goals_team”)
xga_entry = team_stats.get(“expected_goals_conceded_team”)
mp        = team_stats.get(“matchesPlayed”, 0)
if not xg_entry or not xga_entry or mp == 0:
continue
xg[str(tid)] = (
xg_entry[“value”]  / mp,
xga_entry[“value”] / mp,
)
return xg

def _build_season_stats(matches):
“””
Derive per-team season stats from finished matches.

```
Returns {team_id_str: {
    home_gf, home_ga, home_games, home_ppg,
    away_gf, away_ga, away_games, away_ppg,
    last6_pts
}}
"""
finished = sorted(
    [m for m in matches if m["status"] == "FINISHED"],
    key=lambda m: m["utcDate"],
)

raw = defaultdict(lambda: {
    "home_gf": 0, "home_ga": 0, "home_games": 0, "home_pts": 0,
    "away_gf": 0, "away_ga": 0, "away_games": 0, "away_pts": 0,
    "results": [],
})

for m in finished:
    hid = str(m["homeTeamId"])
    aid = str(m["awayTeamId"])
    hg  = m["homeGoals"]
    ag  = m["awayGoals"]
    if hg is None or ag is None:
        continue

    raw[hid]["home_gf"]    += hg
    raw[hid]["home_ga"]    += ag
    raw[hid]["home_games"] += 1
    raw[aid]["away_gf"]    += ag
    raw[aid]["away_ga"]    += hg
    raw[aid]["away_games"] += 1

    if hg > ag:
        raw[hid]["home_pts"] += 3; raw[hid]["results"].append(3)
        raw[aid]["results"].append(0)
    elif hg == ag:
        raw[hid]["home_pts"] += 1; raw[hid]["results"].append(1)
        raw[aid]["away_pts"] += 1; raw[aid]["results"].append(1)
    else:
        raw[aid]["away_pts"] += 3; raw[aid]["results"].append(3)
        raw[hid]["results"].append(0)

out = {}
for tid, s in raw.items():
    hg = s["home_games"]; ag = s["away_games"]
    out[tid] = {
        "home_gf":    s["home_gf"],
        "home_ga":    s["home_ga"],
        "home_games": hg,
        "home_ppg":   s["home_pts"] / hg if hg else 1.0,
        "away_gf":    s["away_gf"],
        "away_ga":    s["away_ga"],
        "away_games": ag,
        "away_ppg":   s["away_pts"] / ag if ag else 1.0,
        "last6_pts":  sum(s["results"][-FORM_WINDOW:]),
    }
return out
```

def compute_stuart_ratings(matches, fotmob_stats, league_stats):
“””
Build blended attack/defense ratings compatible with Ben’s
compute_match_lambdas() interface.

```
Parameters
----------
matches       : list  — Ben's matches.json match dicts
fotmob_stats  : dict  — Ben's fotmob-stats.json, or None
league_stats  : dict  — {"avg_home_goals": float, "avg_away_goals": float}

Returns
-------
ratings : {team_id_str: {
              "home_attack", "home_defense",
              "away_attack", "away_defense"
          }}
All values are normalised to league average = 1.0.
"""
season = _build_season_stats(matches)
xg_map = _extract_xg_per_game(fotmob_stats)

avg_h = league_stats["avg_home_goals"]
avg_a = league_stats["avg_away_goals"]

# ── Blended goals per game ────────────────────────────────────────────────
def blended_att(tid):
    s   = season[tid]
    tot = s["home_games"] + s["away_games"]
    if tot == 0:
        return (avg_h + avg_a) / 2
    actual = (s["home_gf"] + s["away_gf"]) / tot
    if tid in xg_map:
        return SEASON_VS_XG_BLEND * xg_map[tid][0] + (1 - SEASON_VS_XG_BLEND) * actual
    return actual

def blended_def(tid):
    s   = season[tid]
    tot = s["home_games"] + s["away_games"]
    if tot == 0:
        return (avg_h + avg_a) / 2
    actual = (s["home_ga"] + s["away_ga"]) / tot
    if tid in xg_map:
        return SEASON_VS_XG_BLEND * xg_map[tid][1] + (1 - SEASON_VS_XG_BLEND) * actual
    return actual

league_avg_blend_att = (
    sum(blended_att(t) for t in season) / len(season) if season else 1.0
)

# ── Composite strength (our model's signal) ───────────────────────────────
def strength(tid, venue):
    s    = season[tid]
    sp   = s["home_ppg"] if venue == "home" else s["away_ppg"]
    form = s["last6_pts"] / FORM_WINDOW     # normalised to 0–3 scale
    xgd  = blended_att(tid) - league_avg_blend_att
    return VENUE_PPG_WEIGHT * sp + FORM_WEIGHT * form + XG_DIFF_BONUS * xgd

# ── Convert to Ben's normalised rating format ─────────────────────────────
#
# Ben's lambda formula:
#   lam_h = home_attack  * away_defense * avg_home_goals
#   lam_a = away_attack  * home_defense * avg_away_goals
#
# We express attack/defense as fractions of league average so the scalar
# multiplication by avg_home/away_goals produces realistic goal counts.
#
# The composite strength score modulates this via a multiplier relative to
# a league-average team (NEUTRAL_STRENGTH), clamped to [0.5, 2.0] to
# prevent extreme outliers from dominating.

ratings = {}
for tid in season:
    ba = blended_att(tid)
    bd = blended_def(tid)

    home_mult = max(0.5, min(2.0, strength(tid, "home") / NEUTRAL_STRENGTH))
    away_mult = max(0.5, min(2.0, strength(tid, "away") / NEUTRAL_STRENGTH))

    ratings[tid] = {
        "home_attack":  (ba / avg_h) * home_mult if avg_h else 1.0,
        "away_attack":  (ba / avg_a) * away_mult if avg_a else 1.0,
        "home_defense": (bd / avg_a)              if avg_a else 1.0,
        "away_defense": (bd / avg_h)              if avg_h else 1.0,
    }

return ratings
```
	# “””
	stuart_ratings.py — drop-in ratings module for generate-predictions.py

	Replaces compute_match_ratings() and compute_xg_ratings() with the blended
	methodology from Stuart’s Championship model.

	## Blend formula

	blended_att(team) = 0.50 * xGF/g + 0.50 * actual_GF/g
	blended_def(team) = 0.50 * xGA/g + 0.50 * actual_GA/g

	strength(team, venue) =
	0.40 * venue_PPG
	+ 0.60 * last_6_form (points from last 6 games / 6, on a 0–3 scale)
	+ 0.10 * blended_xG_differential_bonus

	Ratings are normalised to league average = 1.0 so they slot into
	Ben’s compute_match_lambdas() formula unchanged:

	```
	lam_h = hr["home_attack"] * ar["away_defense"] * avg_home_goals
	lam_a = ar["away_attack"] * hr["home_defense"] * avg_away_goals
	```

	## Drop-in usage

	In generate-predictions.py, replace:

	```
	match_ratings = compute_match_ratings(profiles, league_stats)
	...
	ratings = blend_ratings(match_ratings, xg_ratings, xg_weight)
	```

	with:

	```
	from stuart_ratings import compute_stuart_ratings
	ratings = compute_stuart_ratings(matches, fotmob_stats, league_stats)
	```

	Everything downstream (apply_overrides, compute_match_lambdas,
	simulate_season, output) is untouched.

	## xG data format (FotMob stats JSON)

	{
	“stats”: {
	“<team_id>”: {
	“expected_goals_team”: {“value”: <float>}, // season xGF total
	“expected_goals_conceded_team”: {“value”: <float>}, // season xGA total
	“matchesPlayed”: <int>
	}
	}
	}

	If fotmob_stats is None or empty, falls back to actual goals only.
	“””

	from collections import defaultdict

	# ── Weighting constants ───────────────────────────────────────────────────────

	SEASON_VS_XG_BLEND = 0.50 # 0.5 = equal weight to xG and actual goals per game
	VENUE_PPG_WEIGHT = 0.40 # fraction of composite strength from home/away PPG
	FORM_WEIGHT = 0.60 # fraction of composite strength from last-6 form
	XG_DIFF_BONUS = 0.10 # multiplier on blended xG differential

	FORM_WINDOW = 6 # number of recent matches for form calculation

	# Empirically calibrated from Championship data: the mean of all home+away

	# strength scores across the division. Used to convert composite strength

	# into a multiplier relative to a league-average team.

	NEUTRAL_STRENGTH = 1.57

	def _extract_xg_per_game(fotmob_stats):
	“””
	Parse FotMob stats JSON into {team_id_str: (xgf_per_game, xga_per_game)}.
	Returns empty dict if data is missing or malformed.
	“””
	xg = {}
	if not fotmob_stats:
	return xg
	for tid, team_stats in fotmob_stats.get(“stats”, {}).items():
	xg_entry = team_stats.get(“expected_goals_team”)
	xga_entry = team_stats.get(“expected_goals_conceded_team”)
	mp = team_stats.get(“matchesPlayed”, 0)
	if not xg_entry or not xga_entry or mp == 0:
	continue
	xg[str(tid)] = (
	xg_entry[“value”] / mp,
	xga_entry[“value”] / mp,
	)
	return xg

	def _build_season_stats(matches):
	“””
	Derive per-team season stats from finished matches.

	```
	Returns {team_id_str: {
	home_gf, home_ga, home_games, home_ppg,
	away_gf, away_ga, away_games, away_ppg,
	last6_pts
	}}
	"""
	finished = sorted(
	[m for m in matches if m["status"] == "FINISHED"],
	key=lambda m: m["utcDate"],
	)

	raw = defaultdict(lambda: {
	"home_gf": 0, "home_ga": 0, "home_games": 0, "home_pts": 0,
	"away_gf": 0, "away_ga": 0, "away_games": 0, "away_pts": 0,
	"results": [],
	})

	for m in finished:
	hid = str(m["homeTeamId"])
	aid = str(m["awayTeamId"])
	hg = m["homeGoals"]
	ag = m["awayGoals"]
	if hg is None or ag is None:
	continue

	raw[hid]["home_gf"] += hg
	raw[hid]["home_ga"] += ag
	raw[hid]["home_games"] += 1
	raw[aid]["away_gf"] += ag
	raw[aid]["away_ga"] += hg
	raw[aid]["away_games"] += 1

	if hg > ag:
	raw[hid]["home_pts"] += 3; raw[hid]["results"].append(3)
	raw[aid]["results"].append(0)
	elif hg == ag:
	raw[hid]["home_pts"] += 1; raw[hid]["results"].append(1)
	raw[aid]["away_pts"] += 1; raw[aid]["results"].append(1)
	else:
	raw[aid]["away_pts"] += 3; raw[aid]["results"].append(3)
	raw[hid]["results"].append(0)

	out = {}
	for tid, s in raw.items():
	hg = s["home_games"]; ag = s["away_games"]
	out[tid] = {
	"home_gf": s["home_gf"],
	"home_ga": s["home_ga"],
	"home_games": hg,
	"home_ppg": s["home_pts"] / hg if hg else 1.0,
	"away_gf": s["away_gf"],
	"away_ga": s["away_ga"],
	"away_games": ag,
	"away_ppg": s["away_pts"] / ag if ag else 1.0,
	"last6_pts": sum(s["results"][-FORM_WINDOW:]),
	}
	return out
	```

	def compute_stuart_ratings(matches, fotmob_stats, league_stats):
	“””
	Build blended attack/defense ratings compatible with Ben’s
	compute_match_lambdas() interface.

	```
	Parameters
	----------
	matches : list — Ben's matches.json match dicts
	fotmob_stats : dict — Ben's fotmob-stats.json, or None
	league_stats : dict — {"avg_home_goals": float, "avg_away_goals": float}

	Returns
	-------
	ratings : {team_id_str: {
	"home_attack", "home_defense",
	"away_attack", "away_defense"
	}}
	All values are normalised to league average = 1.0.
	"""
	season = _build_season_stats(matches)
	xg_map = _extract_xg_per_game(fotmob_stats)

	avg_h = league_stats["avg_home_goals"]
	avg_a = league_stats["avg_away_goals"]

	# ── Blended goals per game ────────────────────────────────────────────────
	def blended_att(tid):
	s = season[tid]
	tot = s["home_games"] + s["away_games"]
	if tot == 0:
	return (avg_h + avg_a) / 2
	actual = (s["home_gf"] + s["away_gf"]) / tot
	if tid in xg_map:
	return SEASON_VS_XG_BLEND * xg_map[tid][0] + (1 - SEASON_VS_XG_BLEND) * actual
	return actual

	def blended_def(tid):
	s = season[tid]
	tot = s["home_games"] + s["away_games"]
	if tot == 0:
	return (avg_h + avg_a) / 2
	actual = (s["home_ga"] + s["away_ga"]) / tot
	if tid in xg_map:
	return SEASON_VS_XG_BLEND * xg_map[tid][1] + (1 - SEASON_VS_XG_BLEND) * actual
	return actual

	league_avg_blend_att = (
	sum(blended_att(t) for t in season) / len(season) if season else 1.0
	)

	# ── Composite strength (our model's signal) ───────────────────────────────
	def strength(tid, venue):
	s = season[tid]
	sp = s["home_ppg"] if venue == "home" else s["away_ppg"]
	form = s["last6_pts"] / FORM_WINDOW # normalised to 0–3 scale
	xgd = blended_att(tid) - league_avg_blend_att
	return VENUE_PPG_WEIGHT * sp + FORM_WEIGHT * form + XG_DIFF_BONUS * xgd

	# ── Convert to Ben's normalised rating format ─────────────────────────────
	#
	# Ben's lambda formula:
	# lam_h = home_attack * away_defense * avg_home_goals
	# lam_a = away_attack * home_defense * avg_away_goals
	#
	# We express attack/defense as fractions of league average so the scalar
	# multiplication by avg_home/away_goals produces realistic goal counts.
	#
	# The composite strength score modulates this via a multiplier relative to
	# a league-average team (NEUTRAL_STRENGTH), clamped to [0.5, 2.0] to
	# prevent extreme outliers from dominating.

	ratings = {}
	for tid in season:
	ba = blended_att(tid)
	bd = blended_def(tid)

	home_mult = max(0.5, min(2.0, strength(tid, "home") / NEUTRAL_STRENGTH))
	away_mult = max(0.5, min(2.0, strength(tid, "away") / NEUTRAL_STRENGTH))

	ratings[tid] = {
	"home_attack": (ba / avg_h) * home_mult if avg_h else 1.0,
	"away_attack": (ba / avg_a) * away_mult if avg_a else 1.0,
	"home_defense": (bd / avg_a) if avg_a else 1.0,
	"away_defense": (bd / avg_h) if avg_h else 1.0,
	}

	return ratings
	```
No results found