Skip to content

Instantly share code, notes, and snippets.

@3outeille
Created March 4, 2026 11:19
Show Gist options
  • Select an option

  • Save 3outeille/e6dad535e6774800cfe840f3dd194642 to your computer and use it in GitHub Desktop.

Select an option

Save 3outeille/e6dad535e6774800cfe840f3dd194642 to your computer and use it in GitHub Desktop.
Reproduce the ~0.5 OOM/year (and actual ~0.6-0.7 OOM/year) training compute trend using Epoch AI's public frontier models dataset.
"""
Reproduce the ~0.5 OOM/year (and actual ~0.6-0.7 OOM/year) training compute trend
using Epoch AI's public frontier models dataset.
Data source: https://epoch.ai/data/ai-models
License: Creative Commons Attribution (CC BY)
"""
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from scipy import stats
from matplotlib.dates import date2num, num2date
import matplotlib.dates as mdates
# ─────────────────────────────────────────────
# 1. LOAD DATA
# ─────────────────────────────────────────────
# Epoch AI frontier models CSV (updated daily)
url = "https://epoch.ai/data/generated/frontier_ai_models.csv"
df = pd.read_csv(url)
# Parse dates and compute
df["date"] = pd.to_datetime(df["Publication date"], errors="coerce")
df["compute"] = pd.to_numeric(df["Training compute (FLOP)"], errors="coerce")
# Drop rows missing date or compute
df = df.dropna(subset=["date", "compute"])
# Filter to 2010+ (deep learning era)
df = df[df["date"] >= "2010-01-01"].copy()
df = df.sort_values("date")
print(f"Loaded {len(df)} frontier models with compute estimates")
print(f"Date range: {df['date'].min().date()} to {df['date'].max().date()}")
print(f"Compute range: {df['compute'].min():.1e} to {df['compute'].max():.1e} FLOP")
# ─────────────────────────────────────────────
# 2. FIT EXPONENTIAL TREND (log-linear regression)
# ─────────────────────────────────────────────
# Convert dates to fractional years for regression
df["year_frac"] = df["date"].dt.year + df["date"].dt.dayofyear / 365.25
df["log10_compute"] = np.log10(df["compute"])
slope, intercept, r, p, se = stats.linregress(df["year_frac"], df["log10_compute"])
# slope = OOM/year (this is what we're after!)
growth_per_year = 10**slope # multiplicative factor per year
print(f"\n{'='*50}")
print(f"REGRESSION RESULTS (frontier models, 2010+)")
print(f"{'='*50}")
print(f"Slope: {slope:.3f} OOM/year")
print(f"Growth factor: {growth_per_year:.2f}x / year")
print(f"R²: {r**2:.3f}")
print(f"{'='*50}")
print(f"\nFor comparison:")
print(f" Aschenbrenner's claim: ~0.5 OOM/year (~3.16x/year)")
print(f" Epoch AI's published: ~0.6-0.7 OOM/year (~4-5x/year)")
print(f" This regression: ~{slope:.2f} OOM/year (~{growth_per_year:.1f}x/year)")
# ─────────────────────────────────────────────
# 3. PLOT
# ─────────────────────────────────────────────
fig, ax = plt.subplots(figsize=(14, 8))
# --- Scatter: all frontier models ---
ax.scatter(
df["date"], df["compute"],
s=40, alpha=0.6, zorder=3,
color="#4A90D9", edgecolors="white", linewidth=0.5,
label="Frontier models (Epoch AI)"
)
# --- Annotate notable models ---
notable = ["GPT-2", "GPT-3", "GPT-4", "Gemini Ultra", "Llama 3.1 405B",
"Claude 3 Opus", "Grok-3", "AlexNet", "AlphaGo Master"]
for _, row in df.iterrows():
name = row["Model"]
# Check if any notable name is a substring
matched = [n for n in notable if n.lower() in str(name).lower()]
if matched:
ax.annotate(
name,
(row["date"], row["compute"]),
textcoords="offset points",
xytext=(10, 5),
fontsize=7.5,
color="#333",
arrowprops=dict(arrowstyle="-", color="#999", lw=0.5),
)
# --- Trend line: actual regression ---
x_line = np.linspace(df["year_frac"].min(), df["year_frac"].max(), 200)
y_line = 10 ** (slope * x_line + intercept)
dates_line = [pd.Timestamp(f"{int(y)}-01-01") + pd.Timedelta(days=(y % 1) * 365.25) for y in x_line]
ax.plot(
dates_line, y_line,
color="#E74C3C", linewidth=2.5, linestyle="-",
label=f"Best fit: {slope:.2f} OOM/yr ({growth_per_year:.1f}x/yr)",
zorder=2
)
# --- Trend line: Aschenbrenner's 0.5 OOM/year from GPT-4 ---
gpt4_date_frac = 2022.6 # Aug 2022
gpt4_compute = 1e25 # ~10^25 FLOP estimate
asch_slope = 0.5
asch_intercept = np.log10(gpt4_compute) - asch_slope * gpt4_date_frac
x_asch = np.linspace(2019, df["year_frac"].max(), 200)
y_asch = 10 ** (asch_slope * x_asch + asch_intercept)
dates_asch = [pd.Timestamp(f"{int(y)}-01-01") + pd.Timedelta(days=(y % 1) * 365.25) for y in x_asch]
ax.plot(
dates_asch, y_asch,
color="#F39C12", linewidth=2, linestyle="--",
label=f"Aschenbrenner: 0.50 OOM/yr (3.2x/yr), anchored at GPT-4",
zorder=2
)
# --- GPT-4 anchor point ---
gpt4_date = pd.Timestamp("2022-08-01")
ax.scatter([gpt4_date], [gpt4_compute], s=120, color="#F39C12",
edgecolors="black", linewidth=1.5, zorder=5, marker="*")
ax.annotate("GPT-4\n(anchor point)",
(gpt4_date, gpt4_compute),
textcoords="offset points", xytext=(-60, -30),
fontsize=9, fontweight="bold", color="#F39C12",
arrowprops=dict(arrowstyle="->", color="#F39C12", lw=1.5))
# --- Formatting ---
ax.set_yscale("log")
ax.set_ylabel("Training Compute (FLOP)", fontsize=13)
ax.set_xlabel("Publication Date", fontsize=13)
ax.set_title(
"Training Compute of Frontier AI Models\n"
"Epoch AI data vs. Aschenbrenner's 0.5 OOM/year trend",
fontsize=15, fontweight="bold"
)
ax.legend(fontsize=10, loc="upper left")
ax.grid(True, which="major", alpha=0.3)
ax.grid(True, which="minor", alpha=0.1)
ax.xaxis.set_major_formatter(mdates.DateFormatter("%Y"))
ax.xaxis.set_major_locator(mdates.YearLocator(2))
# Add OOM scale on right y-axis
ax2 = ax.twinx()
ymin, ymax = ax.get_ylim()
ax2.set_yscale("log")
ax2.set_ylim(ymin, ymax)
oom_ticks = [10.0**i for i in range(8, 28, 2)]
ax2.set_yticks(oom_ticks)
ax2.set_yticklabels([f"10^{i}" for i in range(8, 28, 2)])
ax2.set_ylabel("OOM scale", fontsize=11, color="#666")
ax2.tick_params(colors="#666")
plt.tight_layout()
plt.savefig("training_compute_trend.png", dpi=150, bbox_inches="tight")
plt.show()
# ─────────────────────────────────────────────
# 4. PRINT SUMMARY TABLE (like Aschenbrenner's)
# ─────────────────────────────────────────────
print(f"\n{'='*70}")
print("ASCHENBRENNER'S TABLE REPRODUCED (0.5 OOM/year from GPT-4)")
print(f"{'='*70}")
print(f"{'Year':<8} {'OOMs above GPT-4':<20} {'Compute (FLOP)':<18} {'~H100-eq':<12}")
for year_offset in [0, 2, 4, 6, 8]:
year = 2022 + year_offset
ooms = 0.5 * year_offset
compute = gpt4_compute * (10 ** ooms)
# ~3.3e17 FLOP per H100-hour, assume ~2000 hrs training = ~6.6e20 FLOP per H100
h100_eq = compute / 6.6e20
print(f"{year:<8} +{ooms:<18.1f} {compute:<18.1e} ~{h100_eq:,.0f}")
@3outeille
Copy link
Author

image

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment