Skip to content

Instantly share code, notes, and snippets.

@nocollier
Created September 24, 2025 20:03
Show Gist options
  • Select an option

  • Save nocollier/336d665d58dc7275dd4d5190ed79d0d0 to your computer and use it in GitHub Desktop.

Select an option

Save nocollier/336d665d58dc7275dd4d5190ed79d0d0 to your computer and use it in GitHub Desktop.
An analysis on the variance of land area in CMIP6 models
"""
What kind of land area spread is there among CMIP6 models?
This analysis will remove some models:
- For no 'areacella': EC-Earth3-LR, FGOALS-f3-L, NESM3
- For anomalously low values: EC-Earth3, EC-Earth3-Veg, EC-Earth3P, EC-Earth3P-HR
land_area relative_error [%]
count 74
mean 1.489046e+14 -0.064018
std 3.064177e+12 2.056495
min 1.445632e+14 -2.977698
25% 1.470748e+14 -1.292094
50% 1.487755e+14 -0.150658
75% 1.497160e+14 0.480537
max 1.663004e+14 11.610992
"""
import warnings
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import xarray as xr
from intake_esgf import ESGFCatalog
Z_SCORE_THRESHOLD = 3 # Beyond this, we consider you an outlier
EARTH_LAND_AREA = 1.49e14 # Internet value I found
def compute_land_area(ds: xr.Dataset) -> float:
if "areacella" not in ds:
warnings.warn(f"areacella not found for {ds.attrs['source_id']}")
return np.nan
if not np.allclose(ds["sftlf"].max(), 100.0):
warnings.warn(
f"sftlf for {ds.attrs['source_id']} not in the expected range [{float(ds['sftlf'].min().values)},{float(ds['sftlf'].max().values)}]"
)
land_area = float(
(ds["sftlf"] * 0.01 * ds["areacella"]).sum(ds["areacella"].dims).values
)
return land_area
# Find all the land fraction datasets and then compute the land areas
cat = ESGFCatalog().search(variable_id="sftlf").remove_ensembles()
cat.df = cat.df.groupby("source_id").first().reset_index()
dsd = cat.to_dataset_dict()
df = pd.DataFrame(
[
{"source_id": ds.attrs["source_id"], "land_area": compute_land_area(ds)}
for _, ds in dsd.items()
]
).dropna()
# Some are anomalously off, lets remove then
Z = np.abs((df["land_area"] - df["land_area"].mean()) / df["land_area"].std())
drops = df[Z > Z_SCORE_THRESHOLD]
if not drops.empty:
warnings.warn(f"Dropping some models as they are outliers:\n{drops}")
df = df.drop(index=drops.index)
# Add in a relative error based on the published value of 149 [km2]
df["relative_error [%]"] = (df["land_area"] - EARTH_LAND_AREA) / EARTH_LAND_AREA * 100
# Results
print(df.sort_values("land_area").to_string())
print(df.describe())
df.hist()
plt.savefig("land_area.png")
plt.close()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment