Created
September 24, 2025 20:03
-
-
Save nocollier/336d665d58dc7275dd4d5190ed79d0d0 to your computer and use it in GitHub Desktop.
An analysis on the variance of land area in CMIP6 models
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| """ | |
| What kind of land area spread is there among CMIP6 models? | |
| This analysis will remove some models: | |
| - For no 'areacella': EC-Earth3-LR, FGOALS-f3-L, NESM3 | |
| - For anomalously low values: EC-Earth3, EC-Earth3-Veg, EC-Earth3P, EC-Earth3P-HR | |
| land_area relative_error [%] | |
| count 74 | |
| mean 1.489046e+14 -0.064018 | |
| std 3.064177e+12 2.056495 | |
| min 1.445632e+14 -2.977698 | |
| 25% 1.470748e+14 -1.292094 | |
| 50% 1.487755e+14 -0.150658 | |
| 75% 1.497160e+14 0.480537 | |
| max 1.663004e+14 11.610992 | |
| """ | |
| import warnings | |
| import matplotlib.pyplot as plt | |
| import numpy as np | |
| import pandas as pd | |
| import xarray as xr | |
| from intake_esgf import ESGFCatalog | |
| Z_SCORE_THRESHOLD = 3 # Beyond this, we consider you an outlier | |
| EARTH_LAND_AREA = 1.49e14 # Internet value I found | |
| def compute_land_area(ds: xr.Dataset) -> float: | |
| if "areacella" not in ds: | |
| warnings.warn(f"areacella not found for {ds.attrs['source_id']}") | |
| return np.nan | |
| if not np.allclose(ds["sftlf"].max(), 100.0): | |
| warnings.warn( | |
| f"sftlf for {ds.attrs['source_id']} not in the expected range [{float(ds['sftlf'].min().values)},{float(ds['sftlf'].max().values)}]" | |
| ) | |
| land_area = float( | |
| (ds["sftlf"] * 0.01 * ds["areacella"]).sum(ds["areacella"].dims).values | |
| ) | |
| return land_area | |
| # Find all the land fraction datasets and then compute the land areas | |
| cat = ESGFCatalog().search(variable_id="sftlf").remove_ensembles() | |
| cat.df = cat.df.groupby("source_id").first().reset_index() | |
| dsd = cat.to_dataset_dict() | |
| df = pd.DataFrame( | |
| [ | |
| {"source_id": ds.attrs["source_id"], "land_area": compute_land_area(ds)} | |
| for _, ds in dsd.items() | |
| ] | |
| ).dropna() | |
| # Some are anomalously off, lets remove then | |
| Z = np.abs((df["land_area"] - df["land_area"].mean()) / df["land_area"].std()) | |
| drops = df[Z > Z_SCORE_THRESHOLD] | |
| if not drops.empty: | |
| warnings.warn(f"Dropping some models as they are outliers:\n{drops}") | |
| df = df.drop(index=drops.index) | |
| # Add in a relative error based on the published value of 149 [km2] | |
| df["relative_error [%]"] = (df["land_area"] - EARTH_LAND_AREA) / EARTH_LAND_AREA * 100 | |
| # Results | |
| print(df.sort_values("land_area").to_string()) | |
| print(df.describe()) | |
| df.hist() | |
| plt.savefig("land_area.png") | |
| plt.close() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment