Skip to content

Instantly share code, notes, and snippets.

@mdsumner
Last active January 8, 2026 20:20
Show Gist options
  • Select an option

  • Save mdsumner/974f6adb2380c482d11f168ada45e16f to your computer and use it in GitHub Desktop.

Select an option

Save mdsumner/974f6adb2380c482d11f168ada45e16f to your computer and use it in GitHub Desktop.

In these CMEMs ZARR there's evidence of the dummy "crs" var used via "grid_mapping" in NetCDF, but there's no such var.

https://data.marine.copernicus.eu/product/SEALEVEL_GLO_PHY_L4_MY_008_047/description

curl -s "https://s3.waw3-1.cloudferro.com/mdl-arco-time-045/arco/SEALEVEL_GLO_PHY_L4_MY_008_047/cmems_obs-sl_glo_phy-ssh_my_allsat-demo-l4-duacs-0.125deg_P1D-i_202511/timeChunked.zarr/.zmetadata" | grep -i "grid_mapping\|crs\|spatial_ref"
      "grid_mapping": "crs",
      "grid_mapping": "crs",
      "grid_mapping": "crs",
      "grid_mapping": "crs",
      "grid_mapping": "crs",
      "grid_mapping": "crs",
      "grid_mapping": "crs",
      "grid_mapping": "crs",
      "grid_mapping": "crs",
      "grid_mapping": "crs",


curl -s "https://s3.waw3-1.cloudferro.com/mdl-arco-time-045/arco/SEALEVEL_GLO_PHY_L4_MY_008_047/cmems_obs-sl_glo_phy-ssh_my_allsat-demo-l4-duacs-0.125deg_P1D-i_202511/timeChunked.zarr/.zmetadata" | grep -i "\\.zarray"
    "adt/.zarray": {
    "err_sla/.zarray": {
    "err_ugosa/.zarray": {
    "err_vgosa/.zarray": {
    "flag_ice/.zarray": {
    "lat_bnds/.zarray": {
    "latitude/.zarray": {
    "lon_bnds/.zarray": {
    "longitude/.zarray": {
    "nv/.zarray": {
    "sla/.zarray": {
    "time/.zarray": {
    "tpa_correction/.zarray": {
    "ugos/.zarray": {
    "ugosa/.zarray": {
    "vgos/.zarray": {
    "vgosa/.zarray": {

it's funny because I very clearly remember earlier products by AVISO that shipped in transposed Mercator grid but with rectilinear long lat arrays (ewk), there was a brief happy time when CF provided crs and this stuff just worked from the NetCDFs.

@mdsumner
Copy link
Author

mdsumner commented Jan 8, 2026

here's more on the context around the grid_mapping attributes

curl -s "https://s3.waw3-1.cloudferro.com/mdl-arco-time-045/arco/SEALEVEL_GLO_PHY_L4_MY_008_047/cmems_obs-sl_glo_phy-ssh_my_allsat-demo-l4-duacs-0.125deg_P1D-i_202511/timeChunked.zarr/.zmetadata" | grep -i "grid_mapping\|crs\|spatial_ref" -A 2 -B 2
      "comment": "The absolute dynamic topography is the sea surface height above geoid; the adt is obtained as follows: adt=sla+mdt where mdt is the mean dynamic topography; see the product user manual for details",
      "coordinates": "longitude latitude",
      "grid_mapping": "crs",
      "long_name": "Absolute dynamic topography",
      "scale_factor": 0.0001,
--
      "comment": "The formal mapping error represents a purely theoretical mapping error. It mainly traduces errors induced by the constellation sampling capability and consistency with the spatial/temporal scales considered, as described in Le Traon et al (1998) or Ducet et al (2000)",
      "coordinates": "longitude latitude",
      "grid_mapping": "crs",
      "long_name": "Formal mapping error",
      "scale_factor": 0.0001,
--
      "comment": "The formal mapping error represents a purely theoretical mapping error. It mainly traduces errors induced by the constellation sampling capability and consistency with the spatial/temporal scales considered, as described in Le Traon et al (1998) or Ducet et al (2000)",
      "coordinates": "longitude latitude",
      "grid_mapping": "crs",
      "long_name": "Formal mapping error on zonal geostrophic velocity anomalies",
      "scale_factor": 0.0001,
--
      "comment": "The formal mapping error represents a purely theoretical mapping error. It mainly traduces errors induced by the constellation sampling capability and consistency with the spatial/temporal scales considered, as described in Le Traon et al (1998) or Ducet et al (2000)",
      "coordinates": "longitude latitude",
      "grid_mapping": "crs",
      "long_name": "Formal mapping error on meridional geostrophic velocity anomalies",
      "scale_factor": 0.0001,
--
        1

@mdsumner
Copy link
Author

mdsumner commented Jan 8, 2026

here's a fairly thorough python script to create a subset, augment a fix for the unused crs setting and show that GDAL picks that up

#!/usr/bin/env python3
"""
Proof of concept: Add CF-compliant grid_mapping to CMEMS Zarr
"""

import xarray as xr
import numpy as np
import json
from pathlib import Path

# %% Open remote Zarr

ZARR_URL = "https://s3.waw3-1.cloudferro.com/mdl-arco-time-045/arco/SEALEVEL_GLO_PHY_L4_NRT_008_046/cmems_obs-sl_glo_phy-ssh_nrt_allsat-l4-duacs-0.125deg_P1D_202506/timeChunked.zarr"

print(f"Opening: {ZARR_URL}")
ds = xr.open_zarr(ZARR_URL)

print(f"Variables: {list(ds.data_vars)}")
print(f"Dimensions: {dict(ds.sizes)}")

# %% Take small subset

ds_subset = ds[["sla"]].isel(time=slice(0, 2)).sel(
    latitude=slice(-10, 10),
    longitude=slice(100, 120)
)
print(f"Subset dims: {dict(ds_subset.sizes)}")

# %% Check original - no CRS

print("\nOriginal 'sla' attributes:")
for k, v in ds_subset["sla"].attrs.items():
    print(f"  {k}: {v}")

print(f"\n'grid_mapping' present: {'grid_mapping' in ds_subset['sla'].attrs}")
print(f"'crs' variable present: {'crs' in ds_subset}")

# %% Write original (no CRS)

out_original = Path("cmems_original.zarr")
print(f"\nWriting original to {out_original}...")
ds_subset.load().to_zarr(out_original, mode="w")

# %% Define WGS84 WKT

WGS84_WKT = """GEOGCRS["WGS 84",
    ENSEMBLE["World Geodetic System 1984 ensemble",
        MEMBER["World Geodetic System 1984 (Transit)"],
        MEMBER["World Geodetic System 1984 (G730)"],
        MEMBER["World Geodetic System 1984 (G873)"],
        MEMBER["World Geodetic System 1984 (G1150)"],
        MEMBER["World Geodetic System 1984 (G1674)"],
        MEMBER["World Geodetic System 1984 (G1762)"],
        MEMBER["World Geodetic System 1984 (G2139)"],
        ELLIPSOID["WGS 84",6378137,298.257223563,
            LENGTHUNIT["metre",1]],
        ENSEMBLEACCURACY[2.0]],
    PRIMEM["Greenwich",0,
        ANGLEUNIT["degree",0.0174532925199433]],
    CS[ellipsoidal,2],
        AXIS["geodetic latitude (Lat)",north,
            ORDER[1],
            ANGLEUNIT["degree",0.0174532925199433]],
        AXIS["geodetic longitude (Lon)",east,
            ORDER[2],
            ANGLEUNIT["degree",0.0174532925199433]],
    USAGE[
        SCOPE["Horizontal component of 3D system."],
        AREA["World."],
        BBOX[-90,-180,90,180]],
    ID["EPSG",4326]]"""

# %% Add CRS variable and grid_mapping attribute

ds_fixed = ds_subset.copy()

# Create scalar CRS variable with CF attributes
crs_var = xr.DataArray(
    data=np.int32(0),
    attrs={
        "grid_mapping_name": "latitude_longitude",
        "crs_wkt": WGS84_WKT,
        "semi_major_axis": 6378137.0,
        "inverse_flattening": 298.257223563,
        "longitude_of_prime_meridian": 0.0,
        "spatial_ref": WGS84_WKT,
    }
)

ds_fixed["crs"] = crs_var

# Add grid_mapping attribute to data variable
ds_fixed["sla"].attrs["grid_mapping"] = "crs"

print("\nFixed 'sla' attributes:")
for k, v in ds_fixed["sla"].attrs.items():
    print(f"  {k}: {v}")

print(f"\n'crs' variable present: {'crs' in ds_fixed}")
print(f"'crs' attrs: {list(ds_fixed['crs'].attrs.keys())}")

# %% Write fixed version

out_fixed = Path("cmems_with_crs.zarr")
print(f"\nWriting fixed to {out_fixed}...")
ds_fixed.load().to_zarr(out_fixed, mode="w")

# %% Compare .zmetadata

print("\n" + "=" * 50)
print("Comparing .zmetadata files")
print("=" * 50)

with open(out_original / ".zmetadata") as f:
    meta_orig = json.load(f)

with open(out_fixed / ".zmetadata") as f:
    meta_fixed = json.load(f)


orig_str = json.dumps(meta_orig)
fixed_str = json.dumps(meta_fixed)

print(f"\nOriginal:")
print(f"  'grid_mapping' in metadata: {'grid_mapping' in orig_str}")
print(f"  'crs/.zattrs' key exists: {'crs/.zattrs' in meta_orig['metadata']}")

print(f"\nFixed:")
print(f"  'grid_mapping' in metadata: {'grid_mapping' in fixed_str}")
print(f"  'crs/.zattrs' key exists: {'crs/.zattrs' in meta_fixed['metadata']}")

# Show CRS attrs
if "crs/.zattrs" in meta_fixed["metadata"]:
    crs_attrs = meta_fixed["metadata"]["crs/.zattrs"]
    print(f"\nCRS variable attributes:")
    for k, v in crs_attrs.items():
        v_str = str(v)[:50] + "..." if len(str(v)) > 50 else str(v)
        print(f"  {k}: {v_str}")

# %% Test with GDAL

print("\n" + "=" * 50)
print("GDAL test")
print("=" * 50)

print(f'\ngdalinfo ZARR:"{out_original}"')
print(f'gdalinfo ZARR:"{out_fixed}"')

try:
    from osgeo import gdal
    
    print("\nOriginal:")
    gd = gdal.Open(f'ZARR:"{out_original}"')
    if gd:
        proj = gd.GetProjection()
        print(f"  CRS: {proj[:60]}..." if proj else "  CRS: NONE")
        gd = None
    
    print("\nFixed:")
    gd = gdal.Open(f'ZARR:"{out_fixed}"')
    if gd:
        proj = gd.GetProjection()
        print(f"  CRS: {proj[:60]}..." if proj else "  CRS: NONE")
        gd = None
except ImportError:
    print("\n(Install osgeo/GDAL bindings or test with gdalinfo CLI)")

print("\nDone!")
Opening: https://s3.waw3-1.cloudferro.com/mdl-arco-time-045/arco/SEALEVEL_GLO_PHY_L4_NRT_008_046/cmems_obs-sl_glo_phy-ssh_nrt_allsat-l4-duacs-0.125deg_P1D_202506/timeChunked.zarr
Variables: ['adt', 'err_sla', 'err_ugosa', 'err_vgosa', 'flag_ice', 'sla', 'ugos', 'ugosa', 'vgos', 'vgosa']
Dimensions: {'time': 557, 'latitude': 1440, 'longitude': 2880, 'nv': 2}
Subset dims: {'time': 2, 'latitude': 160, 'longitude': 160}

Original 'sla' attributes:
  ancillary_variables: err_sla
  comment: The sea level anomaly is the sea surface height above mean sea surface; it is referenced to the [1993, 2012] period; see the product user manual for details
  grid_mapping: crs
  long_name: Sea level anomaly
  standard_name: sea_surface_height_above_sea_level
  units: m

'grid_mapping' present: True
'crs' variable present: False

Writing original to cmems_original.zarr...

Fixed 'sla' attributes:
  ancillary_variables: err_sla
  comment: The sea level anomaly is the sea surface height above mean sea surface; it is referenced to the [1993, 2012] period; see the product user manual for details
  grid_mapping: crs
  long_name: Sea level anomaly
  standard_name: sea_surface_height_above_sea_level
  units: m

'crs' variable present: True
'crs' attrs: ['grid_mapping_name', 'crs_wkt', 'semi_major_axis', 'inverse_flattening', 'longitude_of_prime_meridian', 'spatial_ref']

Writing fixed to cmems_with_crs.zarr...

==================================================
Comparing .zmetadata files
==================================================

Original:
  'grid_mapping' in metadata: True
  'crs/.zattrs' key exists: False

Fixed:
  'grid_mapping' in metadata: True
  'crs/.zattrs' key exists: True

CRS variable attributes:
  _ARRAY_DIMENSIONS: []
  crs_wkt: GEOGCRS["WGS 84",
    ENSEMBLE["World Geodetic Sys...
  grid_mapping_name: latitude_longitude
  inverse_flattening: 298.257223563
  longitude_of_prime_meridian: 0.0
  semi_major_axis: 6378137.0
  spatial_ref: GEOGCRS["WGS 84",
    ENSEMBLE["World Geodetic Sys...

==================================================
GDAL test
==================================================

gdalinfo ZARR:"cmems_original.zarr"
gdalinfo ZARR:"cmems_with_crs.zarr"

Original:
  CRS: NONE

Fixed:
  CRS: GEOGCS["WGS 84",DATUM["WGS_1984",SPHEROID["WGS 84",6378137,2...

Done!

gdalinfo --version
GDAL 3.13.0dev-07547f7da1, released 2026/01/06

uv pip list | grep -E 'xarray|zarr'
Using Python 3.10.12 environment ...
xarray                    2025.6.1
zarr                      2.18.3

@mdsumner
Copy link
Author

mdsumner commented Jan 8, 2026

here is a candidate fix-up function

import zarr
import json
from pathlib import Path

def fix_zarr_crs(zarr_path, crs="EPSG:4326"):
    """Add missing CRS variable to a Zarr store (in-place).
    
    Parameters
    ----------
    zarr_path : str or Path
        Path to the Zarr store
    crs : str
        CRS identifier (e.g., "EPSG:4326", "EPSG:3857")
    """
    import pyproj
    
    zarr_path = Path(zarr_path)
    
    # Get CRS info from pyproj
    crs_obj = pyproj.CRS(crs)
    
    # Build attributes
    crs_attrs = {
        "_ARRAY_DIMENSIONS": [],
        "crs_wkt": crs_obj.to_wkt(),
        "spatial_ref": crs_obj.to_wkt(),
    }
    
    # Add grid_mapping_name and ellipsoid params for geographic CRS
    if crs_obj.is_geographic:
        crs_attrs["grid_mapping_name"] = "latitude_longitude"
        ellipsoid = crs_obj.ellipsoid
        if ellipsoid:
            crs_attrs["semi_major_axis"] = ellipsoid.semi_major_metre
            crs_attrs["inverse_flattening"] = ellipsoid.inverse_flattening
        prime_meridian = crs_obj.prime_meridian
        if prime_meridian:
            crs_attrs["longitude_of_prime_meridian"] = prime_meridian.longitude
    
    # For projected CRS, extract projection params
    elif crs_obj.is_projected:
        cf = crs_obj.to_cf()
        crs_attrs.update(cf)  # to_cf() gives CF-compatible dict
    
    # Write crs/.zarray (scalar int)
    crs_zarray = {
        "chunks": [],
        "compressor": None,
        "dtype": "<i4",
        "fill_value": 0,
        "filters": None,
        "order": "C",
        "shape": [],
        "zarr_format": 2
    }
    
    crs_dir = zarr_path / "crs"
    crs_dir.mkdir(exist_ok=True)
    
    (crs_dir / ".zarray").write_text(json.dumps(crs_zarray))
    (crs_dir / ".zattrs").write_text(json.dumps(crs_attrs))
    (crs_dir / "0").write_bytes(b'\x00\x00\x00\x00')
    
    # Update .zmetadata if it exists
    zmeta_path = zarr_path / ".zmetadata"
    if zmeta_path.exists():
        meta = json.loads(zmeta_path.read_text())
        meta["metadata"]["crs/.zarray"] = crs_zarray
        meta["metadata"]["crs/.zattrs"] = crs_attrs
        zmeta_path.write_text(json.dumps(meta, indent=2))
    
    print(f"Added CRS ({crs}) to {zarr_path}")


# Quick test
if __name__ == "__main__":
    import pyproj
    
    # Show what pyproj gives us
    for epsg in ["EPSG:4326", "EPSG:3857", "EPSG:6931"]:
        crs = pyproj.CRS(epsg)
        print(f"\n{epsg}:")
        print(f"  is_geographic: {crs.is_geographic}")
        print(f"  is_projected: {crs.is_projected}")
        if crs.ellipsoid:
            print(f"  semi_major: {crs.ellipsoid.semi_major_metre}")
            print(f"  inv_flat: {crs.ellipsoid.inverse_flattening}")
        print(f"  to_cf(): {list(crs.to_cf().keys())}")

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment