Skip to content

Instantly share code, notes, and snippets.

@j08lue
Last active January 15, 2026 20:07
Show Gist options
  • Select an option

  • Save j08lue/9a8157cbe5c6ba55ddc7f6dc8607999d to your computer and use it in GitHub Desktop.

Select an option

Save j08lue/9a8157cbe5c6ba55ddc7f6dc8607999d to your computer and use it in GitHub Desktop.
Display the source blob
Display the rendered blob
Raw
{
"cells": [
{
"cell_type": "markdown",
"id": "86b7f248-da30-4768-a3f5-efa10c417653",
"metadata": {},
"source": [
"# Download and process Sentinel-2 L2A products to CPM and upload to S3\n",
"\n",
"One-off script, using command-line interfaces of third-party software\n",
"\n",
"## Setup\n",
"\n",
"1. Install `s5cmd`\n",
"2. Set up your `.aws/credentials` and `.aws/config` with profiles for CDSE and your target S3 configuration\n",
"3. Install [eopf-cpm](https://gitlab.eopf.copernicus.eu/cpm/eopf-cpm) and [eopf-stac](https://github.com/EOPF-Sample-Service/eopf-stac) (nb: requires [this revision](https://github.com/EOPF-Sample-Service/eopf-stac/pull/65))\n",
"\n",
"## Usage\n",
"\n",
"1. Adapt the CDSE STAC query parameters to your needs\n",
"2. Change the target S3 configuration, if necessary\n",
"3. Run script top-to-bottom - NB: no retry logic\n",
"4. Note that the STAC JSON files are not deleted locally after upload and used to identify products that are completed"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "5d51e0e7-a68a-4556-aaa3-fe7058aa57c2",
"metadata": {},
"outputs": [],
"source": [
"import os\n",
"import shutil\n",
"import subprocess\n",
"from pathlib import Path\n",
"\n",
"from pystac_client import Client"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "4b134e49-b30a-46a9-a80e-ce8047eee6a6",
"metadata": {},
"outputs": [],
"source": [
"catalog = Client.open(\"https://stac.dataspace.copernicus.eu/v1/\")"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "e442ebe9-2cd1-4d5c-88df-30a968332baa",
"metadata": {},
"outputs": [],
"source": [
"# Define your temporal range\n",
"daterange = {\"interval\": [\"2021-09-14T10:25:31Z\", \"2022-01-03T23:59:59Z\"]}\n",
"\n",
"# Define your search with CQL2 syntax\n",
"search = catalog.search(filter_lang=\"cql2-json\", filter={\n",
" \"op\": \"and\",\n",
" \"args\": [\n",
" {\"op\": \"anyinteracts\", \"args\": [{\"property\": \"datetime\"}, daterange]},\n",
" {\"op\": \"=\", \"args\": [{\"property\": \"collection\"}, \"sentinel-2-l2a\"]},\n",
" {\"op\": \"=\", \"args\": [{\"property\": \"grid:code\"}, \"MGRS-28RBS\"]}\n",
" ]\n",
"})"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "c41811d5-96e1-43c4-b9e4-f18fce80a6d1",
"metadata": {},
"outputs": [],
"source": [
"items = list(search.items())\n",
"len(items)"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "22240a26-6011-40e4-aff1-389dca0a88ea",
"metadata": {},
"outputs": [],
"source": [
"CDSE_S3_ENDPOINT_URL = \"https://eodata.dataspace.copernicus.eu/\"\n",
"CDSE_S3_PROFILE = \"cdse\"\n",
"TARGET_S3_ENDPOINT_URL = \"https://s3.de.io.cloud.ovh.net/\"\n",
"TARGET_S3_PROFILE = \"eopf\"\n",
"TARGET_PREFIX = \"s3://esa-zarr-sentinel-explorer-fra/cpm-manual/\" # keep trailing slash\n",
"# TARGET_ASSET_HREF_BASE = \"https://s3.explorer.eopf.copernicus.eu/esa-zarr-sentinel-explorer-fra/cpm-manual/\" # keep trailing slash"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "d7b9c770-0269-4264-87bd-56a6b018e7d8",
"metadata": {},
"outputs": [],
"source": [
"def get_safe_root(item):\n",
" asset_href = next(iter(item.assets.values())).href\n",
" return asset_href.split(\".SAFE\")[0] + \".SAFE\""
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "80f1e870-e7fb-4a27-bb2b-fbb88f2b052e",
"metadata": {},
"outputs": [],
"source": [
"ROOT_DIR = Path(\"./cumbre-vieja\")\n",
"\n",
"for item in items:\n",
" prefix = get_safe_root(item)\n",
" print(prefix)\n",
"\n",
" safe_dir = ROOT_DIR / \"safe\" / Path(prefix).name\n",
" cpm_dir = ROOT_DIR / \"cpm\" / (Path(prefix).stem + \".zarr\")\n",
" stac_file = cpm_dir.parent / cpm_dir.with_suffix(\".json\").name\n",
" cmp_uri = f\"{TARGET_PREFIX}{cpm_dir.name}/\"\n",
" stac_uri = f\"{TARGET_PREFIX}{stac_file.name}\"\n",
"\n",
" # Check whether local product exists\n",
" if stac_file.exists():\n",
" print(f\"STAC file {stac_file} exists. Skipping.\")\n",
" continue\n",
" \n",
" # Check whether remote product exists\n",
" if False:\n",
" try:\n",
" subprocess.run(\n",
" [\n",
" \"s5cmd\", \n",
" \"--profile\", TARGET_S3_PROFILE,\n",
" \"--endpoint-url\", TARGET_S3_ENDPOINT_URL, \n",
" \"head\", stac_uri\n",
" ], \n",
" check=True)\n",
" print(f\"STAC file {stac_uri} exists. Skipping.\")\n",
" continue\n",
" except:\n",
" pass\n",
" \n",
" # Download SAFE from CDSE\n",
" if not safe_dir.exists():\n",
" print(f\"Downloading {safe_dir.name}\")\n",
" safe_dir.mkdir(parents=True)\n",
" try:\n",
" subprocess.run(\n",
" [\n",
" \"s5cmd\", \n",
" \"--profile\", CDSE_S3_PROFILE,\n",
" \"--endpoint-url\", CDSE_S3_ENDPOINT_URL, \n",
" \"sync\", f\"{prefix}/*\", str(safe_dir.absolute())\n",
" ], \n",
" check=True)\n",
" except:\n",
" shutil.rmtree(safe_dir)\n",
" raise\n",
" \n",
" # Produce CPM Zarr\n",
" if not cpm_dir.exists():\n",
" print(f\"Producing {cpm_dir.name}\")\n",
" cpm_dir.mkdir(parents=True)\n",
" try:\n",
" subprocess.run([\"eopf\", \"convert\", str(safe_dir), str(cpm_dir)], check=True)\n",
" shutil.rmtree(safe_dir)\n",
" except:\n",
" shutil.rmtree(cpm_dir)\n",
" raise\n",
"\n",
" # Produce STAC item metadata\n",
" if not stac_file.exists():\n",
" print(f\"Producing {stac_file.name}\")\n",
" subprocess.run(\n",
" [\n",
" \"eopf-stac\", str(cpm_dir), \n",
" \"--output-file\", str(stac_file),\n",
" # \"--asset-href-base\", TARGET_ASSET_HREF_BASE,\n",
" ],\n",
" check=True\n",
" )\n",
" \n",
" # Upload CPM Zarr product\n",
" print(f\"Uploading {cpm_dir.name}\")\n",
" subprocess.run(\n",
" [\n",
" \"s5cmd\",\n",
" \"--profile\", TARGET_S3_PROFILE,\n",
" \"--endpoint-url\", TARGET_S3_ENDPOINT_URL, \n",
" \"sync\", f\"{cpm_dir.absolute()}/\", cmp_uri,\n",
" ], \n",
" check=True)\n",
" subprocess.run(\n",
" [\n",
" \"s5cmd\",\n",
" \"--profile\", TARGET_S3_PROFILE,\n",
" \"--endpoint-url\", TARGET_S3_ENDPOINT_URL, \n",
" \"cp\", str(stac_file.absolute()), stac_uri\n",
" ], \n",
" check=True)\n",
" # Delete the CMP dir, keep the local STAC file as a token of completion \n",
" shutil.rmtree(cpm_dir)"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3 (ipykernel)",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.12.7"
}
},
"nbformat": 4,
"nbformat_minor": 5
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment