Last active
January 15, 2026 20:07
-
-
Save j08lue/9a8157cbe5c6ba55ddc7f6dc8607999d to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| { | |
| "cells": [ | |
| { | |
| "cell_type": "markdown", | |
| "id": "86b7f248-da30-4768-a3f5-efa10c417653", | |
| "metadata": {}, | |
| "source": [ | |
| "# Download and process Sentinel-2 L2A products to CPM and upload to S3\n", | |
| "\n", | |
| "One-off script, using command-line interfaces of third-party software\n", | |
| "\n", | |
| "## Setup\n", | |
| "\n", | |
| "1. Install `s5cmd`\n", | |
| "2. Set up your `.aws/credentials` and `.aws/config` with profiles for CDSE and your target S3 configuration\n", | |
| "3. Install [eopf-cpm](https://gitlab.eopf.copernicus.eu/cpm/eopf-cpm) and [eopf-stac](https://github.com/EOPF-Sample-Service/eopf-stac) (nb: requires [this revision](https://github.com/EOPF-Sample-Service/eopf-stac/pull/65))\n", | |
| "\n", | |
| "## Usage\n", | |
| "\n", | |
| "1. Adapt the CDSE STAC query parameters to your needs\n", | |
| "2. Change the target S3 configuration, if necessary\n", | |
| "3. Run script top-to-bottom - NB: no retry logic\n", | |
| "4. Note that the STAC JSON files are not deleted locally after upload and used to identify products that are completed" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": null, | |
| "id": "5d51e0e7-a68a-4556-aaa3-fe7058aa57c2", | |
| "metadata": {}, | |
| "outputs": [], | |
| "source": [ | |
| "import os\n", | |
| "import shutil\n", | |
| "import subprocess\n", | |
| "from pathlib import Path\n", | |
| "\n", | |
| "from pystac_client import Client" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": null, | |
| "id": "4b134e49-b30a-46a9-a80e-ce8047eee6a6", | |
| "metadata": {}, | |
| "outputs": [], | |
| "source": [ | |
| "catalog = Client.open(\"https://stac.dataspace.copernicus.eu/v1/\")" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": null, | |
| "id": "e442ebe9-2cd1-4d5c-88df-30a968332baa", | |
| "metadata": {}, | |
| "outputs": [], | |
| "source": [ | |
| "# Define your temporal range\n", | |
| "daterange = {\"interval\": [\"2021-09-14T10:25:31Z\", \"2022-01-03T23:59:59Z\"]}\n", | |
| "\n", | |
| "# Define your search with CQL2 syntax\n", | |
| "search = catalog.search(filter_lang=\"cql2-json\", filter={\n", | |
| " \"op\": \"and\",\n", | |
| " \"args\": [\n", | |
| " {\"op\": \"anyinteracts\", \"args\": [{\"property\": \"datetime\"}, daterange]},\n", | |
| " {\"op\": \"=\", \"args\": [{\"property\": \"collection\"}, \"sentinel-2-l2a\"]},\n", | |
| " {\"op\": \"=\", \"args\": [{\"property\": \"grid:code\"}, \"MGRS-28RBS\"]}\n", | |
| " ]\n", | |
| "})" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": null, | |
| "id": "c41811d5-96e1-43c4-b9e4-f18fce80a6d1", | |
| "metadata": {}, | |
| "outputs": [], | |
| "source": [ | |
| "items = list(search.items())\n", | |
| "len(items)" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": null, | |
| "id": "22240a26-6011-40e4-aff1-389dca0a88ea", | |
| "metadata": {}, | |
| "outputs": [], | |
| "source": [ | |
| "CDSE_S3_ENDPOINT_URL = \"https://eodata.dataspace.copernicus.eu/\"\n", | |
| "CDSE_S3_PROFILE = \"cdse\"\n", | |
| "TARGET_S3_ENDPOINT_URL = \"https://s3.de.io.cloud.ovh.net/\"\n", | |
| "TARGET_S3_PROFILE = \"eopf\"\n", | |
| "TARGET_PREFIX = \"s3://esa-zarr-sentinel-explorer-fra/cpm-manual/\" # keep trailing slash\n", | |
| "# TARGET_ASSET_HREF_BASE = \"https://s3.explorer.eopf.copernicus.eu/esa-zarr-sentinel-explorer-fra/cpm-manual/\" # keep trailing slash" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": null, | |
| "id": "d7b9c770-0269-4264-87bd-56a6b018e7d8", | |
| "metadata": {}, | |
| "outputs": [], | |
| "source": [ | |
| "def get_safe_root(item):\n", | |
| " asset_href = next(iter(item.assets.values())).href\n", | |
| " return asset_href.split(\".SAFE\")[0] + \".SAFE\"" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": null, | |
| "id": "80f1e870-e7fb-4a27-bb2b-fbb88f2b052e", | |
| "metadata": {}, | |
| "outputs": [], | |
| "source": [ | |
| "ROOT_DIR = Path(\"./cumbre-vieja\")\n", | |
| "\n", | |
| "for item in items:\n", | |
| " prefix = get_safe_root(item)\n", | |
| " print(prefix)\n", | |
| "\n", | |
| " safe_dir = ROOT_DIR / \"safe\" / Path(prefix).name\n", | |
| " cpm_dir = ROOT_DIR / \"cpm\" / (Path(prefix).stem + \".zarr\")\n", | |
| " stac_file = cpm_dir.parent / cpm_dir.with_suffix(\".json\").name\n", | |
| " cmp_uri = f\"{TARGET_PREFIX}{cpm_dir.name}/\"\n", | |
| " stac_uri = f\"{TARGET_PREFIX}{stac_file.name}\"\n", | |
| "\n", | |
| " # Check whether local product exists\n", | |
| " if stac_file.exists():\n", | |
| " print(f\"STAC file {stac_file} exists. Skipping.\")\n", | |
| " continue\n", | |
| " \n", | |
| " # Check whether remote product exists\n", | |
| " if False:\n", | |
| " try:\n", | |
| " subprocess.run(\n", | |
| " [\n", | |
| " \"s5cmd\", \n", | |
| " \"--profile\", TARGET_S3_PROFILE,\n", | |
| " \"--endpoint-url\", TARGET_S3_ENDPOINT_URL, \n", | |
| " \"head\", stac_uri\n", | |
| " ], \n", | |
| " check=True)\n", | |
| " print(f\"STAC file {stac_uri} exists. Skipping.\")\n", | |
| " continue\n", | |
| " except:\n", | |
| " pass\n", | |
| " \n", | |
| " # Download SAFE from CDSE\n", | |
| " if not safe_dir.exists():\n", | |
| " print(f\"Downloading {safe_dir.name}\")\n", | |
| " safe_dir.mkdir(parents=True)\n", | |
| " try:\n", | |
| " subprocess.run(\n", | |
| " [\n", | |
| " \"s5cmd\", \n", | |
| " \"--profile\", CDSE_S3_PROFILE,\n", | |
| " \"--endpoint-url\", CDSE_S3_ENDPOINT_URL, \n", | |
| " \"sync\", f\"{prefix}/*\", str(safe_dir.absolute())\n", | |
| " ], \n", | |
| " check=True)\n", | |
| " except:\n", | |
| " shutil.rmtree(safe_dir)\n", | |
| " raise\n", | |
| " \n", | |
| " # Produce CPM Zarr\n", | |
| " if not cpm_dir.exists():\n", | |
| " print(f\"Producing {cpm_dir.name}\")\n", | |
| " cpm_dir.mkdir(parents=True)\n", | |
| " try:\n", | |
| " subprocess.run([\"eopf\", \"convert\", str(safe_dir), str(cpm_dir)], check=True)\n", | |
| " shutil.rmtree(safe_dir)\n", | |
| " except:\n", | |
| " shutil.rmtree(cpm_dir)\n", | |
| " raise\n", | |
| "\n", | |
| " # Produce STAC item metadata\n", | |
| " if not stac_file.exists():\n", | |
| " print(f\"Producing {stac_file.name}\")\n", | |
| " subprocess.run(\n", | |
| " [\n", | |
| " \"eopf-stac\", str(cpm_dir), \n", | |
| " \"--output-file\", str(stac_file),\n", | |
| " # \"--asset-href-base\", TARGET_ASSET_HREF_BASE,\n", | |
| " ],\n", | |
| " check=True\n", | |
| " )\n", | |
| " \n", | |
| " # Upload CPM Zarr product\n", | |
| " print(f\"Uploading {cpm_dir.name}\")\n", | |
| " subprocess.run(\n", | |
| " [\n", | |
| " \"s5cmd\",\n", | |
| " \"--profile\", TARGET_S3_PROFILE,\n", | |
| " \"--endpoint-url\", TARGET_S3_ENDPOINT_URL, \n", | |
| " \"sync\", f\"{cpm_dir.absolute()}/\", cmp_uri,\n", | |
| " ], \n", | |
| " check=True)\n", | |
| " subprocess.run(\n", | |
| " [\n", | |
| " \"s5cmd\",\n", | |
| " \"--profile\", TARGET_S3_PROFILE,\n", | |
| " \"--endpoint-url\", TARGET_S3_ENDPOINT_URL, \n", | |
| " \"cp\", str(stac_file.absolute()), stac_uri\n", | |
| " ], \n", | |
| " check=True)\n", | |
| " # Delete the CMP dir, keep the local STAC file as a token of completion \n", | |
| " shutil.rmtree(cpm_dir)" | |
| ] | |
| } | |
| ], | |
| "metadata": { | |
| "kernelspec": { | |
| "display_name": "Python 3 (ipykernel)", | |
| "language": "python", | |
| "name": "python3" | |
| }, | |
| "language_info": { | |
| "codemirror_mode": { | |
| "name": "ipython", | |
| "version": 3 | |
| }, | |
| "file_extension": ".py", | |
| "mimetype": "text/x-python", | |
| "name": "python", | |
| "nbconvert_exporter": "python", | |
| "pygments_lexer": "ipython3", | |
| "version": "3.12.7" | |
| } | |
| }, | |
| "nbformat": 4, | |
| "nbformat_minor": 5 | |
| } |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment