Skip to content

Instantly share code, notes, and snippets.

@isaaccorley
Last active November 18, 2025 03:01
Show Gist options
  • Select an option

  • Save isaaccorley/3cf87bfe9a8f41fff429aafb45decdab to your computer and use it in GitHub Desktop.

Select an option

Save isaaccorley/3cf87bfe9a8f41fff429aafb45decdab to your computer and use it in GitHub Desktop.
Convert AEF GTI to STAC-GeoParquet
Display the source blob
Display the rendered blob
Raw
{
"cells": [
{
"cell_type": "code",
"execution_count": null,
"id": "087ffb66",
"metadata": {},
"outputs": [],
"source": [
"!pip install polars rustac 'geopandas[all]' shapely"
]
},
{
"cell_type": "code",
"execution_count": 2,
"id": "fc4a8c06",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"--2025-11-17 20:58:32-- https://data.source.coop/tge-labs/aef/v1/annual/aef_index.parquet\n",
"Resolving data.source.coop (data.source.coop)... 52.89.55.49, 52.43.57.187, 34.213.93.21\n",
"Connecting to data.source.coop (data.source.coop)|52.89.55.49|:443... connected.\n",
"HTTP request sent, awaiting response... 200 OK\n",
"Length: 66440580 (63M) [binary/octet-stream]\n",
"Saving to: ‘aef_index.parquet.1’\n",
"\n",
"aef_index.parquet.1 100%[===================>] 63.36M 30.7MB/s in 2.1s \n",
"\n",
"2025-11-17 20:58:35 (30.7 MB/s) - ‘aef_index.parquet.1’ saved [66440580/66440580]\n",
"\n"
]
}
],
"source": [
"!wget https://data.source.coop/tge-labs/aef/v1/annual/aef_index.parquet"
]
},
{
"cell_type": "code",
"execution_count": 3,
"id": "1bdd0edb",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>fid</th>\n",
" <th>crs</th>\n",
" <th>path</th>\n",
" <th>year</th>\n",
" <th>utm_zone</th>\n",
" <th>utm_west</th>\n",
" <th>utm_south</th>\n",
" <th>utm_east</th>\n",
" <th>utm_north</th>\n",
" <th>wgs84_west</th>\n",
" <th>wgs84_south</th>\n",
" <th>wgs84_east</th>\n",
" <th>wgs84_north</th>\n",
" <th>geom</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>235127</td>\n",
" <td>EPSG:32760</td>\n",
" <td>s3://us-west-2.opendata.source.coop/tge-labs/a...</td>\n",
" <td>2024</td>\n",
" <td>60S</td>\n",
" <td>500000.0</td>\n",
" <td>9180800.0</td>\n",
" <td>581920.0</td>\n",
" <td>9262720.0</td>\n",
" <td>177.000000</td>\n",
" <td>-7.411146</td>\n",
" <td>177.742334</td>\n",
" <td>-6.669545</td>\n",
" <td>POLYGON ((177 -7.41115, 177.03712 -7.41114, 17...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>235005</td>\n",
" <td>EPSG:32760</td>\n",
" <td>s3://us-west-2.opendata.source.coop/tge-labs/a...</td>\n",
" <td>2023</td>\n",
" <td>60S</td>\n",
" <td>500000.0</td>\n",
" <td>9180800.0</td>\n",
" <td>581920.0</td>\n",
" <td>9262720.0</td>\n",
" <td>177.000000</td>\n",
" <td>-7.411146</td>\n",
" <td>177.742334</td>\n",
" <td>-6.669545</td>\n",
" <td>POLYGON ((177 -7.41115, 177.03712 -7.41114, 17...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>234529</td>\n",
" <td>EPSG:32760</td>\n",
" <td>s3://us-west-2.opendata.source.coop/tge-labs/a...</td>\n",
" <td>2019</td>\n",
" <td>60S</td>\n",
" <td>500000.0</td>\n",
" <td>9180800.0</td>\n",
" <td>581920.0</td>\n",
" <td>9262720.0</td>\n",
" <td>177.000000</td>\n",
" <td>-7.411146</td>\n",
" <td>177.742334</td>\n",
" <td>-6.669545</td>\n",
" <td>POLYGON ((177 -7.41115, 177.03712 -7.41114, 17...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>234765</td>\n",
" <td>EPSG:32760</td>\n",
" <td>s3://us-west-2.opendata.source.coop/tge-labs/a...</td>\n",
" <td>2021</td>\n",
" <td>60S</td>\n",
" <td>500000.0</td>\n",
" <td>9180800.0</td>\n",
" <td>581920.0</td>\n",
" <td>9262720.0</td>\n",
" <td>177.000000</td>\n",
" <td>-7.411146</td>\n",
" <td>177.742334</td>\n",
" <td>-6.669545</td>\n",
" <td>POLYGON ((177 -7.41115, 177.03712 -7.41114, 17...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>234651</td>\n",
" <td>EPSG:32760</td>\n",
" <td>s3://us-west-2.opendata.source.coop/tge-labs/a...</td>\n",
" <td>2020</td>\n",
" <td>60S</td>\n",
" <td>500000.0</td>\n",
" <td>9180800.0</td>\n",
" <td>581920.0</td>\n",
" <td>9262720.0</td>\n",
" <td>177.000000</td>\n",
" <td>-7.411146</td>\n",
" <td>177.742334</td>\n",
" <td>-6.669545</td>\n",
" <td>POLYGON ((177 -7.41115, 177.03712 -7.41114, 17...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>...</th>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>235158</th>\n",
" <td>157584</td>\n",
" <td>EPSG:32701</td>\n",
" <td>s3://us-west-2.opendata.source.coop/tge-labs/a...</td>\n",
" <td>2020</td>\n",
" <td>1S</td>\n",
" <td>254240.0</td>\n",
" <td>8197760.0</td>\n",
" <td>336160.0</td>\n",
" <td>8279680.0</td>\n",
" <td>-179.299810</td>\n",
" <td>-16.295521</td>\n",
" <td>-178.527845</td>\n",
" <td>-15.548574</td>\n",
" <td>POLYGON ((-179.29981 -16.28857, -179.2615 -16....</td>\n",
" </tr>\n",
" <tr>\n",
" <th>235159</th>\n",
" <td>157887</td>\n",
" <td>EPSG:32701</td>\n",
" <td>s3://us-west-2.opendata.source.coop/tge-labs/a...</td>\n",
" <td>2024</td>\n",
" <td>1S</td>\n",
" <td>172320.0</td>\n",
" <td>8115840.0</td>\n",
" <td>254240.0</td>\n",
" <td>8197760.0</td>\n",
" <td>-180.000000</td>\n",
" <td>-17.028515</td>\n",
" <td>-179.299810</td>\n",
" <td>-16.279795</td>\n",
" <td>POLYGON ((-180 -16.27979, -180 -17.01949, -179...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>235160</th>\n",
" <td>157888</td>\n",
" <td>EPSG:32701</td>\n",
" <td>s3://us-west-2.opendata.source.coop/tge-labs/a...</td>\n",
" <td>2024</td>\n",
" <td>1S</td>\n",
" <td>172320.0</td>\n",
" <td>8197760.0</td>\n",
" <td>254240.0</td>\n",
" <td>8279680.0</td>\n",
" <td>-180.000000</td>\n",
" <td>-16.288572</td>\n",
" <td>-179.291402</td>\n",
" <td>-15.540066</td>\n",
" <td>POLYGON ((-180 -15.54007, -180 -16.27979, -179...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>235161</th>\n",
" <td>157897</td>\n",
" <td>EPSG:32701</td>\n",
" <td>s3://us-west-2.opendata.source.coop/tge-labs/a...</td>\n",
" <td>2024</td>\n",
" <td>1S</td>\n",
" <td>254240.0</td>\n",
" <td>8115840.0</td>\n",
" <td>336160.0</td>\n",
" <td>8197760.0</td>\n",
" <td>-179.308664</td>\n",
" <td>-17.035797</td>\n",
" <td>-178.533455</td>\n",
" <td>-16.288572</td>\n",
" <td>POLYGON ((-179.30866 -17.02851, -179.27021 -17...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>235162</th>\n",
" <td>157898</td>\n",
" <td>EPSG:32701</td>\n",
" <td>s3://us-west-2.opendata.source.coop/tge-labs/a...</td>\n",
" <td>2024</td>\n",
" <td>1S</td>\n",
" <td>254240.0</td>\n",
" <td>8197760.0</td>\n",
" <td>336160.0</td>\n",
" <td>8279680.0</td>\n",
" <td>-179.299810</td>\n",
" <td>-16.295521</td>\n",
" <td>-178.527845</td>\n",
" <td>-15.548574</td>\n",
" <td>POLYGON ((-179.29981 -16.28857, -179.2615 -16....</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"<p>235163 rows × 14 columns</p>\n",
"</div>"
],
"text/plain": [
" fid crs path \\\n",
"0 235127 EPSG:32760 s3://us-west-2.opendata.source.coop/tge-labs/a... \n",
"1 235005 EPSG:32760 s3://us-west-2.opendata.source.coop/tge-labs/a... \n",
"2 234529 EPSG:32760 s3://us-west-2.opendata.source.coop/tge-labs/a... \n",
"3 234765 EPSG:32760 s3://us-west-2.opendata.source.coop/tge-labs/a... \n",
"4 234651 EPSG:32760 s3://us-west-2.opendata.source.coop/tge-labs/a... \n",
"... ... ... ... \n",
"235158 157584 EPSG:32701 s3://us-west-2.opendata.source.coop/tge-labs/a... \n",
"235159 157887 EPSG:32701 s3://us-west-2.opendata.source.coop/tge-labs/a... \n",
"235160 157888 EPSG:32701 s3://us-west-2.opendata.source.coop/tge-labs/a... \n",
"235161 157897 EPSG:32701 s3://us-west-2.opendata.source.coop/tge-labs/a... \n",
"235162 157898 EPSG:32701 s3://us-west-2.opendata.source.coop/tge-labs/a... \n",
"\n",
" year utm_zone utm_west utm_south utm_east utm_north wgs84_west \\\n",
"0 2024 60S 500000.0 9180800.0 581920.0 9262720.0 177.000000 \n",
"1 2023 60S 500000.0 9180800.0 581920.0 9262720.0 177.000000 \n",
"2 2019 60S 500000.0 9180800.0 581920.0 9262720.0 177.000000 \n",
"3 2021 60S 500000.0 9180800.0 581920.0 9262720.0 177.000000 \n",
"4 2020 60S 500000.0 9180800.0 581920.0 9262720.0 177.000000 \n",
"... ... ... ... ... ... ... ... \n",
"235158 2020 1S 254240.0 8197760.0 336160.0 8279680.0 -179.299810 \n",
"235159 2024 1S 172320.0 8115840.0 254240.0 8197760.0 -180.000000 \n",
"235160 2024 1S 172320.0 8197760.0 254240.0 8279680.0 -180.000000 \n",
"235161 2024 1S 254240.0 8115840.0 336160.0 8197760.0 -179.308664 \n",
"235162 2024 1S 254240.0 8197760.0 336160.0 8279680.0 -179.299810 \n",
"\n",
" wgs84_south wgs84_east wgs84_north \\\n",
"0 -7.411146 177.742334 -6.669545 \n",
"1 -7.411146 177.742334 -6.669545 \n",
"2 -7.411146 177.742334 -6.669545 \n",
"3 -7.411146 177.742334 -6.669545 \n",
"4 -7.411146 177.742334 -6.669545 \n",
"... ... ... ... \n",
"235158 -16.295521 -178.527845 -15.548574 \n",
"235159 -17.028515 -179.299810 -16.279795 \n",
"235160 -16.288572 -179.291402 -15.540066 \n",
"235161 -17.035797 -178.533455 -16.288572 \n",
"235162 -16.295521 -178.527845 -15.548574 \n",
"\n",
" geom \n",
"0 POLYGON ((177 -7.41115, 177.03712 -7.41114, 17... \n",
"1 POLYGON ((177 -7.41115, 177.03712 -7.41114, 17... \n",
"2 POLYGON ((177 -7.41115, 177.03712 -7.41114, 17... \n",
"3 POLYGON ((177 -7.41115, 177.03712 -7.41114, 17... \n",
"4 POLYGON ((177 -7.41115, 177.03712 -7.41114, 17... \n",
"... ... \n",
"235158 POLYGON ((-179.29981 -16.28857, -179.2615 -16.... \n",
"235159 POLYGON ((-180 -16.27979, -180 -17.01949, -179... \n",
"235160 POLYGON ((-180 -15.54007, -180 -16.27979, -179... \n",
"235161 POLYGON ((-179.30866 -17.02851, -179.27021 -17... \n",
"235162 POLYGON ((-179.29981 -16.28857, -179.2615 -16.... \n",
"\n",
"[235163 rows x 14 columns]"
]
},
"execution_count": 3,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"import geopandas as gpd\n",
"\n",
"path = \"aef_index.parquet\"\n",
"gdf = gpd.read_parquet(path)\n",
"gdf"
]
},
{
"cell_type": "code",
"execution_count": 4,
"id": "b067fc77",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"{'e_tag': '1ded93e-643d5a4877812-4207e8', 'version': None}"
]
},
"execution_count": 4,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"import polars as pl\n",
"import pyarrow as pa\n",
"import pyarrow.parquet as pq\n",
"import rustac\n",
"import shapely\n",
"from shapely import wkb\n",
"\n",
"\n",
"def make_geom(row: dict) -> bytes:\n",
" geom = shapely.box(\n",
" row[\"wgs84_west\"],\n",
" row[\"wgs84_south\"],\n",
" row[\"wgs84_east\"],\n",
" row[\"wgs84_north\"],\n",
" )\n",
" return wkb.dumps(geom, hex=False)\n",
"\n",
"\n",
"def create_assets_map(assets_data_col):\n",
" assets_list = []\n",
" for asset_struct in assets_data_col.to_pylist():\n",
" assets_map = {\"data\": asset_struct}\n",
" assets_list.append(assets_map)\n",
" return pa.array(\n",
" assets_list,\n",
" type=pa.map_(\n",
" pa.string(),\n",
" pa.struct([\n",
" pa.field(\"href\", pa.string()),\n",
" pa.field(\"type\", pa.string()),\n",
" pa.field(\"roles\", pa.list_(pa.string())),\n",
" ]),\n",
" ),\n",
" )\n",
"\n",
"\n",
"path = \"aef_index.parquet\"\n",
"table = pq.read_table(path)\n",
"\n",
"new_cols = []\n",
"for field, col in zip(table.schema, table.columns):\n",
" if isinstance(field.type, pa.ExtensionType):\n",
" new_cols.append(col.cast(field.type.storage_type))\n",
" else:\n",
" new_cols.append(col)\n",
"\n",
"clean_table = pa.Table.from_arrays(\n",
" new_cols,\n",
" schema=pa.schema([\n",
" pa.field(\n",
" field.name,\n",
" field.type.storage_type\n",
" if isinstance(field.type, pa.ExtensionType)\n",
" else field.type,\n",
" )\n",
" for field in table.schema\n",
" ]),\n",
")\n",
"\n",
"df = pl.from_arrow(clean_table)\n",
"df = df.with_columns([\n",
" pl.struct([\n",
" pl.col(\"wgs84_west\").alias(\"xmin\"),\n",
" pl.col(\"wgs84_south\").alias(\"ymin\"),\n",
" pl.col(\"wgs84_east\").alias(\"xmax\"),\n",
" pl.col(\"wgs84_north\").alias(\"ymax\"),\n",
" ]).alias(\"bbox\"),\n",
" pl.struct([\"wgs84_west\", \"wgs84_south\", \"wgs84_east\", \"wgs84_north\"])\n",
" .map_elements(make_geom, return_dtype=pl.Binary)\n",
" .alias(\"geometry\"),\n",
"])\n",
"\n",
"df_lazy = df.lazy()\n",
"\n",
"asset_struct = pl.struct([\n",
" pl.col(\"path\").alias(\"href\"),\n",
" pl.lit(\"image/tiff; application=geotiff\").alias(\"type\"),\n",
" pl.lit([\"data\"]).alias(\"roles\"),\n",
"])\n",
"\n",
"df_stac_lazy = df_lazy.with_columns([\n",
" pl.col(\"fid\").cast(pl.String).alias(\"id\"),\n",
" pl.lit(\"Feature\").alias(\"type\"),\n",
" pl.lit(\"1.0.0\").alias(\"stac_version\"),\n",
" pl.col(\"bbox\"),\n",
" pl.col(\"geometry\"),\n",
" pl.concat_str([\n",
" pl.col(\"year\").cast(pl.String),\n",
" pl.lit(\"-01-01T00:00:00Z\"),\n",
" ]).alias(\"datetime\"),\n",
" pl.col(\"crs\").alias(\"proj:epsg\"),\n",
" asset_struct.alias(\"assets_data\"),\n",
"])\n",
"\n",
"df_stac = df_stac_lazy.collect()\n",
"df_stac = df_stac.with_columns([\n",
" pl.col(\"geometry\").cast(pl.Binary),\n",
"])\n",
"\n",
"stac_columns = [\n",
" \"id\",\n",
" \"type\",\n",
" \"stac_version\",\n",
" \"geometry\",\n",
" \"bbox\",\n",
" \"datetime\",\n",
" \"proj:epsg\",\n",
" \"assets_data\",\n",
"]\n",
"df_stac = df_stac.select(stac_columns)\n",
"\n",
"table = df_stac.to_arrow()\n",
"\n",
"new_arrays = []\n",
"new_fields = []\n",
"for i, field in enumerate(table.schema):\n",
" col = table.column(i)\n",
" if field.name == \"assets_data\":\n",
" assets_map_col = create_assets_map(col)\n",
" new_fields.append(\n",
" pa.field(\n",
" \"assets\",\n",
" pa.map_(\n",
" pa.string(),\n",
" pa.struct([\n",
" pa.field(\"href\", pa.string()),\n",
" pa.field(\"type\", pa.string()),\n",
" pa.field(\"roles\", pa.list_(pa.string())),\n",
" ]),\n",
" ),\n",
" )\n",
" )\n",
" new_arrays.append(assets_map_col)\n",
" elif field.name == \"bbox\":\n",
" if not pa.types.is_struct(field.type):\n",
" bbox_data = col.to_pylist()\n",
" bbox_structs = [\n",
" {\n",
" \"xmin\": float(row[0]),\n",
" \"ymin\": float(row[1]),\n",
" \"xmax\": float(row[2]),\n",
" \"ymax\": float(row[3]),\n",
" }\n",
" if isinstance(row, (list, tuple)) and len(row) == 4\n",
" else row\n",
" for row in bbox_data\n",
" ]\n",
" col = pa.array(\n",
" bbox_structs,\n",
" type=pa.struct([\n",
" pa.field(\"xmin\", pa.float64()),\n",
" pa.field(\"ymin\", pa.float64()),\n",
" pa.field(\"xmax\", pa.float64()),\n",
" pa.field(\"ymax\", pa.float64()),\n",
" ]),\n",
" )\n",
" new_fields.append(\n",
" pa.field(\n",
" \"bbox\",\n",
" pa.struct([\n",
" pa.field(\"xmin\", pa.float64()),\n",
" pa.field(\"ymin\", pa.float64()),\n",
" pa.field(\"xmax\", pa.float64()),\n",
" pa.field(\"ymax\", pa.float64()),\n",
" ]),\n",
" )\n",
" )\n",
" new_arrays.append(col)\n",
" elif field.name == \"geometry\":\n",
" if not pa.types.is_binary(field.type):\n",
" col = col.cast(pa.binary())\n",
" new_fields.append(pa.field(\"geometry\", pa.binary()))\n",
" new_arrays.append(col)\n",
" elif field.name != \"assets_data\":\n",
" if pa.types.is_large_string(field.type):\n",
" col = col.cast(pa.string())\n",
" new_fields.append(pa.field(field.name, pa.string()))\n",
" new_arrays.append(col)\n",
" else:\n",
" new_fields.append(field)\n",
" new_arrays.append(col)\n",
"\n",
"table = pa.Table.from_arrays(new_arrays, schema=pa.schema(new_fields))\n",
"\n",
"metadata = {\n",
" b\"geo\": b\"\"\"{\n",
" \"version\": \"1.1.0\",\n",
" \"primary_column\": \"geometry\",\n",
" \"columns\": {\n",
" \"geometry\": {\n",
" \"encoding\": \"WKB\",\n",
" \"geometry_types\": [\"Polygon\"]\n",
" }\n",
" }\n",
" }\"\"\",\n",
" b\"stac_version\": b\"1.0.0\",\n",
"}\n",
"table = table.replace_schema_metadata(metadata)\n",
"assert len(df_stac) == len(table)\n",
"\n",
"items = rustac.from_arrow(table)\n",
"await rustac.write(\"aef-index-stac-geoparquet.parquet\", items)"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "torchgeo",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.12.12"
}
},
"nbformat": 4,
"nbformat_minor": 5
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment