Last active
November 18, 2025 03:01
-
-
Save isaaccorley/3cf87bfe9a8f41fff429aafb45decdab to your computer and use it in GitHub Desktop.
Convert AEF GTI to STAC-GeoParquet
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| { | |
| "cells": [ | |
| { | |
| "cell_type": "code", | |
| "execution_count": null, | |
| "id": "087ffb66", | |
| "metadata": {}, | |
| "outputs": [], | |
| "source": [ | |
| "!pip install polars rustac 'geopandas[all]' shapely" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 2, | |
| "id": "fc4a8c06", | |
| "metadata": {}, | |
| "outputs": [ | |
| { | |
| "name": "stdout", | |
| "output_type": "stream", | |
| "text": [ | |
| "--2025-11-17 20:58:32-- https://data.source.coop/tge-labs/aef/v1/annual/aef_index.parquet\n", | |
| "Resolving data.source.coop (data.source.coop)... 52.89.55.49, 52.43.57.187, 34.213.93.21\n", | |
| "Connecting to data.source.coop (data.source.coop)|52.89.55.49|:443... connected.\n", | |
| "HTTP request sent, awaiting response... 200 OK\n", | |
| "Length: 66440580 (63M) [binary/octet-stream]\n", | |
| "Saving to: ‘aef_index.parquet.1’\n", | |
| "\n", | |
| "aef_index.parquet.1 100%[===================>] 63.36M 30.7MB/s in 2.1s \n", | |
| "\n", | |
| "2025-11-17 20:58:35 (30.7 MB/s) - ‘aef_index.parquet.1’ saved [66440580/66440580]\n", | |
| "\n" | |
| ] | |
| } | |
| ], | |
| "source": [ | |
| "!wget https://data.source.coop/tge-labs/aef/v1/annual/aef_index.parquet" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 3, | |
| "id": "1bdd0edb", | |
| "metadata": {}, | |
| "outputs": [ | |
| { | |
| "data": { | |
| "text/html": [ | |
| "<div>\n", | |
| "<style scoped>\n", | |
| " .dataframe tbody tr th:only-of-type {\n", | |
| " vertical-align: middle;\n", | |
| " }\n", | |
| "\n", | |
| " .dataframe tbody tr th {\n", | |
| " vertical-align: top;\n", | |
| " }\n", | |
| "\n", | |
| " .dataframe thead th {\n", | |
| " text-align: right;\n", | |
| " }\n", | |
| "</style>\n", | |
| "<table border=\"1\" class=\"dataframe\">\n", | |
| " <thead>\n", | |
| " <tr style=\"text-align: right;\">\n", | |
| " <th></th>\n", | |
| " <th>fid</th>\n", | |
| " <th>crs</th>\n", | |
| " <th>path</th>\n", | |
| " <th>year</th>\n", | |
| " <th>utm_zone</th>\n", | |
| " <th>utm_west</th>\n", | |
| " <th>utm_south</th>\n", | |
| " <th>utm_east</th>\n", | |
| " <th>utm_north</th>\n", | |
| " <th>wgs84_west</th>\n", | |
| " <th>wgs84_south</th>\n", | |
| " <th>wgs84_east</th>\n", | |
| " <th>wgs84_north</th>\n", | |
| " <th>geom</th>\n", | |
| " </tr>\n", | |
| " </thead>\n", | |
| " <tbody>\n", | |
| " <tr>\n", | |
| " <th>0</th>\n", | |
| " <td>235127</td>\n", | |
| " <td>EPSG:32760</td>\n", | |
| " <td>s3://us-west-2.opendata.source.coop/tge-labs/a...</td>\n", | |
| " <td>2024</td>\n", | |
| " <td>60S</td>\n", | |
| " <td>500000.0</td>\n", | |
| " <td>9180800.0</td>\n", | |
| " <td>581920.0</td>\n", | |
| " <td>9262720.0</td>\n", | |
| " <td>177.000000</td>\n", | |
| " <td>-7.411146</td>\n", | |
| " <td>177.742334</td>\n", | |
| " <td>-6.669545</td>\n", | |
| " <td>POLYGON ((177 -7.41115, 177.03712 -7.41114, 17...</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>1</th>\n", | |
| " <td>235005</td>\n", | |
| " <td>EPSG:32760</td>\n", | |
| " <td>s3://us-west-2.opendata.source.coop/tge-labs/a...</td>\n", | |
| " <td>2023</td>\n", | |
| " <td>60S</td>\n", | |
| " <td>500000.0</td>\n", | |
| " <td>9180800.0</td>\n", | |
| " <td>581920.0</td>\n", | |
| " <td>9262720.0</td>\n", | |
| " <td>177.000000</td>\n", | |
| " <td>-7.411146</td>\n", | |
| " <td>177.742334</td>\n", | |
| " <td>-6.669545</td>\n", | |
| " <td>POLYGON ((177 -7.41115, 177.03712 -7.41114, 17...</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>2</th>\n", | |
| " <td>234529</td>\n", | |
| " <td>EPSG:32760</td>\n", | |
| " <td>s3://us-west-2.opendata.source.coop/tge-labs/a...</td>\n", | |
| " <td>2019</td>\n", | |
| " <td>60S</td>\n", | |
| " <td>500000.0</td>\n", | |
| " <td>9180800.0</td>\n", | |
| " <td>581920.0</td>\n", | |
| " <td>9262720.0</td>\n", | |
| " <td>177.000000</td>\n", | |
| " <td>-7.411146</td>\n", | |
| " <td>177.742334</td>\n", | |
| " <td>-6.669545</td>\n", | |
| " <td>POLYGON ((177 -7.41115, 177.03712 -7.41114, 17...</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>3</th>\n", | |
| " <td>234765</td>\n", | |
| " <td>EPSG:32760</td>\n", | |
| " <td>s3://us-west-2.opendata.source.coop/tge-labs/a...</td>\n", | |
| " <td>2021</td>\n", | |
| " <td>60S</td>\n", | |
| " <td>500000.0</td>\n", | |
| " <td>9180800.0</td>\n", | |
| " <td>581920.0</td>\n", | |
| " <td>9262720.0</td>\n", | |
| " <td>177.000000</td>\n", | |
| " <td>-7.411146</td>\n", | |
| " <td>177.742334</td>\n", | |
| " <td>-6.669545</td>\n", | |
| " <td>POLYGON ((177 -7.41115, 177.03712 -7.41114, 17...</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>4</th>\n", | |
| " <td>234651</td>\n", | |
| " <td>EPSG:32760</td>\n", | |
| " <td>s3://us-west-2.opendata.source.coop/tge-labs/a...</td>\n", | |
| " <td>2020</td>\n", | |
| " <td>60S</td>\n", | |
| " <td>500000.0</td>\n", | |
| " <td>9180800.0</td>\n", | |
| " <td>581920.0</td>\n", | |
| " <td>9262720.0</td>\n", | |
| " <td>177.000000</td>\n", | |
| " <td>-7.411146</td>\n", | |
| " <td>177.742334</td>\n", | |
| " <td>-6.669545</td>\n", | |
| " <td>POLYGON ((177 -7.41115, 177.03712 -7.41114, 17...</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>...</th>\n", | |
| " <td>...</td>\n", | |
| " <td>...</td>\n", | |
| " <td>...</td>\n", | |
| " <td>...</td>\n", | |
| " <td>...</td>\n", | |
| " <td>...</td>\n", | |
| " <td>...</td>\n", | |
| " <td>...</td>\n", | |
| " <td>...</td>\n", | |
| " <td>...</td>\n", | |
| " <td>...</td>\n", | |
| " <td>...</td>\n", | |
| " <td>...</td>\n", | |
| " <td>...</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>235158</th>\n", | |
| " <td>157584</td>\n", | |
| " <td>EPSG:32701</td>\n", | |
| " <td>s3://us-west-2.opendata.source.coop/tge-labs/a...</td>\n", | |
| " <td>2020</td>\n", | |
| " <td>1S</td>\n", | |
| " <td>254240.0</td>\n", | |
| " <td>8197760.0</td>\n", | |
| " <td>336160.0</td>\n", | |
| " <td>8279680.0</td>\n", | |
| " <td>-179.299810</td>\n", | |
| " <td>-16.295521</td>\n", | |
| " <td>-178.527845</td>\n", | |
| " <td>-15.548574</td>\n", | |
| " <td>POLYGON ((-179.29981 -16.28857, -179.2615 -16....</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>235159</th>\n", | |
| " <td>157887</td>\n", | |
| " <td>EPSG:32701</td>\n", | |
| " <td>s3://us-west-2.opendata.source.coop/tge-labs/a...</td>\n", | |
| " <td>2024</td>\n", | |
| " <td>1S</td>\n", | |
| " <td>172320.0</td>\n", | |
| " <td>8115840.0</td>\n", | |
| " <td>254240.0</td>\n", | |
| " <td>8197760.0</td>\n", | |
| " <td>-180.000000</td>\n", | |
| " <td>-17.028515</td>\n", | |
| " <td>-179.299810</td>\n", | |
| " <td>-16.279795</td>\n", | |
| " <td>POLYGON ((-180 -16.27979, -180 -17.01949, -179...</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>235160</th>\n", | |
| " <td>157888</td>\n", | |
| " <td>EPSG:32701</td>\n", | |
| " <td>s3://us-west-2.opendata.source.coop/tge-labs/a...</td>\n", | |
| " <td>2024</td>\n", | |
| " <td>1S</td>\n", | |
| " <td>172320.0</td>\n", | |
| " <td>8197760.0</td>\n", | |
| " <td>254240.0</td>\n", | |
| " <td>8279680.0</td>\n", | |
| " <td>-180.000000</td>\n", | |
| " <td>-16.288572</td>\n", | |
| " <td>-179.291402</td>\n", | |
| " <td>-15.540066</td>\n", | |
| " <td>POLYGON ((-180 -15.54007, -180 -16.27979, -179...</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>235161</th>\n", | |
| " <td>157897</td>\n", | |
| " <td>EPSG:32701</td>\n", | |
| " <td>s3://us-west-2.opendata.source.coop/tge-labs/a...</td>\n", | |
| " <td>2024</td>\n", | |
| " <td>1S</td>\n", | |
| " <td>254240.0</td>\n", | |
| " <td>8115840.0</td>\n", | |
| " <td>336160.0</td>\n", | |
| " <td>8197760.0</td>\n", | |
| " <td>-179.308664</td>\n", | |
| " <td>-17.035797</td>\n", | |
| " <td>-178.533455</td>\n", | |
| " <td>-16.288572</td>\n", | |
| " <td>POLYGON ((-179.30866 -17.02851, -179.27021 -17...</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>235162</th>\n", | |
| " <td>157898</td>\n", | |
| " <td>EPSG:32701</td>\n", | |
| " <td>s3://us-west-2.opendata.source.coop/tge-labs/a...</td>\n", | |
| " <td>2024</td>\n", | |
| " <td>1S</td>\n", | |
| " <td>254240.0</td>\n", | |
| " <td>8197760.0</td>\n", | |
| " <td>336160.0</td>\n", | |
| " <td>8279680.0</td>\n", | |
| " <td>-179.299810</td>\n", | |
| " <td>-16.295521</td>\n", | |
| " <td>-178.527845</td>\n", | |
| " <td>-15.548574</td>\n", | |
| " <td>POLYGON ((-179.29981 -16.28857, -179.2615 -16....</td>\n", | |
| " </tr>\n", | |
| " </tbody>\n", | |
| "</table>\n", | |
| "<p>235163 rows × 14 columns</p>\n", | |
| "</div>" | |
| ], | |
| "text/plain": [ | |
| " fid crs path \\\n", | |
| "0 235127 EPSG:32760 s3://us-west-2.opendata.source.coop/tge-labs/a... \n", | |
| "1 235005 EPSG:32760 s3://us-west-2.opendata.source.coop/tge-labs/a... \n", | |
| "2 234529 EPSG:32760 s3://us-west-2.opendata.source.coop/tge-labs/a... \n", | |
| "3 234765 EPSG:32760 s3://us-west-2.opendata.source.coop/tge-labs/a... \n", | |
| "4 234651 EPSG:32760 s3://us-west-2.opendata.source.coop/tge-labs/a... \n", | |
| "... ... ... ... \n", | |
| "235158 157584 EPSG:32701 s3://us-west-2.opendata.source.coop/tge-labs/a... \n", | |
| "235159 157887 EPSG:32701 s3://us-west-2.opendata.source.coop/tge-labs/a... \n", | |
| "235160 157888 EPSG:32701 s3://us-west-2.opendata.source.coop/tge-labs/a... \n", | |
| "235161 157897 EPSG:32701 s3://us-west-2.opendata.source.coop/tge-labs/a... \n", | |
| "235162 157898 EPSG:32701 s3://us-west-2.opendata.source.coop/tge-labs/a... \n", | |
| "\n", | |
| " year utm_zone utm_west utm_south utm_east utm_north wgs84_west \\\n", | |
| "0 2024 60S 500000.0 9180800.0 581920.0 9262720.0 177.000000 \n", | |
| "1 2023 60S 500000.0 9180800.0 581920.0 9262720.0 177.000000 \n", | |
| "2 2019 60S 500000.0 9180800.0 581920.0 9262720.0 177.000000 \n", | |
| "3 2021 60S 500000.0 9180800.0 581920.0 9262720.0 177.000000 \n", | |
| "4 2020 60S 500000.0 9180800.0 581920.0 9262720.0 177.000000 \n", | |
| "... ... ... ... ... ... ... ... \n", | |
| "235158 2020 1S 254240.0 8197760.0 336160.0 8279680.0 -179.299810 \n", | |
| "235159 2024 1S 172320.0 8115840.0 254240.0 8197760.0 -180.000000 \n", | |
| "235160 2024 1S 172320.0 8197760.0 254240.0 8279680.0 -180.000000 \n", | |
| "235161 2024 1S 254240.0 8115840.0 336160.0 8197760.0 -179.308664 \n", | |
| "235162 2024 1S 254240.0 8197760.0 336160.0 8279680.0 -179.299810 \n", | |
| "\n", | |
| " wgs84_south wgs84_east wgs84_north \\\n", | |
| "0 -7.411146 177.742334 -6.669545 \n", | |
| "1 -7.411146 177.742334 -6.669545 \n", | |
| "2 -7.411146 177.742334 -6.669545 \n", | |
| "3 -7.411146 177.742334 -6.669545 \n", | |
| "4 -7.411146 177.742334 -6.669545 \n", | |
| "... ... ... ... \n", | |
| "235158 -16.295521 -178.527845 -15.548574 \n", | |
| "235159 -17.028515 -179.299810 -16.279795 \n", | |
| "235160 -16.288572 -179.291402 -15.540066 \n", | |
| "235161 -17.035797 -178.533455 -16.288572 \n", | |
| "235162 -16.295521 -178.527845 -15.548574 \n", | |
| "\n", | |
| " geom \n", | |
| "0 POLYGON ((177 -7.41115, 177.03712 -7.41114, 17... \n", | |
| "1 POLYGON ((177 -7.41115, 177.03712 -7.41114, 17... \n", | |
| "2 POLYGON ((177 -7.41115, 177.03712 -7.41114, 17... \n", | |
| "3 POLYGON ((177 -7.41115, 177.03712 -7.41114, 17... \n", | |
| "4 POLYGON ((177 -7.41115, 177.03712 -7.41114, 17... \n", | |
| "... ... \n", | |
| "235158 POLYGON ((-179.29981 -16.28857, -179.2615 -16.... \n", | |
| "235159 POLYGON ((-180 -16.27979, -180 -17.01949, -179... \n", | |
| "235160 POLYGON ((-180 -15.54007, -180 -16.27979, -179... \n", | |
| "235161 POLYGON ((-179.30866 -17.02851, -179.27021 -17... \n", | |
| "235162 POLYGON ((-179.29981 -16.28857, -179.2615 -16.... \n", | |
| "\n", | |
| "[235163 rows x 14 columns]" | |
| ] | |
| }, | |
| "execution_count": 3, | |
| "metadata": {}, | |
| "output_type": "execute_result" | |
| } | |
| ], | |
| "source": [ | |
| "import geopandas as gpd\n", | |
| "\n", | |
| "path = \"aef_index.parquet\"\n", | |
| "gdf = gpd.read_parquet(path)\n", | |
| "gdf" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 4, | |
| "id": "b067fc77", | |
| "metadata": {}, | |
| "outputs": [ | |
| { | |
| "data": { | |
| "text/plain": [ | |
| "{'e_tag': '1ded93e-643d5a4877812-4207e8', 'version': None}" | |
| ] | |
| }, | |
| "execution_count": 4, | |
| "metadata": {}, | |
| "output_type": "execute_result" | |
| } | |
| ], | |
| "source": [ | |
| "import polars as pl\n", | |
| "import pyarrow as pa\n", | |
| "import pyarrow.parquet as pq\n", | |
| "import rustac\n", | |
| "import shapely\n", | |
| "from shapely import wkb\n", | |
| "\n", | |
| "\n", | |
| "def make_geom(row: dict) -> bytes:\n", | |
| " geom = shapely.box(\n", | |
| " row[\"wgs84_west\"],\n", | |
| " row[\"wgs84_south\"],\n", | |
| " row[\"wgs84_east\"],\n", | |
| " row[\"wgs84_north\"],\n", | |
| " )\n", | |
| " return wkb.dumps(geom, hex=False)\n", | |
| "\n", | |
| "\n", | |
| "def create_assets_map(assets_data_col):\n", | |
| " assets_list = []\n", | |
| " for asset_struct in assets_data_col.to_pylist():\n", | |
| " assets_map = {\"data\": asset_struct}\n", | |
| " assets_list.append(assets_map)\n", | |
| " return pa.array(\n", | |
| " assets_list,\n", | |
| " type=pa.map_(\n", | |
| " pa.string(),\n", | |
| " pa.struct([\n", | |
| " pa.field(\"href\", pa.string()),\n", | |
| " pa.field(\"type\", pa.string()),\n", | |
| " pa.field(\"roles\", pa.list_(pa.string())),\n", | |
| " ]),\n", | |
| " ),\n", | |
| " )\n", | |
| "\n", | |
| "\n", | |
| "path = \"aef_index.parquet\"\n", | |
| "table = pq.read_table(path)\n", | |
| "\n", | |
| "new_cols = []\n", | |
| "for field, col in zip(table.schema, table.columns):\n", | |
| " if isinstance(field.type, pa.ExtensionType):\n", | |
| " new_cols.append(col.cast(field.type.storage_type))\n", | |
| " else:\n", | |
| " new_cols.append(col)\n", | |
| "\n", | |
| "clean_table = pa.Table.from_arrays(\n", | |
| " new_cols,\n", | |
| " schema=pa.schema([\n", | |
| " pa.field(\n", | |
| " field.name,\n", | |
| " field.type.storage_type\n", | |
| " if isinstance(field.type, pa.ExtensionType)\n", | |
| " else field.type,\n", | |
| " )\n", | |
| " for field in table.schema\n", | |
| " ]),\n", | |
| ")\n", | |
| "\n", | |
| "df = pl.from_arrow(clean_table)\n", | |
| "df = df.with_columns([\n", | |
| " pl.struct([\n", | |
| " pl.col(\"wgs84_west\").alias(\"xmin\"),\n", | |
| " pl.col(\"wgs84_south\").alias(\"ymin\"),\n", | |
| " pl.col(\"wgs84_east\").alias(\"xmax\"),\n", | |
| " pl.col(\"wgs84_north\").alias(\"ymax\"),\n", | |
| " ]).alias(\"bbox\"),\n", | |
| " pl.struct([\"wgs84_west\", \"wgs84_south\", \"wgs84_east\", \"wgs84_north\"])\n", | |
| " .map_elements(make_geom, return_dtype=pl.Binary)\n", | |
| " .alias(\"geometry\"),\n", | |
| "])\n", | |
| "\n", | |
| "df_lazy = df.lazy()\n", | |
| "\n", | |
| "asset_struct = pl.struct([\n", | |
| " pl.col(\"path\").alias(\"href\"),\n", | |
| " pl.lit(\"image/tiff; application=geotiff\").alias(\"type\"),\n", | |
| " pl.lit([\"data\"]).alias(\"roles\"),\n", | |
| "])\n", | |
| "\n", | |
| "df_stac_lazy = df_lazy.with_columns([\n", | |
| " pl.col(\"fid\").cast(pl.String).alias(\"id\"),\n", | |
| " pl.lit(\"Feature\").alias(\"type\"),\n", | |
| " pl.lit(\"1.0.0\").alias(\"stac_version\"),\n", | |
| " pl.col(\"bbox\"),\n", | |
| " pl.col(\"geometry\"),\n", | |
| " pl.concat_str([\n", | |
| " pl.col(\"year\").cast(pl.String),\n", | |
| " pl.lit(\"-01-01T00:00:00Z\"),\n", | |
| " ]).alias(\"datetime\"),\n", | |
| " pl.col(\"crs\").alias(\"proj:epsg\"),\n", | |
| " asset_struct.alias(\"assets_data\"),\n", | |
| "])\n", | |
| "\n", | |
| "df_stac = df_stac_lazy.collect()\n", | |
| "df_stac = df_stac.with_columns([\n", | |
| " pl.col(\"geometry\").cast(pl.Binary),\n", | |
| "])\n", | |
| "\n", | |
| "stac_columns = [\n", | |
| " \"id\",\n", | |
| " \"type\",\n", | |
| " \"stac_version\",\n", | |
| " \"geometry\",\n", | |
| " \"bbox\",\n", | |
| " \"datetime\",\n", | |
| " \"proj:epsg\",\n", | |
| " \"assets_data\",\n", | |
| "]\n", | |
| "df_stac = df_stac.select(stac_columns)\n", | |
| "\n", | |
| "table = df_stac.to_arrow()\n", | |
| "\n", | |
| "new_arrays = []\n", | |
| "new_fields = []\n", | |
| "for i, field in enumerate(table.schema):\n", | |
| " col = table.column(i)\n", | |
| " if field.name == \"assets_data\":\n", | |
| " assets_map_col = create_assets_map(col)\n", | |
| " new_fields.append(\n", | |
| " pa.field(\n", | |
| " \"assets\",\n", | |
| " pa.map_(\n", | |
| " pa.string(),\n", | |
| " pa.struct([\n", | |
| " pa.field(\"href\", pa.string()),\n", | |
| " pa.field(\"type\", pa.string()),\n", | |
| " pa.field(\"roles\", pa.list_(pa.string())),\n", | |
| " ]),\n", | |
| " ),\n", | |
| " )\n", | |
| " )\n", | |
| " new_arrays.append(assets_map_col)\n", | |
| " elif field.name == \"bbox\":\n", | |
| " if not pa.types.is_struct(field.type):\n", | |
| " bbox_data = col.to_pylist()\n", | |
| " bbox_structs = [\n", | |
| " {\n", | |
| " \"xmin\": float(row[0]),\n", | |
| " \"ymin\": float(row[1]),\n", | |
| " \"xmax\": float(row[2]),\n", | |
| " \"ymax\": float(row[3]),\n", | |
| " }\n", | |
| " if isinstance(row, (list, tuple)) and len(row) == 4\n", | |
| " else row\n", | |
| " for row in bbox_data\n", | |
| " ]\n", | |
| " col = pa.array(\n", | |
| " bbox_structs,\n", | |
| " type=pa.struct([\n", | |
| " pa.field(\"xmin\", pa.float64()),\n", | |
| " pa.field(\"ymin\", pa.float64()),\n", | |
| " pa.field(\"xmax\", pa.float64()),\n", | |
| " pa.field(\"ymax\", pa.float64()),\n", | |
| " ]),\n", | |
| " )\n", | |
| " new_fields.append(\n", | |
| " pa.field(\n", | |
| " \"bbox\",\n", | |
| " pa.struct([\n", | |
| " pa.field(\"xmin\", pa.float64()),\n", | |
| " pa.field(\"ymin\", pa.float64()),\n", | |
| " pa.field(\"xmax\", pa.float64()),\n", | |
| " pa.field(\"ymax\", pa.float64()),\n", | |
| " ]),\n", | |
| " )\n", | |
| " )\n", | |
| " new_arrays.append(col)\n", | |
| " elif field.name == \"geometry\":\n", | |
| " if not pa.types.is_binary(field.type):\n", | |
| " col = col.cast(pa.binary())\n", | |
| " new_fields.append(pa.field(\"geometry\", pa.binary()))\n", | |
| " new_arrays.append(col)\n", | |
| " elif field.name != \"assets_data\":\n", | |
| " if pa.types.is_large_string(field.type):\n", | |
| " col = col.cast(pa.string())\n", | |
| " new_fields.append(pa.field(field.name, pa.string()))\n", | |
| " new_arrays.append(col)\n", | |
| " else:\n", | |
| " new_fields.append(field)\n", | |
| " new_arrays.append(col)\n", | |
| "\n", | |
| "table = pa.Table.from_arrays(new_arrays, schema=pa.schema(new_fields))\n", | |
| "\n", | |
| "metadata = {\n", | |
| " b\"geo\": b\"\"\"{\n", | |
| " \"version\": \"1.1.0\",\n", | |
| " \"primary_column\": \"geometry\",\n", | |
| " \"columns\": {\n", | |
| " \"geometry\": {\n", | |
| " \"encoding\": \"WKB\",\n", | |
| " \"geometry_types\": [\"Polygon\"]\n", | |
| " }\n", | |
| " }\n", | |
| " }\"\"\",\n", | |
| " b\"stac_version\": b\"1.0.0\",\n", | |
| "}\n", | |
| "table = table.replace_schema_metadata(metadata)\n", | |
| "assert len(df_stac) == len(table)\n", | |
| "\n", | |
| "items = rustac.from_arrow(table)\n", | |
| "await rustac.write(\"aef-index-stac-geoparquet.parquet\", items)" | |
| ] | |
| } | |
| ], | |
| "metadata": { | |
| "kernelspec": { | |
| "display_name": "torchgeo", | |
| "language": "python", | |
| "name": "python3" | |
| }, | |
| "language_info": { | |
| "codemirror_mode": { | |
| "name": "ipython", | |
| "version": 3 | |
| }, | |
| "file_extension": ".py", | |
| "mimetype": "text/x-python", | |
| "name": "python", | |
| "nbconvert_exporter": "python", | |
| "pygments_lexer": "ipython3", | |
| "version": "3.12.12" | |
| } | |
| }, | |
| "nbformat": 4, | |
| "nbformat_minor": 5 | |
| } |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment