Skip to content

Instantly share code, notes, and snippets.

@fbriol
Created May 21, 2019 15:25
Show Gist options
  • Select an option

  • Save fbriol/026f9cbe38a60ecceeefa8fe899368ca to your computer and use it in GitHub Desktop.

Select an option

Save fbriol/026f9cbe38a60ecceeefa8fe899368ca to your computer and use it in GitHub Desktop.
Example, quick, of geographical indexes.
Display the source blob
Display the rendered blob
Raw
{
"cells": [
{
"cell_type": "code",
"execution_count": 4,
"metadata": {},
"outputs": [],
"source": [
"import numpy as np"
]
},
{
"cell_type": "code",
"execution_count": 5,
"metadata": {},
"outputs": [],
"source": [
"# install with conda: conda install pyindex -c fbriol\n",
"import pyindex.core as core"
]
},
{
"cell_type": "code",
"execution_count": 7,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"False\n"
]
}
],
"source": [
"# 1D-Spatial index\n",
"axis = core.Axis(np.arange(-180, 179,1), is_circle=True) # We have longitudes that can define a circle\n",
"print(axis.is_circle) # But this axis is not a circle."
]
},
{
"cell_type": "code",
"execution_count": 8,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"-180.0 178.0\n"
]
}
],
"source": [
"print(axis.front(), axis.back())"
]
},
{
"cell_type": "code",
"execution_count": 9,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"True\n"
]
}
],
"source": [
"axis = core.Axis(np.arange(-180, 180,1), is_circle=True) # We have longitudes that can define a circle\n",
"print(axis.is_circle) # Here we have a circle."
]
},
{
"cell_type": "code",
"execution_count": 13,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"array([180, 180, 270], dtype=int64)"
]
},
"execution_count": 13,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# In this case, requests can be made without regard to angular values if they are expressed in degrees.\n",
"axis.find_index([360, 359.5, -270])"
]
},
{
"cell_type": "code",
"execution_count": 21,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"24.2 s ± 190 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)\n"
]
}
],
"source": [
"# Queries are super fast (24s for a billion values)\n",
"%timeit len(axis.find_index(np.arange(0, 360, 1/3000000.0)))"
]
},
{
"cell_type": "code",
"execution_count": 22,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"False\n"
]
}
],
"source": [
"# The axis also works when the values are not spaced regularly.\n",
"MERCATOR_LATITUDES = np.array([\n",
" -89.000000, -88.908818, -88.809323, -88.700757, -88.582294, -88.453032,\n",
" -88.311987, -88.158087, -87.990161, -87.806932, -87.607008, -87.388869,\n",
" -87.150861, -86.891178, -86.607851, -86.298736, -85.961495, -85.593582,\n",
" -85.192224, -84.754402, -84.276831, -83.755939, -83.187844, -82.568330,\n",
" -81.892820, -81.156357, -80.353575, -79.478674, -78.525397, -77.487013,\n",
" -76.356296, -75.125518, -73.786444, -72.330344, -70.748017, -69.029837,\n",
" -67.165823, -65.145744, -62.959262, -60.596124, -58.046413, -55.300856,\n",
" -52.351206, -49.190700, -45.814573, -42.220632, -38.409866, -34.387043,\n",
" -30.161252, -25.746331, -21.161107, -16.429384, -11.579629, -6.644331,\n",
" -1.659041, 3.338836, 8.311423, 13.221792, 18.035297, 22.720709,\n",
" 27.251074, 31.604243, 35.763079, 39.715378, 43.453560, 46.974192,\n",
" 50.277423, 53.366377, 56.246554, 58.925270, 61.411164, 63.713764,\n",
" 65.843134, 67.809578, 69.623418, 71.294813, 72.833637, 74.249378,\n",
" 75.551083, 76.747318, 77.846146, 78.855128, 79.781321, 80.631294,\n",
" 81.411149, 82.126535, 82.782681, 83.384411, 83.936179, 84.442084,\n",
" 84.905904, 85.331111, 85.720897, 86.078198, 86.405707, 86.705898,\n",
" 86.981044, 87.233227, 87.464359, 87.676195, 87.870342, 88.048275,\n",
" 88.211348, 88.360799, 88.497766, 88.623291, 88.738328, 88.843755,\n",
" 88.940374\n",
" ], dtype=np.float64)\n",
"axis = core.Axis(MERCATOR_LATITUDES)\n",
"print(axis.is_regular())"
]
},
{
"cell_type": "code",
"execution_count": 23,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"array([ 54, 3, 104, 54, -1], dtype=int64)"
]
},
"execution_count": 23,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"axis.find_index([-1.659041, -88.700757, 88.497766, 0, -90])"
]
},
{
"cell_type": "code",
"execution_count": 25,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Help on Axis in module pyindex.core object:\n",
"\n",
"class Axis(pybind11_builtins.pybind11_object)\n",
" | A coordinate axis is a Variable that specifies one of the coordinates\n",
" | of a variable's values.\n",
" | \n",
" | Mathematically it is a vector function :math:`f` from index space to\n",
" | :math:`S_n`:\n",
" | \n",
" | :math:`f(i,j,k,...) \\to (S_1, S_2, ... S_n)`\n",
" | \n",
" | where :math:`i, j, k` are integers, and :math:`S` is the set of reals\n",
" | (:math:`\\mathbb{R}`).\n",
" | \n",
" | Method resolution order:\n",
" | Axis\n",
" | pybind11_builtins.pybind11_object\n",
" | builtins.object\n",
" | \n",
" | Methods defined here:\n",
" | \n",
" | __eq__(...)\n",
" | __eq__(self: pyindex.core.Axis, arg0: pyindex.core.Axis) -> bool\n",
" | \n",
" | __getitem__(...)\n",
" | __getitem__(*args, **kwargs)\n",
" | Overloaded function.\n",
" | \n",
" | 1. __getitem__(self: pyindex.core.Axis, arg0: int) -> float\n",
" | \n",
" | 2. __getitem__(self: pyindex.core.Axis, arg0: slice) -> numpy.ndarray[float64]\n",
" | \n",
" | __getstate__(...)\n",
" | __getstate__(self: pyindex.core.Axis) -> tuple\n",
" | \n",
" | __init__(...)\n",
" | __init__(*args, **kwargs)\n",
" | Overloaded function.\n",
" | \n",
" | 1. __init__(self: pyindex.core.Axis, values: numpy.ndarray[float64], is_circle: bool = False, is_radian: bool = False) -> None\n",
" | \n",
" | \n",
" | Create a coordinate axis from values.\n",
" | \n",
" | Args:\n",
" | values (numpy.ndarray): Axis values.\n",
" | is_circle (bool, optional): True, if the axis can represent a\n",
" | circle. Defaults to ``false``.\n",
" | is_radian (bool, optional): True, if the coordinate system is radian.\n",
" | Defaults to ``false``.\n",
" | \n",
" | \n",
" | 2. __init__(self: pyindex.core.Axis, start: float, stop: float, step: float, is_circle: bool = False, is_radian: bool = False) -> None\n",
" | \n",
" | \n",
" | Create a coordinate axis from evenly spaced numbers over a specified\n",
" | interval.\n",
" | \n",
" | Args:\n",
" | start (float): The first value of the axis.\n",
" | stop (float): The last value of the axis.\n",
" | num (int): Number of samples in the axis.\n",
" | is_circle (bool, optional): True, if the axis can represent a circle.\n",
" | Defaults to ``false``.\n",
" | is_radian (bool, optional): True, if the coordinate system is radian.\n",
" | Defaults to ``false``.\n",
" | \n",
" | __len__(...)\n",
" | __len__(self: pyindex.core.Axis) -> int\n",
" | \n",
" | __ne__(...)\n",
" | __ne__(self: pyindex.core.Axis, arg0: pyindex.core.Axis) -> bool\n",
" | \n",
" | __setstate__(...)\n",
" | __setstate__(self: pyindex.core.Axis, arg0: tuple) -> None\n",
" | \n",
" | back(...)\n",
" | back(self: pyindex.core.Axis) -> float\n",
" | \n",
" | \n",
" | Get the last value of this axis\n",
" | \n",
" | Return:\n",
" | float: The last value\n",
" | \n",
" | find_index(...)\n",
" | find_index(self: pyindex.core.Axis, coordinates: numpy.ndarray[float64], bounded: bool = False) -> numpy.ndarray[int64]\n",
" | \n",
" | \n",
" | Given coordinate positions, find what grid elements contains them, or is\n",
" | closest to them.\n",
" | \n",
" | Args:\n",
" | coordinates (numpy.ndarray): Positions in this coordinate system\n",
" | bounded (bool, optional): True if you want to obtain the closest value to\n",
" | a coordinate outside the axis definition range.\n",
" | \n",
" | Return:\n",
" | numpy.ndarray: index of the grid points containing them or -1 if the\n",
" | ``bounded`` parameter is set to false and if one of the searched indexes\n",
" | is out of the definition range of the axis, otherwise the index of the\n",
" | closest value of the coordinate is returned.\n",
" | \n",
" | front(...)\n",
" | front(self: pyindex.core.Axis) -> float\n",
" | \n",
" | \n",
" | Get the first value of this axis\n",
" | \n",
" | Return:\n",
" | float: The first value\n",
" | \n",
" | increment(...)\n",
" | increment(self: pyindex.core.Axis) -> float\n",
" | \n",
" | \n",
" | Get increment value if is_regular()\n",
" | \n",
" | Raises:\n",
" | RuntimeError: if this instance does not represent a regular axis\n",
" | Return:\n",
" | float: Increment value\n",
" | \n",
" | is_regular(...)\n",
" | is_regular(self: pyindex.core.Axis) -> bool\n",
" | \n",
" | \n",
" | Check if this axis values are spaced regularly\n",
" | \n",
" | Return:\n",
" | bool: True if this axis values are spaced regularly\n",
" | \n",
" | max_value(...)\n",
" | max_value(self: pyindex.core.Axis) -> float\n",
" | \n",
" | \n",
" | Get the maximum coordinate value.\n",
" | \n",
" | Return:\n",
" | float: The maximum coordinate value.\n",
" | \n",
" | min_value(...)\n",
" | min_value(self: pyindex.core.Axis) -> float\n",
" | \n",
" | \n",
" | Get the minimum coordinate value.\n",
" | \n",
" | Return:\n",
" | float: The minimum coordinate value.\n",
" | \n",
" | ----------------------------------------------------------------------\n",
" | Data descriptors defined here:\n",
" | \n",
" | is_circle\n",
" | Test if this axis represents a circle.\n",
" | \n",
" | Return:\n",
" | bool: True if this axis represents a circle\n",
" | \n",
" | ----------------------------------------------------------------------\n",
" | Static methods inherited from pybind11_builtins.pybind11_object:\n",
" | \n",
" | __new__(*args, **kwargs) from pybind11_builtins.pybind11_type\n",
" | Create and return a new object. See help(type) for accurate signature.\n",
"\n",
"None\n"
]
}
],
"source": [
"# I wrote a little bit of doc.\n",
"print(help(axis))"
]
},
{
"cell_type": "code",
"execution_count": 26,
"metadata": {},
"outputs": [],
"source": [
"del axis"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### SpatialIndex for cartesian values\n",
"\n",
"Boost doesn't implement dynamic typing of the number of dimensions. We have to do it at compilation, but my C++ class is a template and it' s easy to extend it to several dimensions. In my example, you can play with 5-dimensional spaces.\n",
"\n",
"However, the requests are very fast. By cons Python data must be copied into the memory space of the tree.\n"
]
},
{
"cell_type": "code",
"execution_count": 27,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"['RTree2D', 'RTree3D', 'RTree4D', 'RTree5D', '__doc__', '__loader__', '__name__', '__package__', '__spec__']\n"
]
}
],
"source": [
"print(dir(core.cartesian))"
]
},
{
"cell_type": "code",
"execution_count": 28,
"metadata": {},
"outputs": [],
"source": [
"tree = core.cartesian.RTree2D()\n",
"# Wikipedia example: https://fr.wikipedia.org/wiki/Arbre_kd#/media/File:Arbre_kd_noeuds_espace.svg\n",
"coordinates = np.array([[2, 3], [5, 4], [9, 6], [4, 7], [8, 1], [7, 2]])"
]
},
{
"cell_type": "code",
"execution_count": 29,
"metadata": {},
"outputs": [],
"source": [
"# The preferred method to use to optimize queries and data insertion.\n",
"tree.packing(coordinates)"
]
},
{
"cell_type": "code",
"execution_count": 30,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"<pyindex.core.cartesian.RTree2D at 0x7f9274080768>"
]
},
"execution_count": 30,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"import pickle\n",
"pickle.loads(pickle.dumps(tree))"
]
},
{
"cell_type": "code",
"execution_count": 31,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"[[0]]\n"
]
}
],
"source": [
"dist, index = tree.query(np.array([[2.1, 1.1]]), k=1, num_threads=1)\n",
"print(index)"
]
},
{
"cell_type": "code",
"execution_count": 32,
"metadata": {},
"outputs": [],
"source": [
"# We check that the requested point is enclosed by its neighbours.\n",
"dist, index = tree.query(np.array([[2.1, 1.1]]), k=1, within=True, num_threads=1)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### SpatialIndex for geodetic values\n",
"\n",
"Here the space is limited to three dimensions: longitude, latitude and altitude. The conversion of LLA coordinates into ECEF Cartesian coordinates is managed by the class. To do this, a geodetic system must be defined. By default, we consider WGS-84."
]
},
{
"cell_type": "code",
"execution_count": 33,
"metadata": {},
"outputs": [],
"source": [
"system = core.geodetic.System()"
]
},
{
"cell_type": "code",
"execution_count": 35,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"(6378137.0, 0.0033528106647474805)"
]
},
"execution_count": 35,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"system.semi_major_axis, system.flattening"
]
},
{
"cell_type": "code",
"execution_count": 37,
"metadata": {},
"outputs": [],
"source": [
"tree = core.geodetic.RTree() # or core.geodetic.RTree(system=None) or core.geodetic.RTree(system=system)"
]
},
{
"cell_type": "code",
"execution_count": 41,
"metadata": {},
"outputs": [],
"source": [
"lon = np.random.uniform(-180.0, 180.0, 2048*4096)\n",
"lat = np.random.uniform(-90.0, 90.0, 2048*4096)\n",
"alt = np.random.uniform(-10000, 100000, 2048*4096)"
]
},
{
"cell_type": "code",
"execution_count": 42,
"metadata": {},
"outputs": [],
"source": [
"tree.packing(np.asarray((lon, lat, alt)).T)"
]
},
{
"cell_type": "code",
"execution_count": 43,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"8388608"
]
},
"execution_count": 43,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"len(tree)"
]
},
{
"cell_type": "code",
"execution_count": 45,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Help on method query in module pyindex.core.geodetic:\n",
"\n",
"query(...) method of pyindex.core.geodetic.RTree instance\n",
" query(self: pyindex.core.geodetic.RTree, coordinates: numpy.ndarray[float64], k: int = 4, within: bool = False, num_threads: int = 0) -> numpy.ndarray[float64]\n",
" \n",
" \n",
" Search for the nearest K nearest neighbors of a given point.\n",
" \n",
" Args:\n",
" coordinates (numpy.ndarray): A matrix ``(n, 2)`` to search points defined\n",
" by their longitudes and latitudes or a matrix ``(n, 3)`` to search\n",
" points defined by their longitudes, latitudes and altitudes.\n",
" k (int, optional): The number of nearest neighbors to be used for\n",
" calculating the interpolated value. Defaults to ``4``.\n",
" within (bool, optional): If true, the method ensures that the neighbors\n",
" found are located within the point of interest. Defaults to\n",
" ``false``.\n",
" num_threads (int, optional): The number of threads to use for the\n",
" computation. If 0 all CPUs are used. If 1 is given, no parallel\n",
" computing code is used at all, which is useful for debugging.\n",
" Defaults to ``0``.\n",
" Return:\n",
" tuple: A tuple containing a matrix describing for each provided position,\n",
" the distance, in meters, between the provided position and the found\n",
" neighbors and a matrix containing the values of the different neighbors\n",
" found for all provided positions.\n",
"\n"
]
}
],
"source": [
"help(tree.query)"
]
},
{
"cell_type": "code",
"execution_count": 50,
"metadata": {},
"outputs": [],
"source": [
"distance, index = tree.query([[lon[0], lat[0], alt[0]]], k=1)"
]
},
{
"cell_type": "code",
"execution_count": 52,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"(array([[0.]]), array([[0.]]))"
]
},
"execution_count": 52,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"distance, index"
]
},
{
"cell_type": "code",
"execution_count": 53,
"metadata": {},
"outputs": [],
"source": [
"distance, index = tree.query([[lon[0], lat[0], alt[0]]], k=12)"
]
},
{
"cell_type": "code",
"execution_count": 54,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"array([[ 0. , 9977.11397628, 9695.09425101, 11720.39071635,\n",
" 12801.98829248, 13112.11839679, 13377.86490828, 13869.59962806,\n",
" 8665.04763757, 12346.95727459, 16629.36295914, 13646.6701299 ]])"
]
},
"execution_count": 54,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# distance in meters for the WGS-84\n",
"distance"
]
},
{
"cell_type": "code",
"execution_count": 56,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"array([[-5.18015000e+01, 3.68493323e+01, 3.02074448e+03],\n",
" [-1.04178038e+02, 3.92099274e+01, -8.65378579e+03],\n",
" [-7.98099204e+01, -8.52560463e+01, 5.56068082e+03],\n",
" ...,\n",
" [ 1.67551240e+02, -7.92561058e+01, 5.78649597e+04],\n",
" [ 9.81198620e+01, 7.24864674e+01, 9.21785563e+04],\n",
" [ 8.77325256e+01, -7.31085097e+01, 3.57823628e+04]])"
]
},
"execution_count": 56,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"coordinates = np.asarray((\n",
" np.random.uniform(-180.0, 180.0, 10000),\n",
" np.random.uniform(-90.0, 90.0, 10000),\n",
" np.random.uniform(-10000, 100000, 10000))).T\n",
"coordinates"
]
},
{
"cell_type": "code",
"execution_count": 57,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"9.23 ms ± 120 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)\n"
]
}
],
"source": [
"%timeit tree.query(coordinates)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.7.3"
}
},
"nbformat": 4,
"nbformat_minor": 2
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment