Skip to content

Instantly share code, notes, and snippets.

@eHorn96
Created January 9, 2023 21:26
Show Gist options
  • Select an option

  • Save eHorn96/ed0de2ae0150fb51f7e46dc2d5d4fde8 to your computer and use it in GitHub Desktop.

Select an option

Save eHorn96/ed0de2ae0150fb51f7e46dc2d5d4fde8 to your computer and use it in GitHub Desktop.
Preprocessing
def graph_download(coords:list,
root_dir:str,
name:str,simplified=False,
distance=9000,
keep_geometries=False,
nominatim_endpoint='http://localhost:8088/',
network_type='drive'):
graph_attrs = {'crs': 'epsg:4326',
'simplified': simplified
}
cols_dtypes = {"u":int,"v":int,
"oneway":bool,
"maxspeed":float,
"reversed":bool,
"length":float,
"lanes":float,
"access":bool,
"bridge":bool,
"width":float,
"tunnel":bool,
"u_highway":str,
"v_highway":str
}
fp = osp.join(root_dir)
raw_fp = osp.join(fp,'raw')
surface_tags = []
maxspeed_tags = []
highway_tags = []
if not os.path.exists(raw_fp +'/edges.csv'):
try:
os.makedirs(fp)
except FileExistsError as error:
print(error)
print("Files don't exist in path. Downloading through Overpass.")
G = ox.graph_from_point(coords,
dist=distance,
dist_type="network",
network_type=network_type,
simplify=False,
truncate_by_edge=True)
#print("Got Graph.")
G = ox.speed.add_edge_speeds(G)
nodes,edges = ox.graph_to_gdfs(G,
nodes=True,
node_geometry=keep_geometries,
fill_edge_geometry=keep_geometries)
#print("Got DataFrames")
useless_columns = ['junction','geometry','ref','name',
'osmid','area','est_width']
useless_columns_edges = [i for i in useless_columns if
i in edges.columns]
edges.drop(useless_columns_edges,axis=1,inplace=True)
#print("Dropped useless columns.")
edges.drop('maxspeed',axis=1,inplace=True)
if "tunnel" in edges.columns:
edges.tunnel.replace("building_passage",1,inplace=True)
edges.tunnel.fillna(0,inplace=True)
edges.tunnel.replace("yes",1,inplace=True)
edges.tunnel.replace("no",0,inplace=True)
#print("Processed tunnel column.")
if "bridge" in edges.columns:
edges.bridge.fillna(0,inplace=True)
edges.bridge.replace("yes",1,inplace=True)
edges.bridge.replace("no",0,inplace=True)
#print("Processed Bridge column.")
if "access" in edges.columns:
s = edges.access.value_counts()
edges.access = np.where(edges.access.isin(
s.index[s < len(edges)*0.025]), 'yes',
edges.access)
edges.access.fillna(1,inplace=True)
edges.access.replace("no",0,inplace=True)
edges.access.replace("yes",1,inplace=True)
#print("Processed access column.")
if "width" in edges.columns:
edges.width = edges.width.str.extract('(\d+)').astype(float)
#print("Processed width column.")
if "highway" in nodes.columns:
#print(f"Frequency of null-highways: {len(np.where(nodes.highway == np.nan)[0])}")
def fillhighway(row):
if isinstance(row.highway,float):
res_obj = requests.get(nominatim_endpoint + f'reverse.php?lat={row.y}&lon={row.x}&format=jsonv2&extratags=1&zoom=17').json()
return res_obj["type"]
else:
return row["highway"]
nodes.highway= nodes.apply(fillhighway,axis=1)
s = nodes.highway.value_counts()
nodes.highway = np.where(nodes.highway.isin(
s.index[s < (len(nodes)*0.025)]), 'residential',
nodes.highway)
#nodes.highway.fillna('residential',inplace=True)
#print("Processed highway column.")
tmp = []
for l in tqdm(edges.lanes,desc = "lanes"):
if isinstance(l,list):
tmp.append(sum(list(float(f) for f in l)))
elif isinstance(l,str):
try:
tmp.append(float(l))
except:
tmp.append(np.nan)
else:
tmp.append(l)
edges.lanes = tmp
edges.lanes = edges.lanes.astype(np.float32)
del tmp
#print("Processed lanes column.")
u_highway=[]
v_highway=[]
edge_highways = []
for h in tqdm(edges.highway.to_list()):
if isinstance(h,str):
edge_highways.append([h,h])
elif isinstance(h,list):
edge_highways.append([h[0],h[1]])
elif isinstance(h,float):
edge_highways.append(["unclassified","unclassified"])
else:
raise TypeError(
f"Could not unnest value with type: {type(h)}"
)
edges['u_highway'] = np.array(edge_highways)[0:,0]
edges['v_highway'] = np.array(edge_highways)[0:,1]
edges.drop('highway',axis=1,inplace=True)
assert len(
np.where(
edges.index.to_flat_index().to_numpy() == np.nan)[0] ) == 0
if 'ref' in nodes.columns:
nodes.drop("ref",axis=1,inplace=True)
edges.u_highway = edges.u_highway.astype("category")
edges.v_highway = edges.v_highway.astype("category")
else:
print("Found csv files.")
nodes = gpd.read_file(fp + 'nodes.csv',
layer = 'nodes').convert_dtypes().set_index('osmid')
edges = gpd.read_file(fp + 'edges.csv',
layer='edges').convert_dtypes().set_index(
['u','v','key'])
assert nodes.index.is_unique and edges.index.is_unique
G = ox.graph_from_gdfs(nodes,edges, graph_attrs)
y = np.zeros(len(nodes),dtype=np.int8)
unf = unfaelle[unfaelle.apply(filter, args = (coords[0],coords[1],2*distance), axis=1)]
unf = unf[["YGCSWGS84","XGCSWGS84"]].to_numpy()
pts = nodes[['y','x']].to_numpy()
ndsidx = nodes.index.to_flat_index().to_numpy()
tree = spatial.KDTree(pts)
for p in tqdm(range(len(unf)),'Labeling'):
distance, index = tree.query(unf[p],k=5)
for result in range(len(distance)):
if distance[result] <= 6e-3: #Tuned for Germany. Distances based on
y[index[result]] = 1
edge_attr_ang = np.array(
[
ang_gen(
idx[0],
idx[1],
G=G
) for idx in edges.index
],
dtype=np.float32)
# All rows where ang_gen didnt work get -1 in same shape as edge_attr_ang (1,3)
edge_attr_ang[
np.where(
np.isnan(
edge_attr_ang
) == True )[0]
] = np.array([-1,-1,-1],dtype=np.float32)
#edge_attr_ang = torch.from_numpy(edge_attr_ang).to(torch.float)
#
edge_attr_dir = np.array([dir_gen(idx[0],
idx[1],G=G) for idx in edges.index],
dtype=np.float32)
#print("""Tensorized edge attributes, angular and
# directional component.""")
if "geometry" in edges.columns:
edges.drop("geometry",axis=1,inplace=True)
if "geometry" in nodes.columns:
nodes.drop("geometry",axis=1,inplace=True)
edges.to_csv( fp + 'edges.csv')
#print("saved edges to csv")
nodes.to_csv(fp + 'nodes.csv')
#print("saved nodes to csv")
edges = pd.read_csv(fp + 'edges.csv',dtype=cols_dtypes)
nodes = pd.read_csv(fp + 'nodes.csv')
#print("Converted to pandas DataFrames.")
benc_highway = OneHotEncoder(return_df=True,drop_invariant=False)
highways = edges['u_highway'].to_numpy()
benc_highway.fit(highways)
#print("Fit highway column.")
edges_encoded_u = benc_highway.transform(
edges['u_highway'].to_numpy())
edges_encoded_v = benc_highway.transform(
edges['v_highway'].to_numpy())
nodes_encoded = benc_highway.transform(
nodes['highway'].to_numpy())
#print("encoded highway column.")
edges = pd.concat([edges,
edges_encoded_u,
edges_encoded_v],
axis = 1)
nodes = pd.concat([nodes,
nodes_encoded],
axis=1)
edge_index = np.array([
np.array(
[
np.where(nodes['osmid'] == i[0])[0],
np.where(nodes['osmid'] == i[1])[0]
]
).flatten() for i in tqdm(edges[['u','v']].to_numpy(),
"Creating Edge index.")
],dtype=np.int32)
edge_index
coords = torch.from_numpy(
nodes.iloc[:,1:3].to_numpy()).to(torch.float)
#print("Imputing missing values...")
edges_imputer = IterativeImputer()
nodes_imputer = IterativeImputer()
edges.drop(['u',
'v',
'key',
'u_highway',
'v_highway'],
axis=1,inplace=True)
edges = edges.apply(pd.to_numeric,errors='coerce')
edges = edges_imputer.fit_transform(edges)
edges = np.concatenate([edges,edge_attr_ang,edge_attr_dir],axis=1)
#print("Done for Edges.")
nodes.drop(['y','x','osmid','highway'],axis=1,inplace=True)
nodes = nodes.apply(pd.to_numeric,errors='coerce')
nodes = nodes_imputer.fit_transform(nodes)
#print("Done for Nodes.")
#x = torch.from_numpy(nodes).to(torch.float)
#y = torch.from_numpy(y).to(torch.long)
#
#edge_attr = torch.from_numpy(edges).to(torch.float)
print(
f"Number of nodes: {len(nodes)}"
)
print(
f"Number of node features: {len(nodes[0])}"
)
print(
f"Number of edges: {len(edges)}"
)
print(
f"Positive Class relative frequency: {len(y[y==1])/len(y)}"
)
np.savez(f'{osp.join(fp,name,"raw",name)}.npz',
x=nodes,
y=y,
edge_attr=edges,edge_index=edge_index,
edge_attr_ang=edge_attr_ang,
edge_attr_dir=edge_attr_dir,
coordinates=coords)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment