Created
January 9, 2023 21:26
-
-
Save eHorn96/ed0de2ae0150fb51f7e46dc2d5d4fde8 to your computer and use it in GitHub Desktop.
Preprocessing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| def graph_download(coords:list, | |
| root_dir:str, | |
| name:str,simplified=False, | |
| distance=9000, | |
| keep_geometries=False, | |
| nominatim_endpoint='http://localhost:8088/', | |
| network_type='drive'): | |
| graph_attrs = {'crs': 'epsg:4326', | |
| 'simplified': simplified | |
| } | |
| cols_dtypes = {"u":int,"v":int, | |
| "oneway":bool, | |
| "maxspeed":float, | |
| "reversed":bool, | |
| "length":float, | |
| "lanes":float, | |
| "access":bool, | |
| "bridge":bool, | |
| "width":float, | |
| "tunnel":bool, | |
| "u_highway":str, | |
| "v_highway":str | |
| } | |
| fp = osp.join(root_dir) | |
| raw_fp = osp.join(fp,'raw') | |
| surface_tags = [] | |
| maxspeed_tags = [] | |
| highway_tags = [] | |
| if not os.path.exists(raw_fp +'/edges.csv'): | |
| try: | |
| os.makedirs(fp) | |
| except FileExistsError as error: | |
| print(error) | |
| print("Files don't exist in path. Downloading through Overpass.") | |
| G = ox.graph_from_point(coords, | |
| dist=distance, | |
| dist_type="network", | |
| network_type=network_type, | |
| simplify=False, | |
| truncate_by_edge=True) | |
| #print("Got Graph.") | |
| G = ox.speed.add_edge_speeds(G) | |
| nodes,edges = ox.graph_to_gdfs(G, | |
| nodes=True, | |
| node_geometry=keep_geometries, | |
| fill_edge_geometry=keep_geometries) | |
| #print("Got DataFrames") | |
| useless_columns = ['junction','geometry','ref','name', | |
| 'osmid','area','est_width'] | |
| useless_columns_edges = [i for i in useless_columns if | |
| i in edges.columns] | |
| edges.drop(useless_columns_edges,axis=1,inplace=True) | |
| #print("Dropped useless columns.") | |
| edges.drop('maxspeed',axis=1,inplace=True) | |
| if "tunnel" in edges.columns: | |
| edges.tunnel.replace("building_passage",1,inplace=True) | |
| edges.tunnel.fillna(0,inplace=True) | |
| edges.tunnel.replace("yes",1,inplace=True) | |
| edges.tunnel.replace("no",0,inplace=True) | |
| #print("Processed tunnel column.") | |
| if "bridge" in edges.columns: | |
| edges.bridge.fillna(0,inplace=True) | |
| edges.bridge.replace("yes",1,inplace=True) | |
| edges.bridge.replace("no",0,inplace=True) | |
| #print("Processed Bridge column.") | |
| if "access" in edges.columns: | |
| s = edges.access.value_counts() | |
| edges.access = np.where(edges.access.isin( | |
| s.index[s < len(edges)*0.025]), 'yes', | |
| edges.access) | |
| edges.access.fillna(1,inplace=True) | |
| edges.access.replace("no",0,inplace=True) | |
| edges.access.replace("yes",1,inplace=True) | |
| #print("Processed access column.") | |
| if "width" in edges.columns: | |
| edges.width = edges.width.str.extract('(\d+)').astype(float) | |
| #print("Processed width column.") | |
| if "highway" in nodes.columns: | |
| #print(f"Frequency of null-highways: {len(np.where(nodes.highway == np.nan)[0])}") | |
| def fillhighway(row): | |
| if isinstance(row.highway,float): | |
| res_obj = requests.get(nominatim_endpoint + f'reverse.php?lat={row.y}&lon={row.x}&format=jsonv2&extratags=1&zoom=17').json() | |
| return res_obj["type"] | |
| else: | |
| return row["highway"] | |
| nodes.highway= nodes.apply(fillhighway,axis=1) | |
| s = nodes.highway.value_counts() | |
| nodes.highway = np.where(nodes.highway.isin( | |
| s.index[s < (len(nodes)*0.025)]), 'residential', | |
| nodes.highway) | |
| #nodes.highway.fillna('residential',inplace=True) | |
| #print("Processed highway column.") | |
| tmp = [] | |
| for l in tqdm(edges.lanes,desc = "lanes"): | |
| if isinstance(l,list): | |
| tmp.append(sum(list(float(f) for f in l))) | |
| elif isinstance(l,str): | |
| try: | |
| tmp.append(float(l)) | |
| except: | |
| tmp.append(np.nan) | |
| else: | |
| tmp.append(l) | |
| edges.lanes = tmp | |
| edges.lanes = edges.lanes.astype(np.float32) | |
| del tmp | |
| #print("Processed lanes column.") | |
| u_highway=[] | |
| v_highway=[] | |
| edge_highways = [] | |
| for h in tqdm(edges.highway.to_list()): | |
| if isinstance(h,str): | |
| edge_highways.append([h,h]) | |
| elif isinstance(h,list): | |
| edge_highways.append([h[0],h[1]]) | |
| elif isinstance(h,float): | |
| edge_highways.append(["unclassified","unclassified"]) | |
| else: | |
| raise TypeError( | |
| f"Could not unnest value with type: {type(h)}" | |
| ) | |
| edges['u_highway'] = np.array(edge_highways)[0:,0] | |
| edges['v_highway'] = np.array(edge_highways)[0:,1] | |
| edges.drop('highway',axis=1,inplace=True) | |
| assert len( | |
| np.where( | |
| edges.index.to_flat_index().to_numpy() == np.nan)[0] ) == 0 | |
| if 'ref' in nodes.columns: | |
| nodes.drop("ref",axis=1,inplace=True) | |
| edges.u_highway = edges.u_highway.astype("category") | |
| edges.v_highway = edges.v_highway.astype("category") | |
| else: | |
| print("Found csv files.") | |
| nodes = gpd.read_file(fp + 'nodes.csv', | |
| layer = 'nodes').convert_dtypes().set_index('osmid') | |
| edges = gpd.read_file(fp + 'edges.csv', | |
| layer='edges').convert_dtypes().set_index( | |
| ['u','v','key']) | |
| assert nodes.index.is_unique and edges.index.is_unique | |
| G = ox.graph_from_gdfs(nodes,edges, graph_attrs) | |
| y = np.zeros(len(nodes),dtype=np.int8) | |
| unf = unfaelle[unfaelle.apply(filter, args = (coords[0],coords[1],2*distance), axis=1)] | |
| unf = unf[["YGCSWGS84","XGCSWGS84"]].to_numpy() | |
| pts = nodes[['y','x']].to_numpy() | |
| ndsidx = nodes.index.to_flat_index().to_numpy() | |
| tree = spatial.KDTree(pts) | |
| for p in tqdm(range(len(unf)),'Labeling'): | |
| distance, index = tree.query(unf[p],k=5) | |
| for result in range(len(distance)): | |
| if distance[result] <= 6e-3: #Tuned for Germany. Distances based on | |
| y[index[result]] = 1 | |
| edge_attr_ang = np.array( | |
| [ | |
| ang_gen( | |
| idx[0], | |
| idx[1], | |
| G=G | |
| ) for idx in edges.index | |
| ], | |
| dtype=np.float32) | |
| # All rows where ang_gen didnt work get -1 in same shape as edge_attr_ang (1,3) | |
| edge_attr_ang[ | |
| np.where( | |
| np.isnan( | |
| edge_attr_ang | |
| ) == True )[0] | |
| ] = np.array([-1,-1,-1],dtype=np.float32) | |
| #edge_attr_ang = torch.from_numpy(edge_attr_ang).to(torch.float) | |
| # | |
| edge_attr_dir = np.array([dir_gen(idx[0], | |
| idx[1],G=G) for idx in edges.index], | |
| dtype=np.float32) | |
| #print("""Tensorized edge attributes, angular and | |
| # directional component.""") | |
| if "geometry" in edges.columns: | |
| edges.drop("geometry",axis=1,inplace=True) | |
| if "geometry" in nodes.columns: | |
| nodes.drop("geometry",axis=1,inplace=True) | |
| edges.to_csv( fp + 'edges.csv') | |
| #print("saved edges to csv") | |
| nodes.to_csv(fp + 'nodes.csv') | |
| #print("saved nodes to csv") | |
| edges = pd.read_csv(fp + 'edges.csv',dtype=cols_dtypes) | |
| nodes = pd.read_csv(fp + 'nodes.csv') | |
| #print("Converted to pandas DataFrames.") | |
| benc_highway = OneHotEncoder(return_df=True,drop_invariant=False) | |
| highways = edges['u_highway'].to_numpy() | |
| benc_highway.fit(highways) | |
| #print("Fit highway column.") | |
| edges_encoded_u = benc_highway.transform( | |
| edges['u_highway'].to_numpy()) | |
| edges_encoded_v = benc_highway.transform( | |
| edges['v_highway'].to_numpy()) | |
| nodes_encoded = benc_highway.transform( | |
| nodes['highway'].to_numpy()) | |
| #print("encoded highway column.") | |
| edges = pd.concat([edges, | |
| edges_encoded_u, | |
| edges_encoded_v], | |
| axis = 1) | |
| nodes = pd.concat([nodes, | |
| nodes_encoded], | |
| axis=1) | |
| edge_index = np.array([ | |
| np.array( | |
| [ | |
| np.where(nodes['osmid'] == i[0])[0], | |
| np.where(nodes['osmid'] == i[1])[0] | |
| ] | |
| ).flatten() for i in tqdm(edges[['u','v']].to_numpy(), | |
| "Creating Edge index.") | |
| ],dtype=np.int32) | |
| edge_index | |
| coords = torch.from_numpy( | |
| nodes.iloc[:,1:3].to_numpy()).to(torch.float) | |
| #print("Imputing missing values...") | |
| edges_imputer = IterativeImputer() | |
| nodes_imputer = IterativeImputer() | |
| edges.drop(['u', | |
| 'v', | |
| 'key', | |
| 'u_highway', | |
| 'v_highway'], | |
| axis=1,inplace=True) | |
| edges = edges.apply(pd.to_numeric,errors='coerce') | |
| edges = edges_imputer.fit_transform(edges) | |
| edges = np.concatenate([edges,edge_attr_ang,edge_attr_dir],axis=1) | |
| #print("Done for Edges.") | |
| nodes.drop(['y','x','osmid','highway'],axis=1,inplace=True) | |
| nodes = nodes.apply(pd.to_numeric,errors='coerce') | |
| nodes = nodes_imputer.fit_transform(nodes) | |
| #print("Done for Nodes.") | |
| #x = torch.from_numpy(nodes).to(torch.float) | |
| #y = torch.from_numpy(y).to(torch.long) | |
| # | |
| #edge_attr = torch.from_numpy(edges).to(torch.float) | |
| print( | |
| f"Number of nodes: {len(nodes)}" | |
| ) | |
| print( | |
| f"Number of node features: {len(nodes[0])}" | |
| ) | |
| print( | |
| f"Number of edges: {len(edges)}" | |
| ) | |
| print( | |
| f"Positive Class relative frequency: {len(y[y==1])/len(y)}" | |
| ) | |
| np.savez(f'{osp.join(fp,name,"raw",name)}.npz', | |
| x=nodes, | |
| y=y, | |
| edge_attr=edges,edge_index=edge_index, | |
| edge_attr_ang=edge_attr_ang, | |
| edge_attr_dir=edge_attr_dir, | |
| coordinates=coords) | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment