added neighborhood and transit info to routes

raceconditionrunning · Jul 18, 2024 · aa4bbdb · aa4bbdb
1 parent 2f8b6b7
commit aa4bbdb
Show file tree

Hide file tree

Showing 7 changed files with 7,103 additions and 0 deletions.
diff --git a/locs/extract_locs_info.py b/locs/extract_locs_info.py
@@ -0,0 +1,56 @@
+import pandas as pd
+import geojson
+import gpxpy
+import os
+from shapely import distance
+from shapely.geometry import shape, Point
+
+ROOT = os.path.realpath(os.path.join(os.path.dirname(__file__), '..'))
+
+TRANSPORT_FILES = {
+    "Light Rail": f"{ROOT}/locs/seattle_city_raw_data/sound_transit_light_rail.csv",
+    "Ferry": f"{ROOT}/locs/seattle_city_raw_data/ferry.csv",
+    "Bus": f"{ROOT}/locs/seattle_city_raw_data/bus.csv",
+}
+LOCS_CSV_FILE = f"{ROOT}/locs/db.csv"
+
+df = pd.read_csv(LOCS_CSV_FILE)
+assert "id" in list(df), "db.csv does not contain location ids"
+
+def near_enough(p1, p2, threshold=0.005): #0.005 ~= 0.3 miles or 6 minutes of walking
+    return distance(p1, p2) < threshold
+
+STOPS = {}
+
+for system_name, file_name in TRANSPORT_FILES.items():
+    system_df = pd.read_csv(TRANSPORT_FILES[system_name])
+    for index, row in system_df.iterrows():
+        STOPS[row['stop_name']] = {
+            'lat': row['stop_lat'],
+            'lon': row['stop_lon'],
+            'system': system_name,
+        }
+
+# construct point based on lon/lat returned by geocoder
+
+# check each polygon to see if it contains the point
+
+## add new columns if not existing in csv
+for col_name in ["transit"]:
+    if col_name not in df:
+        df[col_name] = ""
+
+for index, row in df.iterrows():
+    id = row["id"]
+
+    loc_point = Point(row['long'], row['lat'])
+
+    for stop_name, stop_dict in STOPS.items():
+        if near_enough(loc_point, Point(stop_dict['lon'], stop_dict['lat'])):
+            df.at[index, "transit"] = f"{stop_dict["system"]} to {stop_name} stop"
+            break
+
+
+df.to_csv(LOCS_CSV_FILE + "_new")
+
+
diff --git a/locs/seattle_city_raw_data/Neighborhood_Map_Atlas_Neighborhoods.geojson b/locs/seattle_city_raw_data/Neighborhood_Map_Atlas_Neighborhoods.geojson
diff --git a/locs/seattle_city_raw_data/bus.csv b/locs/seattle_city_raw_data/bus.csv
diff --git a/locs/seattle_city_raw_data/ferry.csv b/locs/seattle_city_raw_data/ferry.csv
@@ -0,0 +1,20 @@
+stop_id,stop_name,stop_lat,stop_lon
+1,Anacortes,48.506483,-122.678254
+10,Friday Harbor,48.535066,-123.014750
+11,Coupeville,48.159745,-122.672678
+12,Kingston,47.796299,-122.496454
+13,Lopez Island,48.570421,-122.883628
+14,Mukilteo,47.948826,-122.304313
+15,Orcas Island,48.597930,-122.944101
+16,Point Defiance,47.305550,-122.514242
+17,Port Townsend,48.112664,-122.760505
+18,Shaw Island,48.584021,-122.929298
+20,Southworth,47.511925,-122.499543
+21,Tahlequah,47.333086,-122.507054
+22,Vashon Island,47.508431,-122.464058
+3,Bainbridge Island,47.623094,-122.510809
+4,Bremerton,47.562533,-122.625146
+5,Clinton,47.974564,-122.352037
+7,Seattle,47.602597,-122.337635
+8,Edmonds,47.812542,-122.382900
+9,Fauntleroy,47.523178,-122.393113
diff --git a/locs/seattle_city_raw_data/sound_transit_light_rail.csv b/locs/seattle_city_raw_data/sound_transit_light_rail.csv
diff --git a/routes/db.csv_new b/routes/db.csv_new
diff --git a/routes/extract_route_info.py b/routes/extract_route_info.py
@@ -0,0 +1,92 @@
+import pandas as pd
+import geojson
+import gpxpy
+import os
+from shapely import distance
+from shapely.geometry import shape, Point
+
+ROOT = os.path.realpath(os.path.join(os.path.dirname(__file__), '..'))
+
+NEIGHBORHOOD_FILE = f"{ROOT}/locs/seattle_city_raw_data/Neighborhood_Map_Atlas_Neighborhoods.geojson"
+ROUTES_CSV_FILE = f"{ROOT}/routes/db.csv"
+
+TRANSPORT_FILES = {
+    "Light Rail": f"{ROOT}/locs/seattle_city_raw_data/sound_transit_light_rail.csv",
+    "Ferry": f"{ROOT}/locs/seattle_city_raw_data/ferry.csv",
+    "Bus": f"{ROOT}/locs/seattle_city_raw_data/bus.csv",
+}
+LOCS_CSV_FILE = f"{ROOT}/locs/db.csv"
+
+STOPS = {}
+for system_name, file_name in TRANSPORT_FILES.items():
+    system_df = pd.read_csv(TRANSPORT_FILES[system_name])
+    for index, row in system_df.iterrows():
+        STOPS[row['stop_name']] = {
+            'lat': row['stop_lat'],
+            'lon': row['stop_lon'],
+            'system': system_name,
+        }
+
+df = pd.read_csv(ROUTES_CSV_FILE)
+assert "id" in list(df), "db.csv does not contain route ids"
+
+NEIGHBORHOOD_POLYGONS = {}
+with open(NEIGHBORHOOD_FILE) as f:
+    gj = geojson.load(f)
+
+for n_obj in gj['features']:
+    n_lname = n_obj["properties"]["L_HOOD"]
+    n_sname = n_obj["properties"]["S_HOOD"]
+    n_shape = shape(n_obj["geometry"])
+    NEIGHBORHOOD_POLYGONS[(n_sname, n_lname)] = n_shape
+
+# determine if this stop is "close enough"
+def near_enough(p1, p2, threshold=0.005): #0.005 ~= 0.3 miles or 6 minutes of walking
+    return distance(p1, p2) < threshold
+
+## add new columns if not existing in csv
+for col_name in ["neighborhoods", "coarse_neighborhoods", "start_neighborhood", "end_neighborhood", "transit"]:
+    if col_name not in df:
+        df[col_name] = ""
+
+for index, row in df.iterrows():
+    id = row["id"]
+    gpx_file = open(f"{ROOT}/routes/gpx/{id}.gpx", 'r')
+
+    gpx = gpxpy.parse(gpx_file)
+
+    route_neighborhoods = []
+    coarse_route_neighborhoods = []
+    for track in gpx.tracks:
+        for segment in track.segments:
+            for point in segment.points:
+                p = Point(point.longitude, point.latitude)
+                for (n_name, n_coarse_name), n_shape in NEIGHBORHOOD_POLYGONS.items():
+                    # import pdb;pdb.set_trace()
+                    if n_shape.contains(p):
+                        route_neighborhoods.append(n_name)
+                        coarse_route_neighborhoods.append(n_coarse_name)
+
+    if len(route_neighborhoods) == 0:
+        route_neighborhoods.append("non-Seattle")
+    df.at[index, "start_neighborhood"] = route_neighborhoods[0]
+    if row['type'] in ["Loop", "OB"]:
+        df.at[index, "end_neighborhood"] = route_neighborhoods[0]
+    elif row['type'] in ["P2P"]:
+        df.at[index, "end_neighborhood"] = route_neighborhoods[-1]
+    # hacky, to allow saving list as a csv column
+    df.at[index, "neighborhoods"] = ";".join(list(set(route_neighborhoods)))
+    df.at[index, "coarse_neighborhoods"] = ";".join(list(set(coarse_route_neighborhoods)))
+
+    start = gpx.tracks[0].segments[0].points[0]
+    start_point = Point(start.longitude, start.latitude)
+
+    for stop_name, stop_dict in STOPS.items():
+        if near_enough(start_point, Point(stop_dict['lon'], stop_dict['lat'])):
+            df.at[index, "transit"] = f"{stop_dict['system']} to {stop_name} stop"
+            break
+
+
+df.to_csv(ROUTES_CSV_FILE + "_new")
+
+