Skip to content

Commit

Permalink
added neighborhood and transit info to routes
Browse files Browse the repository at this point in the history
  • Loading branch information
Margaret Li committed Jul 18, 2024
1 parent 2f8b6b7 commit aa4bbdb
Show file tree
Hide file tree
Showing 7 changed files with 7,103 additions and 0 deletions.
56 changes: 56 additions & 0 deletions locs/extract_locs_info.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,56 @@
import pandas as pd
import geojson
import gpxpy
import os
from shapely import distance
from shapely.geometry import shape, Point

ROOT = os.path.realpath(os.path.join(os.path.dirname(__file__), '..'))

TRANSPORT_FILES = {
"Light Rail": f"{ROOT}/locs/seattle_city_raw_data/sound_transit_light_rail.csv",
"Ferry": f"{ROOT}/locs/seattle_city_raw_data/ferry.csv",
"Bus": f"{ROOT}/locs/seattle_city_raw_data/bus.csv",
}
LOCS_CSV_FILE = f"{ROOT}/locs/db.csv"

df = pd.read_csv(LOCS_CSV_FILE)
assert "id" in list(df), "db.csv does not contain location ids"

def near_enough(p1, p2, threshold=0.005): #0.005 ~= 0.3 miles or 6 minutes of walking
return distance(p1, p2) < threshold

STOPS = {}

for system_name, file_name in TRANSPORT_FILES.items():
system_df = pd.read_csv(TRANSPORT_FILES[system_name])
for index, row in system_df.iterrows():
STOPS[row['stop_name']] = {
'lat': row['stop_lat'],
'lon': row['stop_lon'],
'system': system_name,
}

# construct point based on lon/lat returned by geocoder

# check each polygon to see if it contains the point

## add new columns if not existing in csv
for col_name in ["transit"]:
if col_name not in df:
df[col_name] = ""

for index, row in df.iterrows():
id = row["id"]

loc_point = Point(row['long'], row['lat'])

for stop_name, stop_dict in STOPS.items():
if near_enough(loc_point, Point(stop_dict['lon'], stop_dict['lat'])):
df.at[index, "transit"] = f"{stop_dict["system"]} to {stop_name} stop"
break


df.to_csv(LOCS_CSV_FILE + "_new")


Large diffs are not rendered by default.

6,517 changes: 6,517 additions & 0 deletions locs/seattle_city_raw_data/bus.csv

Large diffs are not rendered by default.

20 changes: 20 additions & 0 deletions locs/seattle_city_raw_data/ferry.csv
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
stop_id,stop_name,stop_lat,stop_lon
1,Anacortes,48.506483,-122.678254
10,Friday Harbor,48.535066,-123.014750
11,Coupeville,48.159745,-122.672678
12,Kingston,47.796299,-122.496454
13,Lopez Island,48.570421,-122.883628
14,Mukilteo,47.948826,-122.304313
15,Orcas Island,48.597930,-122.944101
16,Point Defiance,47.305550,-122.514242
17,Port Townsend,48.112664,-122.760505
18,Shaw Island,48.584021,-122.929298
20,Southworth,47.511925,-122.499543
21,Tahlequah,47.333086,-122.507054
22,Vashon Island,47.508431,-122.464058
3,Bainbridge Island,47.623094,-122.510809
4,Bremerton,47.562533,-122.625146
5,Clinton,47.974564,-122.352037
7,Seattle,47.602597,-122.337635
8,Edmonds,47.812542,-122.382900
9,Fauntleroy,47.523178,-122.393113
159 changes: 159 additions & 0 deletions locs/seattle_city_raw_data/sound_transit_light_rail.csv

Large diffs are not rendered by default.

158 changes: 158 additions & 0 deletions routes/db.csv_new

Large diffs are not rendered by default.

92 changes: 92 additions & 0 deletions routes/extract_route_info.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,92 @@
import pandas as pd
import geojson
import gpxpy
import os
from shapely import distance
from shapely.geometry import shape, Point

ROOT = os.path.realpath(os.path.join(os.path.dirname(__file__), '..'))

NEIGHBORHOOD_FILE = f"{ROOT}/locs/seattle_city_raw_data/Neighborhood_Map_Atlas_Neighborhoods.geojson"
ROUTES_CSV_FILE = f"{ROOT}/routes/db.csv"

TRANSPORT_FILES = {
"Light Rail": f"{ROOT}/locs/seattle_city_raw_data/sound_transit_light_rail.csv",
"Ferry": f"{ROOT}/locs/seattle_city_raw_data/ferry.csv",
"Bus": f"{ROOT}/locs/seattle_city_raw_data/bus.csv",
}
LOCS_CSV_FILE = f"{ROOT}/locs/db.csv"

STOPS = {}
for system_name, file_name in TRANSPORT_FILES.items():
system_df = pd.read_csv(TRANSPORT_FILES[system_name])
for index, row in system_df.iterrows():
STOPS[row['stop_name']] = {
'lat': row['stop_lat'],
'lon': row['stop_lon'],
'system': system_name,
}

df = pd.read_csv(ROUTES_CSV_FILE)
assert "id" in list(df), "db.csv does not contain route ids"

NEIGHBORHOOD_POLYGONS = {}
with open(NEIGHBORHOOD_FILE) as f:
gj = geojson.load(f)

for n_obj in gj['features']:
n_lname = n_obj["properties"]["L_HOOD"]
n_sname = n_obj["properties"]["S_HOOD"]
n_shape = shape(n_obj["geometry"])
NEIGHBORHOOD_POLYGONS[(n_sname, n_lname)] = n_shape

# determine if this stop is "close enough"
def near_enough(p1, p2, threshold=0.005): #0.005 ~= 0.3 miles or 6 minutes of walking
return distance(p1, p2) < threshold

## add new columns if not existing in csv
for col_name in ["neighborhoods", "coarse_neighborhoods", "start_neighborhood", "end_neighborhood", "transit"]:
if col_name not in df:
df[col_name] = ""

for index, row in df.iterrows():
id = row["id"]
gpx_file = open(f"{ROOT}/routes/gpx/{id}.gpx", 'r')

gpx = gpxpy.parse(gpx_file)

route_neighborhoods = []
coarse_route_neighborhoods = []
for track in gpx.tracks:
for segment in track.segments:
for point in segment.points:
p = Point(point.longitude, point.latitude)
for (n_name, n_coarse_name), n_shape in NEIGHBORHOOD_POLYGONS.items():
# import pdb;pdb.set_trace()
if n_shape.contains(p):
route_neighborhoods.append(n_name)
coarse_route_neighborhoods.append(n_coarse_name)

if len(route_neighborhoods) == 0:
route_neighborhoods.append("non-Seattle")
df.at[index, "start_neighborhood"] = route_neighborhoods[0]
if row['type'] in ["Loop", "OB"]:
df.at[index, "end_neighborhood"] = route_neighborhoods[0]
elif row['type'] in ["P2P"]:
df.at[index, "end_neighborhood"] = route_neighborhoods[-1]
# hacky, to allow saving list as a csv column
df.at[index, "neighborhoods"] = ";".join(list(set(route_neighborhoods)))
df.at[index, "coarse_neighborhoods"] = ";".join(list(set(coarse_route_neighborhoods)))

start = gpx.tracks[0].segments[0].points[0]
start_point = Point(start.longitude, start.latitude)

for stop_name, stop_dict in STOPS.items():
if near_enough(start_point, Point(stop_dict['lon'], stop_dict['lat'])):
df.at[index, "transit"] = f"{stop_dict['system']} to {stop_name} stop"
break


df.to_csv(ROUTES_CSV_FILE + "_new")


0 comments on commit aa4bbdb

Please sign in to comment.