-
Notifications
You must be signed in to change notification settings - Fork 3
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
added neighborhood and transit info to routes
- Loading branch information
Margaret Li
committed
Jul 18, 2024
1 parent
2f8b6b7
commit aa4bbdb
Showing
7 changed files
with
7,103 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,56 @@ | ||
import pandas as pd | ||
import geojson | ||
import gpxpy | ||
import os | ||
from shapely import distance | ||
from shapely.geometry import shape, Point | ||
|
||
ROOT = os.path.realpath(os.path.join(os.path.dirname(__file__), '..')) | ||
|
||
TRANSPORT_FILES = { | ||
"Light Rail": f"{ROOT}/locs/seattle_city_raw_data/sound_transit_light_rail.csv", | ||
"Ferry": f"{ROOT}/locs/seattle_city_raw_data/ferry.csv", | ||
"Bus": f"{ROOT}/locs/seattle_city_raw_data/bus.csv", | ||
} | ||
LOCS_CSV_FILE = f"{ROOT}/locs/db.csv" | ||
|
||
df = pd.read_csv(LOCS_CSV_FILE) | ||
assert "id" in list(df), "db.csv does not contain location ids" | ||
|
||
def near_enough(p1, p2, threshold=0.005): #0.005 ~= 0.3 miles or 6 minutes of walking | ||
return distance(p1, p2) < threshold | ||
|
||
STOPS = {} | ||
|
||
for system_name, file_name in TRANSPORT_FILES.items(): | ||
system_df = pd.read_csv(TRANSPORT_FILES[system_name]) | ||
for index, row in system_df.iterrows(): | ||
STOPS[row['stop_name']] = { | ||
'lat': row['stop_lat'], | ||
'lon': row['stop_lon'], | ||
'system': system_name, | ||
} | ||
|
||
# construct point based on lon/lat returned by geocoder | ||
|
||
# check each polygon to see if it contains the point | ||
|
||
## add new columns if not existing in csv | ||
for col_name in ["transit"]: | ||
if col_name not in df: | ||
df[col_name] = "" | ||
|
||
for index, row in df.iterrows(): | ||
id = row["id"] | ||
|
||
loc_point = Point(row['long'], row['lat']) | ||
|
||
for stop_name, stop_dict in STOPS.items(): | ||
if near_enough(loc_point, Point(stop_dict['lon'], stop_dict['lat'])): | ||
df.at[index, "transit"] = f"{stop_dict["system"]} to {stop_name} stop" | ||
break | ||
|
||
|
||
df.to_csv(LOCS_CSV_FILE + "_new") | ||
|
||
|
101 changes: 101 additions & 0 deletions
101
locs/seattle_city_raw_data/Neighborhood_Map_Atlas_Neighborhoods.geojson
Large diffs are not rendered by default.
Oops, something went wrong.
Large diffs are not rendered by default.
Oops, something went wrong.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,20 @@ | ||
stop_id,stop_name,stop_lat,stop_lon | ||
1,Anacortes,48.506483,-122.678254 | ||
10,Friday Harbor,48.535066,-123.014750 | ||
11,Coupeville,48.159745,-122.672678 | ||
12,Kingston,47.796299,-122.496454 | ||
13,Lopez Island,48.570421,-122.883628 | ||
14,Mukilteo,47.948826,-122.304313 | ||
15,Orcas Island,48.597930,-122.944101 | ||
16,Point Defiance,47.305550,-122.514242 | ||
17,Port Townsend,48.112664,-122.760505 | ||
18,Shaw Island,48.584021,-122.929298 | ||
20,Southworth,47.511925,-122.499543 | ||
21,Tahlequah,47.333086,-122.507054 | ||
22,Vashon Island,47.508431,-122.464058 | ||
3,Bainbridge Island,47.623094,-122.510809 | ||
4,Bremerton,47.562533,-122.625146 | ||
5,Clinton,47.974564,-122.352037 | ||
7,Seattle,47.602597,-122.337635 | ||
8,Edmonds,47.812542,-122.382900 | ||
9,Fauntleroy,47.523178,-122.393113 |
159 changes: 159 additions & 0 deletions
159
locs/seattle_city_raw_data/sound_transit_light_rail.csv
Large diffs are not rendered by default.
Oops, something went wrong.
Large diffs are not rendered by default.
Oops, something went wrong.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,92 @@ | ||
import pandas as pd | ||
import geojson | ||
import gpxpy | ||
import os | ||
from shapely import distance | ||
from shapely.geometry import shape, Point | ||
|
||
ROOT = os.path.realpath(os.path.join(os.path.dirname(__file__), '..')) | ||
|
||
NEIGHBORHOOD_FILE = f"{ROOT}/locs/seattle_city_raw_data/Neighborhood_Map_Atlas_Neighborhoods.geojson" | ||
ROUTES_CSV_FILE = f"{ROOT}/routes/db.csv" | ||
|
||
TRANSPORT_FILES = { | ||
"Light Rail": f"{ROOT}/locs/seattle_city_raw_data/sound_transit_light_rail.csv", | ||
"Ferry": f"{ROOT}/locs/seattle_city_raw_data/ferry.csv", | ||
"Bus": f"{ROOT}/locs/seattle_city_raw_data/bus.csv", | ||
} | ||
LOCS_CSV_FILE = f"{ROOT}/locs/db.csv" | ||
|
||
STOPS = {} | ||
for system_name, file_name in TRANSPORT_FILES.items(): | ||
system_df = pd.read_csv(TRANSPORT_FILES[system_name]) | ||
for index, row in system_df.iterrows(): | ||
STOPS[row['stop_name']] = { | ||
'lat': row['stop_lat'], | ||
'lon': row['stop_lon'], | ||
'system': system_name, | ||
} | ||
|
||
df = pd.read_csv(ROUTES_CSV_FILE) | ||
assert "id" in list(df), "db.csv does not contain route ids" | ||
|
||
NEIGHBORHOOD_POLYGONS = {} | ||
with open(NEIGHBORHOOD_FILE) as f: | ||
gj = geojson.load(f) | ||
|
||
for n_obj in gj['features']: | ||
n_lname = n_obj["properties"]["L_HOOD"] | ||
n_sname = n_obj["properties"]["S_HOOD"] | ||
n_shape = shape(n_obj["geometry"]) | ||
NEIGHBORHOOD_POLYGONS[(n_sname, n_lname)] = n_shape | ||
|
||
# determine if this stop is "close enough" | ||
def near_enough(p1, p2, threshold=0.005): #0.005 ~= 0.3 miles or 6 minutes of walking | ||
return distance(p1, p2) < threshold | ||
|
||
## add new columns if not existing in csv | ||
for col_name in ["neighborhoods", "coarse_neighborhoods", "start_neighborhood", "end_neighborhood", "transit"]: | ||
if col_name not in df: | ||
df[col_name] = "" | ||
|
||
for index, row in df.iterrows(): | ||
id = row["id"] | ||
gpx_file = open(f"{ROOT}/routes/gpx/{id}.gpx", 'r') | ||
|
||
gpx = gpxpy.parse(gpx_file) | ||
|
||
route_neighborhoods = [] | ||
coarse_route_neighborhoods = [] | ||
for track in gpx.tracks: | ||
for segment in track.segments: | ||
for point in segment.points: | ||
p = Point(point.longitude, point.latitude) | ||
for (n_name, n_coarse_name), n_shape in NEIGHBORHOOD_POLYGONS.items(): | ||
# import pdb;pdb.set_trace() | ||
if n_shape.contains(p): | ||
route_neighborhoods.append(n_name) | ||
coarse_route_neighborhoods.append(n_coarse_name) | ||
|
||
if len(route_neighborhoods) == 0: | ||
route_neighborhoods.append("non-Seattle") | ||
df.at[index, "start_neighborhood"] = route_neighborhoods[0] | ||
if row['type'] in ["Loop", "OB"]: | ||
df.at[index, "end_neighborhood"] = route_neighborhoods[0] | ||
elif row['type'] in ["P2P"]: | ||
df.at[index, "end_neighborhood"] = route_neighborhoods[-1] | ||
# hacky, to allow saving list as a csv column | ||
df.at[index, "neighborhoods"] = ";".join(list(set(route_neighborhoods))) | ||
df.at[index, "coarse_neighborhoods"] = ";".join(list(set(coarse_route_neighborhoods))) | ||
|
||
start = gpx.tracks[0].segments[0].points[0] | ||
start_point = Point(start.longitude, start.latitude) | ||
|
||
for stop_name, stop_dict in STOPS.items(): | ||
if near_enough(start_point, Point(stop_dict['lon'], stop_dict['lat'])): | ||
df.at[index, "transit"] = f"{stop_dict['system']} to {stop_name} stop" | ||
break | ||
|
||
|
||
df.to_csv(ROUTES_CSV_FILE + "_new") | ||
|
||
|