-
Notifications
You must be signed in to change notification settings - Fork 1
/
tiger_versus_json.py
executable file
·69 lines (57 loc) · 2.57 KB
/
tiger_versus_json.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
#! /usr/bin/env python2
import json
import string
import sys
#This data is of absolutely awful quality, so longitude and latitude are sometimes reversed.
#Fix this appropriately if your data is not in the Northern Western quadrant of the Earth.
def fix_swapped_coordinates(coords):
if coords[0] >= 0:
coords[0], coords[1] = coords[1], coords[0]
sys.stderr.write("Corrected fucked JSON coordinates to "+repr(coords)+'\n')
return coords
#And also occasionally a street in Texas will be located at the South Pole.
def data_is_fucked(coords):
MIN_LON = -180
MAX_LON = 0
MIN_LAT = 0
MAX_LAT = 90
return coords[0] <= MIN_LON or coords[0] >= MAX_LON or coords[1] <= MIN_LAT or coords[1] >= MAX_LAT
tokens = str.split(sys.argv[1],'/')
default_state = tokens[-2].upper()
default_city = ""
default_county = ""
file_tokens = tokens[-1].split('.')[0].split('-')
if file_tokens[-1]=="city":
default_city = file_tokens[0].replace('_',' ').upper()
elif file_tokens[-1]=="county":
default_county = file_tokens[0].replace('_',' ').upper()
for line in map(str.rstrip,open(sys.argv[1]).readlines()):
try:
address_object = json.loads(line)
except:
sys.stderr.write("Invalid line: "+line+'\n')
continue
#Some basic sanity checks since the data isn't sane
if 'properties' not in address_object or not address_object['properties'] or 'geometry' not in address_object or not address_object['geometry'] or 'coordinates' not in address_object['geometry'] or not address_object['geometry']['coordinates']:
continue
properties = address_object['properties']
if 'number' not in properties or properties['number']=="" or 'street' not in properties or properties['street']=="":
continue
row = [""] * 8
row[0] = properties['number']
row[1] = row[0]
row[2] = "NARF"
row[3] = properties['street']
row[4] = properties['city'] if properties['city']!="" else default_city
row[5] = properties['region'] if properties['region']!="" else default_state
row[6] = properties['postcode']
coords = address_object['geometry']['coordinates']
coords = fix_swapped_coordinates(coords)
if data_is_fucked(coords):
sys.stderr.write("Skipping fucked-beyond-repair JSON coordinates of "+repr(coords)+'\n')
continue
row[7] = '('+repr(coords[0])+' '+repr(coords[1])
row = map(string.capwords,row)
row[5] = row[5].upper()
row = map(lambda x: x.replace(';','#'),row)
print "".join(map(lambda x: x if x >= ' ' and x <= '~' else "",';'.join(row).encode('ascii','ignore')))