-
Notifications
You must be signed in to change notification settings - Fork 2
/
crime.py
59 lines (42 loc) · 1.06 KB
/
crime.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
import feedparser
import urllib
from lxml.html import parse
#proto code to extract crimes list from single crime page for particular date
url="http://news.arlingtonva.us/crime-report:-may-16-2013"
#url="http://news.arlingtonva.us/crime-reports-april-27-2011-202323"
crimes=[]
stolen=[]
def get_crimes(url):
doc=parse(url).getroot()
reports=doc.cssselect('div.wrapContent p span span')
for r in reports:
content=r.text_content().strip()
print content
return
"""
is_crime=True
is_auto=False
buff=""
for r in reports:
content=r.text_content().strip()
print content
if content=="REPORTS":
is_crime=True
is_auto=False
if content=="STOLEN VEHICLES":
is_crime=False
is_auto=True
if is_crime and len(content)>20 and content!="REPORTS":
crimes.append(content)
print "crime: ", content
if is_auto and len(content)>20 and content!="STOLEN VEHICLES":
if buff=="":
buff=buff+content
else:
stolen.append(buff+"; "+content)
buff==""
print "auto: ", buff,"; ",content
"""
get_crimes(url)
print crimes
print stolen