-
Notifications
You must be signed in to change notification settings - Fork 78
/
Search.py
83 lines (66 loc) · 2.44 KB
/
Search.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
#!/usr/bin/env python
# -*- encoding: utf-8 -*-
import string
import sys
import re
import time
import requests
import urllib
import mechanize
from ParserResults import ParserResults
class Search:
def __init__(self, domain):
self.domain = domain
self.counter = 0
self.results = ""
self.totalresults = ""
self.server = "www.google.com"
self.userAgent = "(Mozilla/5.0 (Windows; U; Windows NT 6.0;en-US; rv:1.9.2) Gecko/20100115 Firefox/3.6"
self.quantity = "100"
def do_search(self):
try:
url="http://" + self.server + "/search?num=" + self.quantity + "&start=" + str(self.counter) + "&hl=en&meta=&q=%40\"" + self.domain + "\""
except Exception, e:
print e
try:
r=requests.get(url)
self.results = r.content
self.totalresults += self.results
except Exception,e:
print e
def get_emails(self):
rawres = ParserResults(self.totalresults, self.domain)
return rawres.emails()
def get_hostnames(self):
rawres = ParserResults(self.totalresults, self.domain)
return rawres.hostnames()
def get_files(self):
rawres = ParserResults(self.totalresults, self.domain)
return rawres.fileurls(self.files)
def process(self):
while self.counter <= 100 and self.counter <= 1000:
self.do_search()
time.sleep(1)
self.counter += 100
def getLinks(self,ip,depth):
br= mechanize.Browser()
br.set_handle_robots(False)
br.addheaders[('User-agent','chrome')]
query="http://www.google.com/search?num=100&q="+ip+"&start="+depth
htmltext=br.open(query).read()
soup=BeautifulSoup(htmltext)
search=soup.findAll('div',attrs={'id':'search'})
searchtext=str(search[0])
soup1=BeautifulSoup(searchtext)
list_items=soup1.findAll('li')
regex="q(?!.*q).*?&"
pattern =re.compile(regex)
results_array=[]
for li in list_items:
soup2 = BeautifulSoup(str(li))
links=soup2.findAll('a')
source_link=links[0]
source_url=re.findall(pattern,str(source_link))
if len(source_url)>0:
results_array.append(str(source_url[0].replace("q=","").replace("&","")))
return results_array