From b7482f364790a247f71cfb2d528ebd27ed81427e Mon Sep 17 00:00:00 2001 From: maldevel Date: Tue, 19 Apr 2016 13:20:42 +0300 Subject: [PATCH] release 1.0 release 1.0 --- EmailHarvester.py | 238 +++++++++++++++++++++++++++++++++++++++++++++- README.md | 49 ++++++++-- requirements.txt | 3 + 3 files changed, 278 insertions(+), 12 deletions(-) diff --git a/EmailHarvester.py b/EmailHarvester.py index f599198..0f682cd 100644 --- a/EmailHarvester.py +++ b/EmailHarvester.py @@ -28,34 +28,264 @@ __copyright__ = "Copyright (c) 2016 @maldevel" __credits__ = ["maldevel"] __license__ = "GPLv3" -__version__ = "3.0" +__version__ = "1.0" __maintainer__ = "maldevel" ################################ import argparse import sys +import time +import requests +import re +from termcolor import colored from argparse import RawTextHelpFormatter +from sys import platform as _platform ################################ +if _platform == 'win32': + import colorama + colorama.init() + + +class myparser: + def __init__(self, results, word): + self.results = results + self.word = word + self.temp = [] + + def genericClean(self): + self.results = re.sub('', '', self.results) + self.results = re.sub('', '', self.results) + self.results = re.sub('', '', self.results) + self.results = re.sub('</div>', '', self.results) + self.results = re.sub('<p>', '', self.results) + self.results = re.sub('</span>', '', self.results) + self.results = re.sub('</a>', '', self.results) + self.results = re.sub('<em>', '', self.results) + self.results = re.sub('<b>', '', self.results) + self.results = re.sub('</b>', '', self.results) + self.results = re.sub('</em>', '', self.results) + self.results = re.sub('%2f', ' ', self.results) + self.results = re.sub('%3a', ' ', self.results) + self.results = re.sub('<strong>', '', self.results) + self.results = re.sub('</strong>', '', self.results) + #self.results = re.sub('>', '', self.results) + + def emails(self): + self.genericClean() + reg_emails = re.compile( + '[a-zA-Z0-9\.\-_]*' + + '@' + + '(?:[a-zA-Z0-9\.\-]*\.)?' + + self.word) + self.temp = reg_emails.findall(self.results) + emails = self.unique() + return emails + + def unique(self): + self.new = [] + for x in self.temp: + if x not in self.new: + self.new.append(x) + return self.new + + +################################################################### + +class SearchEngine: + def __init__(self, urlPattern, word, limit, counterInit, counterStep): + self.results = "" + self.totalresults = "" + self.userAgent = "Mozilla/5.0 (Windows NT 6.1; WOW64; rv:40.0) Gecko/20100101 Firefox/40.1" + self.limit = int(limit) + self.counter = int(counterInit) + self.urlPattern = urlPattern + self.step = int(counterStep) + self.word = word + + def do_search(self): + try: + urly = self.urlPattern.format(counter=str(self.counter), word=self.word) + headers = { + 'User-Agent': self.userAgent, + } + r=requests.get(urly, headers=headers) + except Exception as e: + print(e) + self.results = r.content.decode(r.encoding) + self.totalresults += self.results + + def process(self): + while (self.counter < self.limit): + self.do_search() + time.sleep(1) + print(green("\tSearching " + str(self.counter) + " results...")) + self.counter += self.step + + def get_emails(self): + rawres = myparser(self.totalresults, self.word) + return rawres.emails() + +################################################################### + +def yellow(text): + return colored(text, 'yellow', attrs=['bold']) + +def green(text): + return colored(text, 'green', attrs=['bold']) + +def blue(text): + return colored(text, 'blue', attrs=['bold']) + +def red(text): + return colored(text, 'red', attrs=['bold']) + +def unique(data): + unique = [] + for x in data: + if x not in unique: + unique.append(x) + return unique + +################################################################### + +def limit_type(x): + x = int(x) + if x <= 0: + raise argparse.ArgumentTypeError("Minimum results limit is 1.") + return x + +def engine_type(x): + if x not in ("google", "bing", "yahoo", "ask", "all"): + raise argparse.ArgumentTypeError("Invalid search engine, try with: google, bing, yahoo, ask, all.") + return x + + if __name__ == '__main__': parser = argparse.ArgumentParser(description=""" - + _____ _ _ _ _ _ | ___| (_)| | | | | | | | | |__ _ __ ___ __ _ _ | | | |_| | __ _ _ __ __ __ ___ ___ | |_ ___ _ __ | __|| '_ ` _ \ / _` || || | | _ | / _` || '__|\ \ / // _ \/ __|| __|/ _ \| '__| | |___| | | | | || (_| || || | | | | || (_| || | \ V /| __/\__ \| |_| __/| | \____/|_| |_| |_| \__,_||_||_| \_| |_/ \__,_||_| \_/ \___||___/ \__|\___||_| - -""", + + A tool to retrieve Domain email addresses from Search Engines | @maldevel + {}: {} +""".format(red('Version'), yellow(__version__)), formatter_class=RawTextHelpFormatter) + parser.add_argument("-d", '--domain', metavar='DOMAIN', dest='domain', type=str, help="Domain to search.") + parser.add_argument("-s", '--save', metavar='FILE', dest='filename', type=str, help="Save the results into a TXT and XML file.") + parser.add_argument("-e", '--engine', metavar='ENGINE', dest='engine', default="all", type=engine_type, help="Select search engine(google, bing, yahoo, ask, all).") + parser.add_argument("-l", '--limit', metavar='LIMIT', dest='limit', type=limit_type, default=100, help="Limit the number of results.") + if len(sys.argv) is 1: parser.print_help() sys.exit() args = parser.parse_args() + + domain = "" + if(args.domain): + domain = args.domain + else: + print('[{}] {}'.format(red('ERROR'), "Please specify a domain name to search.")) + sys.exit(2) + + filename = "" + if(args.filename): + filename = args.filename + + limit = args.limit + engine = args.engine + + + if engine == "google": + print(green("[-] Searching in Google..\n")) + search = SearchEngine("http://www.google.com/search?num=100&start={counter}&hl=en&q=%40\"{word}\"", domain, limit, 0, 100) + search.process() + all_emails = search.get_emails() + + elif engine == "bing": + print(green("[-] Searching in Bing..\n")) + search = SearchEngine("http://www.bing.com/search?q=%40{word}&count=50&first={counter}", domain, limit, 0, 50) + search.process() + all_emails = search.get_emails() + + elif engine == "ask": + print(green("[-] Searching in ASK..\n")) + search = SearchEngine("http://www.ask.com/web?q=%40{word}", domain, limit, 0, 100) + search.process() + all_emails = search.get_emails() + + elif engine == "yahoo": + print(green("[-] Searching in Yahoo..\n")) + search = SearchEngine("http://search.yahoo.com/search?p=%40{word}&n=100&ei=UTF-8&va_vt=any&vo_vt=any&ve_vt=any&vp_vt=any&vd=all&vst=0&vf=all&vm=p&fl=0&fr=yfp-t-152&xargs=0&pstart=1&b={counter}", domain, limit, 1, 100) + search.process() + all_emails = search.get_emails() + + elif engine == "all": + print(green("[-] Searching everywhere..\n")) + all_emails = [] + print(green("[-] Searching in Google..\n")) + search = SearchEngine("http://www.google.com/search?num=100&start={counter}&hl=en&q=%40\"{word}\"", domain, limit, 0, 100) + search.process() + all_emails.extend(search.get_emails()) + print(green("\n[-] Searching in Bing..\n")) + search = SearchEngine("http://www.bing.com/search?q=%40{word}&count=50&first={counter}", domain, limit, 0, 50) + search.process() + all_emails.extend(search.get_emails()) + print(green("\n[-] Searching in ASK..\n")) + search = SearchEngine("http://www.ask.com/web?q=%40{word}", domain, limit, 0, 100) + search.process() + all_emails.extend(search.get_emails()) + print(green("\n[-] Searching in Yahoo..\n")) + search = SearchEngine("http://search.yahoo.com/search?p=%40{word}&n=100&ei=UTF-8&va_vt=any&vo_vt=any&ve_vt=any&vp_vt=any&vd=all&vst=0&vf=all&vm=p&fl=0&fr=yfp-t-152&xargs=0&pstart=1&b={counter}", domain, limit, 1, 100) + search.process() + all_emails.extend(search.get_emails()) + all_emails = unique(all_emails) + + print(green("\n\n[+] Emails found:")) + print(green("------------------")) + + if all_emails == []: + print(red("No emails found")) + sys.exit(3) + else: + for emails in all_emails: + print(emails) + + if filename != "": + try: + print(green("[+] Saving files...")) + file = open(filename, 'w') + for email in all_emails: + try: + file.write(email + "\n") + except: + print(red("Exception " + email)) + pass + file.close + except Exception as e: + print(red("Error saving CSV file: " + e)) + + try: + filename = filename.split(".")[0] + ".xml" + file = open(filename, 'w') + file.write('<?xml version="1.0" encoding="UTF-8"?><EmailHarvester>') + for x in all_emails: + file.write('<email>' + x + '</email>') + file.write('</EmailHarvester>') + file.flush() + file.close() + print(green("Files saved!")) + except Exception as er: + print(red("Error saving XML file: " + er)) + + sys.exit() \ No newline at end of file diff --git a/README.md b/README.md index 3f80747..9254429 100644 --- a/README.md +++ b/README.md @@ -2,35 +2,68 @@ EmailHarvester ==== * A tool to retrieve Domain email addresses from Search Engines +This project was inspired by: +* theHarvester(https://github.com/laramies/theHarvester) from laramies. +* search_email_collector(https://github.com/rapid7/metasploit-framework/blob/master/modules/auxiliary/gather/search_email_collector.rb) from Carlos Perez. + Requirements ===== * Python 3.x +* termcolor +* colorama +* requests Features ===== - +* Retrieve Domain email addresses from Search Engines +* Google +* Bing +* Yahoo +* ASK Download/Installation ==== * git clone https://github.com/maldevel/EmailHarvester +* pip install -r requirements.txt --user -Setup +Usage ===== +``` +usage: EmailHarvester.py [-h] [-d DOMAIN] [-s FILE] [-e ENGINE] [-l LIMIT] + _____ _ _ _ _ _ +| ___| (_)| | | | | | | | +| |__ _ __ ___ __ _ _ | | | |_| | __ _ _ __ __ __ ___ ___ | |_ ___ _ __ +| __|| '_ ` _ \ / _` || || | | _ | / _` || '__|\ \ / // _ \/ __|| __|/ _ \| '__| +| |___| | | | | || (_| || || | | | | || (_| || | \ V /| __/\__ \| |_| __/| | +\____/|_| |_| |_| \__,_||_||_| \_| |_/ \__,_||_| \_/ \___||___/ \__|\___||_| -Contents -===== - + A tool to retrieve Domain email addresses from Search Engines | @maldevel + Version: 1.0 -Usage -===== -``` +optional arguments: + -h, --help show this help message and exit + -d DOMAIN, --domain DOMAIN + Domain to search. + -s FILE, --save FILE Save the results into a TXT and XML file. + -e ENGINE, --engine ENGINE + Select search engine(google, bing, yahoo, ask, all). + -l LIMIT, --limit LIMIT + Limit the number of results. ``` Examples ===== +* Search in Google +./EmailHarvester.py -d example.com -e google + +* Search in all engines +./EmailHarvester.py -d example.com -e all + +* Limit results +./EmailHarvester.py -d example.com -e all -l 200 diff --git a/requirements.txt b/requirements.txt index e69de29..63a54a1 100644 --- a/requirements.txt +++ b/requirements.txt @@ -0,0 +1,3 @@ +termcolor +colorama +requests \ No newline at end of file