Skip to content

Commit

Permalink
email regex
Browse files Browse the repository at this point in the history
email regex improvement
  • Loading branch information
maldevel committed Apr 20, 2016
1 parent 26c4d04 commit 3abd22b
Showing 1 changed file with 9 additions and 6 deletions.
15 changes: 9 additions & 6 deletions EmailHarvester.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@

__author__ = "maldevel"
__copyright__ = "Copyright (c) 2016 @maldevel"
__credits__ = ["maldevel"]
__credits__ = ["maldevel", "Christian Martorella"]
__license__ = "GPLv3"
__version__ = "1.1"
__maintainer__ = "maldevel"
Expand Down Expand Up @@ -73,14 +73,18 @@ def genericClean(self):
self.results = re.sub('%3a', ' ', self.results)
self.results = re.sub('<strong>', '', self.results)
self.results = re.sub('</strong>', '', self.results)
#self.results = re.sub('>', '', self.results)
self.results = re.sub('<wbr>', '', self.results)
self.results = re.sub('</wbr>','', self.results)

for e in ('>', ':', '=', '<', '/', '\\', ';', '&', '%3A', '%3D', '%3C'):
self.results = str.replace(self.results, e, ' ')

def emails(self):
self.genericClean()
reg_emails = re.compile(
'[a-zA-Z0-9\.\-_]*' +
'[a-zA-Z0-9.\-_+#~!$&\',;=:]+' +
'@' +
'(?:[a-zA-Z0-9\.\-]*\.)?' +
'[a-zA-Z0-9.-]*' +
self.word)
self.temp = reg_emails.findall(self.results)
emails = self.unique()
Expand Down Expand Up @@ -199,7 +203,7 @@ def engine_type(x):
formatter_class=RawTextHelpFormatter)

parser.add_argument("-d", '--domain', metavar='DOMAIN', dest='domain', type=str, help="Domain to search.")
parser.add_argument("-s", '--save', metavar='FILE', dest='filename', type=str, help="Save the results into a TXT and XML file.")
parser.add_argument("-s", '--save', metavar='FILE', dest='filename', type=str, help="Save the results into a TXT and XML file (both).")
parser.add_argument("-e", '--engine', metavar='ENGINE', dest='engine', default="all", type=engine_type, help="Select search engine(google, bing, yahoo, ask, all).")
parser.add_argument("-l", '--limit', metavar='LIMIT', dest='limit', type=limit_type, default=100, help="Limit the number of results.")
parser.add_argument('-u', '--user-agent', metavar='USER-AGENT', dest='uagent', type=str, help="Set the User-Agent request header.")
Expand Down Expand Up @@ -321,4 +325,3 @@ def engine_type(x):
except Exception as er:
print(red("Error saving XML file: " + er))

sys.exit()

0 comments on commit 3abd22b

Please sign in to comment.