From 3005b91df9e47ff871693d0797ca3eefe02956d6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C2=AF=5C=5F=28=E3=83=84=29=5F/=C2=AF=20feel=20free=20to?= =?UTF-8?q?=20copy=20my=20code=20=2C=20I=20do=20it=20too?= <96356684+behind24proxies@users.noreply.github.com> Date: Fri, 2 Sep 2022 11:11:18 -0400 Subject: [PATCH] last push --- website_report/stats.py | 75 +++++++++++++++++++++++++++++++++++++++++ 1 file changed, 75 insertions(+) create mode 100644 website_report/stats.py diff --git a/website_report/stats.py b/website_report/stats.py new file mode 100644 index 0000000..dadac79 --- /dev/null +++ b/website_report/stats.py @@ -0,0 +1,75 @@ +import requests +import html_to_json +import time +import warnings + +# removes useless data from the url +def clean_url(url): + + url = url.replace('www.', '') + url = url.replace('https://', '') + url = url.replace('http://', '') + url = url.split('/')[0] + return url + +def create_full_url(url): + return f'https://nibbler.insites.com/en_US/reports/{clean_url(url)}' +def generate_report(website_url, + step=5, + max_time=60, + debug=False, + info=True): + full_url = create_full_url(website_url) + max_loop = max_time//step + while True: + resp = requests.get(full_url).text + length = len(resp) + + # if the length is less than 15000, then the page is not ready yet + + if length < 15000: + if debug: + print(f'[DEBUG] : max time left {max_loop*step}s') + print(f'[DEBUG] : sleeping for {step}s ') + + max_loop -= 1 + if max_loop < 0: + if info: + print('[INFO] : max time reached') + print(f'[INFO] : check the report yourself {full_url}') + + return False + time.sleep(step) + else: + # if the code reaches here, then we convert all html data in json + jned = html_to_json.convert(resp) + break + returner = [] + try: + for x in jned['html'][0]['body'][0]['div'][0]['div'][1]['div'][0]['div'][0]['div'][0]['div']: + current = x['h3'][0] + + values = { + 'name': current['_value'], + 'rating': current['span'][0]['_attributes']['class'][1], + 'value': current['span'][0]['_value'] + } + returner.append(values) + + for x in jned['html'][0]['body'][0]['div'][0]['div'][1]['div'][2]['div'][0]['div']: + try: + attribute = x['h2'][0]['_value'] + value = x['h2'][0]['span'][0]['_value'] + values = { + 'name': attribute, + 'value': value + } + returner.append(values) + except Exception as e : + # warnings.warn(e) + pass + return returner + except Exception as e: + print(e) + warnings.warn(e) +