From fea96155bd254306a75ea0bac58fcf8d8d5f52be Mon Sep 17 00:00:00 2001 From: gimmeliina <96843881+gimmeliina@users.noreply.github.com> Date: Tue, 10 Jan 2023 16:54:53 +0200 Subject: [PATCH 01/17] Update and rename ChaosMen.yml to Algolia_ChaosMen.yml --- scrapers/Algolia_ChaosMen.yml | 30 ++++++++++++++++++++++++ scrapers/ChaosMen.yml | 44 ----------------------------------- 2 files changed, 30 insertions(+), 44 deletions(-) create mode 100644 scrapers/Algolia_ChaosMen.yml delete mode 100644 scrapers/ChaosMen.yml diff --git a/scrapers/Algolia_ChaosMen.yml b/scrapers/Algolia_ChaosMen.yml new file mode 100644 index 000000000..03b29ebf5 --- /dev/null +++ b/scrapers/Algolia_ChaosMen.yml @@ -0,0 +1,30 @@ +name: "ChaosMen" +sceneByURL: + - action: script + url: + - chaosmen.com/en/video + script: + - python + - Algolia.py + - chaosmen +sceneByFragment: + action: script + script: + - python + - Algolia.py + - chaosmen +sceneByName: + action: script + script: + - python + - Algolia.py + - chaosmen + - searchName +sceneByQueryFragment: + action: script + script: + - python + - Algolia.py + - chaosmen + - validName +# Last Updated January 10, 2023 diff --git a/scrapers/ChaosMen.yml b/scrapers/ChaosMen.yml deleted file mode 100644 index d617f9c5e..000000000 --- a/scrapers/ChaosMen.yml +++ /dev/null @@ -1,44 +0,0 @@ -name: "ChaosMen" -sceneByURL: - - action: scrapeXPath - url: - - chaosmen.com/showgal.php?g=content/CM/video - scraper: sceneScraper - -xPathScrapers: - sceneScraper: - scene: - Title: - selector: //div[@class='content']/div[@class='cblock'][1]/div[@class='chead clearfix']/h3 - Date: - selector: //div[@class='summary_container']/p[contains(span,"Release date")] - postProcess: - - replace: - - regex: Release date.\s(.*) - with: $1 - - parseDate: January 2, 2006 - Details: - selector: //div[@class='summary_container']/p[not(@class='hbl')]/text() - concat: "\n\n" - Performers: - Name: //div[contains(@class,'video_star_stats')]/h4 - URL: - selector: //div[contains(@class,'video_star_stats')]/a/@href - postProcess: &pp - - replace: - - regex: ^ - with: https://www.chaosmen.com/ - Image: - selector: //div[@class='videoimg']/img/@src - postProcess: *pp - Tags: - Name: - selector: //div[@class='cblock'][2]/p/a - postProcess: - - replace: - - regex: ([\w\s]+)\s-\s([\w\s]+) - with: $2 - Studio: - Name: - fixed: ChaosMen -# Last Updated June 20, 2022 From 1fe07231d71e99a553b47bc6d8fc905eb9fab5af Mon Sep 17 00:00:00 2001 From: gimmeliina <96843881+gimmeliina@users.noreply.github.com> Date: Fri, 18 Mar 2022 20:12:56 +0200 Subject: [PATCH 02/17] Teamskeet API supports now Sayuncle Network --- scrapers/Teamskeet/Teamskeet.yml | 3 ++- scrapers/Teamskeet/TeamskeetAPI.py | 11 +++++++---- 2 files changed, 9 insertions(+), 5 deletions(-) diff --git a/scrapers/Teamskeet/Teamskeet.yml b/scrapers/Teamskeet/Teamskeet.yml index dbca726b6..7fb53fbf8 100644 --- a/scrapers/Teamskeet/Teamskeet.yml +++ b/scrapers/Teamskeet/Teamskeet.yml @@ -4,9 +4,10 @@ name: "TeamskeetAPI" sceneByURL: - url: - teamskeet.com/movies/ + - sayuncle.com/movies/ action: script script: - python - TeamskeetAPI.py #- logJSON # Save the JSON from the API to a file -# Last Updated February 28, 2022 +# Last Updated March 13, 2022 diff --git a/scrapers/Teamskeet/TeamskeetAPI.py b/scrapers/Teamskeet/TeamskeetAPI.py index eab533466..0a155c5f5 100644 --- a/scrapers/Teamskeet/TeamskeetAPI.py +++ b/scrapers/Teamskeet/TeamskeetAPI.py @@ -56,8 +56,8 @@ def save_json(api_json, url): log.error('You need to set the URL (e.g. teamskeet.com/movies/*****)') sys.exit(1) -if "teamskeet.com/movies/" not in scene_url: - log.error('The URL is not from a Teamskeet URL (e.g. teamskeet.com/movies/*****)') +if "teamskeet.com/movies/" not in scene_url and "sayuncle.com/movies/" not in scene_url: + log.error('The URL is not from a Teamskeet or Sayuncle URL (e.g. teamskeet.com/movies/*****)') sys.exit(1) scene_id = re.sub('.+/', '', scene_url) @@ -71,9 +71,12 @@ def save_json(api_json, url): use_local = 1 with open(json_file, encoding="utf-8") as json_file: scene_api_json = json.load(json_file) -else: +else: + if "sayuncle.com/movies/" in scene_url: + api_url = f"https://store2.psmcdn.net/sau-elastic-00gy5fg5ra-videoscontent/_doc/{scene_id}" log.debug("Asking the API...") - api_url = f"https://store2.psmcdn.net/ts-elastic-d5cat0jl5o-videoscontent/_doc/{scene_id}" + if "teamskeet.com/movies/" in scene_url: + api_url = f"https://store2.psmcdn.net/ts-elastic-d5cat0jl5o-videoscontent/_doc/{scene_id}" headers = { 'User-Agent': USER_AGENT, 'Origin': 'https://www.teamskeet.com', From 77e86ede5662ad5541122b8af6911ef50e286572 Mon Sep 17 00:00:00 2001 From: gimmeliina <96843881+gimmeliina@users.noreply.github.com> Date: Fri, 18 Mar 2022 20:13:36 +0200 Subject: [PATCH 03/17] Delete Sayuncle.yml From 96f949f306ecd9feb375a488d4e7fb890640718a Mon Sep 17 00:00:00 2001 From: gimmeliina <96843881+gimmeliina@users.noreply.github.com> Date: Fri, 18 Mar 2022 20:19:45 +0200 Subject: [PATCH 04/17] Update SCRAPERS-LIST.md --- SCRAPERS-LIST.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/SCRAPERS-LIST.md b/SCRAPERS-LIST.md index 3e3bb07c3..4e8ce1c61 100644 --- a/SCRAPERS-LIST.md +++ b/SCRAPERS-LIST.md @@ -1265,7 +1265,7 @@ samuelotoole.com|Algolia_NextDoorStudios.yml|:heavy_check_mark:|:heavy_check_mar sapphicerotica.com|sapphix.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- sapphix.com|sapphix.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- sarajay.com|VNAGirls.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- -sayuncle.com|Sayuncle.yml|:heavy_check_mark:|:x:|:x:|:x:|-|Gay +sayuncle.com|Teamskeet.yml|:heavy_check_mark:|:x:|:x:|:x:|Python|-Gay scarybigdicks.com|Hustler.yml|:heavy_check_mark:|:x:|:x:|:x:|CDP|- schoolgirlshd.com|Jhdv.yml|:heavy_check_mark:|:x:|:x:|:x:|-|JAV Uncensored schoolpov.com|ItsPOV.yml|:heavy_check_mark:|:x:|:x:|:x:|CDP|- From 52cf03a3f796b6666c07d319c8efde258b2c44b4 Mon Sep 17 00:00:00 2001 From: bnkai <48220860+bnkai@users.noreply.github.com> Date: Fri, 18 Mar 2022 23:23:41 +0200 Subject: [PATCH 05/17] Update TeamskeetAPI.py use separate directories for SayUncle/Teamskeet for `logJSON` functionality --- scrapers/Teamskeet/TeamskeetAPI.py | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/scrapers/Teamskeet/TeamskeetAPI.py b/scrapers/Teamskeet/TeamskeetAPI.py index 0a155c5f5..151bc4e83 100644 --- a/scrapers/Teamskeet/TeamskeetAPI.py +++ b/scrapers/Teamskeet/TeamskeetAPI.py @@ -64,6 +64,13 @@ def save_json(api_json, url): if not scene_id: log.error("Error with the ID ({})\nAre you sure that the end of your URL is correct ?".format(scene_id)) sys.exit(1) + +api_url = f"https://store2.psmcdn.net/ts-elastic-d5cat0jl5o-videoscontent/_doc/{scene_id}" + +if "sayuncle.com/movies/" in scene_url: + api_url = f"https://store2.psmcdn.net/sau-elastic-00gy5fg5ra-videoscontent/_doc/{scene_id}" + DIR_JSON = os.path.join(USERFOLDER_PATH, "scraperJSON","SayUncle") + use_local = 0 json_file = os.path.join(DIR_JSON, scene_id+".json") if os.path.isfile(json_file): @@ -72,11 +79,7 @@ def save_json(api_json, url): with open(json_file, encoding="utf-8") as json_file: scene_api_json = json.load(json_file) else: - if "sayuncle.com/movies/" in scene_url: - api_url = f"https://store2.psmcdn.net/sau-elastic-00gy5fg5ra-videoscontent/_doc/{scene_id}" log.debug("Asking the API...") - if "teamskeet.com/movies/" in scene_url: - api_url = f"https://store2.psmcdn.net/ts-elastic-d5cat0jl5o-videoscontent/_doc/{scene_id}" headers = { 'User-Agent': USER_AGENT, 'Origin': 'https://www.teamskeet.com', From da93c0ec3e7705e368937ad9a016ee286b10f4c9 Mon Sep 17 00:00:00 2001 From: bnkai <48220860+bnkai@users.noreply.github.com> Date: Fri, 18 Mar 2022 23:27:28 +0200 Subject: [PATCH 06/17] Update Teamskeet.yml order urls --- scrapers/Teamskeet/Teamskeet.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scrapers/Teamskeet/Teamskeet.yml b/scrapers/Teamskeet/Teamskeet.yml index 7fb53fbf8..893bf5523 100644 --- a/scrapers/Teamskeet/Teamskeet.yml +++ b/scrapers/Teamskeet/Teamskeet.yml @@ -3,8 +3,8 @@ name: "TeamskeetAPI" sceneByURL: - url: - - teamskeet.com/movies/ - sayuncle.com/movies/ + - teamskeet.com/movies/ action: script script: - python From 794668f3313a31e4a8a7c1753b1cb13a32763a1b Mon Sep 17 00:00:00 2001 From: bnkai <48220860+bnkai@users.noreply.github.com> Date: Fri, 18 Mar 2022 23:35:34 +0200 Subject: [PATCH 07/17] Update TeamskeetAPI.py use different headers/origin per request --- scrapers/Teamskeet/TeamskeetAPI.py | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/scrapers/Teamskeet/TeamskeetAPI.py b/scrapers/Teamskeet/TeamskeetAPI.py index 151bc4e83..34740ee00 100644 --- a/scrapers/Teamskeet/TeamskeetAPI.py +++ b/scrapers/Teamskeet/TeamskeetAPI.py @@ -47,7 +47,7 @@ def save_json(api_json, url): # Not necessary but why not ? USER_AGENT = 'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:79.0) Gecko/20100101 Firefox/79.0' - +ORIGIN = "https://www.teamskeet.com" fragment = json.loads(sys.stdin.read()) if fragment["url"]: @@ -70,6 +70,7 @@ def save_json(api_json, url): if "sayuncle.com/movies/" in scene_url: api_url = f"https://store2.psmcdn.net/sau-elastic-00gy5fg5ra-videoscontent/_doc/{scene_id}" DIR_JSON = os.path.join(USERFOLDER_PATH, "scraperJSON","SayUncle") + ORIGIN = "https://www.sayuncle.com" use_local = 0 json_file = os.path.join(DIR_JSON, scene_id+".json") @@ -78,12 +79,12 @@ def save_json(api_json, url): use_local = 1 with open(json_file, encoding="utf-8") as json_file: scene_api_json = json.load(json_file) -else: +else: log.debug("Asking the API...") headers = { 'User-Agent': USER_AGENT, - 'Origin': 'https://www.teamskeet.com', - 'Referer': 'https://www.teamskeet.com/' + 'Origin': ORIGIN, + 'Referer': ORIGIN } scraper = cloudscraper.create_scraper() # Send to the API From cecc1a171a0659e849ebf4508e308b0ba9e7fc29 Mon Sep 17 00:00:00 2001 From: gimmeliina <96843881+gimmeliina@users.noreply.github.com> Date: Fri, 29 Dec 2023 16:30:33 +0200 Subject: [PATCH 08/17] Update TeamskeetAPI.py --- scrapers/Teamskeet/TeamskeetAPI.py | 36 +++++++++++++++++------------- 1 file changed, 21 insertions(+), 15 deletions(-) diff --git a/scrapers/Teamskeet/TeamskeetAPI.py b/scrapers/Teamskeet/TeamskeetAPI.py index 34740ee00..1607c2ee2 100644 --- a/scrapers/Teamskeet/TeamskeetAPI.py +++ b/scrapers/Teamskeet/TeamskeetAPI.py @@ -42,12 +42,11 @@ def save_json(api_json, url): USERFOLDER_PATH = str(pathlib.Path(__file__).parent.parent.absolute()) -DIR_JSON = os.path.join(USERFOLDER_PATH, "scraperJSON","Teamskeet") +DIR_JSON = os.path.join(USERFOLDER_PATH, "scraperJSON","Sayuncle") # Not necessary but why not ? USER_AGENT = 'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:79.0) Gecko/20100101 Firefox/79.0' -ORIGIN = "https://www.teamskeet.com" fragment = json.loads(sys.stdin.read()) if fragment["url"]: @@ -56,22 +55,25 @@ def save_json(api_json, url): log.error('You need to set the URL (e.g. teamskeet.com/movies/*****)') sys.exit(1) -if "teamskeet.com/movies/" not in scene_url and "sayuncle.com/movies/" not in scene_url: - log.error('The URL is not from a Teamskeet or Sayuncle URL (e.g. teamskeet.com/movies/*****)') +if "sayuncle.com/movies/" not in scene_url and "teamskeet.com/movies/" not in scene_url: + log.error('The URL is not from a Teamskeet or SayUncle URL (e.g. teamskeet.com/movies/*****)') sys.exit(1) +# Check the URL and set the API URL +if 'sayuncle.com' in scene_url: + ORIGIN = 'https://www.sayuncle.com' + REFERER = 'https://www.sayuncle.com/' + API_BASE = 'https://store2.psmcdn.net/sau-elastic-00gy5fg5ra-videoscontent/_doc/' +if 'teamskeet.com' in scene_url: + ORIGIN = 'https://www.teamskeet.com' + REFERER = 'https://www.teamskeet.com/' + API_BASE = 'https://store2.psmcdn.net/ts-elastic-d5cat0jl5o-videoscontent/_doc/' + + scene_id = re.sub('.+/', '', scene_url) if not scene_id: log.error("Error with the ID ({})\nAre you sure that the end of your URL is correct ?".format(scene_id)) sys.exit(1) - -api_url = f"https://store2.psmcdn.net/ts-elastic-d5cat0jl5o-videoscontent/_doc/{scene_id}" - -if "sayuncle.com/movies/" in scene_url: - api_url = f"https://store2.psmcdn.net/sau-elastic-00gy5fg5ra-videoscontent/_doc/{scene_id}" - DIR_JSON = os.path.join(USERFOLDER_PATH, "scraperJSON","SayUncle") - ORIGIN = "https://www.sayuncle.com" - use_local = 0 json_file = os.path.join(DIR_JSON, scene_id+".json") if os.path.isfile(json_file): @@ -81,10 +83,11 @@ def save_json(api_json, url): scene_api_json = json.load(json_file) else: log.debug("Asking the API...") + api_url = f"{API_BASE}{scene_id}" headers = { 'User-Agent': USER_AGENT, 'Origin': ORIGIN, - 'Referer': ORIGIN + 'Referer': REFERER } scraper = cloudscraper.create_scraper() # Send to the API @@ -94,8 +97,8 @@ def save_json(api_json, url): except: log.error("An error has occurred with the page request") log.error(f"Request status: `{r.status_code}`") - log.error("Check your TeamskeetAPI.log for more details") - with open("TeamskeetAPI.log", 'w', encoding='utf-8') as f: + log.error("Check your sayuncleAPI.log for more details") + with open("teamskeetAPI.log", 'w', encoding='utf-8') as f: f.write(f"Scene ID: {scene_id}\n") f.write(f"Request:\n{r.text}") sys.exit(1) @@ -128,6 +131,9 @@ def save_json(api_json, url): scrape['performers'] = [{"name": x.get('modelName')} for x in scene_api_json.get('models')] scrape['tags'] = [{"name": x} for x in scene_api_json.get('tags')] +# If the scene is from sayuncle.com, we need to add the gay tag to the tags list +if 'sayuncle.com' in scene_url: + scrape['tags'].append({"name": "Gay"}) scrape['image'] = scene_api_json.get('img') if use_local == 0: From 47a533c3772e4ac1b7e780d2e7e287cc5a225fdb Mon Sep 17 00:00:00 2001 From: gimmeliina <96843881+gimmeliina@users.noreply.github.com> Date: Fri, 29 Dec 2023 16:32:23 +0200 Subject: [PATCH 09/17] Update TeamskeetAPI.py --- scrapers/Teamskeet/TeamskeetAPI.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/scrapers/Teamskeet/TeamskeetAPI.py b/scrapers/Teamskeet/TeamskeetAPI.py index 1607c2ee2..e9d7582fb 100644 --- a/scrapers/Teamskeet/TeamskeetAPI.py +++ b/scrapers/Teamskeet/TeamskeetAPI.py @@ -42,7 +42,7 @@ def save_json(api_json, url): USERFOLDER_PATH = str(pathlib.Path(__file__).parent.parent.absolute()) -DIR_JSON = os.path.join(USERFOLDER_PATH, "scraperJSON","Sayuncle") +DIR_JSON = os.path.join(USERFOLDER_PATH, "scraperJSON","Teamskeet") # Not necessary but why not ? @@ -97,8 +97,8 @@ def save_json(api_json, url): except: log.error("An error has occurred with the page request") log.error(f"Request status: `{r.status_code}`") - log.error("Check your sayuncleAPI.log for more details") - with open("teamskeetAPI.log", 'w', encoding='utf-8') as f: + log.error("Check your TeamskeetAPI.log for more details") + with open("TeamskeetAPI.log", 'w', encoding='utf-8') as f: f.write(f"Scene ID: {scene_id}\n") f.write(f"Request:\n{r.text}") sys.exit(1) From 9c5ae5adc6c6f53c7a19b9e95019ad7b561eddc6 Mon Sep 17 00:00:00 2001 From: gimmeliina <96843881+gimmeliina@users.noreply.github.com> Date: Fri, 29 Dec 2023 16:35:37 +0200 Subject: [PATCH 10/17] delete Algolia_ChaosMen.yml --- scrapers/Algolia_ChaosMen.yml | 30 ------------------------------ 1 file changed, 30 deletions(-) delete mode 100644 scrapers/Algolia_ChaosMen.yml diff --git a/scrapers/Algolia_ChaosMen.yml b/scrapers/Algolia_ChaosMen.yml deleted file mode 100644 index 03b29ebf5..000000000 --- a/scrapers/Algolia_ChaosMen.yml +++ /dev/null @@ -1,30 +0,0 @@ -name: "ChaosMen" -sceneByURL: - - action: script - url: - - chaosmen.com/en/video - script: - - python - - Algolia.py - - chaosmen -sceneByFragment: - action: script - script: - - python - - Algolia.py - - chaosmen -sceneByName: - action: script - script: - - python - - Algolia.py - - chaosmen - - searchName -sceneByQueryFragment: - action: script - script: - - python - - Algolia.py - - chaosmen - - validName -# Last Updated January 10, 2023 From fc0dc8b8cf7ee01d7d4a79fcb6224f3b9b013662 Mon Sep 17 00:00:00 2001 From: gimmeliina <96843881+gimmeliina@users.noreply.github.com> Date: Fri, 29 Dec 2023 16:38:11 +0200 Subject: [PATCH 11/17] Update Teamskeet.yml --- scrapers/Teamskeet/Teamskeet.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scrapers/Teamskeet/Teamskeet.yml b/scrapers/Teamskeet/Teamskeet.yml index 893bf5523..4f19675d3 100644 --- a/scrapers/Teamskeet/Teamskeet.yml +++ b/scrapers/Teamskeet/Teamskeet.yml @@ -10,4 +10,4 @@ sceneByURL: - python - TeamskeetAPI.py #- logJSON # Save the JSON from the API to a file -# Last Updated March 13, 2022 +# Last Updated December 29, 2023 From e3a318dc3f64b6bc5da6660d92765dc8e68540ad Mon Sep 17 00:00:00 2001 From: gimmeliina <96843881+gimmeliina@users.noreply.github.com> Date: Thu, 7 Mar 2024 15:13:43 +0200 Subject: [PATCH 12/17] Update TeamskeetAPI.py --- scrapers/Teamskeet/TeamskeetAPI.py | 39 +++++++++++++++--------------- 1 file changed, 19 insertions(+), 20 deletions(-) diff --git a/scrapers/Teamskeet/TeamskeetAPI.py b/scrapers/Teamskeet/TeamskeetAPI.py index e9d7582fb..b4d99acc8 100644 --- a/scrapers/Teamskeet/TeamskeetAPI.py +++ b/scrapers/Teamskeet/TeamskeetAPI.py @@ -5,19 +5,7 @@ import sys from datetime import datetime -# to import from a parent directory we need to add that directory to the system path -csd = os.path.dirname( - os.path.realpath(__file__)) # get current script directory -parent = os.path.dirname(csd) # parent directory (should be the scrapers one) -sys.path.append( - parent -) # add parent dir to sys path so that we can import py_common from there - -try: - import py_common.log as log -except ModuleNotFoundError: - print("You need to download the folder 'py_common' from the community repo! (CommunityScrapers/tree/master/scrapers/py_common)", file=sys.stderr) - sys.exit() +import py_common.log as log try: import cloudscraper @@ -42,12 +30,13 @@ def save_json(api_json, url): USERFOLDER_PATH = str(pathlib.Path(__file__).parent.parent.absolute()) -DIR_JSON = os.path.join(USERFOLDER_PATH, "scraperJSON","Teamskeet") +DIR_JSON = os.path.join(USERFOLDER_PATH, "scraperJSON","Sayuncle") # Not necessary but why not ? USER_AGENT = 'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:79.0) Gecko/20100101 Firefox/79.0' + fragment = json.loads(sys.stdin.read()) if fragment["url"]: scene_url = fragment["url"] @@ -55,19 +44,19 @@ def save_json(api_json, url): log.error('You need to set the URL (e.g. teamskeet.com/movies/*****)') sys.exit(1) -if "sayuncle.com/movies/" not in scene_url and "teamskeet.com/movies/" not in scene_url: - log.error('The URL is not from a Teamskeet or SayUncle URL (e.g. teamskeet.com/movies/*****)') - sys.exit(1) # Check the URL and set the API URL if 'sayuncle.com' in scene_url: ORIGIN = 'https://www.sayuncle.com' REFERER = 'https://www.sayuncle.com/' API_BASE = 'https://store2.psmcdn.net/sau-elastic-00gy5fg5ra-videoscontent/_doc/' -if 'teamskeet.com' in scene_url: +elif 'teamskeet.com' in scene_url: ORIGIN = 'https://www.teamskeet.com' REFERER = 'https://www.teamskeet.com/' API_BASE = 'https://store2.psmcdn.net/ts-elastic-d5cat0jl5o-videoscontent/_doc/' +else: + log.error('The URL is not from a Teamskeet or SayUncle URL (e.g. teamskeet.com/movies/*****)') + sys.exit(1) scene_id = re.sub('.+/', '', scene_url) @@ -125,16 +114,26 @@ def save_json(api_json, url): dt = re.sub(r'T.+', '', dt) date = datetime.strptime(dt, '%Y-%m-%d') scrape['date'] = str(date.date()) -scrape['details'] = scene_api_json.get('description') + +#fix for TeamKseet including HTML tags in Description +CLEANR = re.compile('<.*?>') +cleandescription = re.sub(CLEANR,'',scene_api_json.get('description')) +scrape['details'] = cleandescription scrape['studio'] = {} scrape['studio']['name'] = scene_api_json['site'].get('name') scrape['performers'] = [{"name": x.get('modelName')} for x in scene_api_json.get('models')] scrape['tags'] = [{"name": x} for x in scene_api_json.get('tags')] +scrape['image'] = scene_api_json.get('img') +# Highres is not working with sayuncle.com at the moment +if 'sayuncle.com' not in scene_url: + high_res = scene_api_json.get('img').replace('shared/med', 'members/full') + log.debug(f"Image before: {scrape['image']}") + log.debug(f"Image after: {high_res}") + scrape['image'] = high_res # If the scene is from sayuncle.com, we need to add the gay tag to the tags list if 'sayuncle.com' in scene_url: scrape['tags'].append({"name": "Gay"}) -scrape['image'] = scene_api_json.get('img') if use_local == 0: save_json(scene_api_json, scene_url) From 36667f17815abe34186339afa5af58a69630bccc Mon Sep 17 00:00:00 2001 From: gimmeliina <96843881+gimmeliina@users.noreply.github.com> Date: Thu, 7 Mar 2024 15:19:34 +0200 Subject: [PATCH 13/17] Update Teamskeet.yml --- scrapers/Teamskeet/Teamskeet.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scrapers/Teamskeet/Teamskeet.yml b/scrapers/Teamskeet/Teamskeet.yml index 7dfd628ef..7927b2ad8 100644 --- a/scrapers/Teamskeet/Teamskeet.yml +++ b/scrapers/Teamskeet/Teamskeet.yml @@ -9,4 +9,4 @@ sceneByURL: script: - python - TeamskeetAPI.py -# Last Updated March 04, 2024 \ No newline at end of file +# Last Updated March 07, 2024 From be1642ccbb9e6818efb13d99164983711c01f2bb Mon Sep 17 00:00:00 2001 From: gimmeliina <96843881+gimmeliina@users.noreply.github.com> Date: Thu, 7 Mar 2024 15:19:54 +0200 Subject: [PATCH 14/17] Update TeamskeetAPI.py --- scrapers/Teamskeet/TeamskeetAPI.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/scrapers/Teamskeet/TeamskeetAPI.py b/scrapers/Teamskeet/TeamskeetAPI.py index 1c4f11495..d943e8fc4 100644 --- a/scrapers/Teamskeet/TeamskeetAPI.py +++ b/scrapers/Teamskeet/TeamskeetAPI.py @@ -30,7 +30,7 @@ def save_json(api_json, url): USERFOLDER_PATH = str(pathlib.Path(__file__).parent.parent.absolute()) -DIR_JSON = os.path.join(USERFOLDER_PATH, "scraperJSON","Sayuncle") +DIR_JSON = os.path.join(USERFOLDER_PATH, "scraperJSON","Teamskeet") # Not necessary but why not ? @@ -137,4 +137,4 @@ def save_json(api_json, url): if use_local == 0: save_json(scene_api_json, scene_url) -print(json.dumps(scrape)) \ No newline at end of file +print(json.dumps(scrape)) From 1ba0c008cced05b8a1c620c04123270793f639a6 Mon Sep 17 00:00:00 2001 From: gimmeliina <96843881+gimmeliina@users.noreply.github.com> Date: Mon, 8 Apr 2024 13:17:16 +0300 Subject: [PATCH 15/17] Update TeamskeetAPI.py Add Support For MYLF --- scrapers/Teamskeet/TeamskeetAPI.py | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/scrapers/Teamskeet/TeamskeetAPI.py b/scrapers/Teamskeet/TeamskeetAPI.py index d943e8fc4..d705dc00f 100644 --- a/scrapers/Teamskeet/TeamskeetAPI.py +++ b/scrapers/Teamskeet/TeamskeetAPI.py @@ -30,7 +30,7 @@ def save_json(api_json, url): USERFOLDER_PATH = str(pathlib.Path(__file__).parent.parent.absolute()) -DIR_JSON = os.path.join(USERFOLDER_PATH, "scraperJSON","Teamskeet") +DIR_JSON = os.path.join(USERFOLDER_PATH, "scraperJSON","Sayuncle") # Not necessary but why not ? @@ -54,8 +54,12 @@ def save_json(api_json, url): ORIGIN = 'https://www.teamskeet.com' REFERER = 'https://www.teamskeet.com/' API_BASE = 'https://store2.psmcdn.net/ts-elastic-d5cat0jl5o-videoscontent/_doc/' +elif 'mylf.com' in scene_url: + ORIGIN = 'https://www.mylf.com' + REFERER = 'https://www.mylf.com/' + API_BASE = 'https://store2.psmcdn.net/mylf-elastic-hka5k7vyuw-videoscontent/_doc/' else: - log.error('The URL is not from a Teamskeet or SayUncle URL (e.g. teamskeet.com/movies/*****)') + log.error('The URL is not from a Teamskeet, MYLF or SayUncle URL (e.g. teamskeet.com/movies/*****)') sys.exit(1) From a6b6b3d76876fe960cc41e06c7e518d4104d3bc2 Mon Sep 17 00:00:00 2001 From: gimmeliina <96843881+gimmeliina@users.noreply.github.com> Date: Mon, 8 Apr 2024 13:17:48 +0300 Subject: [PATCH 16/17] Add Support For mylf --- scrapers/Teamskeet/Teamskeet.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/scrapers/Teamskeet/Teamskeet.yml b/scrapers/Teamskeet/Teamskeet.yml index 7927b2ad8..9994a7e62 100644 --- a/scrapers/Teamskeet/Teamskeet.yml +++ b/scrapers/Teamskeet/Teamskeet.yml @@ -5,6 +5,7 @@ sceneByURL: - url: - sayuncle.com/movies/ - teamskeet.com/movies/ + - mylf.com/movies/ action: script script: - python From 3f02b975bfc2768a61f0bd9f9c8a1f308a9b2a67 Mon Sep 17 00:00:00 2001 From: gimmeliina <96843881+gimmeliina@users.noreply.github.com> Date: Mon, 8 Apr 2024 13:30:13 +0300 Subject: [PATCH 17/17] Update TeamskeetAPI.py --- scrapers/Teamskeet/TeamskeetAPI.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scrapers/Teamskeet/TeamskeetAPI.py b/scrapers/Teamskeet/TeamskeetAPI.py index d705dc00f..4a3f69c68 100644 --- a/scrapers/Teamskeet/TeamskeetAPI.py +++ b/scrapers/Teamskeet/TeamskeetAPI.py @@ -30,7 +30,7 @@ def save_json(api_json, url): USERFOLDER_PATH = str(pathlib.Path(__file__).parent.parent.absolute()) -DIR_JSON = os.path.join(USERFOLDER_PATH, "scraperJSON","Sayuncle") +DIR_JSON = os.path.join(USERFOLDER_PATH, "scraperJSON","Teamskeet") # Not necessary but why not ?