From 11202e8773567b7c660f7dd85b8173ae8475c336 Mon Sep 17 00:00:00 2001
From: shark-lasers58 <165065833+shark-lasers58@users.noreply.github.com>
Date: Thu, 24 Oct 2024 17:21:46 -0400
Subject: [PATCH] Ersties.com URL Scraper (#2073)

---
 scrapers/Ersties/Ersties.py  | 148 +++++++++++++++++++++++++++++++++++
 scrapers/Ersties/Ersties.yml |  26 ++++++
 scrapers/py_common/util.py   |   1 +
 3 files changed, 175 insertions(+)
 create mode 100644 scrapers/Ersties/Ersties.py
 create mode 100644 scrapers/Ersties/Ersties.yml

diff --git a/scrapers/Ersties/Ersties.py b/scrapers/Ersties/Ersties.py
new file mode 100644
index 000000000..47eceef02
--- /dev/null
+++ b/scrapers/Ersties/Ersties.py
@@ -0,0 +1,148 @@
+import sys
+import requests
+import re
+import json
+from py_common.util import guess_nationality
+
+#Authentication tokens and cookies are needed for this scraper. Use the network console in your browsers developer tools to find this information in an api call header.
+#Auth Variables For Header
+authorization = ''
+cookie = ''
+x_visit_uid = ''
+
+#Headers for Requests
+scrape_headers = {
+    'authorization': authorization,
+    'cookie': cookie,
+    'x-visit-uid': x_visit_uid,
+}
+
+#Get JSON from Stash
+def readJSONInput():
+    input = sys.stdin.read()
+    return json.loads(input)
+
+def debugPrint(t):
+    sys.stderr.write(t + "\n")
+
+def get_scene(inputurl):
+
+    # Use a regular expression to extract the number after '#play-' and before '-comments'
+    match = re.search(r'#play-(\d+)-comments', inputurl)
+
+    # Check if the pattern was found and save it as a variable
+    if match:
+        sceneid = match.group(1)  
+    else:
+        debugPrint('No scene ID found in URL. Please make sure you are using the ULR ending with "#play-nnnn-comments".')
+        sys.exit()
+
+    #Build URL to scrape
+    scrape_url='https://api.ersties.com/videos/'+sceneid
+
+    #Scrape URL
+    scrape = requests.get(scrape_url, headers=scrape_headers)
+
+    #Parse response
+    #Check for valid response
+    if scrape.status_code ==200:
+        scrape_data = scrape.json()
+
+        ret = {}
+
+        ret['title'] = scrape_data['title_en']
+        ret['code'] = str(scrape_data['id'])
+        ret['details'] = scrape_data['model']['description_en']
+        ret['studio'] = {'name':'Ersties'}
+        ret['tags'] = [{'name': x['name_en']} for x in scrape_data['tags']]
+        ret['performers'] = [{'name': x['name_en']} for x in scrape_data['participated_models']]
+        for thumbnail in scrape_data['thumbnails']:
+            if thumbnail['is_main']:
+                ret['image'] = f'https://thumb.ersties.com/width=900,height=500,fit=cover,quality=85,sharpen=1,format=jpeg/content/images_mysql/images_videothumbnails/backup/'+thumbnail['file_name']
+                break
+    else:
+        debugPrint('Response: '+str(scrape.status_code)+'. Please check your auth header.')
+        sys.exit()    
+    return ret
+
+def get_group(inputurl):
+    # Use a regular expression to extract the number after 'profile/'
+    match = re.search(r'profile/(\d+)', inputurl)
+
+    # Check if the pattern was found and save it as a variable
+    if match:
+        groupid = match.group(1)  
+    else:
+        debugPrint('No scene/group ID found in URL. Please make sure you are using the ULR ending with "profile/nnnn".')
+        sys.exit()
+
+    #Build URL to scrape group
+    scrape_url='https://api.ersties.com/models/'+groupid
+
+    #Scrape URL
+    scrape = requests.get(scrape_url, headers=scrape_headers)
+
+    #Parse response
+    #Check for valid response
+    if scrape.status_code ==200:
+        scrape_data = scrape.json()
+
+        ret = {}
+
+        ret['name'] = scrape_data['name_en']
+        ret['synopsis'] = scrape_data['description_en']
+        ret['studio'] = {'name':'Ersties'}
+        ret['front_image'] = f'https://thumb.ersties.com/width=510,height=660,fit=cover,quality=85,sharpen=1,format=jpeg/content/images_mysql/Model_Cover_Image/backup/'+scrape_data['thumbnail']  
+    else:
+        debugPrint('Response: '+str(scrape.status_code)+'. Please check your auth header.')
+        sys.exit() 
+    return ret
+
+def get_performer(inputurl):
+    # Use a regular expression to extract the number after '#play-' and before '-comments'
+    match = re.search(r'profile/(\d+)', inputurl)
+
+    # Check if the pattern was found and save it as a variable
+    if match:
+        groupid = match.group(1)  
+    else:
+        debugPrint('No performer ID found in URL. Please make sure you are using the ULR ending with "profile/nnnn".')
+        sys.exit()
+
+    #Build URL to scrape group
+    scrape_url='https://api.ersties.com/models/'+groupid
+
+    #Scrape URL
+    scrape = requests.get(scrape_url, headers=scrape_headers)
+
+    #Parse response
+    #Check for valid response
+    if scrape.status_code ==200:
+        scrape_data = scrape.json()
+
+        ret = {}
+
+        ret['name'] = scrape_data['name_en']
+        if scrape_data['location_en'] is not None:
+            ret['country'] = guess_nationality(scrape_data['location_en'])
+        ret['details'] = scrape_data['description_en']
+        ret['image'] = f'https://thumb.ersties.com/width=510,height=660,fit=cover,quality=85,sharpen=1,format=avif/content/images_mysql/Model_Cover_Image/backup/'+scrape_data['thumbnail']  
+    else:
+        debugPrint('No performer ID found in URL. Please make sure you are using the ULR ending with "profile/nnnn".')
+        sys.exit()
+    return ret
+
+if sys.argv[1] == 'sceneByURL':
+    i = readJSONInput()
+    ret = get_scene(i.get('url'))
+    print(json.dumps(ret))
+
+if sys.argv[1] == 'groupByURL':
+    i = readJSONInput()
+    ret = get_group(i.get('url'))
+    print(json.dumps(ret))
+
+if sys.argv[1] == 'performerByURL':
+    i = readJSONInput()
+    ret = get_performer(i.get('url'))
+    print(json.dumps(ret))
\ No newline at end of file
diff --git a/scrapers/Ersties/Ersties.yml b/scrapers/Ersties/Ersties.yml
new file mode 100644
index 000000000..3e4a7f7f3
--- /dev/null
+++ b/scrapers/Ersties/Ersties.yml
@@ -0,0 +1,26 @@
+name: Ersties
+sceneByURL:
+  - action: script
+    url:
+      - ersties.com/
+    script:
+      - python
+      - Ersties.py
+      - sceneByURL
+movieByURL:
+  - action: script
+    url:
+      - ersties.com/
+    script:
+      - python
+      - Ersties.py
+      - groupByURL
+performerByURL:
+  - action: script
+    url:
+      - ersties.com/
+    script:
+      - python
+      - Ersties.py
+      - performerByURL
+# Last Updated October 21, 2024
\ No newline at end of file
diff --git a/scrapers/py_common/util.py b/scrapers/py_common/util.py
index 3cecc3800..e8ea60c82 100644
--- a/scrapers/py_common/util.py
+++ b/scrapers/py_common/util.py
@@ -672,5 +672,6 @@ def guess_nationality(country: str) -> str:
     "zambian": "Zambia",
     "zimbabwean": "Zimbabwe",
     "åland island": "Åland Islands",
+    "united states": "USA",
     **{s.lower(): "USA" for s in US_states},
 }