Merge branch 'stashapp:master' into master

gimmeliina · Dec 29, 2023 · 312a5b8 · 312a5b8
2 parents fea9615 + 391490c
commit 312a5b8
Show file tree

Hide file tree

Showing 367 changed files with 16,313 additions and 5,355 deletions.
diff --git a/.github/workflows/deploy.yml b/.github/workflows/deploy.yml
@@ -0,0 +1,51 @@
+name: Deploy repository to Github Pages
+
+on:
+  push:
+    branches: [ master, stable ]
+
+  # Allows you to run this workflow manually from the Actions tab
+  workflow_dispatch:
+
+# Sets permissions of the GITHUB_TOKEN to allow deployment to GitHub Pages
+permissions:
+  contents: read
+  pages: write
+  id-token: write
+
+jobs:
+  build:
+    runs-on: ubuntu-22.04
+    steps:
+    - name: Checkout main
+      uses: actions/checkout@v2
+      with:
+        path: master
+        ref: master
+        fetch-depth: '0'
+    - run: |
+        cd master
+        ./build_site.sh ../_site/stable
+    - name: Checkout Stable
+      uses: actions/checkout@v2
+      with:
+        path: dev
+        # replace with develop tag/branch when necessary
+        ref: master
+        fetch-depth: '0'
+    - run: |
+        cd dev
+        ../master/build_site.sh ../_site/develop
+    - uses: actions/upload-pages-artifact@v2
+
+  deploy:
+    environment:
+      name: github-pages
+      url: ${{ steps.deployment.outputs.page_url }}
+    runs-on: ubuntu-22.04
+    needs: build
+    steps:
+      - name: Deploy to GitHub Pages
+        id: deployment
+        uses: actions/deploy-pages@v2
+
diff --git a/.github/workflows/validate.yml b/.github/workflows/validate.yml
@@ -13,6 +13,6 @@ jobs:
     - uses: actions/checkout@v2
     - uses: actions/setup-node@v1
       with:
-        node-version: '12.x'
+        node-version: '14.x'
     - run: cd ./validator && yarn install --frozen-lockfile
     - run: node ./validate.js --ci
diff --git a/.gitignore b/.gitignore
@@ -5,3 +5,5 @@ yarn-error.log
 # Scraper-generated files
 /scrapers/*.ini
 **/__pycache__/
+
+/_site
diff --git a/.vscode/settings.json b/.vscode/settings.json
@@ -0,0 +1,6 @@
+{
+    "yaml.schemas": {
+        "validator/scraper.schema.json": "*.yml"
+    },
+    "python.analysis.typeCheckingMode": "basic"
+}
diff --git a/README.md b/README.md
@@ -1,7 +1,7 @@
 # CommunityScrapers
 This is a public repository containing scrapers created by the Stash Community.
 
-**:exclamation: Make sure to read ALL of the instructions here before requesting any help in the discord channel. For a more user friendly / step by step guide you can checkout [stashdb's guide to scraping](https://guidelines.stashdb.org/docs/guide-to-scraping/):exclamation:**
+**:exclamation: Make sure to read ALL of the instructions here before requesting any help in the discord channel. For a more user friendly / step by step guide you can checkout [stashdb's guide to scraping](https://docs.stashapp.cc/beginner-guides/guide-to-scraping/):exclamation:**
 
 To download the scrapers you can clone the git repo or download directly any of the scrapers.
 

diff --git a/SCRAPERS-LIST.md b/SCRAPERS-LIST.md
diff --git a/build_site.sh b/build_site.sh
@@ -0,0 +1,89 @@
+#!/bin/bash
+
+# builds a repository of scrapers
+# outputs to _site with the following structure:
+# index.yml
+# <scraper_id>.zip
+# Each zip file contains the scraper.yml file and any other files in the same directory
+
+outdir="$1"
+if [ -z "$outdir" ]; then
+    outdir="_site"
+fi
+
+rm -rf "$outdir"
+mkdir -p "$outdir"
+
+buildScraper() 
+{
+    f=$1
+    dir=$(dirname "$f")
+
+    # get the scraper id from the filename
+    scraper_id=$(basename "$f" .yml)
+    versionFile=$f
+    if [ "$scraper_id" == "package" ]; then
+        scraper_id=$(basename "$dir")
+    fi
+
+    if [ "$dir" != "./scrapers" ]; then
+        versionFile="$dir"
+    fi
+
+    echo "Processing $scraper_id"
+
+    # create a directory for the version
+    version=$(git log -n 1 --pretty=format:%h -- "$versionFile")
+    updated=$(TZ=UTC0 git log -n 1 --date="format-local:%F %T" --pretty=format:%ad -- "$versionFile")
+
+    # create the zip file
+    # copy other files
+    zipfile=$(realpath "$outdir/$scraper_id.zip")
+
+    name=$(grep "^name:" "$f" | cut -d' ' -f2- | sed -e 's/\r//' -e 's/^"\(.*\)"$/\1/')
+    ignore=$(grep "^# ignore:" "$f" | cut -c 10- | sed -e 's/\r//')
+    dep=$(grep "^# requires:" "$f" | cut -c 12- | sed -e 's/\r//')
+
+    # always ignore package file
+    ignore="-x $ignore package"
+
+    pushd "$dir" > /dev/null
+    if [ "$dir" != "./scrapers" ]; then
+        zip -r "$zipfile" . ${ignore} > /dev/null
+    else
+        zip "$zipfile" "$scraper_id.yml" > /dev/null
+    fi
+    popd > /dev/null
+
+    # write to spec index
+    echo "- id: $scraper_id
+  name: $name
+  version: $version
+  date: $updated
+  path: $scraper_id.zip
+  sha256: $(sha256sum "$zipfile" | cut -d' ' -f1)" >> "$outdir"/index.yml
+
+    # handle dependencies
+    if [ ! -z "$dep" ]; then
+        echo "  requires:" >> "$outdir"/index.yml
+        for d in ${dep//,/ }; do
+            echo "    - $d" >> "$outdir"/index.yml
+        done
+    fi
+
+    echo "" >> "$outdir"/index.yml
+}
+
+# find all yml files in ./scrapers - these are packages individually
+for f in ./scrapers/*.yml; do 
+    buildScraper "$f"
+done
+
+find ./scrapers/ -mindepth 2 -name *.yml -print0 | while read -d $'\0' f; do
+    buildScraper "$f"
+done
+
+# handle dependency packages
+find ./scrapers/ -mindepth 2 -name package -print0 | while read -d $'\0' f; do
+    buildScraper "$f"
+done
diff --git a/scrapers/1passforallsites.yml b/scrapers/1passforallsites.yml
@@ -0,0 +1,35 @@
+name: "1 Pass For All Sites"
+sceneByURL:
+  - action: scrapeXPath
+    url:
+      - 1passforallsites.com/episode/
+    scraper: sceneScraper
+xPathScrapers:
+  sceneScraper:
+    scene:
+      Studio:
+        Name:
+          selector: //a[contains(@href,'?site=')]
+      Title:
+        selector: //title
+        postProcess:
+          - replace:
+              - regex: (^.+) - 1 .+$
+                with: $1
+      Details: //div[@class="sp-info-txt"]/p/text()
+      Performers:
+        Name:
+          selector: //p[@class="sp-info-name"]/a/text()
+      Tags:
+        Name:
+          selector: //p[@class="niches-list"]/a/text()
+      Date:
+        selector: //li[contains(text(),"Added:")]
+        postProcess:
+          - replace:
+            - regex: "Added\\: (.+)"
+              with: $1
+          - parseDate: 2 Jan 2006
+      Image: //video/@poster
+
+# Last Updated July 12, 2023
diff --git a/scrapers/Algolia_21Naturals.yml → scrapers/21Naturals/21Naturals.yml b/scrapers/Algolia_21Naturals.yml → scrapers/21Naturals/21Naturals.yml
@@ -1,30 +1,31 @@
+# requires: Algolia
 name: "21Naturals"
 sceneByURL:
   - action: script
     url:
       - 21naturals.com/en/video
     script:
       - python
-      - Algolia.py
+      - ../Algolia/Algolia.py
       - 21naturals
 sceneByFragment:
     action: script
     script:
       - python
-      - Algolia.py
+      - ../Algolia/Algolia.py
       - 21naturals
 sceneByName:
     action: script
     script:
       - python
-      - Algolia.py
+      - ../Algolia/Algolia.py
       - 21naturals
       - searchName
 sceneByQueryFragment:
     action: script
     script:
       - python
-      - Algolia.py
+      - ../Algolia/Algolia.py
       - 21naturals
       - validName
 # Last Updated March 23, 2022
diff --git a/scrapers/Algolia_21Sextreme.yml → scrapers/21Sextreme/21Sextreme.yml b/scrapers/Algolia_21Sextreme.yml → scrapers/21Sextreme/21Sextreme.yml
@@ -1,30 +1,31 @@
+# requires: Algolia
 name: "21Sextreme"
 sceneByURL:
   - action: script
     url:
       - 21sextreme.com/en/video
     script:
       - python
-      - Algolia.py
+      - ../Algolia/Algolia.py
       - 21sextreme
 sceneByFragment:
     action: script
     script:
       - python
-      - Algolia.py
+      - ../Algolia/Algolia.py
       - 21sextreme
 sceneByName:
     action: script
     script:
       - python
-      - Algolia.py
+      - ../Algolia/Algolia.py
       - 21sextreme
       - searchName
 sceneByQueryFragment:
     action: script
     script:
       - python
-      - Algolia.py
+      - ../Algolia/Algolia.py
       - 21sextreme
       - validName
 # Last Updated March 23, 2022
diff --git a/scrapers/Algolia_21Sextury.yml → scrapers/21Sextury/21Sextury.yml b/scrapers/Algolia_21Sextury.yml → scrapers/21Sextury/21Sextury.yml
@@ -1,30 +1,31 @@
+# requires: Algolia
 name: "21Sextury"
 sceneByURL:
   - action: script
     url:
       - 21sextury.com/en/video
     script:
       - python
-      - Algolia.py
+      - ../Algolia/Algolia.py
       - 21sextury
 sceneByFragment:
     action: script
     script:
       - python
-      - Algolia.py
+      - ../Algolia/Algolia.py
       - 21sextury
 sceneByName:
     action: script
     script:
       - python
-      - Algolia.py
+      - ../Algolia/Algolia.py
       - 21sextury
       - searchName
 sceneByQueryFragment:
     action: script
     script:
       - python
-      - Algolia.py
+      - ../Algolia/Algolia.py
       - 21sextury
       - validName
 galleryByURL:
@@ -33,7 +34,7 @@ galleryByURL:
       - 21sextury.com/en/photo/
     script:
       - python
-      - Algolia.py
+      - ../Algolia/Algolia.py
       - 21sextury
       - gallery
 # Last Updated December 22, 2022
diff --git a/scrapers/AMAMultimedia.yml b/scrapers/AMAMultimedia.yml
@@ -32,6 +32,7 @@ sceneByURL:
       - holed.com/video/
       - lubed.com/video/
       - mom4k.com/video/
+      - momcum.com/video/
       - myveryfirsttime.com/video/
       - nannyspy.com/video/
       - passion-hd.com/video/
@@ -73,10 +74,22 @@ xPathScrapers:
               - regex: ([?&]img[wh]=\d+)+$
                 with:
       Studio:
-        Name: //div[@id="navigation"]/h5/a/@alt
+        Name:
+          selector: //div[@id="navigation"]/h5/a/@alt
+          postProcess:
+            - map:
+                Baeb: BAEB
+                Casting Couch X: Casting Couch-X
+                Cum4K: Cum 4K
+                Exotic4k: Exotic 4K
+                GirlCum: Girl Cum
+                MomCum: Mom Cum
+                NannySpy: Nanny Spy
+                SpyFam: Spy Fam
+                Tiny4K: Tiny 4K
+                WetVR: Wet VR
       Date:
         selector: //div[contains(text(), 'RELEASED')]/span/text()
         postProcess:
           - parseDate: January 02, 2006
-
-# Last Updated March 26, 2022
+# Last Updated February 06, 2023
diff --git a/scrapers/APOVStory.yml b/scrapers/APOVStory.yml
@@ -0,0 +1,37 @@
+name: A POV Story
+
+sceneByURL:
+  - action: scrapeXPath
+    url:
+      - apovstory.com/trailers/
+    scraper: sceneScraper
+xPathScrapers:
+  sceneScraper:
+    scene:
+      Studio:
+        Name:
+          fixed: "A POV Story"
+      Title:
+        selector: //div[@class = 'trailerArea centerwrap']/h3
+      Details:
+        selector: //div[@class = 'trailerContent']//*//text()
+        concat: "\n\n"
+        postProcess:
+          - replace:
+              - regex: ^Description:\s*
+                with:
+      Tags:
+        Name: //li/span[contains(text(),'CATEGORIES')]/parent::li//a//text()
+      Performers:
+        Name: //li/span[contains(text(),'FEATURING')]/parent::li//a//text()
+      Image:
+        selector: //div[@class="player-thumb"]/img/@src0_3x
+        postProcess:
+          - replace:
+              - regex: ^
+                with: "https://apovstory.com"
+      Date:
+        selector: //span[contains(text(),'RELEASE DATE')]/parent::li/text()
+        postProcess:
+          - parseDate: "January 2, 2006"
+# Last Updated August 24, 2023