diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml new file mode 100644 index 0000000..888910b --- /dev/null +++ b/.github/workflows/main.yml @@ -0,0 +1,40 @@ +name: Deploy index to Github Pages + +on: + push: + branches: [ master ] + + # Allows you to run this workflow manually from the Actions tab + workflow_dispatch: + +# Sets permissions of the GITHUB_TOKEN to allow deployment to GitHub Pages +permissions: + contents: read + pages: write + id-token: write + +jobs: + build: + runs-on: ubuntu-22.04 + steps: + - name: Checkout master + uses: actions/checkout@v2 + with: + path: master + ref: master + fetch-depth: '0' + - run: | + cd master + ./build_site.sh ../_site/ + - uses: actions/upload-pages-artifact@v2 + + deploy: + environment: + name: github-pages + url: ${{ steps.deployment.outputs.page_url }} + runs-on: ubuntu-22.04 + needs: build + steps: + - name: Deploy to GitHub Pages + id: deployment + uses: actions/deploy-pages@v2 diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..04faab0 --- /dev/null +++ b/.gitignore @@ -0,0 +1,5 @@ +# Scraper generated files +*.json + +# Index build artifact +/_site \ No newline at end of file diff --git a/build_site.sh b/build_site.sh new file mode 100755 index 0000000..4987225 --- /dev/null +++ b/build_site.sh @@ -0,0 +1,89 @@ +#!/bin/bash + +# builds a repository of scrapers +# outputs to _site with the following structure: +# index.yml +# .zip +# Each zip file contains the scraper.yml file and any other files in the same directory + +outdir="$1" +if [ -z "$outdir" ]; then + outdir="_site" +fi + +rm -rf "$outdir" +mkdir -p "$outdir" + +buildScraper() +{ + f=$1 + dir=$(dirname "$f") + + # get the scraper id from the filename + scraper_id=$(basename "$f" .yml) + versionFile=$f + if [ "$scraper_id" == "package" ]; then + scraper_id=$(basename "$dir") + fi + + if [ "$dir" != "./scrapers" ]; then + versionFile="$dir" + fi + + echo "Processing $scraper_id" + + # create a directory for the version + version=$(git log -n 1 --pretty=format:%h -- "$versionFile") + updated=$(TZ=UTC0 git log -n 1 --date="format-local:%F %T" --pretty=format:%ad -- "$versionFile") + + # create the zip file + # copy other files + zipfile=$(realpath "$outdir/$scraper_id.zip") + + name=$(grep "^name:" "$f" | cut -d' ' -f2- | sed -e 's/\r//' -e 's/^"\(.*\)"$/\1/') + ignore=$(grep "^# ignore:" "$f" | cut -c 10- | sed -e 's/\r//') + dep=$(grep "^# requires:" "$f" | cut -c 12- | sed -e 's/\r//') + + # always ignore package file + ignore="-x $ignore package" + + pushd "$dir" > /dev/null + if [ "$dir" != "./scrapers" ]; then + zip -r "$zipfile" . ${ignore} > /dev/null + else + zip "$zipfile" "$scraper_id.yml" > /dev/null + fi + popd > /dev/null + + # write to spec index + echo "- id: $scraper_id + name: $name + version: $version + date: $updated + path: $scraper_id.zip + sha256: $(sha256sum "$zipfile" | cut -d' ' -f1)" >> "$outdir"/index.yml + + # handle dependencies + if [ ! -z "$dep" ]; then + echo " requires:" >> "$outdir"/index.yml + for d in ${dep//,/ }; do + echo " - $d" >> "$outdir"/index.yml + done + fi + + echo "" >> "$outdir"/index.yml +} + +# find all yml files in ./scrapers - these are packages individually +for f in ./scrapers/*.yml; do + buildScraper "$f" +done + +find ./scrapers/ -mindepth 2 -name *.yml -print0 | while read -d $'\0' f; do + buildScraper "$f" +done + +# handle dependency packages +find ./scrapers/ -mindepth 2 -name package -print0 | while read -d $'\0' f; do + buildScraper "$f" +done diff --git a/scrapers/AyloAPI/README.md b/scrapers/AyloAPI/README.md new file mode 100644 index 0000000..9a7d6e0 --- /dev/null +++ b/scrapers/AyloAPI/README.md @@ -0,0 +1,34 @@ +# The Aylo API scraper + +This is arguably the biggest scraper in the repo and covers a _lot_ of networks and studios. It is +composed of one main file that contains the functions necessary to scrape scenes, movies, galleries +and performers from the Aylo API along with a few supporting files with functions that handle things +like constructing URL slugs and caching instance tokens. + +Design goals: + +- Split scrapers that can handle the individual complexities of subnetworks without overcomplicating the main scraper +- Easy to modify and understand: documentation, examples + +These functions are designed to be open for extension, but closed to modification: but what does this mean? +The networks and studios in the Aylo API differ in how they construct their URLs and even how +their parent/child studio relationships are expressed these functions could easily take on a lot of +complexity if they were to handle every special case. Instead these scraping functions return their +results in a standard format that works for most studios while also optionally taking a postprocessing +function that callers can supply to handle their special requirements. + +The standard URL formats that can vary: +scenes: `https://www..com/scene//` +movies: `https://www..com/movie//` +performers: `https://www..com/model//` + +`brand-domain` is based on the parent studio: `bangbros` for Bang Bros, `gaywire` for Gay Wire, +`bigstr` for BigStr (which has since consolidated under the Czech Hunter name, so those URLs are wrong!) + +Uses the `parse_args` helper from [py_common](../py_common/util.py) +Developed to be ergonomic for testing and integrating into other Python scripts + +The simplest case is exemplified by the Babes network: they use the standard URL formats and their +parent studio domain `www.babes.com` is correct for all substudios. Their scraper does not need +to make any changes to the results returned by the API, so their scraper is fully defined in [Babes.yml](../Babes.yml). +The only thing it needs to do is specify which domains it should search which can be done inline. diff --git a/scrapers/AyloAPI/aylo_tokens.json b/scrapers/AyloAPI/aylo_tokens.json new file mode 100644 index 0000000..d996e60 --- /dev/null +++ b/scrapers/AyloAPI/aylo_tokens.json @@ -0,0 +1,386 @@ +{ + "babes": { + "token": "eyJ0eXAiOiJKV1QiLCJhbGciOiJIUzI1NiJ9.eyJpc3MiOiJtaW5kZ2VlayIsImF1ZCI6Im1pbmRnZWVrIiwic3ViIjoiaW5zdGFuY2UtYXBpIiwiZXhwIjoxNzA1Mjc2ODAwLCJpZCI6MjczMzgxLCJicmFuZCI6InNwaWNldmlkcyIsImhvc3RuYW1lIjoid3d3LmJhYmVzLmNvbSJ9.dIez_M5MX5OquT2IgealHlxlzt6LP_jhd-QIy-8Qm28", + "date": "2024-01-13" + }, + "bangbros": { + "token": "eyJ0eXAiOiJKV1QiLCJhbGciOiJIUzI1NiJ9.eyJpc3MiOiJtaW5kZ2VlayIsImF1ZCI6Im1pbmRnZWVrIiwic3ViIjoiaW5zdGFuY2UtYXBpIiwiZXhwIjoxNzA1MzYzMjAwLCJpZCI6MjY5ODIxLCJicmFuZCI6ImJhbmdicm9zIiwiaG9zdG5hbWUiOiJiYW5nYnJvcy5jb20ifQ.g_Kzzx0Q0Ad33Gz2yPF8UawT9-1FnbXNS6jyYEdlvYI", + "date": "2024-01-14" + }, + "biempire": { + "token": "eyJ0eXAiOiJKV1QiLCJhbGciOiJIUzI1NiJ9.eyJpc3MiOiJtaW5kZ2VlayIsImF1ZCI6Im1pbmRnZWVrIiwic3ViIjoiaW5zdGFuY2UtYXBpIiwiZXhwIjoxNzA1MzYzMjAwLCJpZCI6NTQ1NzEsImJyYW5kIjoiYmllbXBpcmUiLCJob3N0bmFtZSI6Ind3dy5iaWVtcGlyZS5jb20ifQ.zlcLI39CrBAr4rIx-_ZXq91A9Y4pEqf3ZMWuYEKl_qk", + "date": "2024-01-14" + }, + "bigdicksatschool": { + "token": "eyJ0eXAiOiJKV1QiLCJhbGciOiJIUzI1NiJ9.eyJpc3MiOiJtaW5kZ2VlayIsImF1ZCI6Im1pbmRnZWVrIiwic3ViIjoiaW5zdGFuY2UtYXBpIiwiZXhwIjoxNzA1NDQ5NjAwLCJpZCI6MjE2MDcxLCJicmFuZCI6Im1lbiIsImhvc3RuYW1lIjoid3d3LmJpZ2RpY2tzYXRzY2hvb2wuY29tIn0.7zQbVdCUXDbN4AYwFgXFZbJjjg5vMTTIxEzaY5bAgLM", + "date": "2024-01-15" + }, + "bigstr": { + "token": "eyJ0eXAiOiJKV1QiLCJhbGciOiJIUzI1NiJ9.eyJpc3MiOiJtaW5kZ2VlayIsImF1ZCI6Im1pbmRnZWVrIiwic3ViIjoiaW5zdGFuY2UtYXBpIiwiZXhwIjoxNzA1Mjc2ODAwLCJpZCI6MjYwMDYxLCJicmFuZCI6ImJpZ3N0ciIsImhvc3RuYW1lIjoid3d3LmJpZ3N0ci5jb20ifQ.qLm6UpcpW2QQZXEUQnZvGYhuPi6tiHxoh88ev3A99hM", + "date": "2024-01-13" + }, + "brazzers": { + "token": "eyJ0eXAiOiJKV1QiLCJhbGciOiJIUzI1NiJ9.eyJpc3MiOiJtaW5kZ2VlayIsImF1ZCI6Im1pbmRnZWVrIiwic3ViIjoiaW5zdGFuY2UtYXBpIiwiZXhwIjoxNzA1MzYzMjAwLCJpZCI6MjQ5NjUxLCJicmFuZCI6ImJyYXp6ZXJzIiwiaG9zdG5hbWUiOiJ3d3cuYnJhenplcnMuY29tIn0.xLKpnNYTUUJPJoGHmTkoWkrsGvXHz0ZUy7QGgiFjmlI", + "date": "2024-01-14" + }, + "brazzersnetwork": { + "token": "eyJ0eXAiOiJKV1QiLCJhbGciOiJIUzI1NiJ9.eyJpc3MiOiJtaW5kZ2VlayIsImF1ZCI6Im1pbmRnZWVrIiwic3ViIjoiaW5zdGFuY2UtYXBpIiwiZXhwIjoxNzA1MzYzMjAwLCJpZCI6MTU0NzEyLCJicmFuZCI6ImJyYXp6ZXJzIiwiaG9zdG5hbWUiOiJ3d3cuYnJhenplcnNuZXR3b3JrLmNvbSJ9.4LZa1vD5KBFNTk-yaTSyhnG2S_ELlSj1Pj9cl2Z8ybg", + "date": "2024-01-14" + }, + "bromo": { + "token": "eyJ0eXAiOiJKV1QiLCJhbGciOiJIUzI1NiJ9.eyJpc3MiOiJtaW5kZ2VlayIsImF1ZCI6Im1pbmRnZWVrIiwic3ViIjoiaW5zdGFuY2UtYXBpIiwiZXhwIjoxNzA1MzYzMjAwLCJpZCI6MjUzMzQxLCJicmFuZCI6ImJyb21vIiwiaG9zdG5hbWUiOiJ3d3cuYnJvbW8uY29tIn0.n68hFr6ZunSD1ZxtiiCtTlhv06fR7loQYkXn8x2jjJs", + "date": "2024-01-14" + }, + "captainstabbin": { + "token": "eyJ0eXAiOiJKV1QiLCJhbGciOiJIUzI1NiJ9.eyJpc3MiOiJtaW5kZ2VlayIsImF1ZCI6Im1pbmRnZWVrIiwic3ViIjoiaW5zdGFuY2UtYXBpIiwiZXhwIjoxNzA0MjQwMDAwLCJpZCI6MjU2MDUxLCJicmFuZCI6InJlYWxpdHlraW5ncyIsImhvc3RuYW1lIjoid3d3LnJlYWxpdHlraW5ncy5jb20ifQ.xY9zDGi2QmRfO_eJGzXObKJ1PtkHFMyv-_zw3d59ZNk", + "date": "2024-01-01" + }, + "czechhunter": { + "token": "eyJ0eXAiOiJKV1QiLCJhbGciOiJIUzI1NiJ9.eyJpc3MiOiJtaW5kZ2VlayIsImF1ZCI6Im1pbmRnZWVrIiwic3ViIjoiaW5zdGFuY2UtYXBpIiwiZXhwIjoxNzA1Mjc2ODAwLCJpZCI6MjYxMjIxLCJicmFuZCI6ImJpZ3N0ciIsImhvc3RuYW1lIjoid3d3LmN6ZWNoaHVudGVyLmNvbSJ9.mS6ep4G7oKOIuWNalgiMlkX0TuEwyWgzAJSqrsmfhK0", + "date": "2024-01-13" + }, + "dancingbear": { + "token": "eyJ0eXAiOiJKV1QiLCJhbGciOiJIUzI1NiJ9.eyJpc3MiOiJtaW5kZ2VlayIsImF1ZCI6Im1pbmRnZWVrIiwic3ViIjoiaW5zdGFuY2UtYXBpIiwiZXhwIjoxNzA1Mjc2ODAwLCJpZCI6MjY1NTMxLCJicmFuZCI6ImRhbmNpbmdiZWFyIiwiaG9zdG5hbWUiOiJkYW5jaW5nYmVhci5jb20ifQ.DXqM5yKmhPOSaJVDEKrQ_y5h8LIxwadDq3TPx2am1sk", + "date": "2024-01-13" + }, + "debtdandy": { + "token": "eyJ0eXAiOiJKV1QiLCJhbGciOiJIUzI1NiJ9.eyJpc3MiOiJtaW5kZ2VlayIsImF1ZCI6Im1pbmRnZWVrIiwic3ViIjoiaW5zdGFuY2UtYXBpIiwiZXhwIjoxNzA1MzYzMjAwLCJpZCI6MjYwMTExLCJicmFuZCI6ImJpZ3N0ciIsImhvc3RuYW1lIjoid3d3LmRlYnRkYW5keS5jb20ifQ.NJq_uz70EA81lmLIy1_7LegFP0wGawMKfsK-U8clhd4", + "date": "2024-01-14" + }, + "deviante": { + "token": "eyJ0eXAiOiJKV1QiLCJhbGciOiJIUzI1NiJ9.eyJpc3MiOiJtaW5kZ2VlayIsImF1ZCI6Im1pbmRnZWVrIiwic3ViIjoiaW5zdGFuY2UtYXBpIiwiZXhwIjoxNzA1MzYzMjAwLCJpZCI6MjY0MDAxLCJicmFuZCI6ImRldmlhbnRlIiwiaG9zdG5hbWUiOiJ3d3cuZGV2aWFudGUuY29tIn0.0GckeTblYYlppXL8meT2jokkzmdpZD6U6CY57uMsxJs", + "date": "2024-01-14" + }, + "devianthardcore": { + "token": "eyJ0eXAiOiJKV1QiLCJhbGciOiJIUzI1NiJ9.eyJpc3MiOiJtaW5kZ2VlayIsImF1ZCI6Im1pbmRnZWVrIiwic3ViIjoiaW5zdGFuY2UtYXBpIiwiZXhwIjoxNzA0ODQ0ODAwLCJpZCI6MjY0OTMxLCJicmFuZCI6Im1ldHJvaGQiLCJob3N0bmFtZSI6Ind3dy5kZXZpYW50aGFyZGNvcmUuY29tIn0.SQrcDL8k1NeQloD0yNJsFRSGU4vyXdVXhf6lv9HbrCY", + "date": "2024-01-08" + }, + "digitalplayground": { + "token": "eyJ0eXAiOiJKV1QiLCJhbGciOiJIUzI1NiJ9.eyJpc3MiOiJtaW5kZ2VlayIsImF1ZCI6Im1pbmRnZWVrIiwic3ViIjoiaW5zdGFuY2UtYXBpIiwiZXhwIjoxNzA1MzYzMjAwLCJpZCI6MjM3NDkxLCJicmFuZCI6ImRpZ2l0YWxwbGF5Z3JvdW5kIiwiaG9zdG5hbWUiOiJ3d3cuZGlnaXRhbHBsYXlncm91bmQuY29tIn0.b7xSKSXWpejUwy8aWpZSQmpLk6tBzi-QUph6pvCidi8", + "date": "2024-01-14" + }, + "digitalplaygroundnetwork": { + "token": "eyJ0eXAiOiJKV1QiLCJhbGciOiJIUzI1NiJ9.eyJpc3MiOiJtaW5kZ2VlayIsImF1ZCI6Im1pbmRnZWVrIiwic3ViIjoiaW5zdGFuY2UtYXBpIiwiZXhwIjoxNzA1MzYzMjAwLCJpZCI6MjM3NzIxLCJicmFuZCI6ImRpZ2l0YWxwbGF5Z3JvdW5kIiwiaG9zdG5hbWUiOiJ3d3cuZGlnaXRhbHBsYXlncm91bmRuZXR3b3JrLmNvbSJ9.VQKZVjcTY3AFnfpAJPdw63wvZnLP15-0JNQLyCRUaiU", + "date": "2024-01-14" + }, + "dilfed": { + "token": "eyJ0eXAiOiJKV1QiLCJhbGciOiJIUzI1NiJ9.eyJpc3MiOiJtaW5kZ2VlayIsImF1ZCI6Im1pbmRnZWVrIiwic3ViIjoiaW5zdGFuY2UtYXBpIiwiZXhwIjoxNzA1MzYzMjAwLCJpZCI6MjQ2ODkxLCJicmFuZCI6Im1pbGZlZCIsImhvc3RuYW1lIjoid3d3LmRpbGZlZC5jb20ifQ.bqWh8kNyF2gEOgyluAoyFhNZA4G5EMpIbRbBy2mArJM", + "date": "2024-01-14" + }, + "dirtyscout": { + "token": "eyJ0eXAiOiJKV1QiLCJhbGciOiJIUzI1NiJ9.eyJpc3MiOiJtaW5kZ2VlayIsImF1ZCI6Im1pbmRnZWVrIiwic3ViIjoiaW5zdGFuY2UtYXBpIiwiZXhwIjoxNzA1Mjc2ODAwLCJpZCI6MjYwMDgxLCJicmFuZCI6ImJpZ3N0ciIsImhvc3RuYW1lIjoid3d3LmRpcnR5c2NvdXQuY29tIn0.pxILgJS7OD2t0RR9br0XHAssx0U9SLtjpS7tEc0_noQ", + "date": "2024-01-13" + }, + "doghousedigital": { + "token": "eyJ0eXAiOiJKV1QiLCJhbGciOiJIUzI1NiJ9.eyJpc3MiOiJtaW5kZ2VlayIsImF1ZCI6Im1pbmRnZWVrIiwic3ViIjoiaW5zdGFuY2UtYXBpIiwiZXhwIjoxNzA1MzYzMjAwLCJpZCI6MjY1MDQxLCJicmFuZCI6Im1pbGVoaWdoIiwiaG9zdG5hbWUiOiJ3d3cuZG9naG91c2VkaWdpdGFsLmNvbSJ9.x13Dr1OjOALHKWMZUDVhVQ1buAtDWKJuQtUwBSFyuCw", + "date": "2024-01-14" + }, + "dontbreakme": { + "token": "eyJ0eXAiOiJKV1QiLCJhbGciOiJIUzI1NiJ9.eyJpc3MiOiJtaW5kZ2VlayIsImF1ZCI6Im1pbmRnZWVrIiwic3ViIjoiaW5zdGFuY2UtYXBpIiwiZXhwIjoxNzA0MjQwMDAwLCJpZCI6MjQ3ODAxLCJicmFuZCI6Im1vZm9zIiwiaG9zdG5hbWUiOiJ3d3cubW9mb3MuY29tIn0.6LW-Wflaww_c39e_97XE3HP6FQAlfTnZfGz4btM4DL8", + "date": "2024-01-01" + }, + "erito": { + "token": "eyJ0eXAiOiJKV1QiLCJhbGciOiJIUzI1NiJ9.eyJpc3MiOiJtaW5kZ2VlayIsImF1ZCI6Im1pbmRnZWVrIiwic3ViIjoiaW5zdGFuY2UtYXBpIiwiZXhwIjoxNzA1MzYzMjAwLCJpZCI6Mjc0MTIxLCJicmFuZCI6InNwaWNldmlkcyIsImhvc3RuYW1lIjoid3d3LmVyaXRvLmNvbSJ9.aVrgB4jmysBGYdnbACYK2NOtYTtOygJna_oTTdwJHyQ", + "date": "2024-01-14" + }, + "eroticspice": { + "token": "eyJ0eXAiOiJKV1QiLCJhbGciOiJIUzI1NiJ9.eyJpc3MiOiJtaW5kZ2VlayIsImF1ZCI6Im1pbmRnZWVrIiwic3ViIjoiaW5zdGFuY2UtYXBpIiwiZXhwIjoxNzA1MzYzMjAwLCJpZCI6MjY0NTkxLCJicmFuZCI6ImRldmlhbnRlIiwiaG9zdG5hbWUiOiJ3d3cuZXJvdGljc3BpY2UuY29tIn0.J6ZxQe7ditH8oJchyYdgXPTTeDukN7FzPCbCoGSHoHA", + "date": "2024-01-14" + }, + "fakehostel": { + "token": "eyJ0eXAiOiJKV1QiLCJhbGciOiJIUzI1NiJ9.eyJpc3MiOiJtaW5kZ2VlayIsImF1ZCI6Im1pbmRnZWVrIiwic3ViIjoiaW5zdGFuY2UtYXBpIiwiZXhwIjoxNzA1MzYzMjAwLCJpZCI6MjY1MDExLCJicmFuZCI6ImZha2VodWIiLCJob3N0bmFtZSI6Ind3dy5mYWtlaG9zdGVsLmNvbSJ9.fSrWn-ZLAHI4-ek4sf5sBJng6Or3Uz6q-EUH8stEOEs", + "date": "2024-01-14" + }, + "fakehub": { + "token": "eyJ0eXAiOiJKV1QiLCJhbGciOiJIUzI1NiJ9.eyJpc3MiOiJtaW5kZ2VlayIsImF1ZCI6Im1pbmRnZWVrIiwic3ViIjoiaW5zdGFuY2UtYXBpIiwiZXhwIjoxNzA1MzYzMjAwLCJpZCI6MjU4NDkxLCJicmFuZCI6ImZha2VodWIiLCJob3N0bmFtZSI6Ind3dy5mYWtlaHViLmNvbSJ9.JJzjjxUFmH0K0AY_Zs_CQZOO1TC96BFW2ByvsgMdr1A", + "date": "2024-01-14" + }, + "fakehuboriginals": { + "token": "eyJ0eXAiOiJKV1QiLCJhbGciOiJIUzI1NiJ9.eyJpc3MiOiJtaW5kZ2VlayIsImF1ZCI6Im1pbmRnZWVrIiwic3ViIjoiaW5zdGFuY2UtYXBpIiwiZXhwIjoxNzAzNzIxNjAwLCJpZCI6MjU4NDkxLCJicmFuZCI6ImZha2VodWIiLCJob3N0bmFtZSI6Ind3dy5mYWtlaHViLmNvbSJ9.pQb0Pd6jdRXca_wDxNu1zCWg7CEV8MIF6EMoJ2uPnPk", + "date": "2023-12-26" + }, + "faketaxi": { + "token": "eyJ0eXAiOiJKV1QiLCJhbGciOiJIUzI1NiJ9.eyJpc3MiOiJtaW5kZ2VlayIsImF1ZCI6Im1pbmRnZWVrIiwic3ViIjoiaW5zdGFuY2UtYXBpIiwiZXhwIjoxNzAzNzIxNjAwLCJpZCI6MjY1MDcxLCJicmFuZCI6ImZha2VodWIiLCJob3N0bmFtZSI6Ind3dy5mYWtldGF4aS5jb20ifQ.EuM3CUMjeLZgBrNTG_ODUBlsYlO8bV3YP-9wmK7fQzQ", + "date": "2023-12-26" + }, + "familyhookups": { + "token": "eyJ0eXAiOiJKV1QiLCJhbGciOiJIUzI1NiJ9.eyJpc3MiOiJtaW5kZ2VlayIsImF1ZCI6Im1pbmRnZWVrIiwic3ViIjoiaW5zdGFuY2UtYXBpIiwiZXhwIjoxNzA0ODQ0ODAwLCJpZCI6MjY0NzAxLCJicmFuZCI6Im1ldHJvaGQiLCJob3N0bmFtZSI6Ind3dy5mYW1pbHlob29rdXBzLmNvbSJ9.cPhbFkOu7CgbZiZrlQInPKE5m_N7WoV61gtQswCtnSA", + "date": "2024-01-08" + }, + "familysinners": { + "token": "eyJ0eXAiOiJKV1QiLCJhbGciOiJIUzI1NiJ9.eyJpc3MiOiJtaW5kZ2VlayIsImF1ZCI6Im1pbmRnZWVrIiwic3ViIjoiaW5zdGFuY2UtYXBpIiwiZXhwIjoxNzA1MzYzMjAwLCJpZCI6MjY0NjYxLCJicmFuZCI6Im1pbGVoaWdoIiwiaG9zdG5hbWUiOiJ3d3cuZmFtaWx5c2lubmVycy5jb20ifQ.rOYENp4F5moi2jUl9VpuLS1cufjlaz5BQc84kUSJyOE", + "date": "2024-01-14" + }, + "forgivemefather": { + "token": "eyJ0eXAiOiJKV1QiLCJhbGciOiJIUzI1NiJ9.eyJpc3MiOiJtaW5kZ2VlayIsImF1ZCI6Im1pbmRnZWVrIiwic3ViIjoiaW5zdGFuY2UtYXBpIiwiZXhwIjoxNzA1MzYzMjAwLCJpZCI6MjY0MjUxLCJicmFuZCI6ImRldmlhbnRlIiwiaG9zdG5hbWUiOiJ3d3cuZm9yZ2l2ZW1lZmF0aGVyLmNvbSJ9.1t4iyyrhOyDFY-WqvHoMuvtG2g0rwm--P2XV2pzWXgw", + "date": "2024-01-14" + }, + "gaywire": { + "token": "eyJ0eXAiOiJKV1QiLCJhbGciOiJIUzI1NiJ9.eyJpc3MiOiJtaW5kZ2VlayIsImF1ZCI6Im1pbmRnZWVrIiwic3ViIjoiaW5zdGFuY2UtYXBpIiwiZXhwIjoxNzA1Mjc2ODAwLCJpZCI6MjY3MzQxLCJicmFuZCI6ImdheXdpcmUiLCJob3N0bmFtZSI6ImdheXdpcmUuY29tIn0.WRhcvZbwPCezZHMVN5-xoP9a7yD7z6J9BYCtKoo6G64", + "date": "2024-01-14" + }, + "girlgrind": { + "token": "eyJ0eXAiOiJKV1QiLCJhbGciOiJIUzI1NiJ9.eyJpc3MiOiJtaW5kZ2VlayIsImF1ZCI6Im1pbmRnZWVrIiwic3ViIjoiaW5zdGFuY2UtYXBpIiwiZXhwIjoxNzA0ODQ0ODAwLCJpZCI6Nzc4NzEsImJyYW5kIjoibWV0cm9oZCIsImhvc3RuYW1lIjoid3d3LmdpcmxncmluZC5jb20ifQ.lfqwG5emPDKuUoqDGj543y-lH6Sx5KbbXlpugZ2RHcU", + "date": "2024-01-08" + }, + "godsofmen": { + "token": "eyJ0eXAiOiJKV1QiLCJhbGciOiJIUzI1NiJ9.eyJpc3MiOiJtaW5kZ2VlayIsImF1ZCI6Im1pbmRnZWVrIiwic3ViIjoiaW5zdGFuY2UtYXBpIiwiZXhwIjoxNzA1NDQ5NjAwLCJpZCI6MjE2MTQxLCJicmFuZCI6Im1lbiIsImhvc3RuYW1lIjoid3d3LmdvZHNvZm1lbi5jb20ifQ.kxYerSYKBslM8koEXSz_iZeA8jMsbMKW3yS9QMHtVqI", + "date": "2024-01-15" + }, + "happytugs": { + "token": "eyJ0eXAiOiJKV1QiLCJhbGciOiJIUzI1NiJ9.eyJpc3MiOiJtaW5kZ2VlayIsImF1ZCI6Im1pbmRnZWVrIiwic3ViIjoiaW5zdGFuY2UtYXBpIiwiZXhwIjoxNzA0MjQwMDAwLCJpZCI6MjU2MDUxLCJicmFuZCI6InJlYWxpdHlraW5ncyIsImhvc3RuYW1lIjoid3d3LnJlYWxpdHlraW5ncy5jb20ifQ.xY9zDGi2QmRfO_eJGzXObKJ1PtkHFMyv-_zw3d59ZNk", + "date": "2024-01-01" + }, + "hentaipros": { + "token": "eyJ0eXAiOiJKV1QiLCJhbGciOiJIUzI1NiJ9.eyJpc3MiOiJtaW5kZ2VlayIsImF1ZCI6Im1pbmRnZWVrIiwic3ViIjoiaW5zdGFuY2UtYXBpIiwiZXhwIjoxNzA1MzYzMjAwLCJpZCI6MjE1NDAxLCJicmFuZCI6ImhlbnRhaXByb3MiLCJob3N0bmFtZSI6Ind3dy5oZW50YWlwcm9zLmNvbSJ9.4qAHHdTs0MIPvy9iIwUY42HyV0BbkAm_k36Gnk1GstM", + "date": "2024-01-14" + }, + "hentaiprosnetwork": { + "token": "eyJ0eXAiOiJKV1QiLCJhbGciOiJIUzI1NiJ9.eyJpc3MiOiJtaW5kZ2VlayIsImF1ZCI6Im1pbmRnZWVrIiwic3ViIjoiaW5zdGFuY2UtYXBpIiwiZXhwIjoxNzAzNzIxNjAwLCJpZCI6MjE2NDgxLCJicmFuZCI6ImhlbnRhaXByb3MiLCJob3N0bmFtZSI6Ind3dy5oZW50YWlwcm9zbmV0d29yay5jb20ifQ.ir4ZKiDzvI9Vz1Cm3obHWki8NPWeqa3XE2s8A3tLYak", + "date": "2023-12-26" + }, + "househumpers": { + "token": "eyJ0eXAiOiJKV1QiLCJhbGciOiJIUzI1NiJ9.eyJpc3MiOiJtaW5kZ2VlayIsImF1ZCI6Im1pbmRnZWVrIiwic3ViIjoiaW5zdGFuY2UtYXBpIiwiZXhwIjoxNzAzOTgwODAwLCJpZCI6MjY4NTExLCJicmFuZCI6InByb3BlcnR5c2V4IiwiaG9zdG5hbWUiOiJ3d3cuaG91c2VodW1wZXJzLmNvbSJ9.AhGPyoFD_0JftGCy9bTB1gNYIFTFBCNqZ1bLzimK51Q", + "date": "2023-12-29" + }, + "iconmale": { + "token": "eyJ0eXAiOiJKV1QiLCJhbGciOiJIUzI1NiJ9.eyJpc3MiOiJtaW5kZ2VlayIsImF1ZCI6Im1pbmRnZWVrIiwic3ViIjoiaW5zdGFuY2UtYXBpIiwiZXhwIjoxNzA1MzYzMjAwLCJpZCI6MjU4NzkxLCJicmFuZCI6Imljb25tYWxlIiwiaG9zdG5hbWUiOiJ3d3cuaWNvbm1hbGUuY29tIn0.uDUGScCDYHXl700HHwZFO3GuQmdoR79rL-TKRJbLGHs", + "date": "2024-01-14" + }, + "iknowthatgirl": { + "token": "eyJ0eXAiOiJKV1QiLCJhbGciOiJIUzI1NiJ9.eyJpc3MiOiJtaW5kZ2VlayIsImF1ZCI6Im1pbmRnZWVrIiwic3ViIjoiaW5zdGFuY2UtYXBpIiwiZXhwIjoxNzA0MjQwMDAwLCJpZCI6Mjg5NjEsImJyYW5kIjoibW9mb3MiLCJob3N0bmFtZSI6Ind3dy5pa25vd3RoYXRnaXJsLmNvbSJ9.5KvbPQi4FCxDXTRphXT6l0Wke1RcTcFWo6F4Yn41z9k", + "date": "2024-01-01" + }, + "jizzorgy": { + "token": "eyJ0eXAiOiJKV1QiLCJhbGciOiJIUzI1NiJ9.eyJpc3MiOiJtaW5kZ2VlayIsImF1ZCI6Im1pbmRnZWVrIiwic3ViIjoiaW5zdGFuY2UtYXBpIiwiZXhwIjoxNzA1NDQ5NjAwLCJpZCI6MjE1OTYxLCJicmFuZCI6Im1lbiIsImhvc3RuYW1lIjoid3d3Lmppenpvcmd5LmNvbSJ9.aCIl5dCJefQFMTp-l9HRtzWMFfPou8mrLv2k15tYXUA", + "date": "2024-01-15" + }, + "kinkyspa": { + "token": "eyJ0eXAiOiJKV1QiLCJhbGciOiJIUzI1NiJ9.eyJpc3MiOiJtaW5kZ2VlayIsImF1ZCI6Im1pbmRnZWVrIiwic3ViIjoiaW5zdGFuY2UtYXBpIiwiZXhwIjoxNzA0ODQ0ODAwLCJpZCI6MjY0NTYxLCJicmFuZCI6Im1ldHJvaGQiLCJob3N0bmFtZSI6Ind3dy5raW5reXNwYS5jb20ifQ.GlQ2W7RUB4ccLzOyQo259VABRiEhVxpvp4YesB_G5F4", + "date": "2024-01-08" + }, + "lesbea": { + "token": "eyJ0eXAiOiJKV1QiLCJhbGciOiJIUzI1NiJ9.eyJpc3MiOiJtaW5kZ2VlayIsImF1ZCI6Im1pbmRnZWVrIiwic3ViIjoiaW5zdGFuY2UtYXBpIiwiZXhwIjoxNzAzNzIxNjAwLCJpZCI6MTIxMzAyLCJicmFuZCI6InNleHlodWIiLCJob3N0bmFtZSI6Ind3dy5sZXNiZWEuY29tIn0.u-sfoE3jeFAVjDMYPemX5YBRKEraJS3VZi1nVGWEr24", + "date": "2023-12-26" + }, + "lilhumpers": { + "token": "eyJ0eXAiOiJKV1QiLCJhbGciOiJIUzI1NiJ9.eyJpc3MiOiJtaW5kZ2VlayIsImF1ZCI6Im1pbmRnZWVrIiwic3ViIjoiaW5zdGFuY2UtYXBpIiwiZXhwIjoxNzA0MjQwMDAwLCJpZCI6MjU2MDUxLCJicmFuZCI6InJlYWxpdHlraW5ncyIsImhvc3RuYW1lIjoid3d3LnJlYWxpdHlraW5ncy5jb20ifQ.xY9zDGi2QmRfO_eJGzXObKJ1PtkHFMyv-_zw3d59ZNk", + "date": "2024-01-01" + }, + "lookathernow": { + "token": "eyJ0eXAiOiJKV1QiLCJhbGciOiJIUzI1NiJ9.eyJpc3MiOiJtaW5kZ2VlayIsImF1ZCI6Im1pbmRnZWVrIiwic3ViIjoiaW5zdGFuY2UtYXBpIiwiZXhwIjoxNzA1MzYzMjAwLCJpZCI6NzYzNTEsImJyYW5kIjoibG9va2F0aGVybm93IiwiaG9zdG5hbWUiOiJ3d3cubG9va2F0aGVybm93LmNvbSJ9.11akNT7NJ_OeyyL0B1oVMX_pVYNyqIDRaOPYKVv-i60", + "date": "2024-01-14" + }, + "loveherass": { + "token": "eyJ0eXAiOiJKV1QiLCJhbGciOiJIUzI1NiJ9.eyJpc3MiOiJtaW5kZ2VlayIsImF1ZCI6Im1pbmRnZWVrIiwic3ViIjoiaW5zdGFuY2UtYXBpIiwiZXhwIjoxNzA1MzYzMjAwLCJpZCI6MjY0NTMxLCJicmFuZCI6ImRldmlhbnRlIiwiaG9zdG5hbWUiOiJ3d3cubG92ZWhlcmFzcy5jb20ifQ.UHWgRyAkxAd9fYCZz2X3d3bxWo8vJ5hFVYEi-YFUgcA", + "date": "2024-01-14" + }, + "men": { + "token": "eyJ0eXAiOiJKV1QiLCJhbGciOiJIUzI1NiJ9.eyJpc3MiOiJtaW5kZ2VlayIsImF1ZCI6Im1pbmRnZWVrIiwic3ViIjoiaW5zdGFuY2UtYXBpIiwiZXhwIjoxNzA1NDQ5NjAwLCJpZCI6MjY2ODUxLCJicmFuZCI6Im1lbiIsImhvc3RuYW1lIjoid3d3Lm1lbi5jb20ifQ.ImVPqvkOD37Su1qNC8aEV981wIVV8LmqvkCFNGcocPw", + "date": "2024-01-15" + }, + "menofuk": { + "token": "eyJ0eXAiOiJKV1QiLCJhbGciOiJIUzI1NiJ9.eyJpc3MiOiJtaW5kZ2VlayIsImF1ZCI6Im1pbmRnZWVrIiwic3ViIjoiaW5zdGFuY2UtYXBpIiwiZXhwIjoxNzA1NDQ5NjAwLCJpZCI6MjE2MTYxLCJicmFuZCI6Im1lbiIsImhvc3RuYW1lIjoid3d3Lm1lbm9mdWsuY29tIn0.ZXJtw-4F6ICskcyCoE-p_yd1eCghiRk1-gd6a_i95Bo", + "date": "2024-01-15" + }, + "metrohd": { + "token": "eyJ0eXAiOiJKV1QiLCJhbGciOiJIUzI1NiJ9.eyJpc3MiOiJtaW5kZ2VlayIsImF1ZCI6Im1pbmRnZWVrIiwic3ViIjoiaW5zdGFuY2UtYXBpIiwiZXhwIjoxNzA1MDE3NjAwLCJpZCI6MjY0MjQxLCJicmFuZCI6Im1ldHJvaGQiLCJob3N0bmFtZSI6Ind3dy5tZXRyb2hkLmNvbSJ9.oBwtbhLfdCY6fGgKYelvBUUFQj6OtrDSiQ6xkkB5L9k", + "date": "2024-01-10" + }, + "milehighmedia": { + "token": "eyJ0eXAiOiJKV1QiLCJhbGciOiJIUzI1NiJ9.eyJpc3MiOiJtaW5kZ2VlayIsImF1ZCI6Im1pbmRnZWVrIiwic3ViIjoiaW5zdGFuY2UtYXBpIiwiZXhwIjoxNzA1MzYzMjAwLCJpZCI6MjU4NjkxLCJicmFuZCI6Im1pbGVoaWdoIiwiaG9zdG5hbWUiOiJ3d3cubWlsZWhpZ2htZWRpYS5jb20ifQ.tMxehQzxQa0fheJRxTht6BL90A-6e5LM7Nmgg8AOhrc", + "date": "2024-01-14" + }, + "milfed": { + "token": "eyJ0eXAiOiJKV1QiLCJhbGciOiJIUzI1NiJ9.eyJpc3MiOiJtaW5kZ2VlayIsImF1ZCI6Im1pbmRnZWVrIiwic3ViIjoiaW5zdGFuY2UtYXBpIiwiZXhwIjoxNzA1MzYzMjAwLCJpZCI6MTM5NjEyLCJicmFuZCI6Im1pbGZlZCIsImhvc3RuYW1lIjoid3d3Lm1pbGZlZC5jb20ifQ.AeLuMvIp224v_bt5S719-25nhLCDkPSZAP4wGyI9jGI", + "date": "2024-01-14" + }, + "milfhunter": { + "token": "eyJ0eXAiOiJKV1QiLCJhbGciOiJIUzI1NiJ9.eyJpc3MiOiJtaW5kZ2VlayIsImF1ZCI6Im1pbmRnZWVrIiwic3ViIjoiaW5zdGFuY2UtYXBpIiwiZXhwIjoxNzA0MjQwMDAwLCJpZCI6MjU2MDUxLCJicmFuZCI6InJlYWxpdHlraW5ncyIsImhvc3RuYW1lIjoid3d3LnJlYWxpdHlraW5ncy5jb20ifQ.xY9zDGi2QmRfO_eJGzXObKJ1PtkHFMyv-_zw3d59ZNk", + "date": "2024-01-01" + }, + "mofos": { + "token": "eyJ0eXAiOiJKV1QiLCJhbGciOiJIUzI1NiJ9.eyJpc3MiOiJtaW5kZ2VlayIsImF1ZCI6Im1pbmRnZWVrIiwic3ViIjoiaW5zdGFuY2UtYXBpIiwiZXhwIjoxNzA0MjQwMDAwLCJpZCI6MjQ3ODAxLCJicmFuZCI6Im1vZm9zIiwiaG9zdG5hbWUiOiJ3d3cubW9mb3MuY29tIn0.6LW-Wflaww_c39e_97XE3HP6FQAlfTnZfGz4btM4DL8", + "date": "2024-01-01" + }, + "mofosnetwork": { + "token": "eyJ0eXAiOiJKV1QiLCJhbGciOiJIUzI1NiJ9.eyJpc3MiOiJtaW5kZ2VlayIsImF1ZCI6Im1pbmRnZWVrIiwic3ViIjoiaW5zdGFuY2UtYXBpIiwiZXhwIjoxNzAzNzIxNjAwLCJpZCI6MjYzMTMxLCJicmFuZCI6Im1vZm9zIiwiaG9zdG5hbWUiOiJ3d3cubW9mb3NuZXR3b3JrLmNvbSJ9.vivcuRBK3_21m7n1hno0h4naiQ-sOuJydpZ90YPjHME", + "date": "2023-12-26" + }, + "momsbangteens": { + "token": "eyJ0eXAiOiJKV1QiLCJhbGciOiJIUzI1NiJ9.eyJpc3MiOiJtaW5kZ2VlayIsImF1ZCI6Im1pbmRnZWVrIiwic3ViIjoiaW5zdGFuY2UtYXBpIiwiZXhwIjoxNzA0MjQwMDAwLCJpZCI6MjU2MDUxLCJicmFuZCI6InJlYWxpdHlraW5ncyIsImhvc3RuYW1lIjoid3d3LnJlYWxpdHlraW5ncy5jb20ifQ.xY9zDGi2QmRfO_eJGzXObKJ1PtkHFMyv-_zw3d59ZNk", + "date": "2024-01-01" + }, + "momslickteens": { + "token": "eyJ0eXAiOiJKV1QiLCJhbGciOiJIUzI1NiJ9.eyJpc3MiOiJtaW5kZ2VlayIsImF1ZCI6Im1pbmRnZWVrIiwic3ViIjoiaW5zdGFuY2UtYXBpIiwiZXhwIjoxNzA0MzI2NDAwLCJpZCI6MjU2MDUxLCJicmFuZCI6InJlYWxpdHlraW5ncyIsImhvc3RuYW1lIjoid3d3LnJlYWxpdHlraW5ncy5jb20ifQ.cPLwiHdYoKqDZNyp0G68KYx_pxiTJJiciXa0c5s2miw", + "date": "2024-01-02" + }, + "moneytalks": { + "token": "eyJ0eXAiOiJKV1QiLCJhbGciOiJIUzI1NiJ9.eyJpc3MiOiJtaW5kZ2VlayIsImF1ZCI6Im1pbmRnZWVrIiwic3ViIjoiaW5zdGFuY2UtYXBpIiwiZXhwIjoxNzA0MzI2NDAwLCJpZCI6MjU2MDUxLCJicmFuZCI6InJlYWxpdHlraW5ncyIsImhvc3RuYW1lIjoid3d3LnJlYWxpdHlraW5ncy5jb20ifQ.cPLwiHdYoKqDZNyp0G68KYx_pxiTJJiciXa0c5s2miw", + "date": "2024-01-02" + }, + "mygf": { + "token": "eyJ0eXAiOiJKV1QiLCJhbGciOiJIUzI1NiJ9.eyJpc3MiOiJtaW5kZ2VlayIsImF1ZCI6Im1pbmRnZWVrIiwic3ViIjoiaW5zdGFuY2UtYXBpIiwiZXhwIjoxNzA1MTA0MDAwLCJpZCI6MjY0MDYxLCJicmFuZCI6Im15Z2YiLCJob3N0bmFtZSI6Im15Z2YuY29tIn0.tKnK7NP2YtvOzVwHWA1T7P_cnvKEH98Hhgcthi0Zr9M", + "date": "2024-01-12" + }, + "noirmale": { + "token": "eyJ0eXAiOiJKV1QiLCJhbGciOiJIUzI1NiJ9.eyJpc3MiOiJtaW5kZ2VlayIsImF1ZCI6Im1pbmRnZWVrIiwic3ViIjoiaW5zdGFuY2UtYXBpIiwiZXhwIjoxNzA1MzYzMjAwLCJpZCI6MjU4NjcxLCJicmFuZCI6Im5vaXJtYWxlIiwiaG9zdG5hbWUiOiJ3d3cubm9pcm1hbGUuY29tIn0.-1gvpFmxUcNvqY95qDSHINEm-LV8yULmbbGhEJP97sg", + "date": "2024-01-14" + }, + "papi": { + "token": "eyJ0eXAiOiJKV1QiLCJhbGciOiJIUzI1NiJ9.eyJpc3MiOiJtaW5kZ2VlayIsImF1ZCI6Im1pbmRnZWVrIiwic3ViIjoiaW5zdGFuY2UtYXBpIiwiZXhwIjoxNzAzNzIxNjAwLCJpZCI6MjU5MTExLCJicmFuZCI6InBhcGkiLCJob3N0bmFtZSI6Ind3dy5wYXBpLmNvbSJ9.84mPDwyQedxENB204Q_lD6nKokG6gFLEAGYgJXexT1c", + "date": "2023-12-26" + }, + "prettydirtyteens": { + "token": "eyJ0eXAiOiJKV1QiLCJhbGciOiJIUzI1NiJ9.eyJpc3MiOiJtaW5kZ2VlayIsImF1ZCI6Im1pbmRnZWVrIiwic3ViIjoiaW5zdGFuY2UtYXBpIiwiZXhwIjoxNzA1MzYzMjAwLCJpZCI6MjY0NDQxLCJicmFuZCI6ImRldmlhbnRlIiwiaG9zdG5hbWUiOiJ3d3cucHJldHR5ZGlydHl0ZWVucy5jb20ifQ.jBrrXG8YKgr_j0xVkZkbjhW3I7ANbsigRYjappaZSew", + "date": "2024-01-14" + }, + "propertysex": { + "token": "eyJ0eXAiOiJKV1QiLCJhbGciOiJIUzI1NiJ9.eyJpc3MiOiJtaW5kZ2VlayIsImF1ZCI6Im1pbmRnZWVrIiwic3ViIjoiaW5zdGFuY2UtYXBpIiwiZXhwIjoxNzAzOTgwODAwLCJpZCI6MjU4NjAxLCJicmFuZCI6InByb3BlcnR5c2V4IiwiaG9zdG5hbWUiOiJ3d3cucHJvcGVydHlzZXguY29tIn0.9gvdHqGErARk33k3zuWa6u_waqQz6IAa4_Mf4DY1XjA", + "date": "2023-12-29" + }, + "publicagent": { + "token": "eyJ0eXAiOiJKV1QiLCJhbGciOiJIUzI1NiJ9.eyJpc3MiOiJtaW5kZ2VlayIsImF1ZCI6Im1pbmRnZWVrIiwic3ViIjoiaW5zdGFuY2UtYXBpIiwiZXhwIjoxNzA1MzYzMjAwLCJpZCI6MTIxMjYyLCJicmFuZCI6ImZha2VodWIiLCJob3N0bmFtZSI6Ind3dy5wdWJsaWNhZ2VudC5jb20ifQ.C-I-nvIAagBdk9Ye2kxDEP4l_-_CHqqtGb3vbOWqNj8", + "date": "2024-01-14" + }, + "publicpickups": { + "token": "eyJ0eXAiOiJKV1QiLCJhbGciOiJIUzI1NiJ9.eyJpc3MiOiJtaW5kZ2VlayIsImF1ZCI6Im1pbmRnZWVrIiwic3ViIjoiaW5zdGFuY2UtYXBpIiwiZXhwIjoxNzA0MjQwMDAwLCJpZCI6MjQ3ODAxLCJicmFuZCI6Im1vZm9zIiwiaG9zdG5hbWUiOiJ3d3cubW9mb3MuY29tIn0.6LW-Wflaww_c39e_97XE3HP6FQAlfTnZfGz4btM4DL8", + "date": "2024-01-01" + }, + "realitydudes": { + "token": "eyJ0eXAiOiJKV1QiLCJhbGciOiJIUzI1NiJ9.eyJpc3MiOiJtaW5kZ2VlayIsImF1ZCI6Im1pbmRnZWVrIiwic3ViIjoiaW5zdGFuY2UtYXBpIiwiZXhwIjoxNzA0NzU4NDAwLCJpZCI6MjY5MTAxLCJicmFuZCI6InJlYWxpdHlkdWRlcyIsImhvc3RuYW1lIjoid3d3LnJlYWxpdHlkdWRlcy5jb20ifQ.aY3emEKvzg-qqtacbEr9B3i3cJYhxgSrGhg5S9Cx-B4", + "date": "2024-01-07" + }, + "realitydudesnetwork": { + "token": "eyJ0eXAiOiJKV1QiLCJhbGciOiJIUzI1NiJ9.eyJpc3MiOiJtaW5kZ2VlayIsImF1ZCI6Im1pbmRnZWVrIiwic3ViIjoiaW5zdGFuY2UtYXBpIiwiZXhwIjoxNzAzNzIxNjAwLCJpZCI6MjUzMzIxLCJicmFuZCI6InJlYWxpdHlkdWRlcyIsImhvc3RuYW1lIjoid3d3LnJlYWxpdHlkdWRlc25ldHdvcmsuY29tIn0.n1aqnn_ULA33_-4yIXTZ4bV5NS0VVImL06EOLnf6aJU", + "date": "2023-12-26" + }, + "realityjunkies": { + "token": "eyJ0eXAiOiJKV1QiLCJhbGciOiJIUzI1NiJ9.eyJpc3MiOiJtaW5kZ2VlayIsImF1ZCI6Im1pbmRnZWVrIiwic3ViIjoiaW5zdGFuY2UtYXBpIiwiZXhwIjoxNzA1MzYzMjAwLCJpZCI6MjY0NzkxLCJicmFuZCI6Im1pbGVoaWdoIiwiaG9zdG5hbWUiOiJ3d3cucmVhbGl0eWp1bmtpZXMuY29tIn0.PXDDcG2dbaa9uEtzVacJ_Q_C17WjdC8ckohl8GXNoT4", + "date": "2024-01-14" + }, + "realitykings": { + "token": "eyJ0eXAiOiJKV1QiLCJhbGciOiJIUzI1NiJ9.eyJpc3MiOiJtaW5kZ2VlayIsImF1ZCI6Im1pbmRnZWVrIiwic3ViIjoiaW5zdGFuY2UtYXBpIiwiZXhwIjoxNzA1MzYzMjAwLCJpZCI6MjU2MDUxLCJicmFuZCI6InJlYWxpdHlraW5ncyIsImhvc3RuYW1lIjoid3d3LnJlYWxpdHlraW5ncy5jb20ifQ.ZgcyDMd8zTGnFqUkz9nYLSq18RyxZXPUgbqavAqxV1Q", + "date": "2024-01-14" + }, + "recklessinmiami": { + "token": "eyJ0eXAiOiJKV1QiLCJhbGciOiJIUzI1NiJ9.eyJpc3MiOiJtaW5kZ2VlayIsImF1ZCI6Im1pbmRnZWVrIiwic3ViIjoiaW5zdGFuY2UtYXBpIiwiZXhwIjoxNzA0MzI2NDAwLCJpZCI6MjU2MDUxLCJicmFuZCI6InJlYWxpdHlraW5ncyIsImhvc3RuYW1lIjoid3d3LnJlYWxpdHlraW5ncy5jb20ifQ.cPLwiHdYoKqDZNyp0G68KYx_pxiTJJiciXa0c5s2miw", + "date": "2024-01-02" + }, + "rk": { + "token": "eyJ0eXAiOiJKV1QiLCJhbGciOiJIUzI1NiJ9.eyJpc3MiOiJtaW5kZ2VlayIsImF1ZCI6Im1pbmRnZWVrIiwic3ViIjoiaW5zdGFuY2UtYXBpIiwiZXhwIjoxNzA0MzI2NDAwLCJpZCI6MjQxMTUxLCJicmFuZCI6InJlYWxpdHlraW5ncyIsImhvc3RuYW1lIjoid3d3LnJrLmNvbSJ9.Kea4NgAghqZA4m4wvNtkrttvJT9X0BonVDGmyrPN_lA", + "date": "2024-01-02" + }, + "roundandbrown": { + "token": "eyJ0eXAiOiJKV1QiLCJhbGciOiJIUzI1NiJ9.eyJpc3MiOiJtaW5kZ2VlayIsImF1ZCI6Im1pbmRnZWVrIiwic3ViIjoiaW5zdGFuY2UtYXBpIiwiZXhwIjoxNzA0MzI2NDAwLCJpZCI6MjU2MDUxLCJicmFuZCI6InJlYWxpdHlraW5ncyIsImhvc3RuYW1lIjoid3d3LnJlYWxpdHlraW5ncy5jb20ifQ.cPLwiHdYoKqDZNyp0G68KYx_pxiTJJiciXa0c5s2miw", + "date": "2024-01-02" + }, + "seancody": { + "token": "eyJ0eXAiOiJKV1QiLCJhbGciOiJIUzI1NiJ9.eyJpc3MiOiJtaW5kZ2VlayIsImF1ZCI6Im1pbmRnZWVrIiwic3ViIjoiaW5zdGFuY2UtYXBpIiwiZXhwIjoxNzAzNzIxNjAwLCJpZCI6MjczMDExLCJicmFuZCI6InNlYW5jb2R5IiwiaG9zdG5hbWUiOiJ3d3cuc2VhbmNvZHkuY29tIn0.JvBKQJKXDYd-kE9lRnsmDLA6gqhIQUuv2kfaKBGeI0A", + "date": "2023-12-26" + }, + "sexworking": { + "token": "eyJ0eXAiOiJKV1QiLCJhbGciOiJIUzI1NiJ9.eyJpc3MiOiJtaW5kZ2VlayIsImF1ZCI6Im1pbmRnZWVrIiwic3ViIjoiaW5zdGFuY2UtYXBpIiwiZXhwIjoxNzA1MzYzMjAwLCJpZCI6MjY1MjQxLCJicmFuZCI6ImRldmlhbnRlIiwiaG9zdG5hbWUiOiJ3d3cuc2V4d29ya2luZy5jb20ifQ.ey6HFniTwjXqcgkoAVhbxujOoTz7ZmxAusG7L9lrr8Y", + "date": "2024-01-14" + }, + "sexyhub": { + "token": "eyJ0eXAiOiJKV1QiLCJhbGciOiJIUzI1NiJ9.eyJpc3MiOiJtaW5kZ2VlayIsImF1ZCI6Im1pbmRnZWVrIiwic3ViIjoiaW5zdGFuY2UtYXBpIiwiZXhwIjoxNzAzODA4MDAwLCJpZCI6MjU4ODUxLCJicmFuZCI6InNleHlodWIiLCJob3N0bmFtZSI6Ind3dy5zZXh5aHViLmNvbSJ9.exEPkAKkOVWKfZDMgVW1gOSQiu6G6UzQyHk7FueBWew", + "date": "2023-12-27" + }, + "shewillcheat": { + "token": "eyJ0eXAiOiJKV1QiLCJhbGciOiJIUzI1NiJ9.eyJpc3MiOiJtaW5kZ2VlayIsImF1ZCI6Im1pbmRnZWVrIiwic3ViIjoiaW5zdGFuY2UtYXBpIiwiZXhwIjoxNzA0ODQ0ODAwLCJpZCI6MjY0NzIxLCJicmFuZCI6Im1ldHJvaGQiLCJob3N0bmFtZSI6Ind3dy5zaGV3aWxsY2hlYXQuY29tIn0.MKiBOqdJlgQ0GQRJj5Z5WbvtMy5jqiRHrA2adTJrx4M", + "date": "2024-01-08" + }, + "sneakysex": { + "token": "eyJ0eXAiOiJKV1QiLCJhbGciOiJIUzI1NiJ9.eyJpc3MiOiJtaW5kZ2VlayIsImF1ZCI6Im1pbmRnZWVrIiwic3ViIjoiaW5zdGFuY2UtYXBpIiwiZXhwIjoxNzA0MzI2NDAwLCJpZCI6MjU2MDUxLCJicmFuZCI6InJlYWxpdHlraW5ncyIsImhvc3RuYW1lIjoid3d3LnJlYWxpdHlraW5ncy5jb20ifQ.cPLwiHdYoKqDZNyp0G68KYx_pxiTJJiciXa0c5s2miw", + "date": "2024-01-02" + }, + "squirted": { + "token": "eyJ0eXAiOiJKV1QiLCJhbGciOiJIUzI1NiJ9.eyJpc3MiOiJtaW5kZ2VlayIsImF1ZCI6Im1pbmRnZWVrIiwic3ViIjoiaW5zdGFuY2UtYXBpIiwiZXhwIjoxNzAzNzIxNjAwLCJpZCI6MjY0MTUxLCJicmFuZCI6InNxdWlydGVkIiwiaG9zdG5hbWUiOiJ3d3cuc3F1aXJ0ZWQuY29tIn0.hj4VSLqrLSvkh_6KbDR2i5jfZu524ZsD3VxgTBbRukk", + "date": "2023-12-26" + }, + "str8togay": { + "token": "eyJ0eXAiOiJKV1QiLCJhbGciOiJIUzI1NiJ9.eyJpc3MiOiJtaW5kZ2VlayIsImF1ZCI6Im1pbmRnZWVrIiwic3ViIjoiaW5zdGFuY2UtYXBpIiwiZXhwIjoxNzA1NDQ5NjAwLCJpZCI6MjE1ODYxLCJicmFuZCI6Im1lbiIsImhvc3RuYW1lIjoid3d3LnN0cjh0b2dheS5jb20ifQ.Tkse0n4sqcqBma004UCEGDWo1P5-AwbY2K0azbyqX5I", + "date": "2024-01-15" + }, + "sweetheartvideo": { + "token": "eyJ0eXAiOiJKV1QiLCJhbGciOiJIUzI1NiJ9.eyJpc3MiOiJtaW5kZ2VlayIsImF1ZCI6Im1pbmRnZWVrIiwic3ViIjoiaW5zdGFuY2UtYXBpIiwiZXhwIjoxNzA1Mjc2ODAwLCJpZCI6MjU4NzMxLCJicmFuZCI6Im1pbGVoaWdoIiwiaG9zdG5hbWUiOiJ3d3cuc3dlZXRoZWFydHZpZGVvLmNvbSJ9.cyeKpqqImmdaL1qAb9z8Rovqf3qNTMS4xtqN4cmhvHI", + "date": "2024-01-13" + }, + "sweetsinner": { + "token": "eyJ0eXAiOiJKV1QiLCJhbGciOiJIUzI1NiJ9.eyJpc3MiOiJtaW5kZ2VlayIsImF1ZCI6Im1pbmRnZWVrIiwic3ViIjoiaW5zdGFuY2UtYXBpIiwiZXhwIjoxNzA1Mjc2ODAwLCJpZCI6MjU4NzExLCJicmFuZCI6Im1pbGVoaWdoIiwiaG9zdG5hbWUiOiJ3d3cuc3dlZXRzaW5uZXIuY29tIn0.tL-FSKULGnkt263GtgLVcPf8sWv14tnfx5hVQZHG3nk", + "date": "2024-01-13" + }, + "taboomale": { + "token": "eyJ0eXAiOiJKV1QiLCJhbGciOiJIUzI1NiJ9.eyJpc3MiOiJtaW5kZ2VlayIsImF1ZCI6Im1pbmRnZWVrIiwic3ViIjoiaW5zdGFuY2UtYXBpIiwiZXhwIjoxNzA1MzYzMjAwLCJpZCI6MTU0MDkyLCJicmFuZCI6InRhYm9vbWFsZSIsImhvc3RuYW1lIjoid3d3LnRhYm9vbWFsZS5jb20ifQ.kcqqqXg6jB_FvpUs_Z7YMzvcN2hIHp55rwKv0bdcfE4", + "date": "2024-01-14" + }, + "teenslovehugecocks": { + "token": "eyJ0eXAiOiJKV1QiLCJhbGciOiJIUzI1NiJ9.eyJpc3MiOiJtaW5kZ2VlayIsImF1ZCI6Im1pbmRnZWVrIiwic3ViIjoiaW5zdGFuY2UtYXBpIiwiZXhwIjoxNzA0MzI2NDAwLCJpZCI6MjY4MjIxLCJicmFuZCI6InJlYWxpdHlraW5ncyIsImhvc3RuYW1lIjoid3d3LnRlZW5zbG92ZWh1Z2Vjb2Nrcy5jb20ifQ.nyYSon9gciSSGFP55yt3hFA8QpUDz9TBzYnZi6h-gpg", + "date": "2024-01-02" + }, + "thegayoffice": { + "token": "eyJ0eXAiOiJKV1QiLCJhbGciOiJIUzI1NiJ9.eyJpc3MiOiJtaW5kZ2VlayIsImF1ZCI6Im1pbmRnZWVrIiwic3ViIjoiaW5zdGFuY2UtYXBpIiwiZXhwIjoxNzA1NDQ5NjAwLCJpZCI6MjE2MTIxLCJicmFuZCI6Im1lbiIsImhvc3RuYW1lIjoid3d3LnRoZWdheW9mZmljZS5jb20ifQ.463FpF2LPmsrMvZ0C4zP8wDZr126dOQbORVRDQ6XJYo", + "date": "2024-01-15" + }, + "toptobottom": { + "token": "eyJ0eXAiOiJKV1QiLCJhbGciOiJIUzI1NiJ9.eyJpc3MiOiJtaW5kZ2VlayIsImF1ZCI6Im1pbmRnZWVrIiwic3ViIjoiaW5zdGFuY2UtYXBpIiwiZXhwIjoxNzA1NDQ5NjAwLCJpZCI6MjE2MDUxLCJicmFuZCI6Im1lbiIsImhvc3RuYW1lIjoid3d3LnRvcHRvYm90dG9tLmNvbSJ9.eN0MvzpaefOPF0mJTxnaW3xG9x4LFgD1ekaQI7FQ2bI", + "date": "2024-01-15" + }, + "trannysurprise": { + "token": "eyJ0eXAiOiJKV1QiLCJhbGciOiJIUzI1NiJ9.eyJpc3MiOiJtaW5kZ2VlayIsImF1ZCI6Im1pbmRnZWVrIiwic3ViIjoiaW5zdGFuY2UtYXBpIiwiZXhwIjoxNzA0MzI2NDAwLCJpZCI6MjU5MTQxLCJicmFuZCI6InRyYW5zaGFyZGVyIiwiaG9zdG5hbWUiOiJ3d3cudHJhbnNoYXJkZXIuY29tIn0.Y_MTgI0NzGjxWW2UuJ7vHgioC1gTzQPNS2dgb0NtKqw", + "date": "2024-01-02" + }, + "transangels": { + "token": "eyJ0eXAiOiJKV1QiLCJhbGciOiJIUzI1NiJ9.eyJpc3MiOiJtaW5kZ2VlayIsImF1ZCI6Im1pbmRnZWVrIiwic3ViIjoiaW5zdGFuY2UtYXBpIiwiZXhwIjoxNzA0MzI2NDAwLCJpZCI6Mjc0NTcxLCJicmFuZCI6InRyYW5zYW5nZWxzIiwiaG9zdG5hbWUiOiJ3d3cudHJhbnNhbmdlbHMuY29tIn0.gv8vV-NhlsWKRLjUWQ2F_zGTD-a58SGRlQb5ZJsIs7k", + "date": "2024-01-02" + }, + "transangelsnetwork": { + "token": "eyJ0eXAiOiJKV1QiLCJhbGciOiJIUzI1NiJ9.eyJpc3MiOiJtaW5kZ2VlayIsImF1ZCI6Im1pbmRnZWVrIiwic3ViIjoiaW5zdGFuY2UtYXBpIiwiZXhwIjoxNzAzNzIxNjAwLCJpZCI6Mjc0MjMxLCJicmFuZCI6InRyYW5zYW5nZWxzIiwiaG9zdG5hbWUiOiJ3d3cudHJhbnNhbmdlbHNuZXR3b3JrLmNvbSJ9.NbryOUc9J6RgdVssvTkZAeNEF8ltio_bAv1OpsU1flU", + "date": "2023-12-26" + }, + "transharder": { + "token": "eyJ0eXAiOiJKV1QiLCJhbGciOiJIUzI1NiJ9.eyJpc3MiOiJtaW5kZ2VlayIsImF1ZCI6Im1pbmRnZWVrIiwic3ViIjoiaW5zdGFuY2UtYXBpIiwiZXhwIjoxNzA0MzI2NDAwLCJpZCI6MjU5MTQxLCJicmFuZCI6InRyYW5zaGFyZGVyIiwiaG9zdG5hbWUiOiJ3d3cudHJhbnNoYXJkZXIuY29tIn0.Y_MTgI0NzGjxWW2UuJ7vHgioC1gTzQPNS2dgb0NtKqw", + "date": "2024-01-02" + }, + "transsensual": { + "token": "eyJ0eXAiOiJKV1QiLCJhbGciOiJIUzI1NiJ9.eyJpc3MiOiJtaW5kZ2VlayIsImF1ZCI6Im1pbmRnZWVrIiwic3ViIjoiaW5zdGFuY2UtYXBpIiwiZXhwIjoxNzAzNzIxNjAwLCJpZCI6MjU4NzYxLCJicmFuZCI6InRyYW5zc2Vuc3VhbCIsImhvc3RuYW1lIjoid3d3LnRyYW5zc2Vuc3VhbC5jb20ifQ.zx-g9DUshMyYIu9StlQrxmCgaf3LtLU_r9CmPabOz6g", + "date": "2023-12-26" + }, + "trueamateurs": { + "token": "eyJ0eXAiOiJKV1QiLCJhbGciOiJIUzI1NiJ9.eyJpc3MiOiJtaW5kZ2VlayIsImF1ZCI6Im1pbmRnZWVrIiwic3ViIjoiaW5zdGFuY2UtYXBpIiwiZXhwIjoxNzA0MzI2NDAwLCJpZCI6MjY2MzMxLCJicmFuZCI6InNwaWNldmlkcyIsImhvc3RuYW1lIjoid3d3LnRydWVhbWF0ZXVycy5jb20ifQ.gyUanDysED-EkR3ftelvl-TAJQA6jZXvxuYuS95F2ro", + "date": "2024-01-02" + }, + "tube8vip": { + "token": "eyJ0eXAiOiJKV1QiLCJhbGciOiJIUzI1NiJ9.eyJpc3MiOiJtaW5kZ2VlayIsImF1ZCI6Im1pbmRnZWVrIiwic3ViIjoiaW5zdGFuY2UtYXBpIiwiZXhwIjoxNzA0MzI2NDAwLCJpZCI6NDE2ODEsImJyYW5kIjoidHViZTh2aXAiLCJob3N0bmFtZSI6Ind3dy50dWJlOHZpcC5jb20ifQ.5ObIKtDCFmxlAw536x_UphqRk1cAfg3GcuXCBwburIs", + "date": "2024-01-02" + }, + "twinkpop": { + "token": "eyJ0eXAiOiJKV1QiLCJhbGciOiJIUzI1NiJ9.eyJpc3MiOiJtaW5kZ2VlayIsImF1ZCI6Im1pbmRnZWVrIiwic3ViIjoiaW5zdGFuY2UtYXBpIiwiZXhwIjoxNzA1NDQ5NjAwLCJpZCI6MjcwNzMxLCJicmFuZCI6InR3aW5rcG9wIiwiaG9zdG5hbWUiOiJ3d3cudHdpbmtwb3AuY29tIn0.i3RaaiTlSclMEYowhmIOX6aW6EA23vhOI0kGPd1ywJM", + "date": "2024-01-15" + }, + "twistys": { + "token": "eyJ0eXAiOiJKV1QiLCJhbGciOiJIUzI1NiJ9.eyJpc3MiOiJtaW5kZ2VlayIsImF1ZCI6Im1pbmRnZWVrIiwic3ViIjoiaW5zdGFuY2UtYXBpIiwiZXhwIjoxNzAzODA4MDAwLCJpZCI6MjU4NjUxLCJicmFuZCI6InR3aXN0eXMiLCJob3N0bmFtZSI6Ind3dy50d2lzdHlzLmNvbSJ9.VQjUfdiPzeza3gTAgE6o9HN9RiYpt90OSZ9AYb_0fE4", + "date": "2023-12-27" + }, + "twistysnetwork": { + "token": "eyJ0eXAiOiJKV1QiLCJhbGciOiJIUzI1NiJ9.eyJpc3MiOiJtaW5kZ2VlayIsImF1ZCI6Im1pbmRnZWVrIiwic3ViIjoiaW5zdGFuY2UtYXBpIiwiZXhwIjoxNzAzNzIxNjAwLCJpZCI6MjYzMzExLCJicmFuZCI6InR3aXN0eXMiLCJob3N0bmFtZSI6Ind3dy50d2lzdHlzbmV0d29yay5jb20ifQ.-fwhJQ2mTvK9l9Q651yt_AMqrKNNGiLpnRJduzhvhCo", + "date": "2023-12-26" + }, + "virtualporn": { + "token": "eyJ0eXAiOiJKV1QiLCJhbGciOiJIUzI1NiJ9.eyJpc3MiOiJtaW5kZ2VlayIsImF1ZCI6Im1pbmRnZWVrIiwic3ViIjoiaW5zdGFuY2UtYXBpIiwiZXhwIjoxNzA1Mjc2ODAwLCJpZCI6MjcxNDcxLCJicmFuZCI6InZpcnR1YWxwb3JuIiwiaG9zdG5hbWUiOiJ2aXJ0dWFscG9ybi5jb20ifQ.bJl2sc4sZYx4XDrO_fifZZXPyyskt1bSSBiYQ1YrVmo", + "date": "2024-01-13" + }, + "voyr": { + "token": "eyJ0eXAiOiJKV1QiLCJhbGciOiJIUzI1NiJ9.eyJpc3MiOiJtaW5kZ2VlayIsImF1ZCI6Im1pbmRnZWVrIiwic3ViIjoiaW5zdGFuY2UtYXBpIiwiZXhwIjoxNzA1Mjc2ODAwLCJpZCI6MjY4ODUxLCJicmFuZCI6InZveXIiLCJob3N0bmFtZSI6Ind3dy52b3lyLmNvbSJ9.wsn-jH9B9CKzXQp318BxNH0IxBO4Kp69LKGku1KUcJA", + "date": "2024-01-13" + }, + "welivetogether": { + "token": "eyJ0eXAiOiJKV1QiLCJhbGciOiJIUzI1NiJ9.eyJpc3MiOiJtaW5kZ2VlayIsImF1ZCI6Im1pbmRnZWVrIiwic3ViIjoiaW5zdGFuY2UtYXBpIiwiZXhwIjoxNzAzNzIxNjAwLCJpZCI6NDM1MzEsImJyYW5kIjoicmVhbGl0eWtpbmdzIiwiaG9zdG5hbWUiOiJ3d3cud2VsaXZldG9nZXRoZXIuY29tIn0.Tp-h8tnwwG15z8RTK0NW7NjbbrspJGRSTcbkvFRMLcI", + "date": "2023-12-26" + }, + "whynotbi": { + "token": "eyJ0eXAiOiJKV1QiLCJhbGciOiJIUzI1NiJ9.eyJpc3MiOiJtaW5kZ2VlayIsImF1ZCI6Im1pbmRnZWVrIiwic3ViIjoiaW5zdGFuY2UtYXBpIiwiZXhwIjoxNzAzODA4MDAwLCJpZCI6MjU2ODQxLCJicmFuZCI6IndoeW5vdGJpIiwiaG9zdG5hbWUiOiJ3d3cud2h5bm90YmkuY29tIn0.I9kq1JF94pGysFhmp1MGKFEOx2loFz_RzCJC3wCAT2Q", + "date": "2023-12-27" + } +} \ No newline at end of file diff --git a/scrapers/AyloAPI/config.py b/scrapers/AyloAPI/config.py new file mode 100644 index 0000000..a2d9341 --- /dev/null +++ b/scrapers/AyloAPI/config.py @@ -0,0 +1,11 @@ +# User variables +## Minimum Ratio to consider the scene to scrape (Ratio between Title and API Title) +SET_RATIO = 0.75 + +## Check the SSL Certificate: set to False if you have problems with the SSL Errors +CHECK_SSL_CERT = True + +## User Agent to use for the requests +USER_AGENT = ( + "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:79.0) Gecko/20100101 Firefox/79.0" +) diff --git a/scrapers/AyloAPI/domains.py b/scrapers/AyloAPI/domains.py new file mode 100644 index 0000000..690f3e1 --- /dev/null +++ b/scrapers/AyloAPI/domains.py @@ -0,0 +1,67 @@ +import atexit +import datetime +import json +from pathlib import Path +from typing import Callable +from urllib.parse import urlparse + +""" +Keeps a cache of instance tokens for the Aylo API. + +Domains are assumed to omit the TLD, e.g. "brazzers" instead of "brazzers.com" +""" + + +__TOKENS_FILE = Path(__file__).parent / "aylo_tokens.json" +try: + __TOKENS = json.load(__TOKENS_FILE.open(encoding="utf-8")) +except (FileNotFoundError, json.JSONDecodeError): + __TOKENS = {} + + +@atexit.register +def __save_domains(): + sorted_domains = dict(sorted(__TOKENS.items(), key=lambda x: x[0])) + json.dump(sorted_domains, __TOKENS_FILE.open("w", encoding="utf-8"), indent=2) + + +def site_name(url: str) -> str: + """ + Returns the site name of the given URL, e.g. "brazzers" for "https://www.brazzers.com" + """ + return urlparse(url).netloc.split(".")[-2] + + +def get_token_for(domain: str, fallback: Callable[[str], str | None]) -> str | None: + """ + Returns a token for the given domain. If the stored token is not valid, the provided + fallback function will be used to generate a new token. + + If the fallback function returns None, it will return None. + """ + today = datetime.datetime.today().strftime("%Y-%m-%d") + + # If the domain is in the list and if the token is still valid we just return it + if (entry := __TOKENS.get(domain)) and entry["date"] == today and entry["token"]: + return entry["token"] + + # Generate the token using the provided fallback function + url = f"https://www.{domain}.com" + token = fallback(url) + if not token: + return None + # And persist it + __TOKENS[domain] = { + "token": token, + "date": today, + } + + return token + + +def all_domains() -> list[str]: + """ + Returns a list of all known domains for the Aylo API + """ + + return list(__TOKENS.keys()) diff --git a/scrapers/AyloAPI/package b/scrapers/AyloAPI/package new file mode 100644 index 0000000..b148107 --- /dev/null +++ b/scrapers/AyloAPI/package @@ -0,0 +1,2 @@ +# script used as a dependency only +name: AyloAPI diff --git a/scrapers/AyloAPI/scrape.py b/scrapers/AyloAPI/scrape.py new file mode 100644 index 0000000..e2ff567 --- /dev/null +++ b/scrapers/AyloAPI/scrape.py @@ -0,0 +1,886 @@ +import json +import re +import sys +import difflib +from datetime import datetime +from typing import Any, Callable +from urllib.parse import urlparse + +try: + import requests +except ModuleNotFoundError: + print( + "You need to install the requests module." + "(https://docs.python-requests.org/en/latest/user/install/)\n" + "If you have pip (normally installed with python)," + "run this command in a terminal (cmd): python -m pip install requests", + file=sys.stderr, + ) + sys.exit() + + +try: + import py_common.log as log + from py_common.util import dig, scraper_args + from py_common.types import ( + ScrapedScene, + ScrapedMovie, + ScrapedPerformer, + ScrapedStudio, + ScrapedTag, + ) + import AyloAPI.domains as domains + import AyloAPI.config as config + from AyloAPI.slugger import slugify +except ModuleNotFoundError: + print( + "You need to download the folder 'py_common' from the community repo! " + "(CommunityScrapers/tree/master/scrapers/py_common)", + file=sys.stderr, + ) + sys.exit() + + +def default_postprocess(obj: Any, _) -> Any: + return obj + + +# network stuff +def __raw_request(url, headers) -> requests.Response: + log.trace(f"Sending GET request to {url}") + response = requests.get( + url, headers=headers, timeout=10, verify=config.CHECK_SSL_CERT + ) + + if response.status_code == 429: + log.error( + "[REQUEST] 429 Too Many Requests: " + "you have sent too many requests in a given amount of time." + ) + sys.exit(1) + + # Even a 404 will contain an instance token + return response + + +def __api_request(url: str, headers: dict) -> dict | None: + result = __raw_request(url, headers) + api_response = result.json() + if isinstance(api_response, list): + api_search_errors = "\n- ".join( + json.dumps(res, indent=None) for res in api_response + ) + log.error(f"Errors from API:\n{api_search_errors}") + return None + + with open("api_response.json", "w", encoding="utf-8") as f: + json.dump(api_response, f, indent=2) + + return api_response["result"] + + +def _create_headers_for(domain: str) -> dict[str, str]: + # If we haven't stored a token we must provide a function to get one + def get_instance_token(url: str) -> str | None: + r = __raw_request(url, {"User-Agent": config.USER_AGENT}) + if r and (token := r.cookies.get("instance_token")): + return token + log.error( + f"Failed to get instance_token from '{url}': " + "are you sure this site is in the Aylo network?" + ) + + api_token = domains.get_token_for(domain, fallback=get_instance_token) + if api_token is None: + log.error(f"Unable to get an API token for '{domain}'") + sys.exit(1) + + api_headers = { + "Instance": api_token, + "User-Agent": config.USER_AGENT, + "Origin": f"https://{domain}", + "Referer": f"https://{domain}", + } + return api_headers + + +def _construct_url(api_result: dict) -> str: + """ + Tries to construct a valid public URL for an API result + + This will often result in scene links that point to the parent network site, + so we might want to add wrapper scrapers that can add the correct URL as well + + For example, a scene from We Live Together will have an URL for realitykings.com + but that scene is also on welivetogether.com and that might be considered more canonical + """ + + brand = api_result["brand"] + type_ = api_result["type"] + id_ = api_result["id"] + slug = slugify(api_result["title"]) + return f"https://www.{brand}.com/{type_}/{id_}/{slug}" + + +def _construct_performer_url(api_result: dict, site: str) -> str: + id_ = api_result["id"] + slug = slugify(api_result["name"]) + return f"https://www.{site}.com/model/{id_}/{slug}" + + +## Helper functions for the objects returned from Aylo's API +def get_studio(api_object: dict) -> ScrapedStudio | None: + studio_name = dig(api_object, "collections", 0, "name") + parent_name = dig(api_object, "brandMeta", ("displayName", "name", "shortName")) + if studio_name: + if parent_name.lower() != studio_name.lower(): + return { + "name": studio_name, + "parent": {"name": parent_name}, + } + return {"name": studio_name} + elif parent_name: + return {"name": parent_name} + + log.error(f"No studio for {api_object['type']} with id {api_object['id']}") + return None + + +def get_tags(api_object: dict) -> list[ScrapedTag]: + tags = api_object.get("tags", []) + return [{"name": x["name"].strip()} for x in tags if "name" in x] + + +state_map = { + "AK": "USA", + "AL": "USA", + "AR": "USA", + "AZ": "USA", + "CA": "USA", + "CO": "USA", + "CT": "USA", + "DC": "USA", + "DE": "USA", + "FL": "USA", + "GA": "USA", + "HI": "USA", + "IA": "USA", + "ID": "USA", + "IL": "USA", + "IN": "USA", + "KS": "USA", + "KY": "USA", + "LA": "USA", + "MA": "USA", + "MD": "USA", + "ME": "USA", + "MI": "USA", + "MN": "USA", + "MO": "USA", + "MS": "USA", + "MT": "USA", + "NC": "USA", + "ND": "USA", + "NE": "USA", + "NH": "USA", + "NJ": "USA", + "NM": "USA", + "NV": "USA", + "NY": "USA", + "OH": "USA", + "OK": "USA", + "OR": "USA", + "PA": "USA", + "RI": "USA", + "SC": "USA", + "SD": "USA", + "TN": "USA", + "TX": "USA", + "UT": "USA", + "VA": "USA", + "VT": "USA", + "WA": "USA", + "WI": "USA", + "WV": "USA", + "WY": "USA", + "Alabama": "USA", + "Alaska": "USA", + "Arizona": "USA", + "Arkansas": "USA", + "California": "USA", + "Colorado": "USA", + "Connecticut": "USA", + "Delaware": "USA", + "Florida": "USA", + "Georgia": "USA", + "Hawaii": "USA", + "Idaho": "USA", + "Illinois": "USA", + "Indiana": "USA", + "Iowa": "USA", + "Kansas": "USA", + "Kentucky": "USA", + "Louisiana": "USA", + "Maine": "USA", + "Maryland": "USA", + "Massachusetts": "USA", + "Michigan": "USA", + "Minnesota": "USA", + "Mississippi": "USA", + "Missouri": "USA", + "Montana": "USA", + "Nebraska": "USA", + "Nevada": "USA", + "New Hampshire": "USA", + "New Jersey": "USA", + "New Mexico": "USA", + "New York": "USA", + "North Carolina": "USA", + "North Dakota": "USA", + "Ohio": "USA", + "Oklahoma": "USA", + "Oregon": "USA", + "Pennsylvania": "USA", + "Rhode Island": "USA", + "South Carolina": "USA", + "South Dakota": "USA", + "Tennessee": "USA", + "Texas": "USA", + "Utah": "USA", + "Vermont": "USA", + "Virginia": "USA", + "Washington": "USA", + "West Virginia": "USA", + "Wisconsin": "USA", + "Wyoming": "USA", +} + + +## Helper functions to convert from Aylo's API to Stash's scaper return types +def to_scraped_performer( + performer_from_api: dict, site: str | None = None +) -> ScrapedPerformer: + if (type_ := dig(performer_from_api, "brand")) and type_ != "actorsandtags": + wrong_type = performer_from_api.get("type", "mystery") + wrong_id = performer_from_api.get("id", "unknown") + log.error(f"Attempted to scrape a '{wrong_type}' (ID: {wrong_id}) as a scene.") + raise ValueError("Invalid performer from API") + # This is all we get when scraped as part of a scene or movie + performer: ScrapedPerformer = { + "name": performer_from_api["name"], + "gender": performer_from_api["gender"], + } + + if aliases := ", ".join( + alias + for alias in performer_from_api.get("aliases", []) + if alias.lower() != performer["name"].lower() + ): + performer["aliases"] = aliases + + if details := performer_from_api.get("bio"): + performer["details"] = details + + # All remaining fields are only available when scraped directly + if height := performer_from_api.get("height"): + # Convert to cm + performer["height"] = str(round(height * 2.54)) + + if weight := performer_from_api.get("weight"): + # Convert to kg + performer["weight"] = str(round(weight / 2.205)) + + if birthdate := performer_from_api.get("birthday"): + performer["birthdate"] = datetime.strptime( + birthdate, "%Y-%m-%dT%H:%M:%S%z" + ).strftime("%Y-%m-%d") + + if birthplace := performer_from_api.get("birthPlace"): + performer["country"] = birthplace + + if measurements := performer_from_api.get("measurements"): + performer["measurements"] = measurements + + images = dig(performer_from_api, "images", "master_profile") or {} + # Performers can have multiple images, try to get the biggest versions + if images := [ + img + for alt in images.values() + if (img := dig(alt, ("xx", "xl", "lg", "md", "sm"), "url")) + ]: + performer["images"] = images + + if tags := get_tags(performer_from_api): + performer["tags"] = tags + + if site: + performer["url"] = _construct_performer_url(performer_from_api, site) + + return performer + + +def to_scraped_movie(movie_from_api: dict) -> ScrapedMovie: + if not movie_from_api["type"] == "movie": + wrong_type = movie_from_api["type"] + wrong_id = movie_from_api["id"] + log.error(f"Attempted to scrape a '{wrong_type}' (ID: {wrong_id}) as a movie.") + raise ValueError("Invalid movie from API") + + movie: ScrapedMovie = { + "name": movie_from_api["title"], + "synopsis": dig(movie_from_api, "description"), + "front_image": dig(movie_from_api, "images", "cover", "0", "xx", "url"), + "url": _construct_url(movie_from_api), + } + + if date := dig(movie_from_api, "dateReleased"): + movie["date"] = datetime.strptime(date, "%Y-%m-%dT%H:%M:%S%z").strftime( + "%Y-%m-%d" + ) + + if studio := get_studio(movie_from_api): + movie["studio"] = studio + + return movie + + +def to_scraped_scene(scene_from_api: dict) -> ScrapedScene: + if not scene_from_api["type"] == "scene": + wrong_type = scene_from_api["type"] + wrong_id = scene_from_api["id"] + log.error(f"Attempted to scrape a '{wrong_type}' (ID: {wrong_id}) as a scene.") + raise ValueError("Invalid scene from API") + + scene: ScrapedScene = { + "title": scene_from_api["title"], + "code": str(scene_from_api["id"]), + "details": dig(scene_from_api, "description"), + "date": datetime.strptime( + scene_from_api["dateReleased"], "%Y-%m-%dT%H:%M:%S%z" + ).strftime("%Y-%m-%d"), + "url": _construct_url(scene_from_api), + "performers": [ + to_scraped_performer(p, dig(scene_from_api, "brand")) + for p in scene_from_api["actors"] + ], + "tags": get_tags(scene_from_api), + } + + if image := dig( + scene_from_api, + "images", + ("poster", "poster_fallback"), + "0", + ("xx", "xl", "lg", "md", "sm", "xs"), + "url", + ): + scene["image"] = image + + if dig(scene_from_api, "parent", "type") == "movie": + scene["movies"] = [to_scraped_movie(scene_from_api["parent"])] + + if studio := get_studio(scene_from_api): + scene["studio"] = studio + + if markers := scene_from_api.get("timeTags"): + log.debug( + f"This scene has {len(markers)} markers" + " but scraping markers hasn't been implemented yet" + ) + + return scene + + +## Primary functions used to scrape from Aylo's API +def scene_from_url( + url, postprocess: Callable[[ScrapedScene, dict], ScrapedScene] = default_postprocess +) -> ScrapedScene | None: + """ + Scrapes a scene from a URL, running an optional postprocess function on the result + """ + + if not (match := re.search(r"/(\d+)/", url)): + log.error( + "Can't get the ID of the Scene. " + "Are you sure that URL is from a site in the Aylo Network?" + ) + return None + scene_id = match.group(1) + + log.debug(f"Scene ID: {scene_id}") + + # Extract the domain from the URL + domain = domains.site_name(url) + + api_URL = f"https://site-api.project1service.com/v2/releases/{scene_id}" + api_headers = _create_headers_for(domain) + api_scene_json = __api_request(api_URL, api_headers) + + if not api_scene_json: + return None + + if dig(api_scene_json, "type") == "scene": + return postprocess(to_scraped_scene(api_scene_json), api_scene_json) + + # If you scrape a trailer we can still get the correct scene data + if dig(api_scene_json, "parent", "type") == "scene": + log.debug("Result is a movie or trailer, getting scene data from parent") + return postprocess( + to_scraped_scene(api_scene_json["parent"]), api_scene_json["parent"] + ) + + +def performer_from_url( + url, + postprocess: Callable[ + [ScrapedPerformer, dict], ScrapedPerformer + ] = default_postprocess, +) -> ScrapedPerformer | None: + """ + Scrapes a performer from a URL, running an optional postprocess function on the result + """ + + if not (match := re.search(r"/(\d+)/", url)): + log.error( + "Can't get the ID of the performer. " + "Are you sure that URL is from a site in the Aylo Network?" + ) + return None + performer_id = match.group(1) + + log.debug(f"Performer ID: {performer_id}") + + # Extract the domain from the URL + domain = urlparse(url).netloc.split(".")[-2] + + api_URL = f"https://site-api.project1service.com/v1/actors/{performer_id}" + api_headers = _create_headers_for(domain) + api_performer_json = __api_request(api_URL, api_headers) + if not api_performer_json: + return None + + return postprocess(to_scraped_performer(api_performer_json), api_performer_json) + + +def movie_from_url( + url, postprocess: Callable[[ScrapedMovie, dict], ScrapedMovie] = default_postprocess +) -> ScrapedMovie | None: + """ + Scrapes a movie from a URL, running an optional postprocess function on the result + """ + + if not (match := re.search(r"/(\d+)/", url)): + log.error( + "Can't get the ID of the movie. " + "Are you sure that URL is from a site in the Aylo Network?" + ) + return None + movie_id = match.group(1) + + log.debug(f"Movie ID: {movie_id}") + + # Extract the domain from the URL + domain = urlparse(url).netloc.split(".")[-2] + + api_URL = f"https://site-api.project1service.com/v2/releases/{movie_id}" + api_headers = _create_headers_for(domain) + api_movie_json = __api_request(api_URL, api_headers) + if not api_movie_json: + return None + + if dig(api_movie_json, "type") == "movie": + return postprocess(to_scraped_movie(api_movie_json), api_movie_json) + + # If you scrape a scene or trailer, we can still get the correct movie data + if dig(api_movie_json, "parent", "type") == "movie": + log.debug("Result is a scene or trailer, getting movie data from parent") + return postprocess( + to_scraped_movie(api_movie_json["parent"]), api_movie_json["parent"] + ) + + +# Since the "Scrape with..." function in Stash expects a single result, we provide +# this function to return the first result that exceeds the threshold so +# that users don't need to use scene_search directly and THEN take the first result +def find_scene( + query: str, + search_domains: list[str] | None = None, + min_ratio: float = 0.9, + postprocess: Callable[[ScrapedScene, dict], ScrapedScene] = default_postprocess, +) -> ScrapedScene | None: + """ + Searches the Aylo API for scenes matching the given query and returns the + first match that exceeds `min_ratio` similarity: a float between 0 and 1. + + Differs from `scene_from_query` in that it only returns the first match, + returning early as soon as it finds a match that exceeds the threshold. + + If search_domains is provided it will only search those domains, + otherwise it will search all (this could be very slow!) + + Domains should not include the "www." or ".com" parts of the domain: 'brazzers', 'realitykings', etc. + + If postprocess is provided it will be called on the result before returning + """ + if not query: + log.error("No query provided") + return None + + if not search_domains: + log.warning("Searcing all known domains, this could be very slow!") + search_domains = domains.all_domains() + + log.debug(f"Matching '{query}' against {len(search_domains)} sites") + + def matcher(candidate_title: str): + return round( + difflib.SequenceMatcher( + None, query.lower(), candidate_title.lower() + ).ratio(), + 3, + ) + + for domain in search_domains: + log.debug(f"Searching '{domain}'") + + api_headers = _create_headers_for(domain) + search_url = f"https://site-api.project1service.com/v2/releases?search={query}&type=scene" + api_response = __api_request(search_url, api_headers) + + if api_response is None: + log.error(f"Failed to search '{domain}'") + continue + if not api_response: + log.debug(f"No results from '{domain}'") + continue + + best_match = max(api_response, key=lambda x: matcher(x["title"])) + ratio = matcher(best_match["title"]) + if ratio >= min_ratio: + log.info( + f"Found scene '{best_match['title']}' with {ratio:.2%} similarity " + f"to '{query}' (exceeds {min_ratio:.2%} threshold) " + f"on '{domain}'" + ) + return postprocess(to_scraped_scene(best_match), best_match) + else: + log.info( + f"Giving up on '{domain}': best result '{best_match['title']}' " + f"with {ratio:.2%} similarity" + ) + + log.error(f"No scenes found for '{query}'") + return None + + +# Since the "Scrape with..." function in Stash expects a single result, we provide +# this function to return the first result that exceeds the threshold so +# that users don't need to use performer_search directly and THEN take the first result +def find_performer( + query: str, + search_domains: list[str] | None = None, + min_ratio: float = 0.9, + postprocess: Callable[ + [ScrapedPerformer, dict], ScrapedPerformer + ] = default_postprocess, +) -> ScrapedPerformer | None: + """ + Searches the Aylo API for performers matching the given query and returns the + first match that exceeds `min_ratio` similarity: a float between 0 and 1. + + Differs from `search_performer` in that it only returns the first match, + returning early as soon as it finds a match that exceeds the threshold. + + If search_domains is provided it will only search those domains, + otherwise it will search all (this could be very slow!) + + Domains should not include the "www." or ".com" parts of the domain: 'brazzers', 'realitykings', etc. + + If postprocess is provided it will be called on the result before returning + """ + if not query: + log.error("No query provided") + return None + + if not search_domains: + log.warning("Searcing all known domains, this could be very slow!") + search_domains = domains.all_domains() + + log.debug(f"Matching '{query}' against {len(search_domains)} sites") + + def matcher(candidate_name: str): + return round( + difflib.SequenceMatcher( + None, query.lower(), candidate_name.lower() + ).ratio(), + 3, + ) + + for domain in search_domains: + log.debug(f"Searching {domain}") + + api_headers = _create_headers_for(domain) + search_url = f"https://site-api.project1service.com/v1/actors?search={query}" + api_response = __api_request(search_url, api_headers) + + if api_response is None: + log.error(f"Failed to search {domain}") + continue + if not api_response: + log.debug(f"No results from {domain}") + continue + + best_match = max(api_response, key=lambda x: matcher(x["name"])) + ratio = matcher(best_match["name"]) + if ratio >= min_ratio: + log.info( + f"Found performer '{best_match['name']}' with {ratio:.2%} similarity " + f"to '{query}' (exceeds {min_ratio:.2%} threshold) " + f"on '{domain}'" + ) + return postprocess(to_scraped_performer(best_match, domain), best_match) + else: + log.info( + f"Giving up on '{domain}': best result '{best_match['name']}' " + f"with {ratio:.2%} similarity" + ) + + log.error(f"No performers found for '{query}'") + return None + + +def scene_search( + query: str, + search_domains: list[str] | None = None, + postprocess: Callable[[ScrapedScene, dict], ScrapedScene] = default_postprocess, +) -> list[ScrapedScene]: + """ + Searches the Aylo API for the given query and returns a list of ScrapedScene + + If search_domains is provided it will only search those domains, + otherwise it will search all known domains (this could be very slow!) + + Domains should not include the "www." or ".com" parts of the domain: 'brazzers', 'realitykings', etc. + + If postprocess is provided it will be called on each result before returning + """ + if not query: + log.error("No query provided") + return [] + + if not search_domains: + log.warning("Searcing all known domains, this could be very slow!") + search_domains = domains.all_domains() + + log.debug(f"Searching for '{query}' on {len(search_domains)} sites") + + # The source of the results will be based on the token used (Brazzers, Reality Kings, etc.) + search_url = f"https://site-api.project1service.com/v2/releases?search={query}&type=scene&limit=10" + search_results = [] + + for domain in search_domains: + log.debug(f"Searching {domain}") + + api_headers = _create_headers_for(domain) + api_response = __api_request(search_url, api_headers) + if api_response is None: + log.error(f"Failed to search {domain}") + continue + if not api_response: + log.debug(f"No results from {domain}") + continue + + search_results.extend( + postprocess(to_scraped_scene(result), result) for result in api_response + ) + + # Try to to avoid more than 10ish results or this will take forever + if len(search_results) >= 10: + log.warning("Found more than 10 results, stopping search") + break + + log.info(f"Search finished, found {len(search_results)} candidates") + + def matcher(candidate: ScrapedScene): + return round( + difflib.SequenceMatcher( + None, + query.lower(), + candidate["title"].lower(), # type: ignore (title is always set) + ).ratio(), + 3, + ) + + return sorted(search_results, key=matcher, reverse=True) + + +def performer_search( + query: str, + search_domains: list[str] | None = None, + postprocess: Callable[ + [ScrapedPerformer, dict], ScrapedPerformer + ] = default_postprocess, +) -> list[ScrapedPerformer]: + """ + Searches the Aylo API for the given query and returns a list of ScrapedPerformer + + If search_domains is provided it will only search those domains, + otherwise it will search all known domains (this could be very slow!) + + Domains should not include the "www." or ".com" parts of the domain: 'brazzers', 'realitykings', etc. + + If postprocess is provided it will be called on each result before returning + """ + if not query: + log.error("No query provided") + return [] + + if not search_domains: + log.warning("Searcing all known domains, this could be very slow!") + search_domains = domains.all_domains() + + log.debug(f"Searching for '{query}' on {len(search_domains)} sites") + + # The source of the results will be based on the token used (Brazzers, Reality Kings, etc.) + search_url = ( + f"https://site-api.project1service.com/v1/actors?search={query}&limit=10" + ) + search_results = [] + + for domain in search_domains: + log.debug(f"Searching {domain}") + + api_headers = _create_headers_for(domain) + api_response = __api_request(search_url, api_headers) + if api_response is None: + log.error(f"Failed to search {domain}") + continue + if not api_response: + log.debug(f"No results from {domain}") + continue + + search_results.extend( + postprocess(to_scraped_performer(result, domain), result) + for result in api_response + ) + + # Try to to avoid more than 10ish results or this will take forever + if len(search_results) >= 10: + log.warning("Found more than 10 results, stopping search") + break + + log.debug(f"Search finished, found {len(search_results)} candidates") + + def matcher(candidate: ScrapedPerformer): + return round( + difflib.SequenceMatcher( + None, + query.lower(), + candidate["name"].lower(), # type: ignore (name is always set) + ).ratio(), + 3, + ) + + return sorted(search_results, key=matcher, reverse=True) + + +def scene_from_fragment( + fragment: dict, + search_domains: list[str] | None = None, + min_ratio=config.SET_RATIO, + postprocess: Callable[[ScrapedScene, dict], ScrapedScene] = default_postprocess, +) -> ScrapedScene | None: + """ + Scrapes a scene from a fragment, which must contain at least one of the following: + - url: the URL of the scene + - title: the title of the scene + + If domains is provided it will only search those domains, + otherwise it will search all known domains (this could be very slow!) + + If min_ratio is provided _AND_ the fragment contains a title but no URL, + the search will only return a scene if a match with at least that ratio is found + + If postprocess is provided it will be called on the result before returning + """ + log.debug(f"Fragment scraping scene {fragment['id']}") + if url := fragment.get("url"): + log.debug(f"Using scene URL: '{url}'") + if scene := scene_from_url(url, postprocess=postprocess): + return scene + log.debug("Failed to scrape scene from URL") + if title := fragment.get("title"): + log.debug(f"Searching for '{title}'") + if scene := find_scene( + title, search_domains, min_ratio, postprocess=postprocess + ): + return scene + log.debug("Failed to find scene by title") + + log.warning("Cannot scrape from this fragment: need to have title or url set") + + +def performer_from_fragment( + fragment: dict, + search_domains: list[str] | None = None, + min_ratio=0.9, + postprocess: Callable[ + [ScrapedPerformer, dict], ScrapedPerformer + ] = default_postprocess, +) -> ScrapedPerformer | None: + """ + Scrapes a performer from a fragment, which must contain one of the following: + - url: the URL of the performer page (anywhere in the Aylo network) + - name: the name of the performer + + If domains is provided it will only search those domains, + otherwise it will search all known domains (this could be very slow!) + + If min_ratio is provided _AND_ the fragment contains a title but no URL, + the search will only return a scene if a match with at least that ratio is found + + If postprocess is provided it will be called on the result before returning + """ + log.debug("Fragment scraping performer...") + if url := fragment.get("url"): + log.debug(f"Using performer URL: '{url}'") + return performer_from_url(url, postprocess=postprocess) + elif name := fragment.get("name"): + log.debug(f"Searching for '{name}'") + return find_performer(name, search_domains, min_ratio, postprocess=postprocess) + + log.warning("Cannot scrape from this fragment: need to have url or name set") + + +def main_scraper(): + """ + Takes arguments from stdin or from the command line and dumps output as JSON to stdout + """ + op, args = scraper_args() + result = None + match op, args: + case "scene-by-url", {"url": url} if url: + result = scene_from_url(url) + case "scene-by-name", {"name": name, "extra": _domains} if name: + result = scene_search(name, search_domains=_domains) + case "scene-by-fragment" | "scene-by-query-fragment", args: + _domains = args.get("extra", None) + result = scene_from_fragment(args, search_domains=_domains) + case "performer-by-url", {"url": url}: + result = performer_from_url(url) + case "performer-by-fragment", args: + _domains = args.get("extra", None) + result = performer_from_fragment(args, search_domains=_domains) + case "performer-by-name", {"name": name, "extra": _domains} if name: + result = performer_search(name, search_domains=_domains) + case "movie-by-url", {"url": url} if url: + result = movie_from_url(url) + case _: + log.error(f"Operation: {op}, arguments: {json.dumps(args)}") + sys.exit(1) + + print(json.dumps(result)) + + +if __name__ == "__main__": + main_scraper() diff --git a/scrapers/AyloAPI/slugger.py b/scrapers/AyloAPI/slugger.py new file mode 100644 index 0000000..967282f --- /dev/null +++ b/scrapers/AyloAPI/slugger.py @@ -0,0 +1,82 @@ +import re + +""" +This ports the kebabCase function from lodash to Python. It is used to generate +slugs for the URLs for scenes, performers and movies scraped from the Aylo API. + +https://github.com/lodash/lodash/blob/main/src/kebabCase.ts +""" + +rsAstralRange = "\\ud800-\\udfff" +rsComboMarksRange = "\\u0300-\\u036f" +reComboHalfMarksRange = "\\ufe20-\\ufe2f" +rsComboSymbolsRange = "\\u20d0-\\u20ff" +rsComboMarksExtendedRange = "\\u1ab0-\\u1aff" +rsComboMarksSupplementRange = "\\u1dc0-\\u1dff" +rsComboRange = ( + rsComboMarksRange + + reComboHalfMarksRange + + rsComboSymbolsRange + + rsComboMarksExtendedRange + + rsComboMarksSupplementRange +) +rsDingbatRange = "\\u2700-\\u27bf" +rsLowerRange = "a-z\\xdf-\\xf6\\xf8-\\xff" +rsMathOpRange = "\\xac\\xb1\\xd7\\xf7" +rsNonCharRange = "\\x00-\\x2f\\x3a-\\x40\\x5b-\\x60\\x7b-\\xbf" +rsPunctuationRange = "\\u2000-\\u206f" +rsSpaceRange = " \\t\\x0b\\f\\xa0\\ufeff\\n\\r\\u2028\\u2029\\u1680\\u180e\\u2000\\u2001\\u2002\\u2003\\u2004\\u2005\\u2006\\u2007\\u2008\\u2009\\u200a\\u202f\\u205f\\u3000" +rsUpperRange = "A-Z\\xc0-\\xd6\\xd8-\\xde" +rsVarRange = "\\ufe0e\\ufe0f" +rsBreakRange = rsMathOpRange + rsNonCharRange + rsPunctuationRange + rsSpaceRange + +rsApos = "['\u2019]" +rsBreak = f"[{rsBreakRange}]" +rsCombo = f"[{rsComboRange}]" +rsDigit = "\\d" +rsDingbat = f"[{rsDingbatRange}]" +rsLower = f"[{rsLowerRange}]" +rsMisc = f"[^{rsAstralRange}{rsBreakRange + rsDigit + rsDingbatRange + rsLowerRange + rsUpperRange}]" +rsFitz = "\\ud83c[\\udffb-\\udfff]" +rsModifier = f"(?:{rsCombo}|{rsFitz})" +rsNonAstral = f"[^{rsAstralRange}]" +rsRegional = "(?:\\ud83c[\\udde6-\\uddff]){2}" +rsSurrPair = "[\\ud800-\\udbff][\\udc00-\\udfff]" +rsUpper = f"[{rsUpperRange}]" +rsZWJ = "\\u200d" + +rsMiscLower = f"(?:{rsLower}|{rsMisc})" +rsMiscUpper = f"(?:{rsUpper}|{rsMisc})" +rsOptContrLower = f"(?:{rsApos}(?:d|ll|m|re|s|t|ve))?" +rsOptContrUpper = f"(?:{rsApos}(?:D|LL|M|RE|S|T|VE))?" +reOptMod = f"{rsModifier}?" +rsOptVar = f"[{rsVarRange}]?" +rsOptJoin = f"(?:{rsZWJ}(?:{('|').join([rsNonAstral, rsRegional, rsSurrPair])}){rsOptVar + reOptMod})*" +rsOrdLower = "\\d*(?:1st|2nd|3rd|(?![123])\\dth)(?=\\b|[A-Z_])" +rsOrdUpper = "\\d*(?:1ST|2ND|3RD|(?![123])\\dTH)(?=\\b|[a-z_])" +rsSeq = rsOptVar + reOptMod + rsOptJoin +rsEmoji = rf"(?:{('|').join([rsDingbat, rsRegional, rsSurrPair])}){rsSeq}" + +reUnicodeWords = re.compile( + "|".join( + [ + f"{rsUpper}?{rsLower}+{rsOptContrLower}(?={('|').join([rsBreak, rsUpper, '$'])})", + f"{rsMiscUpper}+{rsOptContrUpper}(?={('|').join([rsBreak, rsUpper + rsMiscLower, '$'])})", + f"{rsUpper}?{rsMiscLower}+{rsOptContrLower}", + f"{rsUpper}+{rsOptContrUpper}", + rsOrdUpper, + rsOrdLower, + f"{rsDigit}+", + rsEmoji, + ] + ) +) + +reAsciiWords = re.compile(r"[^\x00-\x2f\x3a-\x40\x5b-\x60\x7b-\x7f]+") + + +def slugify(string): + cleaned = re.sub("['\u2019]", "", string) + if reUnicodeWords.search(cleaned): + return "-".join(reUnicodeWords.findall(cleaned)).lower() + return "-".join(reAsciiWords.findall(cleaned)).lower() diff --git a/scrapers/AyloAPI/test_cases.md b/scrapers/AyloAPI/test_cases.md new file mode 100644 index 0000000..3133f07 --- /dev/null +++ b/scrapers/AyloAPI/test_cases.md @@ -0,0 +1,442 @@ +Tested with the following networks: + +# Metro HD + +- 5 studios listed on StashDB, 5 scrapable from API +- All studios have their own domain +- metro.com needs to be replaced with metrohd.com as a fallback +- Name adjustments: + Metro -> Metro HD + +https://www.devianthardcore.com/scene/4414757/sexy-domme-abigail-mac-uses-her-good-submissive-maddy-o-reilly - Deviant Hardcore / Metro +https://www.familyhookups.com/scene/9670731/aidra-fox-and-aubree-valentine - Family Hook Ups / Metro +https://www.girlgrind.com/scene/4471381/latina-babe-daisy-marie-gets-d-pd-with-toys-by-hot-blonde-sammie-rhodes - Girl Grind / Metro +https://www.kinkyspa.com/scene/9638571/nicole-doshi-gets-a-spa-day-for-her-anniversary - Kinky Spa / Metro +https://www.shewillcheat.com/scene/9670841/hot-blonde-lilly-bell-gets-fucked-by-her-marriage-counselor - She Will Cheat / Metro + +# Babes + +- 7 studios listed on StashDB, 6 scrapable from API + Missing: Babes Live (which only has one scene) +- Scenes from Black is Better have been moved into Babes Unleashed +- No special handling required + +https://www.babes.com/scene/4474211/forbidden-fruit - Babes +https://www.babes.com/scene/3108261/fill-her-up - Babes Unleashed / Babes +https://www.babes.com/scene/3098571/like-stepmother-like-stepdaughter - Step Mom Lessons / Babes + +# Bang Bros + +- 70 studios listed on StashDB, 54 scrapable from API + Missing: BangBros Worldwide, Big Ass Adventure, BrandiBelle.com, Busty Adventures, CFNM Show, College Rules, Dancing Bear, Fart Hammer, Filthy Family, Mia Khalifa, Mom's Anal Adventure, My Dirty Vault, MyGF, Public Invasion, Sex Busters, XXX Pawn +- All scene URLs use 'video' instead of 'scene' between domain and scene ID but performer URLs are fine +- Should translate old-style URLs to new-style URLs by checking for redirects: + - https://bangbros.com/video116453/eva-lovia + - https://bangbros.com/video/9027421/eva-lovia +- Name adjustments: + AvaSpice -> Ava Spice + MomIsHorny -> Mom Is Horny + +https://bangbros.com/video/9719651/big-poolside-booty - Ass Parade / BangBros +https://bangbros.com/video/9087751/cock-a-smile - AvaSpice / BangBros +https://bangbros.com/video/8968901/sexy-latinas-good-dicking - Back Room Facials / BangBros +https://bangbros.com/video/9092331/a-taste-of-vanilla - Backroom MILF / BangBros +https://bangbros.com/video/9115601/erika-vution-rocked-my-cock - Ball Honeys / BangBros +https://bangbros.com/video/9420961/zodiac-fuck - Bang Bus / BangBros +https://bangbros.com/video/8985221/sexy-selena - Bang Casting / BangBros +https://bangbros.com/video/9214171/fuck-me-in-the-ass-please - Bang POV / BangBros +https://bangbros.com/video/9063991/sex-tape-5 - Bang Tryouts / BangBros +https://bangbros.com/video/9580671/stepdad-gets-caught-in-action - BangBros 18 / BangBros +https://bangbros.com/video/9027421/eva-lovia - BangBros Angels / BangBros +https://bangbros.com/video/9428961/new-shower-new-pussy - Bangbros Clips / BangBros +https://bangbros.com/video/9010721/big-ass-big-tits-gianna-works-out-that-juicy-pussy - BangBros Remastered / BangBros +https://bangbros.com/video/8863501/summer-getting-nasty - BangBros Vault / BangBros +https://bangbros.com/video/9155231/asian-nympho-loves-to-get-fuck - Big Mouthfuls / BangBros +https://bangbros.com/video/9297791/nikka-realy-needs-the-cream- - Big Tit Cream Pie / BangBros +https://bangbros.com/video/9429171/puerto-rican-pussy-loves-oil - Big Tits, Round Asses / BangBros +https://bangbros.com/video/8906201/arietta-sucks-her-stepbrothers-dick - BlowJob Fridays / BangBros +https://bangbros.com/video/9120031/jenaveve-gets-down-and-dirtty - Blowjob Ninjas / BangBros +https://bangbros.com/video/9149391/black-gold - Boob Squad / BangBros +https://bangbros.com/video/9497411/stretching-out-august - Brown Bunnies / BangBros +https://bangbros.com/video/9032981/rachel-starr-and-the-hoagie-hero - Can He Score / BangBros +https://bangbros.com/video/9124101/she-does-cook-anal - Casting / BangBros +https://bangbros.com/video/9041131/doesnt-get-better-then-jynx-maze - Chongas / BangBros +https://bangbros.com/video/8968571/colombian-newbie-gets-slammed - Colombia Fuck Fest / BangBros +https://bangbros.com/video/9097911/pin-up-doll - Dirty World Tour / BangBros +https://bangbros.com/video/9034361/pornstars-raid-the-dorm-full-of-college-boys - Dorm Invasion / BangBros +https://bangbros.com/video/8992381/slutty-amateur-gets-a-facial - Facial Fest / BangBros +https://bangbros.com/video/9027201/fuckteam-beach-time - Fuck Team Five / BangBros +https://bangbros.com/video/8971801/thick-latina-deep-throats-in-the-gloryhole - Glory Hole Loads / BangBros +https://bangbros.com/video/9032011/puerto-rican-flavor-weve-evans - Latina Rampage / BangBros +https://bangbros.com/video/9083961/one-with-nature - Living With Anna / BangBros +https://bangbros.com/video/9129461/anita-blues-anal-salute - MILF Lessons / BangBros +https://bangbros.com/video/9044481/milf-takes-a-pounding - Milf Soup / BangBros +https://bangbros.com/video/9059841/feet-that-will-make-you-squirt - Magical Feet / BangBros +https://bangbros.com/video/9401771/step-mom-gives-blu-balls - MomIsHorny / BangBros +https://bangbros.com/video/8852571/tiny-pussy-gets-fucked - Monsters of Cock / BangBros +https://bangbros.com/video/9139961/alexis-malone - Mr CamelToe / BangBros +https://bangbros.com/video/8986901/cristi-ann-gets-an-anal-pounding - Mr. Anal / BangBros +https://bangbros.com/video/8838251/anal-gymnastics - My Dirty Maid / BangBros +https://bangbros.com/video/9088421/suzanny-the-dog-rescuer - My Life In Brazil / BangBros +https://bangbros.com/video/9100111/pussy-on-wheels - Newbie Black / BangBros +https://bangbros.com/video/8996361/keisha-grey-fucked-by-black-cock - Pawg / BangBros +https://bangbros.com/video/9035831/amateur-lesbians-fuck - Party of 3 / BangBros +https://bangbros.com/video/9119001/olivia-olovely-and-from-one-million-to-billion - Penny Show / BangBros +https://bangbros.com/video/9041761/sexy-brunette-massaged-and-fucked-hardcore - Porn Star Spa / BangBros +https://bangbros.com/video/9097431/freaky-slumber-munch - Power Munch / BangBros +https://bangbros.com/video/8935091/selva-gets-public-anal - Public Bang / BangBros +https://bangbros.com/video/9026281/amateur-blonde-slut-gang-banged - Slutty White Girls / BangBros +https://bangbros.com/video/9007731/my-stepmom-teaches-me-how-to-fuck-my-bf - Stepmom Videos / BangBros +https://bangbros.com/video/9133631/tour-d-fuck - Street Ranger / BangBros +https://bangbros.com/video/9015181/hot-latina-pornstar-jerks-off-a-dick - Tugjobs / BangBros +https://virtualporn.com/video/9359281/too-horny-to-fail - Virtual Porn / BangBros +https://bangbros.com/video/9123731/latin-fever - Working Latinas / BangBros + +# Gay Wire + +- 18 studios listed on StashDB, 13 scrapable from API + Missing: Bigdaddy, Gay Patrol, Gay Pawn, Grab Ass, Project City Bus + Suspicious: Gay Selector? Doesn't seem to belong to Gay Wire + New: Sausage Party, Urban Invasion +- BangBros needs to be replaced with Gay Wire in all studio objects +- bangbros.com needs to be replaced with gaywire.com in all URLs +- Should translate old-style URLs to new-style URLs by checking for redirects: + https://gaywire.com/h1/video487/cock-sucking-galore + https://gaywire.com/scene/9322311/cock-sucking-galore +- Name adjustments: + Its Gonna Hurt -> It's Gonna Hurt + Poundhisass -> Pound His Ass + +https://gaywire.com/scene/9315611/double-ginger - Bait Bus / BangBros +https://gaywire.com/scene/9321281/virgin-anal-sex-with-a-big-dick - Bareback Attack / BangBros +https://gaywire.com/scene/9320721/georgio-fucks-like-an-animal - Bareback Casting / BangBros +https://gaywire.com/scene/9322311/cock-sucking-galore - ExBF / BangBros +https://gaywire.com/scene/9326111/slosh-balls - Haze Him / BangBros +https://gaywire.com/scene/9325601/shockingly-painful - Its Gonna Hurt / BangBros +https://gaywire.com/scene/9329621/public-anal-sex-in-europe - Out In Public / BangBros +https://gaywire.com/scene/8942881/aaron-trainer-trains-his-cock - Poundhisass / BangBros +https://gaywire.com/scene/9335691/oil-massaged-anal-sex - Rub Him / BangBros +https://gaywire.com/scene/9332131/crazy-in-the-club - Sausage Party / BangBros +https://gaywire.com/scene/9332821/hunting-in-the-heart-of-darkness - Thug Hunter / BangBros +https://gaywire.com/scene/9336331/cock-surprise - UngloryHole / BangBros +https://gaywire.com/scene/9333241/two-hot-jocks - Urban Invasion / BangBros + +# Mile High Media (Straight) + +- Appear to segregate their bi / gay / trans content across domains, but according to the API they're all MHM + Have split them across 3 scrapers but could undergo consolidation or further splitting at a later date +- 16 studio listed on StashDB, 12 scrapable from API + Missing: Cherry Pop, Couples Seeking Teens (folded into Reality Junkies?), Gilfed, Mile High Xtreme +- milehigh.com needs to be replaced with milehighmedia.com in all URLs +- milehigh.com can be replaced by studios own domain +- Name adjustments: + dlf -> Dilfed + DogHouseDigital -> Doghouse Digital + LesbianOlderYounger -> Lesbian Older Younger + RealityJunkies -> Reality Junkies + SweetSinner -> Sweet Sinner + SweetHeartVideo -> Sweetheart Video + +https://www.dilfed.com/scene/9292051/cheating-bbq - dlf / Mile High Media +https://www.doghousedigital.com/scene/9393951/amazing-tits-14-scene-2-titty-action - DogHouseDigital / Mile High Media +https://www.familysinners.com/scene/9381621/in-laws-2-episode-2-keep-it-in-the-family - Family Sinners / Mile High Media +https://www.milfed.com/scene/4365378/older-women-crave-chicks-02-scene-1 - LesbianOlderYounger / Mile High Media +https://www.milfed.com/scene/9497251/fun-at-the-physio - Milfed / Mile High Media +https://www.realityjunkies.com/scene/9446181/free-use-families-2-scene-3-cant-resist - RealityJunkies / Mile High Media +https://www.sweetsinner.com/scene/9585781/the-voyeur-6-scene-1-sneaky - SweetSinner / Mile High Media +https://www.sweetheartvideo.com/scene/4651721/lesbian-stepmother-7-scene-4 - SweetHeartVideo / Mile High Media + +# Mile High Media (Trans and Bi) + +- milehigh.com needs to be replaced with milehighmedia.com in all URLs +- milehigh.com can be replaced by studio domain +- Name adjustments: + BIEmpire -> Bi Empire + +https://www.biempire.com/scene/4421745/dirty-pictures - BIEmpire / Mile High Media +https://www.transsensual.com/scene/4653681/sheer-panties-cumshot-gets-her-guy - Transsensual / Mile High Media + +# Mile High Media (Gay) + +- milehigh.com needs to be replaced with milehighmedia.com in all URLs +- milehigh.com can be replaced by studio domain + +https://www.iconmale.com/scene/4653701/hot-daddies-3-scene-1 - Icon Male / Mile High Media +https://www.taboomale.com/scene/4373622/forgive-me-scene-1 - Icon Male / Mile High Media +https://www.noirmale.com/scene/9635521/christmas-proposal - Noir Male / Mile High Media + +# BigStr + +- Now known as Czech Hunter, seems to be actively restructuring: during development of this scraper several scenes vanished from public site +- 3 studios listed on StashDB, all 3 scrapable from API +- All studios have their own domain + +https://www.debtdandy.com/scene/4300191/debt-dandy-160 - Debt Dandy / BigStr +https://www.dirtyscout.com/scene/4508331/dirty-scout-283 - Dirty Scout / BigStr +https://www.czechhunter.com/scene/9575711/czech-hunter-718 - Czech Hunter / BigStr + +# Brazzers + +- 49 studios listed on StashDB, 28 scrapable from API + Missing: Brazzers en Español, Butts & Blacks, Charles Dera, Desiree Dulce, Emily Willis, Euro Babes, Jizz On My Jugs, Jordi, Kendra Lust, Lacy Lennon, Lil D, Luna Star, Pornstars Punishment, Rachel Starr, Racks & Blacks, SexPro Adventures, Sofia Rose, Sophie Dee, Teens Like It Black, VRT, Xander Corvus +- All scene URLs use 'video' instead of 'scene' between domain and scene ID +- All performer URLs use 'pornstar' instead of 'model' between domain and scene ID +- If scene has tag "Brazzers Live" then studio should be "Brazzers Live" +- Name adjustments: + JugFuckers -> Jug Fuckers + Shes Gonna Squirt -> She's Gonna Squirt + +https://www.brazzers.com/video/3788751/from-russia-with-lust - Asses in Public / Brazzers +https://www.brazzers.com/video/4423671/chores-suck-and-so-do-i - Baby Got Boobs / Brazzers +https://www.brazzers.com/video/4445271/big-ass-anal-for-a-heavy-load - Big Butts Like It Big / Brazzers +https://www.brazzers.com/video/3831901/principal-photography - Big Tits at School / Brazzers +https://www.brazzers.com/video/4658471/virtual-fuckfest - Big Tits at Work / Brazzers +https://www.brazzers.com/video/3847871/kortney-s-slutty-circuit-training - Big Tits In Sports / Brazzers +https://www.brazzers.com/video/3851201/dinners-on-me - Big Tits In Uniform / Brazzers +https://www.brazzers.com/video/4404989/britney-s-jeans - Big Wet Butts / Brazzers +https://www.brazzers.com/video/9311401/medical-ass-istance-required - BrazzersExxtra / Brazzers +https://www.brazzers.com/video/3873921/i-gotta-have-my-moms-boyfriend - Brazzers Vault / Brazzers +https://www.brazzers.com/video/3793201/big-natural-round-boobs - Busty & Real / Brazzers +https://www.brazzers.com/video/3882961/pretty-face-with-big-tits - Bustyz / Brazzers +https://www.brazzers.com/video/3899601/you-want-us-to-clean-your-dick - CFNM / Brazzers +https://www.brazzers.com/video/4415556/siouxsie-q-s-anal-kitchen-cleaning - Day With A Pornstar / Brazzers +https://www.brazzers.com/video/4393739/im-sensitive - Dirty Masseur / Brazzers +https://www.brazzers.com/video/3912371/psycho-anal-ysis - Doctor Adventures / Brazzers +https://www.brazzers.com/video/4505171/anal-workout-with-bestie - Hot And Mean / Brazzers +https://www.brazzers.com/video/3924151/everything-is-bigger-in-texas - Hot Chicks Big Asses / Brazzers +https://www.brazzers.com/video/3936411/big-natural-juggs - JugFuckers / Brazzers +https://www.brazzers.com/video/4395708/i-m-over-it - Milfs Like It Big / Brazzers +https://www.brazzers.com/video/3940591/ninas-chapel-of-lust-part-1 - Mommy Got Boobs / Brazzers +https://www.brazzers.com/video/3963591/you-need-mums-approval - Moms in control / Brazzers +https://www.brazzers.com/video/4445561/the-garden-of-demi-s-delights - Pornstars Like it Big / Brazzers +https://www.brazzers.com/video/4410374/sex-with-the-therapist - Real Wife Stories / Brazzers +https://www.brazzers.com/video/4015911/wheres-my-valentine - Shes Gonna Squirt / Brazzers +https://www.brazzers.com/video/4327711/mc2-ass - Teens Like It Big / Brazzers +https://www.brazzers.com/video/3789801/the-exxxceptions-episode-1 - ZZ Series / Brazzers + +# Bromo + +- Single studio but has some substudio elements? Will flatten to just "Bromo" with no parent + +https://www.bromo.com/scene/4412747/tattoo-fuck - Bromo US / Bromo +https://www.bromo.com/scene/3227341/breaking-him-scene-1 - Bromo BlackMaleMe / Bromo + +# Deviante + +- 5 studios listed on StashDB, all scrapable from API +- All scenes use 'video' instead of 'scene' between domain and scene ID +- All studios have their own domain +- Name adjustments: + es -> Erotic Spice + fmf -> Forgive Me Father + lha -> Love Her Ass + pdt -> Pretty Dirty Teens + sw -> Sex Working + +https://www.eroticspice.com/video/4422220/busty-japanese-milf-fucks-shy-guy - es / Deviante +https://www.forgivemefather.com/video/4493281/father-s-righteous-ritual - fmf / Deviante +https://www.loveherass.com/video/4424461/playful-roomie-loves-anal - lha / Deviante +https://www.prettydirtyteens.com/video/7939981/creampie-for-sneaky-college-teen - pdt / Deviante +https://www.deviante.com/video/4647241/cash-for-happy-ending-with-masseuse - sw / Deviante +https://www.sexworking.com/video/4474711/brazilian-escort-summoned-to-please - sw / Deviante + +# Digital Playground + +- 6 studios listed on StashDB, all scrapable from API +- Name adjustments: + dpw -> DP World + Dpstar Episodes -> Episodes + Dpstar Sex Challenges -> Sex Challenges + +https://www.digitalplayground.com/scene/4410802/sleepless-nights-scene-1 - Digital Playground +https://www.digitalplayground.com/scene/8353721/tourist-trap-episode-4 - dpw / Digital Playground +https://www.digitalplaygroundnetwork.com/scene/8353721/tourist-trap-episode-4 - dpw / Digital Playground +https://www.digitalplayground.com/scene/4132651/body-heat-scene-1 - Blockbuster / Digital Playground +https://www.digitalplayground.com/scene/4176641/ass-effect-a-xxx-parody - DP Parody / Digital Playground +https://www.digitalplayground.com/scene/4416872/auditions-part-1 - Dpstar Episodes / Digital Playground +https://www.digitalplayground.com/scene/4187051/bad-babysitter-episode-1 - Episodes / Digital Playground +https://www.digitalplayground.com/scene/4416848/luna-star-in-dp-star-sex-challenge - Dpstar Sex Challenges / Digital Playground +https://www.digitalplayground.com/scene/4489851/bouncer-bitch - Flixxx / Digital Playground +https://www.digitalplayground.com/scene/4186511/ghost-of-blowjobs-past - Rawcut - Digital Playground + +# Erito + +- No special handling required + +https://www.erito.com/scene/4653021/nonstop-sticky-creampie - Erito + +# FakeHub + +- 11 studios listed on StashDB, all scrapable from API +- All performer profiles have 'modelprofile' instead of 'model' between domain and scene ID +- Some studios have their own domain + +https://www.fakehub.com/scene/4411255/punk-rocker-loves-rough-sex - Fake Agent / FakeHub +https://www.fakehub.com/scene/4083631/cum-splattered-face-for-petite-teen - Fake Agent UK / FakeHub +https://www.fakehub.com/scene/4087511/unregistered-driver-creampied-by-cop - Fake Cop / FakeHub +https://www.fakehub.com/scene/4474671/students-accidental-tik-tok-bag-challenge - Fake Driving School / FakeHub +https://www.fakehub.com/scene/4084801/holiday-maker-strikes-a-sexual-deal - Fake Hospital / FakeHub +https://www.fakehostel.com/scene/9352861/sudden-threesome-for-new-lovers - Fake Hostel / FakeHub +https://www.faketaxi.com/scene/9642961/cute-brazilian-gives-xmas-tip - Fake Taxi / FakeHub +https://www.fakehub.com/scene/9642841/free-use-mail-order-e-girl - Fakehub Originals / FakeHub +https://www.fakehub.com/scene/4079471/sexy-lesbian-christmas-casting - Female Agent / FakeHub +https://www.fakehub.com/scene/4485911/curvy-driver-gets-a-hard-dicking - Female Fake Taxi / FakeHub +https://www.publicagent.com/scene/4498351/what-is-the-spanish-for-blowjob - Public Agent / FakeHub + +# Hentai Pros + +- No special handling required + +https://www.hentaipros.com/scene/8359071/girlfriend-stealing-app - Hentai Pros +https://www.hentaiprosnetwork.com/scene/2686971/boin-babes-at-the-resort-2 - Hentai Pros + +# Look At Her Now + +- Reality Kings? + +https://www.lookathernow.com/scene/4413126/taste-tester - Look At Her Now + +# Men.com + +- 10 studios listed on StashDB, all scrapable from API +- Consolidated most studios URLs into men.com: old URLs should be supported, but only men.com should be output + Exception: TwinkPop still has a separate domain +- All scene URLs use 'sceneid' instead of 'scene' between domain and scene ID + Exception: TwinkPop uses 'scene' or 'video': we'll stick with 'scene' for consistency +- All performer URLs use 'modelprofile' instead of 'model' between domain and scene ID + Exception: TwinkPop uses 'pornstar' +- Name adjustments: + tp -> TwinkPop + Men -> Men.com + +https://www.bigdicksatschool.com/sceneid/4405350/more-spice-than-sugar - Big Dicks At School / Men +https://www.men.com/sceneid/4481271/the-rookie-lifeguard - Drill My Hole / Men +https://www.godsofmen.com/scene/4392307/tied-to-you - Gods of Men / Men +https://www.jizzorgy.com/scene/3707561/the-calendar-shoot - Jizz Orgy / Men +https://www.menofuk.com/scene/3711691/men-in-crack - Men of UK / Men +https://www.men.com/sceneid/9736131/ex-con-hard-on - Men +https://www.str8togay.com/scene/4357781/rodeo-romeo - Str8 to Gay / Men +https://www.thegayoffice.com/scene/3712041/unexpected-revenge - The Gay Office / Men +https://www.toptobottom.com/scene/3716161/the-cleaner - Top to Bottom / Men +https://www.twinkpop.com/video/9634991/sweet-twink-sweat - tp / Men + +Why Not Bi +https://www.whynotbi.com/scene/4643151/glory-ous-wet-threesome - Why Not Bi / WhyNotBy + +Mofos: +https://www.iknowthatgirl.com/scene/9670091/influencer-coco-is-a-baddie - I Know That Girl / Mofos +https://www.mofos.com/scene/2978471/webcamming-babysitter-learns-to-fuck - Busted Babysitters / Mofos +https://www.dontbreakme.com/scene/4410806/dont-break-aria - Don't Break Me / Mofos +https://www.mofos.com/scene/2984901/country-riding - Drone Hunter / Mofos +https://www.mofos.com/scene/2986081/breaking-entering-lesbian-couple - Girls Gone Pink / Mofos +https://www.mofos.com/scene/2982821/gym-class-fuckers - In Gang We Bang / Mofos +https://www.mofos.com/scene/2997891/public-flashing-in-the-street - Latina Sex Tapes / Mofos +https://www.mofos.com/scene/4620941/gf-loses-game-and-now-she-must-give-up-her-ass - Lets Try Anal / Mofos +https://www.mofos.com/scene/3014421/working-out-aint-working-out - Milfs Like It Black / Mofos +https://www.mofos.com/scene/2982291/big-booty-nurse-heals-sick-bf - Mofos B Sides / Mofos +https://www.mofosnetwork.com/scene/3009491/sweet-naomi - MOFOS Lab / Mofos +https://www.mofos.com/scene/4358051/smokeshow - Pervs On Patrol / Mofos +https://www.mofos.com/scene/3027181/riley-reid-doesnt-wear-panties - Pornstar Vote / Mofos +https://www.mofos.com/scene/3027261/petite-teens-rv-fuck - Project RV / Mofos +https://www.mofos.com/scene/4350950/cant-dickline-cash - Pubic Pickups / Mofos +https://www.mofos.com/scene/3037521/boats-n-hoes - Real Slut Party / Mofos +https://www.mofos.com/scene/9483651/new-dick-to-forget-your-ex - Share My BF / Mofos +https://www.mofos.com/scene/3046411/blondes-do-have-all-the-fun - She's A Freak / Mofos +https://www.mofos.com/scene/3218041/stranded-in-my-feelings - Stranded Teens / Mofos +https://www.mofos.com/scene/3051541/hot-brunette-scouts-a-stranger - The Sex Scout / Mofos + +Property Sex: +https://www.propertysex.com/scene/4437501/id-be-a-great-roommate - Property Sex +https://househumpers.com/scene/9635471/we-work-better-together - House Humpers / Property Sex + +Reality Dudes: +https://www.realitydudes.com/scene/2839091/cole - Str8 Chaser / Reality Dudes +https://www.realitydudes.com/scene/4475811/fitness-training - Dick Dorm / Reality Dudes +https://www.realitydudesnetwork.com/scene/9509871/malik-delgaty-drills-enzo-mullers-hole - Reality Dudes +https://www.papi.com/scene/2834511/santas-snake - Papi / Reality Dudes + +Reality Kings: +https://www.realitykings.com/scene/27/double-bubbles - 40 Inch Plus / Reality Kings +https://www.realitykings.com/scene/4293251/selling-that-ass - 8th Street Latinas / Reality Kings +https://www.realitykings.com/scene/10255/cant-be-serious - Bad Tow Truck / Reality Kings +https://www.realitykings.com/scene/4411866/give-her-a-break - Big Naturals / Reality Kings +https://www.realitykings.com/scene/1074/big-bust-bentley - Big Tits Boss / Reality Kings +https://www.realitykings.com/scene/4442371/semen-sirens-of-the-high-seas - Captain Stabbin / Reality Kings +https://www.realitykings.com/scene/1501/can-you-get-hard - CFNM Secret / Reality Kings +https://www.realitykings.com/scene/2959551/naughty-nyomi - Crazy Asian GFs / Reality Kings +https://www.realitykings.com/scene/2957881/deuce-is-wild - Crazy College GFs / Reality Kings +https://www.realitykings.com/scene/9955/ass-of-asha - Cum Fiesta / Reality Kings +https://www.realitykings.com/scene/2953981/party-time - Dare Dorm / Reality Kings +https://www.realitykings.com/scene/10746/loving-to-fuck - Euro Sex Parties / Reality Kings +https://www.realitykings.com/scene/2603/teenie-bikini - Extreme Asses / Reality Kings +https://www.realitykings.com/scene/2667/redheaded-tit-youth - Extreme Naturals / Reality Kings +https://www.realitykings.com/scene/10812/ease-into-elsa - First Time Auditions / Reality Kings +https://www.realitykings.com/scene/2955251/sexy-stash - GF Leaks / Reality Kings +https://www.realitykings.com/scene/3086/the-soloist - Girls of Naked / Reality Kings +https://www.happytugs.com/scene/3101/marvelous-mandi - Happy Tugs / Reality Kings +https://www.realitykings.com/scene/8376/rocking-remy - HD Love / Reality Kings +https://www.realitykings.com/scene/2957491/divorcees-revenge - Horny Birds / Reality Kings +https://www.realitykings.com/scene/3194/sweet-puss - Hot Bush / Reality Kings +https://www.realitykings.com/scene/3272/doing-doubles - In the VIP / Reality Kings +https://www.lilhumpers.com/scene/4410213/pounding-the-prankster - Lil Humpers / Reality Kings +https://www.realitykings.com/scene/9776/sweet-caramel - Mike in Brazil / Reality Kings +https://www.realitykings.com/scene/10808/sexy-zazie - Mike's Apartment / Reality Kings +https://www.milfhunter.com/scene/1853742/showering-her-with-cum - Milf Hunter / Reality Kings +https://www.realitykings.com/scene/5097/in-the-mood - Milf Next Door / Reality Kings +https://www.momsbangteens.com/scene/2286917/anal-sex-education - Moms Bang Teens / Reality Kings +https://www.momslickteens.com/scene/2948131/vacation-sensation - Moms Lick Teens / Reality Kings +https://www.moneytalks.com/scene/8790/candy-cooch - Money Talks / Reality Kings +https://www.realitykings.com/scene/4297681/choose-your-poison-2 - Monster Curves / Reality Kings +https://www.realitykings.com/scene/5606/private-packers - No Faces / Reality Kings +https://www.realitykings.com/scene/5708/horny-chloe - Pure 18 / Reality Kings +https://www.recklessinmiami.com/scene/4393751/home-and-away - Reckless in Miami / Reality Kings +https://www.realitykings.com/scene/4654441/mai-i-oil-you-up - RK Prime / Reality Kings +https://www.sneakysex.com/scene/4410566/booty-camp - Sneaky Sex / Reality Kings +https://www.realitykings.com/scene/10688/juicy-sweet - Street BlowJobs / Reality Kings +https://www.teenslovehugecocks.com/scene/4417629/amazing-avery - Teens Love Huge Cocks / Reality Kings +https://www.realitykings.com/scene/4655221/fuck-truck - Work Me Harder / Reality Kings +https://www.welivetogether.com/scene/4414014/wlt-s01e02-new-arrivals - We Live Together / Reality Kings +https://www.realitykings.com/scene/8283/perfect-ten - Wives in Pantyhose / Reality Kings + +Sean Cody: +https://www.seancody.com/scene/4652961/devy-brock-bareback - Sean Cody +https://www.seancody.com/scene/2711231/daniel-philip-bareback - SC BlackMaleMe / Sean Cody + +SexyHub: +https://www.lesbea.com/scene/9289091/orgasms-in-beautiful-lingerie - Lesbea / SexyHub +https://www.danejones.com/scene/9585371/gamer-fucks-big-tits-italian-gf - Dane Jones / SexyHub +https://www.sexyhub.com/scene/9379531/long-legs-facesitting-with-stepmom - Mom XXX / SexyHub +https://www.sexyhub.com/scene/9585341/big-cock-gym-perv-fucks-redhead - Fitness Rooms / SexyHub +https://www.sexyhub.com/scene/4227051/bananas - Girlfriends / SexyHub +https://www.sexyhub.com/scene/4411925/deep-orgasms-for-petite-czech-babe - Massage Rooms / SexyHub + +Squirted: +https://www.squirted.com/scene/4485941/tiffanys-back-for-more - Squirted + +TransAngels: +https://www.transangels.com/scene/2682551/cocked-behind-bars - TransAngels +https://www.transangelsnetwork.com/scene/9510091/strip-searched-and-fucked - TransAngels +https://www.transharder.com/scene/4435871/simulation-stimulation - TransAngels + +True Amateurs: +https://www.trueamateurs.com/scene/4333351/footjob-with-black-stockings + +Premium: +https://www.tube8vip.com/scene/2933481/why-everybody-loves-adriana - Elite / Premium + +Twistys: +https://www.twistys.com/scene/3406841/fix-her-up-her - TwistysHard / Twistys +https://www.twistysnetwork.com/scene/3474791/for-all-the-world-to-see - twistys / Twistys +https://www.twistys.com/scene/9420281/lulus-lesbian-squirtfest - whengirlsplay / Twistys +https://www.twistys.com/scene/4281921/dads-new-girlfriend - Mom Knows Best / Twistys +https://www.twistys.com/scene/3430461/work-those-asses - Twistys Teasers / Twistys +https://www.twistys.com/scene/3436211/wanna-play - Nicole Graves / Twistys +https://www.twistys.com/scene/3468571/anette-gets-a-naughty - anettedawn / Twistys +https://www.twistys.com/scene/8366721/let-me-show-you - Turning Twistys / Twistys +https://www.twistys.com/scene/3413521/the-artist-within-part-1 - Feature Film / Twistys +https://www.twistys.com/scene/3398991/tight-bodied-vixen - twistys / Twistys +https://www.twistys.com/scene/3427141/holding-my-breath - twistys / Twistys + +# VOYR + +https://www.voyr.com/scene/9729471/four-way-pleasure - Voyr / VOYR diff --git a/scrapers/Babes.yml b/scrapers/Babes.yml new file mode 100644 index 0000000..483a3f3 --- /dev/null +++ b/scrapers/Babes.yml @@ -0,0 +1,65 @@ +name: Babes +# requires: py_common, AyloAPI +# scrapes: Babes, Babes Unleashed, Black is Better (now Babes Unleashed), Elegant Anal, Office Obsession, Step Mom Lessons +sceneByURL: + - action: script + url: + - babes.com/scene/ + script: + - python + - AyloAPI/scrape.py + - babes + - scene-by-url +sceneByFragment: + action: script + script: + - python + - AyloAPI/scrape.py + - babes + - scene-by-fragment +sceneByName: + action: script + script: + - python + - AyloAPI/scrape.py + - babes + - scene-by-name +sceneByQueryFragment: + action: script + script: + - python + - AyloAPI/scrape.py + - babes + - scene-by-query-fragment +performerByName: + action: script + script: + - python + - AyloAPI/scrape.py + - babes + - performer-by-name +performerByURL: + - action: script + url: + - babes.com/model/ + script: + - python + - AyloAPI/scrape.py + - performer-by-url +performerByFragment: + action: script + script: + - python + - AyloAPI/scrape.py + - performer-by-fragment +movieByURL: + - action: script + url: + - babes.com/movie/ + # Since scenes link to the movie we can scrape movies from scenes + - babes.com/scene/ + script: + - python + - AyloAPI/scrape.py + - movie-by-url +# Last Updated January 13, 2024 diff --git a/scrapers/BangBros/BangBros.py b/scrapers/BangBros/BangBros.py new file mode 100644 index 0000000..7fa0462 --- /dev/null +++ b/scrapers/BangBros/BangBros.py @@ -0,0 +1,90 @@ +import json +import sys +from requests import head +from typing import Any +from py_common import log +from py_common.util import dig, replace_all, replace_at +from AyloAPI.scrape import ( + scraper_args, + scene_from_url, + scene_search, + scene_from_fragment, + performer_from_url, + performer_from_fragment, + performer_search, + movie_from_url, +) + +studio_map = { + "AvaSpice": "Ava Spice", + "MomIsHorny": "Mom Is Horny", +} + + +def redirect(url: str) -> str: + if not url: + return url + if (res := head(url)) and (redirect := res.headers.get("Location", url)): + return redirect if not redirect.endswith("404") else url + return url + + +def bangbros(obj: Any, _) -> Any: + domain = ( + "virtualporn.com" + if dig(obj, "studio", "name") == "Virtual Porn" + else "bangbros.com" + ) + + # All bangbros URLs omit the standard www. subdomain prefix + # and all scene URLs use /video/ instead of the standard /scene/ + fixed = replace_all( + obj, + "url", + lambda x: x.replace("/scene/", "/video/").replace("www.bangbros.com", domain), + ) + + # Rename certain studios according to the map + fixed = replace_at( + fixed, "studio", "name", replacement=lambda x: studio_map.get(x, x) + ) + + return fixed + + +if __name__ == "__main__": + domains = [ + "bangbros", + "virtualporn", + ] + op, args = scraper_args() + result = None + + match op, args: + case "scene-by-url", {"url": url} if url: + url = redirect(url) + result = scene_from_url(url, postprocess=bangbros) + case "scene-by-name", {"name": name} if name: + result = scene_search(name, search_domains=domains, postprocess=bangbros) + case "scene-by-fragment" | "scene-by-query-fragment", args: + args = replace_all(args, "url", redirect) + result = scene_from_fragment( + args, search_domains=domains, postprocess=bangbros + ) + case "performer-by-url", {"url": url}: + url = redirect(url) + result = performer_from_url(url, postprocess=bangbros) + case "performer-by-fragment", args: + result = performer_from_fragment(args) + case "performer-by-name", {"name": name} if name: + result = performer_search( + name, search_domains=domains, postprocess=bangbros + ) + case "movie-by-url", {"url": url} if url: + url = redirect(url) + result = movie_from_url(url, postprocess=bangbros) + case _: + log.error(f"Operation: {op}, arguments: {json.dumps(args)}") + sys.exit(1) + + print(json.dumps(result)) diff --git a/scrapers/BangBros/BangBros.yml b/scrapers/BangBros/BangBros.yml new file mode 100644 index 0000000..4be974a --- /dev/null +++ b/scrapers/BangBros/BangBros.yml @@ -0,0 +1,60 @@ +name: BangBros +# requires: py_common, AyloAPI +# scrapes: Ass Parade, Ava Spice, Back Room Facials, Backroom MILF, Ball Honeys, Bang Bus, Bang Casting, Bang POV, Bang Tryouts, BangBros 18, BangBros Angels, Bangbros Clips, BangBros Remastered, BangBros Vault, Big Mouthfuls, Big Tit Cream Pie, Big Tits, Round Asses, BlowJob Fridays, Blowjob Ninjas, Boob Squad, Brown Bunnies, Can He Score, Casting, Chongas, Colombia Fuck Fest, Dirty World Tour, Dorm Invasion, Facial Fest, Fuck Team Five, Glory Hole Loads, Latina Rampage, Living With Anna, MILF Lessons, Milf Soup, Magical Feet, Mom Is Horny, Monsters of Cock, Mr CamelToe, Mr. Anal, My Dirty Maid, My Life In Brazil, Newbie Black, Pawg, Party of 3, Penny Show, Porn Star Spa, Power Munch, Public Bang, Slutty White Girls, Stepmom Videos, Street Ranger, Tugjobs, Virtual Porn, Working Latinas +sceneByURL: + - action: script + url: + - bangbros.com/video + - virtualporn.com/video + script: + - python + - BangBros.py + - scene-by-url +sceneByFragment: + action: script + script: + - python + - BangBros.py + - scene-by-fragment +sceneByName: + action: script + script: + - python + - BangBros.py + - scene-by-name +sceneByQueryFragment: + action: script + script: + - python + - BangBros.py + - scene-by-query-fragment +performerByName: + action: script + script: + - python + - BangBros.py + - performer-by-name +performerByURL: + - action: script + url: + - bangbros.com/model/ + script: + - python + - BangBros.py + - performer-by-url +performerByFragment: + action: script + script: + - python + - BangBros.py + - performer-by-fragment +movieByURL: + - action: script + url: + - bangbros.com/movie + - virtualporn.com/movie + script: + - python + - BangBros.py + - movie-by-url +# Last Updated January 14, 2024 diff --git a/scrapers/Brazzers/Brazzers.py b/scrapers/Brazzers/Brazzers.py new file mode 100644 index 0000000..b6372b4 --- /dev/null +++ b/scrapers/Brazzers/Brazzers.py @@ -0,0 +1,77 @@ +import json +import sys +from typing import Any +from py_common import log +from py_common.util import dig, replace_all, replace_at +from AyloAPI.scrape import ( + scraper_args, + scene_from_url, + scene_search, + scene_from_fragment, + performer_from_url, + performer_from_fragment, + performer_search, + movie_from_url, +) + +studio_map = { + "JugFuckers": "Jug Fuckers", + "Shes Gonna Squirt": "She's Gonna Squirt", +} + + +def bangbros(obj: Any, _) -> Any: + # All brazzers URLs use /video/ instead of the standard /scene/ + # and /pornstar/ instead of the standard /model + fixed = replace_all( + obj, + "url", + lambda x: x.replace("/scene/", "/video/").replace("/model/", "/pornstar/"), + ) + + # Rename certain studios according to the map + fixed = replace_at( + fixed, "studio", "name", replacement=lambda x: studio_map.get(x, x) + ) + + # Brazzers Live special case: if the scene has the tag "Brazzers Live" we need to set the studio name to "Brazzers Live" + if any(t["name"] == "Brazzers Live" for t in dig(obj, "tags", default=[])): + fixed = replace_at( + fixed, "studio", "name", replacement=lambda _: "Brazzers Live" + ) + + return fixed + + +if __name__ == "__main__": + domains = [ + "brazzers", + "brazzersnetwork", + ] + op, args = scraper_args() + result = None + + match op, args: + case "scene-by-url", {"url": url} if url: + result = scene_from_url(url, postprocess=bangbros) + case "scene-by-name", {"name": name} if name: + result = scene_search(name, search_domains=domains, postprocess=bangbros) + case "scene-by-fragment" | "scene-by-query-fragment", args: + result = scene_from_fragment( + args, search_domains=domains, postprocess=bangbros + ) + case "performer-by-url", {"url": url}: + result = performer_from_url(url, postprocess=bangbros) + case "performer-by-fragment", args: + result = performer_from_fragment(args) + case "performer-by-name", {"name": name} if name: + result = performer_search( + name, search_domains=domains, postprocess=bangbros + ) + case "movie-by-url", {"url": url} if url: + result = movie_from_url(url, postprocess=bangbros) + case _: + log.error(f"Operation: {op}, arguments: {json.dumps(args)}") + sys.exit(1) + + print(json.dumps(result)) diff --git a/scrapers/Brazzers/Brazzers.yml b/scrapers/Brazzers/Brazzers.yml new file mode 100644 index 0000000..05b7b60 --- /dev/null +++ b/scrapers/Brazzers/Brazzers.yml @@ -0,0 +1,61 @@ +name: Brazzers +# requires: py_common, AyloAPI +# scrapes: Asses in Public, Baby Got Boobs, Big Butts Like It Big, Big Tits at School, Big Tits at Work, Big Tits In Sports, Big Tits In Uniform, Big Wet Butts, BrazzersExxtra, Brazzers Live, Brazzers Vault, Busty & Real, Bustyz, CFNM, Day With A Pornstar, Dirty Masseur, Doctor Adventures, Hot And Mean, Hot Chicks Big Asses, JugFuckers, Milfs Like It Big, Mommy Got Boobs, Moms in control, Pornstars Like it Big, Real Wife Stories, Shes Gonna Squirt, Teens Like It Big, ZZ Series +sceneByURL: + - action: script + url: + - brazzers.com/video + - brazzersnetwork.com/video + script: + - python + - Brazzers.py + - scene-by-url +sceneByFragment: + action: script + script: + - python + - Brazzers.py + - scene-by-fragment +sceneByName: + action: script + script: + - python + - Brazzers.py + - scene-by-name +sceneByQueryFragment: + action: script + script: + - python + - Brazzers.py + - scene-by-query-fragment +performerByName: + action: script + script: + - python + - Brazzers.py + - performer-by-name +performerByURL: + - action: script + url: + - brazzers.com/pornstar/ + - brazzersnetwork.com/pornstar/ + script: + - python + - Brazzers.py + - performer-by-url +performerByFragment: + action: script + script: + - python + - Brazzers.py + - performer-by-fragment +movieByURL: + - action: script + url: + - brazzers.com/movie + - brazzersnetwork.com/movie + script: + - python + - Brazzers.py + - movie-by-url +# Last Updated January 14, 2024 diff --git a/scrapers/Bromo/Bromo.py b/scrapers/Bromo/Bromo.py new file mode 100644 index 0000000..7fe4d96 --- /dev/null +++ b/scrapers/Bromo/Bromo.py @@ -0,0 +1,51 @@ +import json +import sys +from typing import Any +from py_common import log +from py_common.util import replace_at +from AyloAPI.scrape import ( + scraper_args, + scene_from_url, + scene_search, + scene_from_fragment, + performer_from_url, + performer_from_fragment, + performer_search, + movie_from_url, +) + + +def bromo(obj: Any, _) -> Any: + # Flatten all studios to just "Bromo" + return replace_at(obj, "studio", replacement=lambda _: {"name": "Bromo"}) + + +if __name__ == "__main__": + domains = [ + "bromo", + ] + op, args = scraper_args() + result = None + + match op, args: + case "scene-by-url", {"url": url} if url: + result = scene_from_url(url, postprocess=bromo) + case "scene-by-name", {"name": name} if name: + result = scene_search(name, search_domains=domains, postprocess=bromo) + case "scene-by-fragment" | "scene-by-query-fragment", args: + result = scene_from_fragment( + args, search_domains=domains, postprocess=bromo + ) + case "performer-by-url", {"url": url}: + result = performer_from_url(url, postprocess=bromo) + case "performer-by-fragment", args: + result = performer_from_fragment(args) + case "performer-by-name", {"name": name} if name: + result = performer_search(name, search_domains=domains, postprocess=bromo) + case "movie-by-url", {"url": url} if url: + result = movie_from_url(url, postprocess=bromo) + case _: + log.error(f"Operation: {op}, arguments: {json.dumps(args)}") + sys.exit(1) + + print(json.dumps(result)) diff --git a/scrapers/Bromo/Bromo.yml b/scrapers/Bromo/Bromo.yml new file mode 100644 index 0000000..d3ed51c --- /dev/null +++ b/scrapers/Bromo/Bromo.yml @@ -0,0 +1,60 @@ +name: Bromo +# requires: py_common, AyloAPI +# scrapes: Bromo +sceneByURL: + - action: script + url: + - bromo.com/scene/ + script: + - python + - Bromo.py + - scene-by-url +sceneByFragment: + action: script + script: + - python + - Bromo.py + - scene-by-fragment +sceneByName: + action: script + script: + - python + - Bromo.py + - scene-by-name +sceneByQueryFragment: + action: script + script: + - python + - Bromo.py + - scene-by-query-fragment +performerByName: + action: script + script: + - python + - Bromo.py + - performer-by-name +performerByURL: + - action: script + url: + - bromo.com/model/ + script: + - python + - Bromo.py + - performer-by-url +performerByFragment: + action: script + script: + - python + - Bromo.py + - performer-by-fragment +movieByURL: + - action: script + url: + - bromo.com/scene/ + # Since scenes link to the movie we can scrape movies from scenes + - bromo.com/movie/ + script: + - python + - Bromo.py + - movie-by-url +# Last Updated January 14, 2024 diff --git a/scrapers/CzechHunter/CzechHunter.py b/scrapers/CzechHunter/CzechHunter.py new file mode 100644 index 0000000..889b36d --- /dev/null +++ b/scrapers/CzechHunter/CzechHunter.py @@ -0,0 +1,69 @@ +import json +import sys +from typing import Any +from py_common import log +from py_common.util import dig, replace_all +from AyloAPI.scrape import ( + scraper_args, + scene_from_url, + scene_search, + scene_from_fragment, + performer_from_url, + performer_from_fragment, + performer_search, + movie_from_url, +) + + +def czechhunter(obj: Any, _) -> Any: + replacement = None + match dig(obj, "studio", "name"): + case "Czech Hunter": + replacement = "czechhunter.com" + case "Debt Dandy": + replacement = "debtdandy.com" + case "Dirty Scout": + replacement = "dirtyscout.com" + case _: + # This will never be correct, but I don't see a better way to handle it + replacement = "bigstr.com" + + # Replace the studio name in all URLs + fixed = replace_all(obj, "url", lambda x: x.replace("bigstr.com", replacement)) + + return fixed + + +if __name__ == "__main__": + domains = [ + "czechhunter", + "debtdandy", + "dirtyscout", + ] + op, args = scraper_args() + result = None + + match op, args: + case "scene-by-url", {"url": url} if url: + result = scene_from_url(url, postprocess=czechhunter) + case "scene-by-name", {"name": name} if name: + result = scene_search(name, search_domains=domains, postprocess=czechhunter) + case "scene-by-fragment" | "scene-by-query-fragment", args: + result = scene_from_fragment( + args, search_domains=domains, postprocess=czechhunter + ) + case "performer-by-url", {"url": url}: + result = performer_from_url(url, postprocess=czechhunter) + case "performer-by-fragment", args: + result = performer_from_fragment(args) + case "performer-by-name", {"name": name} if name: + result = performer_search( + name, search_domains=domains, postprocess=czechhunter + ) + case "movie-by-url", {"url": url} if url: + result = movie_from_url(url, postprocess=czechhunter) + case _: + log.error(f"Operation: {op}, arguments: {json.dumps(args)}") + sys.exit(1) + + print(json.dumps(result)) diff --git a/scrapers/CzechHunter/CzechHunter.yml b/scrapers/CzechHunter/CzechHunter.yml new file mode 100644 index 0000000..990c1f5 --- /dev/null +++ b/scrapers/CzechHunter/CzechHunter.yml @@ -0,0 +1,68 @@ +name: Czech Hunter (BigStr) +# requires: py_common, AyloAPI +# scrapes: Czech Hunter, Debt Dandy, Dirty Scout +sceneByURL: + - action: script + url: + - czechhunter.com/scene/ + - debtdandy.com/scene/ + - dirtyscout.com/scene/ + script: + - python + - CzechHunter.py + - scene-by-url +sceneByFragment: + action: script + script: + - python + - CzechHunter.py + - scene-by-fragment +sceneByName: + action: script + script: + - python + - CzechHunter.py + - scene-by-name +sceneByQueryFragment: + action: script + script: + - python + - CzechHunter.py + - scene-by-query-fragment +performerByName: + action: script + script: + - python + - CzechHunter.py + - performer-by-name +performerByURL: + - action: script + url: + - czechhunter.com/model/ + - debtdandy.com/model/ + - dirtyscout.com/model/ + script: + - python + - CzechHunter.py + - performer-by-url +performerByFragment: + action: script + script: + - python + - CzechHunter.py + - performer-by-fragment +movieByURL: + - action: script + url: + - czechhunter.com/scene/ + - debtdandy.com/scene/ + - dirtyscout.com/scene/ + # Since scenes link to the movie we can scrape movies from scenes + - czechhunter.com/movie/ + - debtdandy.com/movie/ + - dirtyscout.com/movie/ + script: + - python + - CzechHunter.py + - movie-by-url +# Last Updated January 14, 2024 diff --git a/scrapers/Deviante/Deviante.py b/scrapers/Deviante/Deviante.py new file mode 100644 index 0000000..43df5a2 --- /dev/null +++ b/scrapers/Deviante/Deviante.py @@ -0,0 +1,89 @@ +import json +import sys +from typing import Any +from py_common import log +from py_common.util import dig, replace_all +from AyloAPI.scrape import ( + scraper_args, + scene_from_url, + scene_search, + scene_from_fragment, + performer_from_url, + performer_from_fragment, + performer_search, + movie_from_url, +) + +studio_map = { + "es": "Erotic Spice", + "fmf": "Forgive Me Father", + "lha": "Love Her Ass", + "pdt": "Pretty Dirty Teens", + "sw": "Sex Working", +} + + +def deviante(obj: Any, _) -> Any: + fixed = replace_all(obj, "name", replacement=lambda x: studio_map.get(x, x)) + + replacement = None + match dig(fixed, "studio", "name"): + case "Erotic Spice": + replacement = "eroticspice.com" + case "Forgive Me Father": + replacement = "forgivemefather.com" + case "Love Her Ass": + replacement = "loveherass.com" + case "Pretty Dirty Teens": + replacement = "prettydirtyteens.com" + case "Sex Working": + replacement = "sexworking.com" + case _: + replacement = "deviante.com" + + # All deviante URLs use /video/ instead of the standard /scene/ + # and also have separate domains per studio + fixed = replace_all( + fixed, + "url", + lambda x: x.replace("/scene/", "/video/").replace("deviante.com", replacement), + ) + + return fixed + + +if __name__ == "__main__": + domains = [ + "eroticspice", + "forgivemefather", + "loveherass", + "prettydirtyteens", + "sexworking", + ] + op, args = scraper_args() + result = None + + match op, args: + case "scene-by-url", {"url": url} if url: + result = scene_from_url(url, postprocess=deviante) + case "scene-by-name", {"name": name} if name: + result = scene_search(name, search_domains=domains, postprocess=deviante) + case "scene-by-fragment" | "scene-by-query-fragment", args: + result = scene_from_fragment( + args, search_domains=domains, postprocess=deviante + ) + case "performer-by-url", {"url": url}: + result = performer_from_url(url, postprocess=deviante) + case "performer-by-fragment", args: + result = performer_from_fragment(args) + case "performer-by-name", {"name": name} if name: + result = performer_search( + name, search_domains=domains, postprocess=deviante + ) + case "movie-by-url", {"url": url} if url: + result = movie_from_url(url, postprocess=deviante) + case _: + log.error(f"Operation: {op}, arguments: {json.dumps(args)}") + sys.exit(1) + + print(json.dumps(result)) diff --git a/scrapers/Deviante/Deviante.yml b/scrapers/Deviante/Deviante.yml new file mode 100644 index 0000000..e7537a0 --- /dev/null +++ b/scrapers/Deviante/Deviante.yml @@ -0,0 +1,80 @@ +name: Deviante +# requires: py_common, AyloAPI +# scrapes: Erotic Spice, Forgive Me Father, Love Her Ass, Pretty Dirty Teens, Sex Working +sceneByURL: + - action: script + url: + - deviante.com/video/ + - eroticspice.com/video/ + - forgivemefather.com/video/ + - loveherass.com/video/ + - prettydirtyteens.com/video/ + - sexworking.com/video/ + script: + - python + - Deviante.py + - scene-by-url +sceneByFragment: + action: script + script: + - python + - Deviante.py + - scene-by-fragment +sceneByName: + action: script + script: + - python + - Deviante.py + - scene-by-name +sceneByQueryFragment: + action: script + script: + - python + - Deviante.py + - scene-by-query-fragment +performerByName: + action: script + script: + - python + - Deviante.py + - performer-by-name +performerByURL: + - action: script + url: + - deviante.com/model/ + - eroticspice.com/model/ + - forgivemefather.com/model/ + - loveherass.com/model/ + - prettydirtyteens.com/model/ + - sexworking.com/model/ + script: + - python + - Deviante.py + - performer-by-url +performerByFragment: + action: script + script: + - python + - Deviante.py + - performer-by-fragment +movieByURL: + - action: script + url: + - deviante.com/movie/ + - eroticspice.com/movie/ + - forgivemefather.com/movie/ + - loveherass.com/movie/ + - prettydirtyteens.com/movie/ + - sexworking.com/movie/ + # Since scenes link to the movie we can scrape movies from scenes + - deviante.com/video/ + - eroticspice.com/video/ + - forgivemefather.com/video/ + - loveherass.com/video/ + - prettydirtyteens.com/video/ + - sexworking.com/video/ + script: + - python + - Deviante.py + - movie-by-url +# Last Updated January 14, 2024 diff --git a/scrapers/DigitalPlayground/DigitalPlayground.py b/scrapers/DigitalPlayground/DigitalPlayground.py new file mode 100644 index 0000000..2319114 --- /dev/null +++ b/scrapers/DigitalPlayground/DigitalPlayground.py @@ -0,0 +1,61 @@ +import json +import sys +from typing import Any +from py_common import log +from py_common.util import replace_at +from AyloAPI.scrape import ( + scraper_args, + scene_from_url, + scene_search, + scene_from_fragment, + performer_from_url, + performer_from_fragment, + performer_search, + movie_from_url, +) + +studio_map = { + "dpw": "DP World", + "Dpstar Episodes": "Episodes", + "Dpstar Sex Challenges": "Sex Challenges", +} + + +def digitalplayground(obj: Any, _) -> Any: + return replace_at(obj, "studio", "name", replacement=lambda x: studio_map.get(x, x)) + + +if __name__ == "__main__": + domains = [ + "bangbros", + "virtualporn", + ] + op, args = scraper_args() + result = None + + match op, args: + case "scene-by-url", {"url": url} if url: + result = scene_from_url(url, postprocess=digitalplayground) + case "scene-by-name", {"name": name} if name: + result = scene_search( + name, search_domains=domains, postprocess=digitalplayground + ) + case "scene-by-fragment" | "scene-by-query-fragment", args: + result = scene_from_fragment( + args, search_domains=domains, postprocess=digitalplayground + ) + case "performer-by-url", {"url": url}: + result = performer_from_url(url, postprocess=digitalplayground) + case "performer-by-fragment", args: + result = performer_from_fragment(args) + case "performer-by-name", {"name": name} if name: + result = performer_search( + name, search_domains=domains, postprocess=digitalplayground + ) + case "movie-by-url", {"url": url} if url: + result = movie_from_url(url, postprocess=digitalplayground) + case _: + log.error(f"Operation: {op}, arguments: {json.dumps(args)}") + sys.exit(1) + + print(json.dumps(result)) diff --git a/scrapers/DigitalPlayground/DigitalPlayground.yml b/scrapers/DigitalPlayground/DigitalPlayground.yml new file mode 100644 index 0000000..218571f --- /dev/null +++ b/scrapers/DigitalPlayground/DigitalPlayground.yml @@ -0,0 +1,64 @@ +name: Digital Playground +# requires: py_common, AyloAPI +# scrapes: Digital Playground, DP Parodies, DP World, Episodes, Flixxx, Rawcut +sceneByURL: + - action: script + url: + - digitalplayground.com/scene/ + - digitalplaygroundnetwork.com/scene/ + script: + - python + - DigitalPlayground.py + - scene-by-url +sceneByFragment: + action: script + script: + - python + - DigitalPlayground.py + - scene-by-fragment +sceneByName: + action: script + script: + - python + - DigitalPlayground.py + - scene-by-name +sceneByQueryFragment: + action: script + script: + - python + - DigitalPlayground.py + - scene-by-query-fragment +performerByName: + action: script + script: + - python + - DigitalPlayground.py + - performer-by-name +performerByURL: + - action: script + url: + - digitalplayground.com/model/ + - digitalplaygroundnetwork.com/model/ + script: + - python + - DigitalPlayground.py + - performer-by-url +performerByFragment: + action: script + script: + - python + - DigitalPlayground.py + - performer-by-fragment +movieByURL: + - action: script + url: + - digitalplayground.com/movie/ + - digitalplaygroundnetwork.com/movie/ + # Since scenes link to the movie we can scrape movies from scenes + - digitalplayground.com/scene/ + - digitalplaygroundnetwork.com/scene/ + script: + - python + - DigitalPlayground.py + - movie-by-url +# Last Updated January 14, 2024 diff --git a/scrapers/Erito.yml b/scrapers/Erito.yml new file mode 100644 index 0000000..2f17ece --- /dev/null +++ b/scrapers/Erito.yml @@ -0,0 +1,65 @@ +name: Erito +# requires: py_common, AyloAPI +# scrapes: Erito +sceneByURL: + - action: script + url: + - erito.com/scene/ + script: + - python + - AyloAPI/scrape.py + - erito + - scene-by-url +sceneByFragment: + action: script + script: + - python + - AyloAPI/scrape.py + - erito + - scene-by-fragment +sceneByName: + action: script + script: + - python + - AyloAPI/scrape.py + - erito + - scene-by-name +sceneByQueryFragment: + action: script + script: + - python + - AyloAPI/scrape.py + - erito + - scene-by-query-fragment +performerByName: + action: script + script: + - python + - AyloAPI/scrape.py + - erito + - performer-by-name +performerByURL: + - action: script + url: + - erito.com/model/ + script: + - python + - AyloAPI/scrape.py + - performer-by-url +performerByFragment: + action: script + script: + - python + - AyloAPI/scrape.py + - performer-by-fragment +movieByURL: + - action: script + url: + - erito.com/movie/ + # Since scenes link to the movie we can scrape movies from scenes + - erito.com/scene/ + script: + - python + - AyloAPI/scrape.py + - movie-by-url +# Last Updated January 13, 2024 diff --git a/scrapers/FakeHub/FakeHub.py b/scrapers/FakeHub/FakeHub.py new file mode 100644 index 0000000..44b4903 --- /dev/null +++ b/scrapers/FakeHub/FakeHub.py @@ -0,0 +1,74 @@ +import json +import sys +from typing import Any +from py_common import log +from py_common.util import dig, replace_all +from AyloAPI.scrape import ( + scraper_args, + scene_from_url, + scene_search, + scene_from_fragment, + performer_from_url, + performer_from_fragment, + performer_search, + movie_from_url, +) + + +def fakehub(obj: Any, _) -> Any: + replacement = None + match dig(obj, "studio", "name"): + case "Fake Hostel": + replacement = "fakehostel.com" + case "Fake Taxi": + replacement = "faketaxi.com" + case "Public Agent": + replacement = "publicagent.com" + case _: + replacement = "fakehub.com" + + # All FakeHub performer URLs use /modelprofile/ instead of the standard /model/ + # and some studios have their own domains + fixed = replace_all( + obj, + "url", + lambda x: x.replace("/model/", "/modelprofile/").replace( + "fakehub.com", replacement + ), + ) + + return fixed + + +if __name__ == "__main__": + domains = [ + "fakehub", + "fakehostel", + "faketaxi", + "publicagent", + ] + op, args = scraper_args() + result = None + + match op, args: + case "scene-by-url", {"url": url} if url: + result = scene_from_url(url, postprocess=fakehub) + case "scene-by-name", {"name": name} if name: + result = scene_search(name, search_domains=domains, postprocess=fakehub) + case "scene-by-fragment" | "scene-by-query-fragment", args: + result = scene_from_fragment( + args, search_domains=domains, postprocess=fakehub + ) + case "performer-by-url", {"url": url}: + result = performer_from_url(url, postprocess=fakehub) + case "performer-by-fragment", args: + result = performer_from_fragment(args) + case "performer-by-name", {"name": name} if name: + result = performer_search(name, search_domains=domains, postprocess=fakehub) + case "movie-by-url", {"url": url} if url: + result = movie_from_url(url, postprocess=fakehub) + case _: + log.error(f"Operation: {op}, arguments: {json.dumps(args)}") + sys.exit(1) + + print(json.dumps(result)) diff --git a/scrapers/FakeHub/FakeHub.yml b/scrapers/FakeHub/FakeHub.yml new file mode 100644 index 0000000..b9e9533 --- /dev/null +++ b/scrapers/FakeHub/FakeHub.yml @@ -0,0 +1,72 @@ +name: FakeHub +# requires: py_common, AyloAPI +# scrapes: Fake Agent, Fake Agent UK, Fake Cop, Fake Driving School, Fake Hospital, Fake Hostel, Fake Taxi, Fakehub Originals, Female Agent, Female Fake Taxi, Public Agent +sceneByURL: + - action: script + url: + - fakehub.com/scene/ + - fakehostel.com/scene/ + - faketaxi.com/scene/ + - publicagent.com/scene/ + script: + - python + - FakeHub.py + - scene-by-url +sceneByFragment: + action: script + script: + - python + - FakeHub.py + - scene-by-fragment +sceneByName: + action: script + script: + - python + - FakeHub.py + - scene-by-name +sceneByQueryFragment: + action: script + script: + - python + - FakeHub.py + - scene-by-query-fragment +performerByName: + action: script + script: + - python + - FakeHub.py + - performer-by-name +performerByURL: + - action: script + url: + - fakehub.com/modelprofile/ + - fakehostel.com/modelprofile/ + - faketaxi.com/modelprofile/ + - publicagent.com/modelprofile/ + script: + - python + - FakeHub.py + - performer-by-url +performerByFragment: + action: script + script: + - python + - FakeHub.py + - performer-by-fragment +movieByURL: + - action: script + url: + - fakehub.com/movie/ + - fakehostel.com/movie/ + - faketaxi.com/movie/ + - publicagent.com/movie/ + # Since scenes link to the movie we can scrape movies from scenes + - fakehub.com/scene/ + - fakehostel.com/scene/ + - faketaxi.com/scene/ + - publicagent.com/scene/ + script: + - python + - FakeHub.py + - movie-by-url +# Last Updated January 14, 2024 diff --git a/scrapers/GayWire/GayWire.py b/scrapers/GayWire/GayWire.py new file mode 100644 index 0000000..3958ccc --- /dev/null +++ b/scrapers/GayWire/GayWire.py @@ -0,0 +1,85 @@ +import json +import sys +from requests import head +from typing import Any +from py_common import log +from py_common.util import replace_all, replace_at +from AyloAPI.scrape import ( + scraper_args, + scene_from_url, + scene_search, + scene_from_fragment, + performer_from_url, + performer_from_fragment, + performer_search, + movie_from_url, +) + +studio_map = { + "Its Gonna Hurt": "It's Gonna Hurt", + "Poundhisass": "Pound His Ass", +} + + +def redirect(url: str) -> str: + if not url: + return url + if (res := head(url)) and (redirect := res.headers.get("Location", url)): + return redirect if not redirect.endswith("404") else url + return url + + +def gaywire(obj: Any, _) -> Any: + if obj is None: + return None + + # API returns Gay Wire substudios as bangbros.com + fixed = replace_all( + obj, + "url", + lambda x: x.replace("www.bangbros.com", "gaywire.com"), + ) + + # Rename certain studios according to the map + fixed = replace_at( + fixed, "studio", "name", replacement=lambda x: studio_map.get(x, x) + ) + + fixed = replace_at( + fixed, "studio", "parent", "name", replacement=lambda x: "Gay Wire" + ) + + return fixed + + +if __name__ == "__main__": + domains = ["gaywire"] + op, args = scraper_args() + result = None + + match op, args: + case "scene-by-url", {"url": url} if url: + url = redirect(url) + result = scene_from_url(url, postprocess=gaywire) + case "scene-by-name", {"name": name} if name: + result = scene_search(name, search_domains=domains, postprocess=gaywire) + case "scene-by-fragment" | "scene-by-query-fragment", args: + args = replace_all(args, "url", redirect) + result = scene_from_fragment( + args, search_domains=domains, postprocess=gaywire + ) + case "performer-by-url", {"url": url}: + url = redirect(url) + result = performer_from_url(url, postprocess=gaywire) + case "performer-by-fragment", args: + result = performer_from_fragment(args) + case "performer-by-name", {"name": name} if name: + result = performer_search(name, search_domains=domains, postprocess=gaywire) + case "movie-by-url", {"url": url} if url: + url = redirect(url) + result = movie_from_url(url, postprocess=gaywire) + case _: + log.error(f"Operation: {op}, arguments: {json.dumps(args)}") + sys.exit(1) + + print(json.dumps(result)) diff --git a/scrapers/GayWire/GayWire.yml b/scrapers/GayWire/GayWire.yml new file mode 100644 index 0000000..276affa --- /dev/null +++ b/scrapers/GayWire/GayWire.yml @@ -0,0 +1,58 @@ +name: Gay Wire +# requires: py_common, AyloAPI +# scrapes: Bait Bus, Bareback Attack, Bareback Casting, ExBF, Haze Him, It's Gonna Hurt, Out In Public, Pound His Ass, Rub Him, Sausage Party, Thug Hunter, UngloryHole, Urban Invasion +sceneByURL: + - action: script + url: + - gaywire.com/video + script: + - python + - GayWire.py + - scene-by-url +sceneByFragment: + action: script + script: + - python + - GayWire.py + - scene-by-fragment +sceneByName: + action: script + script: + - python + - GayWire.py + - scene-by-name +sceneByQueryFragment: + action: script + script: + - python + - GayWire.py + - scene-by-query-fragment +performerByName: + action: script + script: + - python + - GayWire.py + - performer-by-name +performerByURL: + - action: script + url: + - gaywire.com/model/ + script: + - python + - GayWire.py + - performer-by-url +performerByFragment: + action: script + script: + - python + - GayWire.py + - performer-by-fragment +movieByURL: + - action: script + url: + - gaywire.com/movie + script: + - python + - GayWire.py + - movie-by-url +# Last Updated January 14, 2024 diff --git a/scrapers/HentaiPros.yml b/scrapers/HentaiPros.yml new file mode 100644 index 0000000..4e574df --- /dev/null +++ b/scrapers/HentaiPros.yml @@ -0,0 +1,65 @@ +name: Hentai Pros +# requires: py_common, AyloAPI +# scrapes: Hentai Pros +sceneByURL: + - action: script + url: + - hentaipros.com/scene/ + script: + - python + - AyloAPI/scrape.py + - hentaipros + - scene-by-url +sceneByFragment: + action: script + script: + - python + - AyloAPI/scrape.py + - hentaipros + - scene-by-fragment +sceneByName: + action: script + script: + - python + - AyloAPI/scrape.py + - hentaipros + - scene-by-name +sceneByQueryFragment: + action: script + script: + - python + - AyloAPI/scrape.py + - hentaipros + - scene-by-query-fragment +performerByName: + action: script + script: + - python + - AyloAPI/scrape.py + - hentaipros + - performer-by-name +performerByURL: + - action: script + url: + - hentaipros.com/model/ + script: + - python + - AyloAPI/scrape.py + - performer-by-url +performerByFragment: + action: script + script: + - python + - AyloAPI/scrape.py + - performer-by-fragment +movieByURL: + - action: script + url: + - hentaipros.com/movie/ + # Since scenes link to the movie we can scrape movies from scenes + - hentaipros.com/scene/ + script: + - python + - AyloAPI/scrape.py + - movie-by-url +# Last Updated January 13, 2024 diff --git a/scrapers/Men/Men.py b/scrapers/Men/Men.py new file mode 100644 index 0000000..6587f1d --- /dev/null +++ b/scrapers/Men/Men.py @@ -0,0 +1,82 @@ +import json +import sys +from typing import Any +from py_common import log +from py_common.util import dig, replace_all, replace_at +from AyloAPI.scrape import ( + scraper_args, + scene_from_url, + scene_search, + scene_from_fragment, + performer_from_url, + performer_from_fragment, + performer_search, + movie_from_url, +) + +studio_map = { + "tp": "TwinkPop", + "Men": "Men.com", +} + + +def men(obj: Any, _) -> Any: + fixed = replace_at( + obj, "studio", "name", replacement=lambda x: studio_map.get(x, x) + ) + fixed = replace_at( + fixed, "studio", "parent", "name", replacement=lambda x: studio_map.get(x, x) + ) + + # TwinkPop is the only special case for now + studio_name = dig(fixed, "studio", "name") + scene = "/sceneid/" if studio_name != "TwinkPop" else "/scene/" + model = "/modelprofile/" if studio_name != "TwinkPop" else "/pornstar/" + domain = "men.com" if studio_name != "TwinkPop" else "twinkpop.com" + + fixed = replace_all( + fixed, + "url", + lambda x: x.replace("/scene/", scene) + .replace("/model/", model) + .replace("men.com", domain), + ) + + return fixed + + +if __name__ == "__main__": + domains = [ + "men", + "bigdicksatschool", + "godsofmen", + "jizzorgy", + "menofuk", + "str8togay", + "thegayoffice", + "toptobottom", + "twinkpop", + ] + op, args = scraper_args() + result = None + + match op, args: + case "scene-by-url", {"url": url} if url: + result = scene_from_url(url, postprocess=men) + case "scene-by-name", {"name": name} if name: + result = scene_search(name, search_domains=domains, postprocess=men) + case "scene-by-fragment" | "scene-by-query-fragment", args: + result = scene_from_fragment(args, search_domains=domains, postprocess=men) + case "performer-by-url", {"url": url}: + result = performer_from_url(url, postprocess=men) + case "performer-by-fragment", args: + result = performer_from_fragment(args) + case "performer-by-name", {"name": name} if name: + result = performer_search(name, search_domains=domains, postprocess=men) + case "movie-by-url", {"url": url} if url: + result = movie_from_url(url, postprocess=men) + case _: + log.error(f"Operation: {op}, arguments: {json.dumps(args)}") + sys.exit(1) + + print(json.dumps(result)) diff --git a/scrapers/Men/Men.yml b/scrapers/Men/Men.yml new file mode 100644 index 0000000..564c268 --- /dev/null +++ b/scrapers/Men/Men.yml @@ -0,0 +1,96 @@ +name: Men.com +# requires: py_common, AyloAPI +# scrapes: Big Dicks At School, Drill My Hole, Gods of Men, Jizz Orgy, Men of UK, Men.com, Str8 to Gay, The Gay Office, Top to Bottom, TwinkPop +sceneByURL: + - action: script + url: + - \/men.com/scene + - mennetwork.com/scene + - bigdicksatschool.com/scene/ + - godsofmen.com/scene/ + - jizzorgy.com/scene/ + - menofuk.com/scene/ + - str8togay.com/scene/ + - thegayoffice.com/scene/ + - toptobottom.com/scene/ + - twinkpop.com/video/ + script: + - python + - Men.py + - scene-by-url +sceneByFragment: + action: script + script: + - python + - Men.py + - scene-by-fragment +sceneByName: + action: script + script: + - python + - Men.py + - scene-by-name +sceneByQueryFragment: + action: script + script: + - python + - Men.py + - scene-by-query-fragment +performerByName: + action: script + script: + - python + - Men.py + - performer-by-name +performerByURL: + - action: script + url: + - \/men.com/modelprofile + - mennetwork.com/modelprofile + - bigdicksatschool.com/model/ + - godsofmen.com/model/ + - jizzorgy.com/model/ + - menofuk.com/model/ + - str8togay.com/model/ + - thegayoffice.com/model/ + - toptobottom.com/model/ + - twinkpop.com/pornstar/ + script: + - python + - Men.py + - performer-by-url +performerByFragment: + action: script + script: + - python + - Men.py + - performer-by-fragment +movieByURL: + - action: script + url: + - \/men.com/movie + - mennetwork.com/movie + - bigdicksatschool.com/movie/ + - godsofmen.com/movie/ + - jizzorgy.com/movie/ + - menofuk.com/movie/ + - str8togay.com/movie/ + - thegayoffice.com/movie/ + - toptobottom.com/movie/ + - twinkpop.com/movie/ + # Since scenes link to the movie we can scrape movies from scenes + - \/men.com/scene + - mennetwork.com/scene + - bigdicksatschool.com/scene/ + - godsofmen.com/scene/ + - jizzorgy.com/scene/ + - menofuk.com/scene/ + - str8togay.com/scene/ + - thegayoffice.com/scene/ + - toptobottom.com/scene/ + - twinkpop.com/video/ + script: + - python + - Men.py + - movie-by-url +# Last Updated January 14, 2024 diff --git a/scrapers/MetroHD/MetroHD.py b/scrapers/MetroHD/MetroHD.py new file mode 100644 index 0000000..a6af6ca --- /dev/null +++ b/scrapers/MetroHD/MetroHD.py @@ -0,0 +1,85 @@ +import json +import sys +from typing import Any +from py_common import log +from py_common.util import dig, replace_all, replace_at +from AyloAPI.scrape import ( + scraper_args, + scene_from_url, + scene_search, + scene_from_fragment, + performer_from_url, + performer_from_fragment, + performer_search, + movie_from_url, +) + + +def metrohd(obj: Any, _) -> Any: + replacement = None + match dig(obj, "studio", "name"): + case "Deviant Hardcore": + replacement = "devianthardcore.com" + case "Family Hook Ups": + replacement = "familyhookups.com" + case "Girl Grind": + replacement = "girlgrind.com" + case "Kinky Spa": + replacement = "kinkyspa.com" + case "She Will Cheat": + replacement = "shewillcheat.com" + case _: + replacement = "metrohd.com" + + # Replace the studio name in all URLs: even if there's no specific studio, + # metro.com is wrong and needs to be replaced with metrohd.com + fixed = replace_all(obj, "url", lambda x: x.replace("metro.com", replacement)) + + # The API returns Metro as a studio name but we know them as Metro HD + fixed = replace_at( + fixed, "studio", "name", replacement=lambda x: x.replace("Metro", "Metro HD") + ) + fixed = replace_at( + fixed, + "studio", + "parent", + "name", + replacement=lambda x: x.replace("Metro", "Metro HD"), + ) + return fixed + + +if __name__ == "__main__": + domains = [ + "devianthardcore", + "familyhookups", + "girlgrind", + "kinkyspa", + "shewillcheat", + "metrohd", + ] + op, args = scraper_args() + result = None + + match op, args: + case "scene-by-url", {"url": url} if url: + result = scene_from_url(url, postprocess=metrohd) + case "scene-by-name", {"name": name} if name: + result = scene_search(name, search_domains=domains, postprocess=metrohd) + case "scene-by-fragment" | "scene-by-query-fragment", args: + result = scene_from_fragment( + args, search_domains=domains, postprocess=metrohd + ) + case "performer-by-url", {"url": url}: + result = performer_from_url(url, postprocess=metrohd) + case "performer-by-fragment", args: + result = performer_from_fragment(args) + case "performer-by-name", {"name": name} if name: + result = performer_search(name, search_domains=domains, postprocess=metrohd) + case "movie-by-url", {"url": url} if url: + result = movie_from_url(url, postprocess=metrohd) + case _: + log.error(f"Operation: {op}, arguments: {json.dumps(args)}") + sys.exit(1) + + print(json.dumps(result)) diff --git a/scrapers/MetroHD/MetroHD.yml b/scrapers/MetroHD/MetroHD.yml new file mode 100644 index 0000000..364c575 --- /dev/null +++ b/scrapers/MetroHD/MetroHD.yml @@ -0,0 +1,73 @@ +name: Metro HD +# requires: py_common, AyloAPI +# scrapes: Metro HD, Deviant Hardcore, Girl Grind, Kinky Spa, She Will Cheat, Family Hookups +sceneByURL: + - action: script + url: + - metrohd.com/scene/ + - devianthardcore.com/scene/ + - familyhookups.com/scene/ + - girlgrind.com/scene/ + - kinkyspa.com/scene/ + - shewillcheat.com/scene/ + script: + - python + - MetroHD.py + - scene-by-url +sceneByFragment: + action: script + script: + - python + - MetroHD.py + - scene-by-fragment +sceneByName: + action: script + script: + - python + - MetroHD.py + - scene-by-name +sceneByQueryFragment: + action: script + script: + - python + - MetroHD.py + - scene-by-query-fragment +performerByName: + action: script + script: + - python + - MetroHD.py + - performer-by-name +performerByURL: + - action: script + url: + - metrohd.com/model/ + - devianthardcore.com/model/ + - familyhookups.com/model/ + - girlgrind.com/model/ + - kinkyspa.com/model/ + - shewillcheat.com/model/ + script: + - python + - MetroHD.py + - performer-by-url +performerByFragment: + action: script + script: + - python + - MetroHD.py + - performer-by-fragment +movieByURL: + - action: script + url: + - metrohd.com/movie/ + - devianthardcore.com/movie/ + - familyhookups.com/movie/ + - girlgrind.com/movie/ + - kinkyspa.com/movie/ + - shewillcheat.com/movie/ + script: + - python + - MetroHD.py + - movie-by-url +# Last Updated January 14, 2024 diff --git a/scrapers/MileHighMedia_BiandTrans/MileHighMedia_BiandTrans.py b/scrapers/MileHighMedia_BiandTrans/MileHighMedia_BiandTrans.py new file mode 100644 index 0000000..d6fdb8a --- /dev/null +++ b/scrapers/MileHighMedia_BiandTrans/MileHighMedia_BiandTrans.py @@ -0,0 +1,73 @@ +import json +import sys +from typing import Any +from py_common import log +from py_common.util import dig, replace_all +from AyloAPI.scrape import ( + scraper_args, + scene_from_url, + scene_search, + scene_from_fragment, + performer_from_url, + performer_from_fragment, + performer_search, + movie_from_url, +) + +studio_map = { + "BIEmpire": "Bi Empire", +} + + +def milehigh(obj: Any, _) -> Any: + fixed = replace_all(obj, "name", replacement=lambda x: studio_map.get(x, x)) + + replacement = None + match dig(fixed, "studio", "name"): + case "Bi Empire": + replacement = "biempire.com" + case "Transsensual": + replacement = "transsensual.com" + case _: + replacement = "milehighmedia.com" + + # Replace the studio name in all URLs: even if there's no specific studio, + # milehigh.com is wrong and needs to be replaced with milehighmedia.com + fixed = replace_all(fixed, "url", lambda x: x.replace("milehigh.com", replacement)) + + return fixed + + +if __name__ == "__main__": + domains = [ + "milehighmedia", + "biempire", + "transsensual", + ] + op, args = scraper_args() + result = None + + match op, args: + case "scene-by-url", {"url": url} if url: + result = scene_from_url(url, postprocess=milehigh) + case "scene-by-name", {"name": name} if name: + result = scene_search(name, search_domains=domains, postprocess=milehigh) + case "scene-by-fragment" | "scene-by-query-fragment", args: + result = scene_from_fragment( + args, search_domains=domains, postprocess=milehigh + ) + case "performer-by-url", {"url": url}: + result = performer_from_url(url, postprocess=milehigh) + case "performer-by-fragment", args: + result = performer_from_fragment(args) + case "performer-by-name", {"name": name} if name: + result = performer_search( + name, search_domains=domains, postprocess=milehigh + ) + case "movie-by-url", {"url": url} if url: + result = movie_from_url(url, postprocess=milehigh) + case _: + log.error(f"Operation: {op}, arguments: {json.dumps(args)}") + sys.exit(1) + + print(json.dumps(result)) diff --git a/scrapers/MileHighMedia_BiandTrans/MileHighMedia_BiandTrans.yml b/scrapers/MileHighMedia_BiandTrans/MileHighMedia_BiandTrans.yml new file mode 100644 index 0000000..9123781 --- /dev/null +++ b/scrapers/MileHighMedia_BiandTrans/MileHighMedia_BiandTrans.yml @@ -0,0 +1,64 @@ +name: Mile High Media (Bi and Trans) +# requires: py_common, AyloAPI +# scrapes: Bi Empire, Transsensual +sceneByURL: + - action: script + url: + - biempire.com/scene/ + - transsensual.com/scene/ + script: + - python + - MileHighMedia_BiandTrans.py + - scene-by-url +sceneByFragment: + action: script + script: + - python + - MileHighMedia_BiandTrans.py + - scene-by-fragment +sceneByName: + action: script + script: + - python + - MileHighMedia_BiandTrans.py + - scene-by-name +sceneByQueryFragment: + action: script + script: + - python + - MileHighMedia_BiandTrans.py + - scene-by-query-fragment +performerByName: + action: script + script: + - python + - MileHighMedia_BiandTrans.py + - performer-by-name +performerByURL: + - action: script + url: + - biempire.com/model/ + - transsensual.com/model/ + script: + - python + - MileHighMedia_BiandTrans.py + - performer-by-url +performerByFragment: + action: script + script: + - python + - MileHighMedia_BiandTrans.py + - performer-by-fragment +movieByURL: + - action: script + url: + - biempire.com/movie/ + - transsensual.com/movie/ + # Since scenes link to the movie we can scrape movies from scenes + - biempire.com/scene/ + - transsensual.com/scene/ + script: + - python + - MileHighMedia_BiandTrans.py + - movie-by-url +# Last Updated January 14, 2024 diff --git a/scrapers/MileHighMedia_Gay/MileHighMedia_Gay.py b/scrapers/MileHighMedia_Gay/MileHighMedia_Gay.py new file mode 100644 index 0000000..f0dab98 --- /dev/null +++ b/scrapers/MileHighMedia_Gay/MileHighMedia_Gay.py @@ -0,0 +1,67 @@ +import json +import sys +from typing import Any +from py_common import log +from py_common.util import dig, replace_all +from AyloAPI.scrape import ( + scraper_args, + scene_from_url, + scene_search, + scene_from_fragment, + performer_from_url, + performer_from_fragment, + performer_search, + movie_from_url, +) + + +def milehigh(obj: Any, _) -> Any: + replacement = None + match dig(obj, "studio", "name"): + case "Icon Male": + replacement = "iconmale.com" + case "Noir Male": + replacement = "noirmale.com" + case _: + replacement = "milehighmedia.com" + + # Replace the studio name in all URLs: even if there's no specific studio, + # milehigh.com is wrong and needs to be replaced with milehighmedia.com + fixed = replace_all(obj, "url", lambda x: x.replace("milehigh.com", replacement)) + + return fixed + + +if __name__ == "__main__": + domains = [ + "iconmale", + "noirmale", + "taboomale", + ] + op, args = scraper_args() + result = None + + match op, args: + case "scene-by-url", {"url": url} if url: + result = scene_from_url(url, postprocess=milehigh) + case "scene-by-name", {"name": name} if name: + result = scene_search(name, search_domains=domains, postprocess=milehigh) + case "scene-by-fragment" | "scene-by-query-fragment", args: + result = scene_from_fragment( + args, search_domains=domains, postprocess=milehigh + ) + case "performer-by-url", {"url": url}: + result = performer_from_url(url, postprocess=milehigh) + case "performer-by-fragment", args: + result = performer_from_fragment(args) + case "performer-by-name", {"name": name} if name: + result = performer_search( + name, search_domains=domains, postprocess=milehigh + ) + case "movie-by-url", {"url": url} if url: + result = movie_from_url(url, postprocess=milehigh) + case _: + log.error(f"Operation: {op}, arguments: {json.dumps(args)}") + sys.exit(1) + + print(json.dumps(result)) diff --git a/scrapers/MileHighMedia_Gay/MileHighMedia_Gay.yml b/scrapers/MileHighMedia_Gay/MileHighMedia_Gay.yml new file mode 100644 index 0000000..7796f74 --- /dev/null +++ b/scrapers/MileHighMedia_Gay/MileHighMedia_Gay.yml @@ -0,0 +1,64 @@ +name: Mile High Media (Gay) +# requires: py_common, AyloAPI +# scrapes: Icon Male, Noir Male, Taboo Male +sceneByURL: + - action: script + url: + - iconmale.com/scene/ + - noirmale.com/scene/ + script: + - python + - MileHighMedia_Gay.py + - scene-by-url +sceneByFragment: + action: script + script: + - python + - MileHighMedia_Gay.py + - scene-by-fragment +sceneByName: + action: script + script: + - python + - MileHighMedia_Gay.py + - scene-by-name +sceneByQueryFragment: + action: script + script: + - python + - MileHighMedia_Gay.py + - scene-by-query-fragment +performerByName: + action: script + script: + - python + - MileHighMedia_Gay.py + - performer-by-name +performerByURL: + - action: script + url: + - iconmale.com/model/ + - noirmale.com/model/ + script: + - python + - MileHighMedia_Gay.py + - performer-by-url +performerByFragment: + action: script + script: + - python + - MileHighMedia_Gay.py + - performer-by-fragment +movieByURL: + - action: script + url: + - iconmale.com/movie/ + - noirmale.com/movie/ + # Since scenes link to the movie we can scrape movies from scenes + - iconmale.com/scene/ + - noirmale.com/scene/ + script: + - python + - MileHighMedia_Gay.py + - movie-by-url +# Last Updated January 14, 2024 diff --git a/scrapers/MileHighMedia_Straight/MileHighMedia_Straight.py b/scrapers/MileHighMedia_Straight/MileHighMedia_Straight.py new file mode 100644 index 0000000..8fa6987 --- /dev/null +++ b/scrapers/MileHighMedia_Straight/MileHighMedia_Straight.py @@ -0,0 +1,93 @@ +import json +import sys +from typing import Any +from py_common import log +from py_common.util import dig, replace_all +from AyloAPI.scrape import ( + scraper_args, + scene_from_url, + scene_search, + scene_from_fragment, + performer_from_url, + performer_from_fragment, + performer_search, + movie_from_url, +) + +studio_map = { + "dlf": "Dilfed", + "DogHouseDigital": "Doghouse Digital", + "LesbianOlderYounger": "Lesbian Older Younger", + "SweetHeartVideo": "Sweetheart Video", + "SweetSinner": "Sweet Sinner", + "RealityJunkies": "Reality Junkies", +} + + +def milehigh(obj: Any, _) -> Any: + fixed = replace_all(obj, "name", replacement=lambda x: studio_map.get(x, x)) + + replacement = None + match dig(fixed, "studio", "name"): + case "Dilfed": + replacement = "dilfed.com" + case "Doghouse Digital": + replacement = "doghousedigital.com" + case "Family Sinners": + replacement = "familysinners.com" + case "Milfed" | "Lesbian Older Younger": + replacement = "milfed.com" + case "Reality Junkies": + replacement = "realityjunkies.com" + case "Sweet Sinner": + replacement = "sweetsinner.com" + case "Sweetheart Video": + replacement = "sweetheartvideo.com" + case _: + replacement = "milehighmedia.com" + + # Replace the studio name in all URLs: even if there's no specific studio, + # milehigh.com is wrong and needs to be replaced with milehighmedia.com + fixed = replace_all(fixed, "url", lambda x: x.replace("milehigh.com", replacement)) + + return fixed + + +if __name__ == "__main__": + domains = [ + "milehighmedia", + "dilfed", + "doghousedigital", + "familysinners", + "milfed", + "realityjunkies", + "sweetsinner", + "sweetheartvideo", + ] + op, args = scraper_args() + result = None + + match op, args: + case "scene-by-url", {"url": url} if url: + result = scene_from_url(url, postprocess=milehigh) + case "scene-by-name", {"name": name} if name: + result = scene_search(name, search_domains=domains, postprocess=milehigh) + case "scene-by-fragment" | "scene-by-query-fragment", args: + result = scene_from_fragment( + args, search_domains=domains, postprocess=milehigh + ) + case "performer-by-url", {"url": url}: + result = performer_from_url(url, postprocess=milehigh) + case "performer-by-fragment", args: + result = performer_from_fragment(args) + case "performer-by-name", {"name": name} if name: + result = performer_search( + name, search_domains=domains, postprocess=milehigh + ) + case "movie-by-url", {"url": url} if url: + result = movie_from_url(url, postprocess=milehigh) + case _: + log.error(f"Operation: {op}, arguments: {json.dumps(args)}") + sys.exit(1) + + print(json.dumps(result)) diff --git a/scrapers/MileHighMedia_Straight/MileHighMedia_Straight.yml b/scrapers/MileHighMedia_Straight/MileHighMedia_Straight.yml new file mode 100644 index 0000000..156203b --- /dev/null +++ b/scrapers/MileHighMedia_Straight/MileHighMedia_Straight.yml @@ -0,0 +1,88 @@ +name: Mile High Media (Straight) +# requires: py_common, AyloAPI +# scrapes: Dilfed, Doghouse Digital, Family Sinners, Lesbian Older Younger, Mile High Media, Milfed, Reality Junkies, Sweet Sinner, Sweetheart Video +sceneByURL: + - action: script + url: + - milehighmedia.com/scene/ + - dilfed.com/scene/ + - doghousedigital.com/scene/ + - familysinners.com/scene/ + - milfed.com/scene/ + - realityjunkies.com/scene/ + - sweetsinner.com/scene/ + - sweetheartvideo.com/scene/ + script: + - python + - MileHighMedia_Straight.py + - scene-by-url +sceneByFragment: + action: script + script: + - python + - MileHighMedia_Straight.py + - scene-by-fragment +sceneByName: + action: script + script: + - python + - MileHighMedia_Straight.py + - scene-by-name +sceneByQueryFragment: + action: script + script: + - python + - MileHighMedia_Straight.py + - scene-by-query-fragment +performerByName: + action: script + script: + - python + - MileHighMedia_Straight.py + - performer-by-name +performerByURL: + - action: script + url: + - milehighmedia.com/model/ + - dilfed.com/model/ + - doghousedigital.com/model/ + - familysinners.com/model/ + - milfed.com/model/ + - realityjunkies.com/model/ + - sweetsinner.com/model/ + - sweetheartvideo.com/model/ + script: + - python + - MileHighMedia_Straight.py + - performer-by-url +performerByFragment: + action: script + script: + - python + - MileHighMedia_Straight.py + - performer-by-fragment +movieByURL: + - action: script + url: + - milehighmedia.com/movie/ + - dilfed.com/movie/ + - doghousedigital.com/movie/ + - familysinners.com/movie/ + - milfed.com/movie/ + - realityjunkies.com/movie/ + - sweetsinner.com/movie/ + - sweetheartvideo.com/movie/ + # Since scenes link to the movie we can scrape movies from scenes + - milehighmedia.com/scene/ + - dilfed.com/scene/ + - doghousedigital.com/scene/ + - familysinners.com/scene/ + - milfed.com/scene/ + - realityjunkies.com/scene/ + - sweetsinner.com/scene/ + - sweetheartvideo.com/scene/ + script: + - python + - MileHighMedia_Straight.py + - movie-by-url +# Last Updated January 14, 2024 diff --git a/scrapers/WhyNotBi/WhyNotBi.py b/scrapers/WhyNotBi/WhyNotBi.py new file mode 100644 index 0000000..bda50f2 --- /dev/null +++ b/scrapers/WhyNotBi/WhyNotBi.py @@ -0,0 +1,54 @@ +import json +import sys +from typing import Any +from py_common import log +from py_common.util import replace_at +from AyloAPI.scrape import ( + scraper_args, + scene_from_url, + scene_search, + scene_from_fragment, + performer_from_url, + performer_from_fragment, + performer_search, + movie_from_url, +) + + +def whynotbi(obj: Any, _) -> Any: + # parent studio comes back from API as "WhyNotBy" + # so we flatten all studios to just "Why Not Bi" + return replace_at(obj, "studio", replacement=lambda _: {"name": "Why Not Bi"}) + + +if __name__ == "__main__": + domains = [ + "whynotbi", + ] + op, args = scraper_args() + result = None + + match op, args: + case "scene-by-url", {"url": url} if url: + result = scene_from_url(url, postprocess=whynotbi) + case "scene-by-name", {"name": name} if name: + result = scene_search(name, search_domains=domains, postprocess=whynotbi) + case "scene-by-fragment" | "scene-by-query-fragment", args: + result = scene_from_fragment( + args, search_domains=domains, postprocess=whynotbi + ) + case "performer-by-url", {"url": url}: + result = performer_from_url(url, postprocess=whynotbi) + case "performer-by-fragment", args: + result = performer_from_fragment(args) + case "performer-by-name", {"name": name} if name: + result = performer_search( + name, search_domains=domains, postprocess=whynotbi + ) + case "movie-by-url", {"url": url} if url: + result = movie_from_url(url, postprocess=whynotbi) + case _: + log.error(f"Operation: {op}, arguments: {json.dumps(args)}") + sys.exit(1) + + print(json.dumps(result)) diff --git a/scrapers/WhyNotBi/WhyNotBi.yml b/scrapers/WhyNotBi/WhyNotBi.yml new file mode 100644 index 0000000..010151d --- /dev/null +++ b/scrapers/WhyNotBi/WhyNotBi.yml @@ -0,0 +1,60 @@ +name: Bromo +# requires: py_common, AyloAPI +# scrapes: Why Not Bi +sceneByURL: + - action: script + url: + - whynotbi.com/scene/ + script: + - python + - WhyNotBi.py + - scene-by-url +sceneByFragment: + action: script + script: + - python + - WhyNotBi.py + - scene-by-fragment +sceneByName: + action: script + script: + - python + - WhyNotBi.py + - scene-by-name +sceneByQueryFragment: + action: script + script: + - python + - WhyNotBi.py + - scene-by-query-fragment +performerByName: + action: script + script: + - python + - WhyNotBi.py + - performer-by-name +performerByURL: + - action: script + url: + - whynotbi.com/model/ + script: + - python + - WhyNotBi.py + - performer-by-url +performerByFragment: + action: script + script: + - python + - WhyNotBi.py + - performer-by-fragment +movieByURL: + - action: script + url: + - whynotbi.com/scene/ + # Since scenes link to the movie we can scrape movies from scenes + - whynotbi.com/movie/ + script: + - python + - WhyNotBi.py + - movie-by-url +# Last Updated January 14, 2024 diff --git a/scrapers/py_common/config.py b/scrapers/py_common/config.py new file mode 100644 index 0000000..8db265e --- /dev/null +++ b/scrapers/py_common/config.py @@ -0,0 +1,7 @@ +# An API Key can be generated in Stash's settings page ( Settings > Security > Authentication ) +STASH = { + "url": + "http://localhost:9999", + "api_key": + "" +} diff --git a/scrapers/py_common/graphql.py b/scrapers/py_common/graphql.py new file mode 100644 index 0000000..8aaac9e --- /dev/null +++ b/scrapers/py_common/graphql.py @@ -0,0 +1,1149 @@ +import sys + +try: + import requests +except ModuleNotFoundError: + print( + "You need to install the requests module. (https://docs.python-requests.org/en/latest/user/install/)", + file=sys.stderr, + ) + print( + "If you have pip (normally installed with python), run this command in a terminal (cmd): pip install requests", + file=sys.stderr, + ) + sys.exit() + +try: + import py_common.config as config + import py_common.log as log + from py_common.util import dig +except ModuleNotFoundError: + print( + "You need to download the folder 'py_common' from the community repo! (CommunityScrapers/tree/master/scrapers/py_common)", + file=sys.stderr, + ) + sys.exit() + + +def callGraphQL(query: str, variables: dict | None = None): + api_key = config.STASH.get("api_key", "") + url = config.STASH.get("url", "") + if not url: + log.error("You need to set the URL in 'config.py'") + return None + elif "stashdb.org" in url: + log.error("You need to set the URL in 'config.py' to your own stash server") + return None + + stash_url = config.STASH["url"] + "/graphql" + headers = { + "Accept-Encoding": "gzip, deflate", + "Content-Type": "application/json", + "Accept": "application/json", + "Connection": "keep-alive", + "DNT": "1", + "ApiKey": api_key, + } + json = {"query": query} + if variables is not None: + json["variables"] = variables # type: ignore + response = requests.post(stash_url, json=json, headers=headers) + if response.status_code == 200: + result = response.json() + if errors := result.get("error"): + errors = "\n".join(errors) + log.error(f"[ERROR][GraphQL] {errors}") + return None + if result.get("data"): + return result.get("data") + elif response.status_code == 401: + log.error( + "[ERROR][GraphQL] HTTP Error 401, Unauthorised. You can add a API Key in 'config.py' in the 'py_common' folder" + ) + return None + elif response.status_code == 404: + if "localhost:9999" in url: + log.error( + "[ERROR][GraphQL] HTTP Error 404, Not Found. Your local stash server is your endpoint, but port 9999 did not respond. Did you change stash's port? Edit 'config.py' in the 'py_common' folder to point at the correct port for stash!" + ) + else: + log.error( + "[ERROR][GraphQL] HTTP Error 404, Not Found. Make sure 'config.py' in the 'py_common' folder points at the correct address and port!" + ) + return None + + raise ConnectionError( + f"GraphQL query failed: {response.status_code} - {response.content}" + ) + + +def configuration() -> dict | None: + query = """ + query Configuration { + configuration { + ...ConfigData + } + } + fragment ConfigData on ConfigResult { + general { + ...ConfigGeneralData + } + interface { + ...ConfigInterfaceData + } + dlna { + ...ConfigDLNAData + } + scraping { + ...ConfigScrapingData + } + defaults { + ...ConfigDefaultSettingsData + } + } + fragment ConfigGeneralData on ConfigGeneralResult { + stashes { + path + excludeVideo + excludeImage + } + databasePath + generatedPath + metadataPath + cachePath + calculateMD5 + videoFileNamingAlgorithm + parallelTasks + previewAudio + previewSegments + previewSegmentDuration + previewExcludeStart + previewExcludeEnd + previewPreset + maxTranscodeSize + maxStreamingTranscodeSize + writeImageThumbnails + apiKey + username + password + maxSessionAge + logFile + logOut + logLevel + logAccess + createGalleriesFromFolders + videoExtensions + imageExtensions + galleryExtensions + excludes + imageExcludes + customPerformerImageLocation + stashBoxes { + name + endpoint + api_key + } + } + fragment ConfigInterfaceData on ConfigInterfaceResult { + menuItems + soundOnPreview + wallShowTitle + wallPlayback + maximumLoopDuration + noBrowser + autostartVideo + autostartVideoOnPlaySelected + continuePlaylistDefault + showStudioAsText + css + cssEnabled + language + imageLightbox { + slideshowDelay + displayMode + scaleUp + resetZoomOnNav + scrollMode + scrollAttemptsBeforeChange + } + disableDropdownCreate { + performer + tag + studio + } + handyKey + funscriptOffset + } + fragment ConfigDLNAData on ConfigDLNAResult { + serverName + enabled + whitelistedIPs + interfaces + } + fragment ConfigScrapingData on ConfigScrapingResult { + scraperUserAgent + scraperCertCheck + scraperCDPPath + excludeTagPatterns + } + fragment ConfigDefaultSettingsData on ConfigDefaultSettingsResult { + scan { + scanGeneratePreviews + scanGenerateImagePreviews + scanGenerateSprites + scanGeneratePhashes + scanGenerateThumbnails + } + identify { + sources { + source { + ...ScraperSourceData + } + options { + ...IdentifyMetadataOptionsData + } + } + options { + ...IdentifyMetadataOptionsData + } + } + autoTag { + performers + studios + tags + __typename + } + generate { + sprites + previews + imagePreviews + previewOptions { + previewSegments + previewSegmentDuration + previewExcludeStart + previewExcludeEnd + previewPreset + } + markers + markerImagePreviews + markerScreenshots + transcodes + phashes + } + deleteFile + deleteGenerated + } + fragment ScraperSourceData on ScraperSource { + stash_box_endpoint + scraper_id + } + fragment IdentifyMetadataOptionsData on IdentifyMetadataOptions { + fieldOptions { + ...IdentifyFieldOptionsData + } + setCoverImage + setOrganized + includeMalePerformers + } + fragment IdentifyFieldOptionsData on IdentifyFieldOptions { + field + strategy + createMissing + } + """ + result = callGraphQL(query) or {} + return dig(result, "configuration") + + +def getScene(scene_id: str | int) -> dict | None: + query = """ + query FindScene($id: ID!, $checksum: String) { + findScene(id: $id, checksum: $checksum) { + ...SceneData + } + } + fragment SceneData on Scene { + id + title + code + details + urls + date + rating100 + o_counter + organized + interactive + files { + path + size + duration + video_codec + audio_codec + width + height + frame_rate + bit_rate + } + paths { + screenshot + preview + stream + webp + vtt + sprite + funscript + } + scene_markers { + ...SceneMarkerData + } + galleries { + ...SlimGalleryData + } + studio { + ...SlimStudioData + } + movies { + movie { + ...MovieData + } + scene_index + } + tags { + ...SlimTagData + } + performers { + ...PerformerData + } + stash_ids { + endpoint + stash_id + } + } + fragment SceneMarkerData on SceneMarker { + id + title + seconds + stream + preview + screenshot + scene { + id + } + primary_tag { + id + name + aliases + } + tags { + id + name + aliases + } + } + fragment SlimGalleryData on Gallery { + id + title + date + urls + details + rating100 + organized + image_count + cover { + paths { + thumbnail + } + } + studio { + id + name + image_path + } + tags { + id + name + } + performers { + id + name + gender + favorite + image_path + } + scenes { + id + title + files { + path + basename + } + } + } + fragment SlimStudioData on Studio { + id + name + image_path + stash_ids { + endpoint + stash_id + } + parent_studio { + id + } + details + rating100 + aliases + } + fragment MovieData on Movie { + id + name + aliases + duration + date + rating100 + director + studio { + ...SlimStudioData + } + synopsis + url + front_image_path + back_image_path + scene_count + scenes { + id + title + files { + path + } + } + } + fragment SlimTagData on Tag { + id + name + aliases + image_path + } + fragment PerformerData on Performer { + id + name + url + gender + twitter + instagram + birthdate + ethnicity + country + eye_color + height_cm + measurements + fake_tits + career_length + tattoos + piercings + alias_list + favorite + image_path + scene_count + image_count + gallery_count + movie_count + tags { + ...SlimTagData + } + stash_ids { + stash_id + endpoint + } + rating100 + details + death_date + hair_color + weight + } + """ + variables = {"id": str(scene_id)} + result = callGraphQL(query, variables) or {} + return dig(result, "findScene") + + +def getSceneScreenshot(scene_id: str | int) -> str | None: + query = """ + query FindScene($id: ID!, $checksum: String) { + findScene(id: $id, checksum: $checksum) { + id + paths { + screenshot + } + } + } + """ + variables = {"id": str(scene_id)} + result = callGraphQL(query, variables) or {} + return dig(result, "findScene", "paths", "screenshot") + + +def getSceneByPerformerId(performer_id: str | int) -> dict | None: + query = """ +query FindScenes($filter: FindFilterType, $scene_filter: SceneFilterType, $scene_ids: [Int!]) { + findScenes(filter: $filter, scene_filter: $scene_filter, scene_ids: $scene_ids) { + count + filesize + duration + scenes { + ...SceneData + __typename + } + __typename + } + } + + fragment SceneData on Scene { + id + title + details + urls + date + rating100 + o_counter + organized + files { + path + size + duration + video_codec + audio_codec + width + height + frame_rate + bit_rate + __typename + } + interactive + interactive_speed + captions { + language_code + caption_type + __typename + } + created_at + updated_at + paths { + screenshot + preview + stream + webp + vtt + sprite + funscript + interactive_heatmap + caption + __typename + } + scene_markers { + ...SceneMarkerData + __typename + } + galleries { + ...SlimGalleryData + __typename + } + studio { + ...SlimStudioData + __typename + } + movies { + movie { + ...MovieData + __typename + } + scene_index + __typename + } + tags { + ...SlimTagData + __typename + } + performers { + ...PerformerData + __typename + } + stash_ids { + endpoint + stash_id + __typename + } + sceneStreams { + url + mime_type + label + __typename + } + __typename + } + + fragment SceneMarkerData on SceneMarker { + id + title + seconds + stream + preview + screenshot + scene { + id + __typename + } + primary_tag { + id + name + aliases + __typename + } + tags { + id + name + aliases + __typename + } + __typename + } + + fragment SlimGalleryData on Gallery { + id + title + date + urls + details + rating100 + organized + image_count + cover { + paths { + thumbnail + __typename + } + __typename + } + studio { + id + name + image_path + __typename + } + tags { + id + name + __typename + } + performers { + id + name + gender + favorite + image_path + __typename + } + scenes { + id + title + files { + path + } + __typename + } + __typename + } + + fragment SlimStudioData on Studio { + id + name + image_path + stash_ids { + endpoint + stash_id + __typename + } + parent_studio { + id + __typename + } + details + rating100 + aliases + __typename + } + + fragment MovieData on Movie { + id + name + aliases + duration + date + rating100 + director + studio { + ...SlimStudioData + __typename + } + synopsis + url + front_image_path + back_image_path + scene_count + scenes { + id + title + files { + path + } + __typename + } + __typename + } + + fragment SlimTagData on Tag { + id + name + aliases + image_path + __typename + } + + fragment PerformerData on Performer { + id + name + url + gender + twitter + instagram + birthdate + ethnicity + country + eye_color + height_cm + measurements + fake_tits + career_length + tattoos + piercings + alias_list + favorite + ignore_auto_tag + image_path + scene_count + image_count + gallery_count + movie_count + tags { + ...SlimTagData + __typename + } + stash_ids { + stash_id + endpoint + __typename + } + rating100 + details + death_date + hair_color + weight + __typename + } + """ + variables = { + "filter": {"page": 1, "per_page": 20, "sort": "title", "direction": "ASC"}, + "scene_filter": { + "performers": {"value": [str(performer_id)], "modifier": "INCLUDES_ALL"} + }, + } + result = callGraphQL(query, variables) or {} + return dig(result, "findScenes") + + +def getSceneIdByPerformerId(performer_id: str | int) -> dict | None: + query = """ + query FindScenes($filter: FindFilterType, $scene_filter: SceneFilterType, $scene_ids: [Int!]) { + findScenes(filter: $filter, scene_filter: $scene_filter, scene_ids: $scene_ids) { + scenes { + id + title + files { + path + } + paths { + screenshot + } + } + } + } + """ + variables = { + "filter": {"page": 1, "per_page": 20, "sort": "id", "direction": "DESC"}, + "scene_filter": { + "performers": {"value": [str(performer_id)], "modifier": "INCLUDES_ALL"} + }, + } + result = callGraphQL(query, variables) or {} + return dig(result, "findScenes") + + +def getPerformersByName(performer_name: str) -> dict | None: + query = """ + query FindPerformers($filter: FindFilterType, $performer_filter: PerformerFilterType) { + findPerformers(filter: $filter, performer_filter: $performer_filter) { + count + performers { + ...PerformerData + __typename + } + __typename + } + } + + fragment PerformerData on Performer { + id + name + url + gender + twitter + instagram + birthdate + ethnicity + country + eye_color + height_cm + measurements + fake_tits + career_length + tattoos + piercings + alias_list + favorite + ignore_auto_tag + image_path + scene_count + image_count + gallery_count + movie_count + tags { + ...SlimTagData + __typename + } + stash_ids { + stash_id + endpoint + __typename + } + rating100 + details + death_date + hair_color + weight + __typename + } + + fragment SlimTagData on Tag { + id + name + aliases + image_path + __typename + } + """ + + variables = { + "filter": { + "q": performer_name, + "page": 1, + "per_page": 20, + "sort": "name", + "direction": "ASC", + }, + "performer_filter": {}, + } + result = callGraphQL(query, variables) or {} + return dig(result, "findPerformers") + + +def getPerformersIdByName(performer_name: str) -> dict | None: + query = """ + query FindPerformers($filter: FindFilterType, $performer_filter: PerformerFilterType) { + findPerformers(filter: $filter, performer_filter: $performer_filter) { + count + performers { + ...PerformerData + } + } + } + + fragment PerformerData on Performer { + id + name + alias_list + } + """ + + variables = { + "filter": { + "q": performer_name, + "page": 1, + "per_page": 20, + "sort": "name", + "direction": "ASC", + }, + "performer_filter": {}, + } + + result = callGraphQL(query, variables) or {} + return dig(result, "findPerformers") + + +def getGallery(gallery_id: str | int) -> dict | None: + query = """ + query FindGallery($id: ID!) { + findGallery(id: $id) { + ...GalleryData + } + } + fragment GalleryData on Gallery { + id + created_at + updated_at + title + date + urls + details + rating100 + organized + folder { + path + } + cover { + ...SlimImageData + } + studio { + ...SlimStudioData + } + tags { + ...SlimTagData + } + performers { + ...PerformerData + } + scenes { + ...SlimSceneData + } + } + fragment SlimImageData on Image { + id + title + rating100 + organized + o_counter + visual_files { + ... on ImageFile { + path + size + height + width + } + } + + paths { + thumbnail + image + } + + galleries { + id + files { + path + } + title + } + + studio { + id + name + image_path + } + + tags { + id + name + } + + performers { + id + name + gender + favorite + image_path + } + } + fragment SlimStudioData on Studio { + id + name + image_path + stash_ids { + endpoint + stash_id + } + parent_studio { + id + } + details + rating100 + aliases + } + fragment SlimTagData on Tag { + id + name + aliases + image_path + } + fragment PerformerData on Performer { + id + name + url + gender + twitter + instagram + birthdate + ethnicity + country + eye_color + height_cm + measurements + fake_tits + career_length + tattoos + piercings + alias_list + favorite + image_path + scene_count + image_count + gallery_count + movie_count + + tags { + ...SlimTagData + } + + stash_ids { + stash_id + endpoint + } + rating100 + details + death_date + hair_color + weight + } + fragment SlimSceneData on Scene { + id + title + code + details + urls + date + rating100 + o_counter + organized + interactive + + files { + path + size + duration + video_codec + audio_codec + width + height + frame_rate + bit_rate + } + + paths { + screenshot + preview + stream + webp + vtt + sprite + funscript + } + + scene_markers { + id + title + seconds + } + + galleries { + id + title + files { + path + } + } + + studio { + id + name + image_path + } + + movies { + movie { + id + name + front_image_path + } + scene_index + } + + tags { + id + name + } + + performers { + id + name + gender + favorite + image_path + } + + stash_ids { + endpoint + stash_id + } + } + """ + variables = {"id": gallery_id} + result = callGraphQL(query, variables) or {} + return dig(result, "findGallery") + + +def getGalleryPath(gallery_id: str | int) -> str | None: + query = """ + query FindGallery($id: ID!) { + findGallery(id: $id) { + folder { + path + } + files { + path + } + } + } + """ + variables = {"id": gallery_id} + result = callGraphQL(query, variables) or {} + # Galleries can either be a folder full of files or a zip file + return dig(result, "findGallery", "folder", "path") \ + or dig(result, "findGallery", "files", 0, "path") diff --git a/scrapers/py_common/log.py b/scrapers/py_common/log.py new file mode 100644 index 0000000..e013bb4 --- /dev/null +++ b/scrapers/py_common/log.py @@ -0,0 +1,39 @@ +import sys +import re +# Log messages sent from a script scraper instance are transmitted via stderr and are +# encoded with a prefix consisting of special character SOH, then the log +# level (one of t, d, i, w or e - corresponding to trace, debug, info, +# warning and error levels respectively), then special character +# STX. +# +# The log.trace, log.debug, log.info, log.warning, and log.error methods, and their equivalent +# formatted methods are intended for use by script scraper instances to transmit log +# messages. +# + +def __log(level_char: bytes, s): + if level_char: + lvl_char = "\x01{}\x02".format(level_char.decode()) + s = re.sub(r"data:image.+?;base64(.+?')","[...]",str(s)) + for x in s.split("\n"): + print(lvl_char, x, file=sys.stderr, flush=True) + + +def trace(s): + __log(b't', s) + + +def debug(s): + __log(b'd', s) + + +def info(s): + __log(b'i', s) + + +def warning(s): + __log(b'w', s) + + +def error(s): + __log(b'e', s) diff --git a/scrapers/py_common/package b/scrapers/py_common/package new file mode 100644 index 0000000..6c171ab --- /dev/null +++ b/scrapers/py_common/package @@ -0,0 +1,2 @@ +# script used as a dependency only +name: py_common diff --git a/scrapers/py_common/types.py b/scrapers/py_common/types.py new file mode 100644 index 0000000..6069296 --- /dev/null +++ b/scrapers/py_common/types.py @@ -0,0 +1,116 @@ +from typing import Literal, Required, TypedDict + +""" +Types for outputs that scrapers can produce and that Stash will accept +""" + +class ScrapedTag(TypedDict): + name: str + "Name is the only required field" + +class ScrapedPerformer(TypedDict, total=False): + name: Required[str] + "Name is the only required field" + disambiguation: str + "This is only added through Tagger view" + gender: Literal["MALE", "FEMALE", "TRANSGENDER_MALE", "TRANSGENDER_FEMALE", "INTERSEX", "NON_BINARY"] + url: str + twitter: str + instagram: str + birthdate: str + "Must be in the format YYYY-MM-DD" + death_date: str + "Must be in the format YYYY-MM-DD" + ethnicity: Literal["CAUCASIAN", "BLACK", "ASIAN", "INDIAN", "LATIN", "MIDDLE_EASTERN", "MIXED", "OTHER"] + country: str + "Not validated" + eye_color: Literal["BLUE", "BROWN", "GREEN", "GREY", "HAZEL", "RED"] + hair_color: Literal["BLONDE", "BRUNETTE", "BLACK", "RED", "AUBURN", "GREY", "BALD", "VARIOUS", "OTHER"] + "Hair color, can be 'VARIOUS' or 'OTHER' if the performer has multiple hair colors" + height: str + "Height in centimeters" + weight: str + "Weight in kilograms" + measurements: str + "bust-waist-hip measurements in centimeters, with optional cupsize for bust (e.g. 90-60-90, 90C-60-90)" + fake_tits: str + penis_length: str + circumcised: str + career_length: str + tattoos: str + piercings: str + aliases: str + "Must be comma-delimited in order to be parsed correctly" + tags: list[ScrapedTag] + image: str + images: list[str] + "Images can be URLs or base64-encoded images" + details: str + +class ScrapedStudio(TypedDict, total=False): + name: Required[str] + "Name is the only required field" + url: str + parent: 'ScrapedStudio' + image: str + +class ScrapedMovie(TypedDict, total=False): + name: Required[str] + date: str + "Must be in the format YYYY-MM-DD" + duration: str + "Duration in seconds" + director: str + synopsis: str + studio: ScrapedStudio + rating: str + front_image: str + back_image: str + url: str + aliases: str + +class ScrapedGallery(TypedDict, total=False): + title: Required[str] + details: str + url: str + urls: list[str] + date: str + "Must be in the format YYYY-MM-DD" + studio: ScrapedStudio + tags: list[ScrapedTag] + performers: list[ScrapedPerformer] + +class ScrapedScene(TypedDict, total=False): + title: str + details: str + url: str + urls: list[str] + date: str + image: str + studio: ScrapedStudio + movies: list[ScrapedMovie] + tags: list[ScrapedTag] + performers: list[ScrapedPerformer] + code: str + director: str + +# Technically we can return a full ScrapedPerformer but the current UI only +# shows the name. The URL is absolutely necesserary for the result to be used +# in the next step: actually scraping the performer +class PerformerSearchResult(TypedDict): + name: str + url: str + +# Technically we can return a full ScrapedScene but the current UI only +# shows the name, image, studio, tags and performers. The URL is absolutely +# necesserary for the result to be used in the next step: actually scraping the scene +class SceneSearchResult(TypedDict, total=False): + title: Required[str] + url: Required[str] + date: str + "Must be in the format YYYY-MM-DD" + image: str + "Image can be a URL or base64-encoded image" + tags: list[ScrapedTag] + performers: list[ScrapedPerformer] + studio: ScrapedStudio diff --git a/scrapers/py_common/util.py b/scrapers/py_common/util.py new file mode 100644 index 0000000..9022fa7 --- /dev/null +++ b/scrapers/py_common/util.py @@ -0,0 +1,258 @@ +from argparse import ArgumentParser +from functools import reduce +from typing import Any, Callable, TypeVar +from urllib.error import URLError +from urllib.request import Request, urlopen +import json +import sys + + +def dig(c: dict | list, *keys: str | int | tuple[str | int, ...], default=None) -> Any: + """ + Helper function to get a value from a nested dict or list + + If a key is a tuple the items will be tried in order until a value is found + + :param c: dict or list to search + :param keys: keys to search for + :param default: default value to return if not found + :return: value if found, None otherwise + + >>> obj = {"a": {"b": ["c", "d"], "f": {"g": "h"}}} + >>> dig(obj, "a", "b", 1) + 'd' + >>> dig(obj, "a", ("e", "f"), "g") + 'h' + """ + + def inner(d: dict | list, key: str | int | tuple): + if isinstance(d, dict): + if isinstance(key, tuple): + for k in key: + if k in d: + return d[k] + return d.get(key) + elif isinstance(d, list) and isinstance(key, int) and key < len(d): + return d[key] + else: + return default + + return reduce(inner, keys, c) # type: ignore + + +T = TypeVar("T") + + +def replace_all(obj: dict, key: str, replacement: Callable[[T], T]) -> dict: + """ + Helper function to recursively replace values in a nested dict, returning a new dict + + If the key refers to a list the replacement function will be called for each item + + :param obj: dict to search + :param key: key to search for + :param replacement: function called on the value to replace it + :return: new dict + + >>> obj = {"a": {"b": ["c", "d"], "f": {"g": "h"}}} + >>> replace(obj, "g", lambda x: x.upper()) # Replace a single item + {'a': {'b': ['c', 'd'], 'f': {'g': 'H'}}} + >>> replace(obj, "b", lambda x: x.upper()) # Replace all items in a list + {'a': {'b': ['C', 'D'], 'f': {'g': 'h'}}} + >>> replace(obj, "z", lambda x: x.upper()) # Do nothing if the key is not found + {'a': {'b': ['c', 'd'], 'f': {'g': 'h'}}} + """ + if not isinstance(obj, dict): + return obj + + new = {} + for k, v in obj.items(): + if k == key: + if isinstance(v, list): + new[k] = [replacement(x) for x in v] + else: + new[k] = replacement(v) + elif isinstance(v, dict): + new[k] = replace_all(v, key, replacement) + elif isinstance(v, list): + new[k] = [replace_all(x, key, replacement) for x in v] + else: + new[k] = v + return new + + +def replace_at(obj: dict, *path: str, replacement: Callable[[T], T]) -> dict: + """ + Helper function to replace a value at a given path in a nested dict, returning a new dict + + If the path refers to a list the replacement function will be called for each item + + If the path does not exist, the replacement function will not be called and the dict will be returned as-is + + :param obj: dict to search + :param path: path to search for + :param replacement: function called on the value to replace it + :return: new dict + + >>> obj = {"a": {"b": ["c", "d"], "f": {"g": "h"}}} + >>> replace_at(obj, "a", "f", "g", replacement=lambda x: x.upper()) # Replace a single item + {'a': {'b': ['c', 'd'], 'f': {'g': 'H'}}} + >>> replace_at(obj, "a", "b", replacement=lambda x: x.upper()) # Replace all items in a list + {'a': {'b': ['C', 'D'], 'f': {'g': 'h'}}} + >>> replace_at(obj, "a", "z", "g", replacement=lambda x: x.upper()) # Broken path, do nothing + {'a': {'b': ['c', 'd'], 'f': {'g': 'h'}}} + """ + + def inner(d: dict, *keys: str): + match keys: + case [k] if isinstance(d, dict) and k in d: + if isinstance(d[k], list): + return {**d, k: [replacement(x) for x in d[k]]} + return {**d, k: replacement(d[k])} + case [k, *ks] if isinstance(d, dict) and k in d: + return {**d, k: inner(d[k], *ks)} + case _: + return d + + return inner(obj, *path) # type: ignore + + +def is_valid_url(url): + """ + Checks if an URL is valid by making a HEAD request and ensuring the response code is 2xx + """ + try: + req = Request(url, method="HEAD") + with urlopen(req) as response: + return 200 <= response.getcode() < 300 + except URLError: + return False + + +def __default_parser(**kwargs): + parser = ArgumentParser(**kwargs) + # Some scrapers can take extra arguments so we can + # do rudimentary configuration in the YAML file + parser.add_argument("extra", nargs="*") + subparsers = parser.add_subparsers(dest="operation", required=True) + + # "Scrape with..." and the subsequent search box + subparsers.add_parser( + "performer-by-name", help="Search for performers" + ).add_argument("--name", help="Performer name to search for") + + # The results of performer-by-name will be passed to this + pbf = subparsers.add_parser("performer-by-fragment", help="Scrape a performer") + # Technically there's more information in this fragment, + # but in 99.9% of cases we only need the URL or the name + pbf.add_argument("--url", help="Scene URL") + pbf.add_argument("--name", help="Performer name to search for") + + # Filling in an URL and hitting the "Scrape" icon + subparsers.add_parser( + "performer-by-url", help="Scrape a performer by their URL" + ).add_argument("--url") + + # Filling in an URL and hitting the "Scrape" icon + subparsers.add_parser( + "movie-by-url", help="Scrape a movie by its URL" + ).add_argument("--url") + + # The looking glass search icon + # name field is guaranteed to be filled by Stash + subparsers.add_parser("scene-by-name", help="Scrape a scene by name").add_argument( + "--name", help="Name to search for" + ) + + # Filling in an URL and hitting the "Scrape" icon + subparsers.add_parser( + "scene-by-url", help="Scrape a scene by its URL" + ).add_argument("--url") + + # "Scrape with..." + sbf = subparsers.add_parser("scene-by-fragment", help="Scrape a scene") + sbf.add_argument("-u", "--url") + sbf.add_argument("--id") + sbf.add_argument("--title") # Title will be filename if not set in Stash + sbf.add_argument("--date") + sbf.add_argument("--details") + sbf.add_argument("--urls", nargs="+") + + # Tagger view or search box + sbqf = subparsers.add_parser("scene-by-query-fragment", help="Scrape a scene") + sbqf.add_argument("-u", "--url") + sbqf.add_argument("--id") + sbqf.add_argument("--title") # Title will be filename if not set in Stash + sbqf.add_argument("--code") + sbqf.add_argument("--details") + sbqf.add_argument("--director") + sbqf.add_argument("--date") + sbqf.add_argument("--urls", nargs="+") + + # Filling in an URL and hitting the "Scrape" icon + subparsers.add_parser( + "gallery-by-url", help="Scrape a gallery by its URL" + ).add_argument("--url", help="Gallery URL") + + # "Scrape with..." + gbf = subparsers.add_parser("gallery-by-fragment", help="Scrape a gallery") + gbf.add_argument("-u", "--url") + gbf.add_argument("--id") + gbf.add_argument("--title") + gbf.add_argument("--date") + gbf.add_argument("--details") + gbf.add_argument("--urls", nargs="+") + + return parser + + +def scraper_args(**kwargs): + """ + Helper function to parse arguments for a scraper + + This allows scrapers to be called from the command line without + piping JSON to stdin but also from Stash + + Returns a tuple of the operation and the parsed arguments: operation is one of + - performer-by-name + - performer-by-fragment + - performer-by-url + - movie-by-url + - scene-by-name + - scene-by-url + - scene-by-fragment + - scene-by-query-fragment + - gallery-by-url + - gallery-by-fragment + + A scraper can be configured to take extra arguments by adding them to the YAML file: + ```yaml + sceneByName: + action: script + script: + - python + - my-scraper.py + - extra + - args + - scene-by-name + ``` + + When called from Stash through the above configuration this function would return: + ```python + ("scene-by-name", {"extra": ["extra", "args"], "name": "scene name"}) + ``` + """ + + parser = __default_parser(**kwargs) + args = vars(parser.parse_args()) + + # If stdin is not connected to a TTY the script is being executed by Stash + if not sys.stdin.isatty(): + try: + stash_fragment = json.load(sys.stdin) + args.update(stash_fragment) + except json.decoder.JSONDecodeError: + # This would only happen if Stash passed invalid JSON + sys.exit(69) + + return args.pop("operation"), args