diff --git a/.github/workflows/deploy.yml b/.github/workflows/deploy.yml new file mode 100644 index 000000000..477b3666e --- /dev/null +++ b/.github/workflows/deploy.yml @@ -0,0 +1,51 @@ +name: Deploy repository to Github Pages + +on: + push: + branches: [ master, stable ] + + # Allows you to run this workflow manually from the Actions tab + workflow_dispatch: + +# Sets permissions of the GITHUB_TOKEN to allow deployment to GitHub Pages +permissions: + contents: read + pages: write + id-token: write + +jobs: + build: + runs-on: ubuntu-22.04 + steps: + - name: Checkout main + uses: actions/checkout@v2 + with: + path: master + ref: master + fetch-depth: '0' + - run: | + cd master + ./build_site.sh ../_site/stable + - name: Checkout Stable + uses: actions/checkout@v2 + with: + path: dev + # replace with develop tag/branch when necessary + ref: master + fetch-depth: '0' + - run: | + cd dev + ../master/build_site.sh ../_site/develop + - uses: actions/upload-pages-artifact@v2 + + deploy: + environment: + name: github-pages + url: ${{ steps.deployment.outputs.page_url }} + runs-on: ubuntu-22.04 + needs: build + steps: + - name: Deploy to GitHub Pages + id: deployment + uses: actions/deploy-pages@v2 + diff --git a/.github/workflows/validate.yml b/.github/workflows/validate.yml index 644c1caae..a6db84194 100644 --- a/.github/workflows/validate.yml +++ b/.github/workflows/validate.yml @@ -13,6 +13,6 @@ jobs: - uses: actions/checkout@v2 - uses: actions/setup-node@v1 with: - node-version: '12.x' + node-version: '14.x' - run: cd ./validator && yarn install --frozen-lockfile - run: node ./validate.js --ci diff --git a/.gitignore b/.gitignore index 9377605f1..393112e62 100644 --- a/.gitignore +++ b/.gitignore @@ -5,3 +5,5 @@ yarn-error.log # Scraper-generated files /scrapers/*.ini **/__pycache__/ + +/_site \ No newline at end of file diff --git a/.vscode/settings.json b/.vscode/settings.json new file mode 100644 index 000000000..856ccff68 --- /dev/null +++ b/.vscode/settings.json @@ -0,0 +1,6 @@ +{ + "yaml.schemas": { + "validator/scraper.schema.json": "*.yml" + }, + "python.analysis.typeCheckingMode": "basic" +} diff --git a/README.md b/README.md index 487bd6f8c..c6878bf8c 100644 --- a/README.md +++ b/README.md @@ -1,7 +1,7 @@ # CommunityScrapers This is a public repository containing scrapers created by the Stash Community. -**:exclamation: Make sure to read ALL of the instructions here before requesting any help in the discord channel. For a more user friendly / step by step guide you can checkout [stashdb's guide to scraping](https://guidelines.stashdb.org/docs/guide-to-scraping/):exclamation:** +**:exclamation: Make sure to read ALL of the instructions here before requesting any help in the discord channel. For a more user friendly / step by step guide you can checkout [stashdb's guide to scraping](https://docs.stashapp.cc/beginner-guides/guide-to-scraping/):exclamation:** To download the scrapers you can clone the git repo or download directly any of the scrapers. diff --git a/SCRAPERS-LIST.md b/SCRAPERS-LIST.md index 418591369..3e3bb07c3 100644 --- a/SCRAPERS-LIST.md +++ b/SCRAPERS-LIST.md @@ -9,11 +9,12 @@ Supported Site|Scraper| S | G | M | P |Needs|Contents --------------|-------|:-:|:-:|:-:|:-:|:---:|:------: 1000facials.com|GammaEntertainment.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- 10musume.com|10Musume.yml|:heavy_check_mark:|:x:|:x:|:x:|-|JAV -1111customsxxx.com|ChickPass.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +1111customsxxx.com|ChickPass.yml|:heavy_check_mark:|:x:|:x:|:heavy_check_mark:|-|- 18eighteen.com|TheScoreGroup.yml|:heavy_check_mark:|:heavy_check_mark:|:x:|:x:|-|- 18tokyo.com|karatmedia.yml|:heavy_check_mark:|:x:|:x:|:x:|-|JAV 18vr.com|BaDoink.yml|:heavy_check_mark:|:x:|:heavy_check_mark:|:heavy_check_mark:|-|VR 1by-day.com|DDFNetwork.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +1passforallsites.com|1passforallsites.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- 1pondo.tv|1pondo.yml|:heavy_check_mark:|:x:|:x:|:x:|-|JAV 21naturals.com|Algolia_21Naturals.yml|:heavy_check_mark:|:x:|:x:|:x:|Python|- 21roles.com|21Roles.yml|:heavy_check_mark:|:x:|:x:|:x:|CDP|- @@ -30,10 +31,11 @@ abbiemaley.com|ThirdRockEnt.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- abbywinters.com|AbbyWinters.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- abuseme.com|AbuseMe.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- accidentalgangbang.com|Algolia_Adultime.yml|:heavy_check_mark:|:heavy_check_mark:|:heavy_check_mark:|:x:|Python|- -activeduty.com|GammaEntertainment.yml|:heavy_check_mark:|:x:|:x:|:heavy_check_mark:|-|Gay +activeduty.com|Algolia_ActiveDuty.yml|:heavy_check_mark:|:x:|:x:|:x:|-|Gay adamandevepictures.com|Algolia_Adultime.yml|:heavy_check_mark:|:heavy_check_mark:|:heavy_check_mark:|:x:|Python|- addicted2girls.com|Algolia_Addicted2Girls.yml|:heavy_check_mark:|:heavy_check_mark:|:x:|:x:|Python|- -addisonstreet.com|ChickPass.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +addisonstreet.com|ChickPass.yml|:heavy_check_mark:|:x:|:x:|:heavy_check_mark:|-|- +adultanime.dbsearch.net|Anime-DB.yml|:heavy_check_mark:|:x:|:x:|:x:|CDP|Japanese Hentai adultdvdempire.com|AdultEmpire.yml|:heavy_check_mark:|:x:|:heavy_check_mark:|:x:|-|- adultdvdmarketplace.com|AdultDvdMarketPlace.yml|:x:|:x:|:heavy_check_mark:|:x:|-|- adultempire.com|AdultEmpire.yml|:heavy_check_mark:|:x:|:heavy_check_mark:|:x:|-|- @@ -42,6 +44,7 @@ adultprime.com|AdultPrime.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- adulttime.com|Algolia_Adultime.yml|:heavy_check_mark:|:x:|:heavy_check_mark:|:x:|Python|- adulttimepilots.com|Algolia_Adultime.yml|:heavy_check_mark:|:heavy_check_mark:|:heavy_check_mark:|:x:|Python|- aebn.com|AEBN.yml|:heavy_check_mark:|:x:|:heavy_check_mark:|:heavy_check_mark:|-|Straight + Gay +agentredgirl.com|Algolia_Adultime.yml|:heavy_check_mark:|:x:|:x:|:x:|Python|- alettaoceanempire.com|GammaEntertainment.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- alexismonroe.com|bellapass.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- alexlegend.com|VNAGirls.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- @@ -55,6 +58,7 @@ allherluv.com|MissaX.yml|:heavy_check_mark:|:x:|:x:|:x:|Python|Lesbian allinternal.com|PerfectGonzo.yml|:heavy_check_mark:|:x:|:x:|:heavy_check_mark:|Python|- alljapanesepass.com|karatmedia.yml|:heavy_check_mark:|:x:|:x:|:x:|-|JAV allvr.porn|SLRoriginals.yml|:heavy_check_mark:|:x:|:heavy_check_mark:|:x:|-|VR +alphamales.com|PornsiteManager.yml|:heavy_check_mark:|:x:|:heavy_check_mark:|:x:|-|Gay alsscan.com|SARJ-LLC.yml|:heavy_check_mark:|:heavy_check_mark:|:x:|:heavy_check_mark:|Python|- amateripremium.com|PornCZ.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- amateurallure.com|AmateurAllure.yml|:heavy_check_mark:|:heavy_check_mark:|:x:|:x:|-|- @@ -63,15 +67,17 @@ amateurboxxx.com|AmateurBoxxx.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- amateure-xtreme.com|AmateureExtreme.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- amateureuro.com|LetsDoeIt.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- amateursfrombohemia.com|PornCZ.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- -amazinganna.com|ChickPass.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +amazinganna.com|ChickPass.yml|:heavy_check_mark:|:x:|:x:|:heavy_check_mark:|-|- ambushmassage.com|Williamhiggins.yml|:heavy_check_mark:|:x:|:x:|:x:|-|Gay amelielou.com|AmelieLou.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +americanmusclehunks.com|CarnalPlus.yml|:heavy_check_mark:|:x:|:x:|:x:|-|Gay amkingdom.com|ATKExotics.yml|:heavy_check_mark:|:x:|:x:|:x:|CDP|- amourangels.com|AmourAngels.yml|:heavy_check_mark:|:heavy_check_mark:|:x:|:x:|-|- -anal-angels.com|TeenMegaWorld.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- -anal-beauty.com|TeenMegaWorld.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +anal-angels.com|TeenMegaWorld.yml|:heavy_check_mark:|:x:|:x:|:heavy_check_mark:|-|- +anal-beauty.com|TeenMegaWorld.yml|:heavy_check_mark:|:x:|:x:|:heavy_check_mark:|-|- anal4k.com|AMAMultimedia.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- analacrobats.com|GammaEntertainment.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +analamateur.com|ThirdRockEnt.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- analbbc.com|ThirdRockEnt.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- analcheckups.com|Teencoreclub.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- analhookers.com|Hustler.yml|:heavy_check_mark:|:x:|:x:|:x:|CDP|- @@ -86,11 +92,15 @@ analtherapyxxx.com|FamilyTherapyXXX.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- analvids.com|LegalPorno.yml|:heavy_check_mark:|:x:|:x:|:heavy_check_mark:|Python|- analviolation.com|ThirdRockEnt.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- analyzedgirls.com|Teencoreclub.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +andolinixxl.com|PornsiteManager.yml|:heavy_check_mark:|:x:|:heavy_check_mark:|:x:|-|Gay angelasommers.com|angelasommers.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- angelawhite.com|AngelaWhite.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- angelinacastrolive.com|VNAGirls.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +anidb.net|AniDB.yml|:heavy_check_mark:|:x:|:x:|:heavy_check_mark:|-|Hentai Database anilos.com|Nubiles.yml|:heavy_check_mark:|:heavy_check_mark:|:x:|:x:|-|- animecharactersdatabase.com|AnimeCharactersDatabase.yml|:x:|:x:|:x:|:heavy_check_mark:|Python|Database +antoniosuleiman.com|AntonioSuleiman.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +apovstory.com|APOVStory.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- archangelvideo.com|ArchAngelVideo.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- ariellynn.com|Andomark.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- ashemaletube.com|AShemaleTube.yml|:heavy_check_mark:|:x:|:x:|:heavy_check_mark:|-|- @@ -100,7 +110,8 @@ asian18.com|Hustler.yml|:heavy_check_mark:|:x:|:x:|:x:|CDP|- asianamericantgirls.com|GroobyClub.yml|:heavy_check_mark:|:x:|:x:|:x:|-|Trans asianfever.com|Hustler.yml|:heavy_check_mark:|:x:|:x:|:x:|CDP|- asiansexdiary.com|VegasDreamsLLC.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- -asiantgirl.com|GroobyNetwork-Partial.yml|:heavy_check_mark:|:x:|:x:|:x:|-|Trans +asiantgirl.com|GroobyNetwork-Partial.yml|:heavy_check_mark:|:heavy_check_mark:|:x:|:x:|-|Trans +asmrfantasy.com|Algolia_Adultime.yml|:heavy_check_mark:|:heavy_check_mark:|:x:|:x:|Python|- assholefever.com|Algolia_Adultime.yml|:heavy_check_mark:|:x:|:x:|:x:|Python|- assmeat.com|Hustler.yml|:heavy_check_mark:|:x:|:x:|:x:|CDP|- assteenmouth.com|Teencoreclub.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- @@ -108,25 +119,31 @@ asstraffic.com|PerfectGonzo.yml|:heavy_check_mark:|:x:|:x:|:heavy_check_mark:|Py assumethepositionstudios.com|AssumeThePositionStudios.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- assylum.com|Assylum.yml|:heavy_check_mark:|:x:|:x:|:x:|-|Fetish atkexotics.com|ATKExotics.yml|:heavy_check_mark:|:x:|:x:|:x:|CDP|- +atkgirlfriends.com|ATKGirlfriends.yml|:heavy_check_mark:|:x:|:x:|:heavy_check_mark:|-|- atkhairy.com|ATKHairy.yml|:heavy_check_mark:|:x:|:x:|:x:|CDP|- atkpetites.com|ATKHairy.yml|:heavy_check_mark:|:x:|:x:|:x:|CDP|- atkpremium.com|ATKHairy.yml|:heavy_check_mark:|:x:|:x:|:x:|CDP|- +attackboys.com|PornsiteManager.yml|:heavy_check_mark:|:x:|:heavy_check_mark:|:x:|-|Gay +auntjudys.com|AuntJudys.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +auntjudysxxx.com|AuntJudys.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- auntykathy.com|auntykathy.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- aussieass.com|AussieAss.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- aussiefellatioqueens.com|AussieFelatioQueens.yml|:heavy_check_mark:|:x:|:x:|:x:|CDP|- aussiepov.com|AussieAss.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +austinwilde.com|Algolia_NextDoorStudios.yml|:heavy_check_mark:|:heavy_check_mark:|:x:|:x:|Python|Gay av69.tv|Jhdv.yml|:heavy_check_mark:|:x:|:x:|:x:|-|JAV Uncensored avadawn.com|bellapass.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- avanal.com|Jhdv.yml|:heavy_check_mark:|:x:|:x:|:x:|-|JAV Uncensored aventertainments.com|AVE.yml|:heavy_check_mark:|:x:|:x:|:x:|-|Database -avidolz.com|Baberotica.yml|:heavy_check_mark:|:x:|:x:|:x:|-|JAV Uncensored +avidolz.com|Baberotica.yml|:heavy_check_mark:|:x:|:x:|:heavy_check_mark:|-|JAV Uncensored +avjiali.com|AVJiali.yml|:heavy_check_mark:|:x:|:x:|:heavy_check_mark:|-|JAV Uncensored avstockings.com|Jhdv.yml|:heavy_check_mark:|:x:|:x:|:x:|-|JAV Uncensored avtits.com|Jhdv.yml|:heavy_check_mark:|:x:|:x:|:x:|-|JAV Uncensored aziani.com|Aziani.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- babearchives.com|BabeArchives.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- babepedia.com|Babepedia.yml|:x:|:x:|:x:|:heavy_check_mark:|-|Database -baberotica.com|Baberotica.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- -baberoticavr.com|Baberotica.yml|:heavy_check_mark:|:x:|:x:|:x:|-|VR +baberotica.com|Baberotica.yml|:heavy_check_mark:|:x:|:x:|:heavy_check_mark:|-|- +baberoticavr.com|Baberotica.yml|:heavy_check_mark:|:x:|:x:|:heavy_check_mark:|-|VR babes.com|Brazzers.yml|:heavy_check_mark:|:heavy_check_mark:|:x:|:heavy_check_mark:|CDP|- babesandstars.com|Babes&Stars.yml|:x:|:x:|:x:|:heavy_check_mark:|-|- babesnetwork.com|MindGeek.yml|:heavy_check_mark:|:x:|:x:|:heavy_check_mark:|-|- @@ -143,20 +160,24 @@ baeb.com|AMAMultimedia.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- balletflatsfetish.com|FFCSH.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- bamvisions.com|BamVisions.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- bang.com|Bang.yml|:heavy_check_mark:|:x:|:heavy_check_mark:|:heavy_check_mark:|-|- +bangbangboys.com|CarnalPlus.yml|:heavy_check_mark:|:x:|:x:|:x:|-|Gay bangbros.com|BangBros.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- bangingbeauties.com|PervCity.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- bangteenpussy.com|Teencoreclub.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- barbarafeet.com|FFCSH.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +barebackplus.com|CarnalPlus.yml|:heavy_check_mark:|:x:|:x:|:x:|-|Gay barelylegal.com|Hustler.yml|:heavy_check_mark:|:x:|:x:|:x:|CDP|- +baretwinks.com|BoyCrush.yml|:heavy_check_mark:|:x:|:x:|:x:|-|Gay bathhousebait.com|AMAMultimedia.yml|:heavy_check_mark:|:x:|:x:|:x:|-|Gay battlebang.com|Hustler.yml|:heavy_check_mark:|:x:|:x:|:x:|CDP|- bbcparadise.com|PaperStreetMedia.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- bbcpie.com|AMAMultimedia.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- bbcsurprise.com|ExploitedX.yml|:heavy_check_mark:|:x:|:x:|:heavy_check_mark:|-|- -beauty-angels.com|TeenMegaWorld.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- -beauty4k.com|TeenMegaWorld.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +beauty-angels.com|TeenMegaWorld.yml|:heavy_check_mark:|:x:|:x:|:heavy_check_mark:|-|- +beauty4k.com|TeenMegaWorld.yml|:heavy_check_mark:|:x:|:x:|:heavy_check_mark:|-|- beaverhunt.com|Hustler.yml|:heavy_check_mark:|:x:|:x:|:x:|CDP|- -becomingfemme.com|ChristianXXX.yml|:heavy_check_mark:|:x:|:x:|:x:|-|Trans +becomingfemme.com|PureMedia.yml|:heavy_check_mark:|:x:|:x:|:x:|-|Trans +behindtrans500.com|Trans500.yml|:heavy_check_mark:|:x:|:x:|:x:|-|Trans beingphoenixmarie.com|ModelCentroAPI.yml|:heavy_check_mark:|:x:|:x:|:x:|Python|- belamionline.com|BelAmi.yml|:heavy_check_mark:|:x:|:x:|:x:|-|Gay bellahd.com|bellapass.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- @@ -164,8 +185,11 @@ bellanextdoor.com|bellapass.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- bellapass.com|bellapass.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- bellesafilms.com|RealityKingsOL.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- bellesahouse.com|RealityKingsOL.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +beltbound.com|BeltBound.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +berryboys.fr|PornsiteManager.yml|:heavy_check_mark:|:x:|:heavy_check_mark:|:x:|-|Gay bestoftealconrad.com|VNAGirls.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- bffs.com|PaperStreetMedia.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +biempire.com|MindGeek.yml|:heavy_check_mark:|:x:|:x:|:heavy_check_mark:|-|- bigbootytgirls.com|BigBootyTGirls.yml|:heavy_check_mark:|:x:|:x:|:x:|-|Trans bigfatcreampie.com|GammaEntertainment.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- biggulpgirls.com|TopWebModels.yml|:heavy_check_mark:|:x:|:x:|:x:|Python|- @@ -174,76 +198,109 @@ biguysfuck.com|BlurredMedia.yml|:heavy_check_mark:|:x:|:x:|:x:|CDP|- billiestar.com|xSinsVR.yml|:heavy_check_mark:|:heavy_check_mark:|:x:|:x:|-|- biphoria.com|Algolia_Biphoria.yml|:heavy_check_mark:|:heavy_check_mark:|:x:|:x:|Python|- bjraw.com|BJRaw.yml|:heavy_check_mark:|:x:|:x:|:heavy_check_mark:|CDP|- -black-tgirls.com|GroobyNetwork-Partial.yml|:heavy_check_mark:|:x:|:x:|:x:|-|Trans +black-tgirls.com|GroobyNetwork-Partial.yml|:heavy_check_mark:|:heavy_check_mark:|:x:|:x:|-|Trans black4k.com|Vip4K.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- blackambush.com|ExploitedX.yml|:heavy_check_mark:|:x:|:x:|:heavy_check_mark:|-|- +blackandbig.com|SmutPuppet.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +blackboyaddictionz.com|AdultSiteRunner.yml|:heavy_check_mark:|:x:|:x:|:x:|-|Gay blacked.com|vixenNetwork.yml|:heavy_check_mark:|:x:|:x:|:x:|Python|- blackedraw.com|vixenNetwork.yml|:heavy_check_mark:|:x:|:x:|:x:|Python|- +Blackfoxbound UK|BlackFoxBound.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- blackmarketxxx.com|CombatZone.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- blackmassivecocks.com|AdultEmpireCash.yml|:heavy_check_mark:|:x:|:heavy_check_mark:|:x:|-|- +blackmeatwhitefeet.com|Algolia_DogfartNetwork.yml|:heavy_check_mark:|:x:|:x:|:heavy_check_mark:|Python|- blackph.com|FFCSH.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- -blacktgirlshardcore.com|GroobyNetwork-Partial.yml|:heavy_check_mark:|:x:|:x:|:x:|-|Trans +blacksonblondes.com|Algolia_DogfartNetwork.yml|:heavy_check_mark:|:x:|:x:|:heavy_check_mark:|Python|- +blacksoncougars.com|Algolia_DogfartNetwork.yml|:heavy_check_mark:|:x:|:x:|:heavy_check_mark:|Python|- +blacksondaddies.com|AdultSiteRunner.yml|:heavy_check_mark:|:x:|:x:|:x:|-|Gay +blacktgirlshardcore.com|GroobyNetwork-Partial.yml|:heavy_check_mark:|:heavy_check_mark:|:x:|:x:|-|Trans blackvalleygirls.com|PaperStreetMedia.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- blackwhitefuckfest.com|Vivid.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- blakemason.com|BlakeMason.yml|:heavy_check_mark:|:x:|:x:|:x:|-|Gay +blowmepov.com|Algolia_Adultime.yml|:heavy_check_mark:|:x:|:x:|:x:|Python|- blownbyrone.com|VNAGirls.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- -blowpass.com|GammaEntertainment.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +blowpass.com|Algolia_Blowpass.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- bobbiedenlive.com|VNAGirls.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- -bobstgirls.com|GroobyNetwork-Partial.yml|:heavy_check_mark:|:x:|:x:|:x:|-|Trans +bobstgirls.com|GroobyNetwork-Partial.yml|:heavy_check_mark:|:heavy_check_mark:|:x:|:x:|-|Trans +bolatino.com|PornsiteManager.yml|:heavy_check_mark:|:x:|:heavy_check_mark:|:x:|-|Gay bondagecafe.com|BondageCafe.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- bondageliberation.com|BondageLiberation.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- boobpedia.com|Boobpedia.yml|:x:|:x:|:x:|:heavy_check_mark:|-|Database bootyclapxxx.com|Hustler.yml|:heavy_check_mark:|:x:|:x:|:x:|CDP|- bootysisters.com|Hustler.yml|:heavy_check_mark:|:x:|:x:|:x:|CDP|- -boppingbabes.com|BoppingBabes.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +boppingbabes.com|WankItNow.yml|:heavy_check_mark:|:x:|:x:|:x:|-|VR bossymilfs.com|Hustler.yml|:heavy_check_mark:|:x:|:x:|:x:|CDP|- -bouncychicks.com|ChickPass.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +bouncychicks.com|ChickPass.yml|:heavy_check_mark:|:x:|:x:|:heavy_check_mark:|-|- +boundheat.com|BoundHeat.yml|:heavy_check_mark:|:x:|:x:|:x:|-|Lesbian boundhoneys.com|Boundhoneys.yml|:heavy_check_mark:|:x:|:x:|:x:|-|Lesbian +boundjocks.com|AdultSiteRunner.yml|:heavy_check_mark:|:x:|:x:|:x:|-|Gay boundlife.com|Boundlife.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +boundtwinks.com|CarnalPlus.yml|:heavy_check_mark:|:x:|:x:|:x:|-|Gay bountyhunterporn.com|Nubiles.yml|:heavy_check_mark:|:heavy_check_mark:|:x:|:x:|-|- boxtrucksex.com|BoxTruckSex.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +boycrush.com|BoyCrush.yml|:heavy_check_mark:|:x:|:x:|:x:|-|Gay +boyforsale.com|CarnalPlus.yml|:heavy_check_mark:|:x:|:x:|:x:|-|Gay boyfriendsharing.com|Mypervmom.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +boyfun.com|BoyFun.yml|:heavy_check_mark:|:x:|:x:|:x:|-|Gay +boygusher.com|BluMedia.yml|:heavy_check_mark:|:x:|:x:|:heavy_check_mark:|-|Gay boysdestroyed.com|AMAMultimedia.yml|:heavy_check_mark:|:x:|:x:|:x:|-|Gay boysfuckmilfs.com|PornCZ.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +boyshalfwayhouse.com|AdultSiteRunner.yml|:heavy_check_mark:|:x:|:x:|:x:|-|Gay bradmontana.com|BradMontana.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- brandibelle.com|brandibelle.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- brandnewamateurs.com|BrandNewAmateurs.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- brandnewfaces.com|Vivid.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +brasilvr.com|POVR.yml|:heavy_check_mark:|:x:|:x:|:x:|-|VR brattyfamily.com|Mypervmom.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- brattymilf.com|Nubiles.yml|:heavy_check_mark:|:heavy_check_mark:|:x:|:x:|-|- brattysis.com|Nubiles.yml|:heavy_check_mark:|:heavy_check_mark:|:x:|:x:|-|- -brazilian-transsexuals.com|GroobyNetwork-Brazilian.yml|:heavy_check_mark:|:x:|:x:|:x:|-|Trans +bravofucker.com|PornsiteManager.yml|:heavy_check_mark:|:x:|:heavy_check_mark:|:x:|-|Gay +brazilian-transsexuals.com|GroobyNetwork-Partial.yml|:heavy_check_mark:|:x:|:x:|:x:|-|Trans braziltgirls.xxx|GroobyNetwork-Brazilian.yml|:heavy_check_mark:|:x:|:x:|:x:|-|Trans brazzers.com|Brazzers.yml|:heavy_check_mark:|:heavy_check_mark:|:x:|:heavy_check_mark:|CDP|- +breeditraw.com|BreedItRaw.yml|:heavy_check_mark:|:x:|:x:|:heavy_check_mark:|-|Gay +brett-tyler.com|PornsiteManager.yml|:heavy_check_mark:|:x:|:heavy_check_mark:|:x:|-|Gay brickyates.com|brickyates.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +bride4k.com|Vip4K.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +brokenlatinawhores.com|BrokenLatinaWhores.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +brokensluts.net|ThirdRockEnt.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +brokestraightboys.com|BluMedia.yml|:heavy_check_mark:|:x:|:x:|:heavy_check_mark:|-|Gay brookelynnebriar.com|ModelCentroAPI.yml|:heavy_check_mark:|:x:|:x:|:x:|Python|- bruceandmorgan.net|bruceandmorgan.yml|:heavy_check_mark:|:x:|:x:|:x:|CDP|Amateur Fetish brutalinvasion.com|Teencoreclub.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- bryci.com|bellapass.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- bskow.com|GammaEntertainment.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- bukkakenow.com|karatmedia.yml|:heavy_check_mark:|:x:|:x:|:x:|-|JAV +bulldogxxx.com|PornsiteManager.yml|:heavy_check_mark:|:x:|:heavy_check_mark:|:x:|-|Gay burningangel.com|Algolia_BurningAngel.yml|:heavy_check_mark:|:x:|:x:|:x:|Python|- bushybushy.com|GammaEntertainment.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- bustybeauties.com|Hustler.yml|:heavy_check_mark:|:x:|:x:|:x:|CDP|- buttman.com|GammaEntertainment.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +cadinot.fr|PornsiteManager.yml|:heavy_check_mark:|:x:|:heavy_check_mark:|:x:|-|Gay calicarter.com|bellapass.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +camwhores.tv|CamWhorestv.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- canada-tgirl.com|GroobyClub.yml|:heavy_check_mark:|:x:|:x:|:x:|-|Trans caribbeancom.com|Carib.yml|:heavy_check_mark:|:x:|:x:|:x:|-|JAV Uncensored caribbeancompr.com|Carib.yml|:heavy_check_mark:|:x:|:x:|:x:|-|JAV Uncensored carmenvalentina.com|VNAGirls.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +carnalplus.com|CarnalPlus.yml|:heavy_check_mark:|:x:|:x:|:x:|-|Gay castingcouch-x.com|AMAMultimedia.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- catalinacruz.com|Catalina Cruz.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +cazzofilm.com|PornsiteManager.yml|:heavy_check_mark:|:x:|:heavy_check_mark:|:x:|-|Gay cfnmeu.com|Williamhiggins.yml|:heavy_check_mark:|:x:|:x:|:x:|-|Gay -chaosmen.com|ChaosMen.yml|:heavy_check_mark:|:x:|:x:|:x:|-|Gay +chaosmen.com|Algolia_ChaosMen.yml|:heavy_check_mark:|:x:|:x:|:x:|Python|Gay charleechaselive.com|VNAGirls.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- chastitybabes.com|chastitybabes.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +cheatingsis.com|Nubiles.yml|:heavy_check_mark:|:heavy_check_mark:|:x:|:x:|-|- cherrypimps.com|CherryPimps.yml|:heavy_check_mark:|:heavy_check_mark:|:x:|:x:|-|- -chickpass.com|ChickPass.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- -chickpassinternational.com|ChickPass.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- -chickpasspornstars.com|ChickPass.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- -chickpassteens.com|ChickPass.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +chickpass.com|ChickPass.yml|:heavy_check_mark:|:x:|:x:|:heavy_check_mark:|-|- +chickpassinternational.com|ChickPass.yml|:heavy_check_mark:|:x:|:x:|:heavy_check_mark:|-|- +chickpasspornstars.com|ChickPass.yml|:heavy_check_mark:|:x:|:x:|:heavy_check_mark:|-|- +chickpassteens.com|ChickPass.yml|:heavy_check_mark:|:x:|:x:|:heavy_check_mark:|-|- chloelamour.com|PornCZ.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- chocolatebjs.com|PervCity.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +citebeur.com|PornsiteManager.yml|:heavy_check_mark:|:x:|:heavy_check_mark:|:x:|-|Gay +clairprod.com|PornsiteManager.yml|:heavy_check_mark:|:x:|:heavy_check_mark:|:x:|-|Gay class-lesbians.com|ClassLesbians.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- claudiamarie.com|ClaudiaMarie.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- clips4sale.com|Clips4Sale.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- @@ -252,12 +309,20 @@ clubelegantangel.com|ClubElegantAngel.yml|:heavy_check_mark:|:x:|:heavy_check_ma clubinfernodungeon.com|Algolia_ClubInfernoDungeon.yml|:heavy_check_mark:|:heavy_check_mark:|:x:|:x:|-|Gay clubseventeen.com|ClubSeventeen.yml|:heavy_check_mark:|:heavy_check_mark:|:x:|:x:|-|- clubsweethearts.com|ClubSeventeen.yml|:heavy_check_mark:|:heavy_check_mark:|:x:|:x:|-|- +clubtug.com|TugPass.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- cockhero.info|Cockhero.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +cocksuremen.com|CocksureMen.yml|:heavy_check_mark:|:x:|:x:|:x:|-|Gay +cockyboys.com|CockyBoys.yml|:heavy_check_mark:|:x:|:x:|:heavy_check_mark:|-|Gay +codycummings.com|Algolia_NextDoorStudios.yml|:heavy_check_mark:|:heavy_check_mark:|:x:|:x:|Python|Gay colbyknox.com|ColbyKnox.yml|:heavy_check_mark:|:x:|:x:|:x:|-|Gay colette.com|Colette.yml|:heavy_check_mark:|:heavy_check_mark:|:x:|:x:|-|- -college-amateurs.com|ChickPass.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +collectivecorruption.com|CollectiveCorruption.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +college-amateurs.com|ChickPass.yml|:heavy_check_mark:|:x:|:x:|:heavy_check_mark:|-|- college-uniform.com|CollegeUniform.yml|:heavy_check_mark:|:heavy_check_mark:|:x:|:x:|-|- +collegeboyphysicals.com|BluMedia.yml|:heavy_check_mark:|:x:|:x:|:heavy_check_mark:|-|Gay +collegedudes.com|BluMedia.yml|:heavy_check_mark:|:x:|:x:|:heavy_check_mark:|-|Gay collegefuckparties.com|Wtfpass.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +coltstudiogroup.com|AdultSiteRunner.yml|:heavy_check_mark:|:x:|:x:|:x:|-|Gay combatzone.us|CombatZone.yml|:x:|:x:|:heavy_check_mark:|:x:|-|- combatzonexxx.com|CombatZone.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- concoxxxion.com|AdultEmpireCash.yml|:heavy_check_mark:|:x:|:heavy_check_mark:|:x:|-|- @@ -266,13 +331,19 @@ cosplayfeet.com|FFCSH.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- cospuri.com|Cospuri.yml|:heavy_check_mark:|:x:|:x:|:heavy_check_mark:|-|- cougarseason.com|TopWebModels.yml|:heavy_check_mark:|:x:|:x:|:x:|Python|- crashpadseries.com|CrashpadSeries.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- -creampie-angels.com|TeenMegaWorld.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +creampie-angels.com|TeenMegaWorld.yml|:heavy_check_mark:|:x:|:x:|:heavy_check_mark:|-|- creativeporn.com|Czechav.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- cruelgf.com|CruelGirlfriend.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +crunchboy.com|PornsiteManager.yml|:heavy_check_mark:|:x:|:heavy_check_mark:|:x:|-|Gay +cuck4k.com|Vip4K.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- cuckhunter.com|Arx.yml|:heavy_check_mark:|:x:|:x:|:x:|Python|- +cuckoldsessions.com|Algolia_DogfartNetwork.yml|:heavy_check_mark:|:x:|:x:|:heavy_check_mark:|Python|- culioneros.com|Pornditos.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- cum4k.com|AMAMultimedia.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- cumaholicteens.com|Teencoreclub.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +cumbang.com|Algolia_DogfartNetwork.yml|:heavy_check_mark:|:x:|:x:|:heavy_check_mark:|Python|- +cumblastcity.com|TugPass.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +cumdumpsterteens.com|ThirdRockEnt.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- cumforcover.com|PerfectGonzo.yml|:heavy_check_mark:|:x:|:x:|:heavy_check_mark:|Python|- cumlouder.com|Cumlouder.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- cumshotoasis.com|GammaEntertainment.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- @@ -324,24 +395,31 @@ czechsupermodels.com|Czechav.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- czechtaxi.com|Czechav.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- czechvr.com|CzechVR.yml|:heavy_check_mark:|:x:|:heavy_check_mark:|:x:|-|VR czechvrcasting.com|CzechVR.yml|:heavy_check_mark:|:x:|:heavy_check_mark:|:x:|-|VR -czechvrnetwork.com|CzechVR.yml|:heavy_check_mark:|:x:|:heavy_check_mark:|:x:|-|VR czechvrfetish.com|CzechVR.yml|:heavy_check_mark:|:x:|:heavy_check_mark:|:x:|-|VR +czechvrnetwork.com|CzechVR.yml|:heavy_check_mark:|:x:|:heavy_check_mark:|:x:|-|VR czechwifeswap.com|Czechav.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- d52q.com|JavLibrary.yml|:heavy_check_mark:|:x:|:heavy_check_mark:|:x:|-|JAV dadcrush.com|PaperStreetMedia.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- daddy4k.com|Vip4K.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +daddycarl.com|AdultSiteRunner.yml|:heavy_check_mark:|:x:|:x:|:x:|-|Gay daddygetslucky.com|Hustler.yml|:heavy_check_mark:|:x:|:x:|:x:|CDP|- daddyslilangel.com|Nubiles.yml|:heavy_check_mark:|:heavy_check_mark:|:x:|:x:|-|- damnthatsbig.com|AMAMultimedia.yml|:heavy_check_mark:|:x:|:x:|:x:|-|Gay danejones.com|RealityKingsOL.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- danidaniels.com|ModelCentroAPI.yml|:heavy_check_mark:|:x:|:x:|:x:|Python|- danni.com|Danni.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +darkcruising.com|PornsiteManager.yml|:heavy_check_mark:|:x:|:heavy_check_mark:|:x:|-|Gay darkroomvr.com|DarkRoomVR.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +darksodomy.com|SmutPuppet.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- darkx.com|Algolia_xEmpire.yml|:heavy_check_mark:|:heavy_check_mark:|:x:|:x:|Python|- +darttechstudios.com|DartTechStudios.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- data18.com|data18.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +datingmystepson.com|Nubiles.yml|:heavy_check_mark:|:heavy_check_mark:|:x:|:x:|-|- +daughterjoi.com|ThirdRockEnt.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- daughterswap.com|PaperStreetMedia.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- ddfbusty.com|DDFNetwork.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- deauxmalive.com|VNAGirls.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +debt4k.com|Vip4K.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- deeper.com|vixenNetwork.yml|:heavy_check_mark:|:x:|:x:|:x:|Python|- deeplush.com|Nubiles.yml|:heavy_check_mark:|:heavy_check_mark:|:x:|:x:|-|- deepthroatsirens.com|TopWebModels.yml|:heavy_check_mark:|:x:|:x:|:x:|Python|- @@ -357,6 +435,8 @@ devilsfilmparodies.com|GammaEntertainment.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- devilsgangbangs.com|GammaEntertainment.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- devilstgirls.com|Algolia_Adultime.yml|:heavy_check_mark:|:x:|:heavy_check_mark:|:x:|Python|Trans devonlee.com|GammaEntertainment.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +dfbnetwork.com|DFB.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +dfxtra.com|Algolia_DogfartNetwork.yml|:heavy_check_mark:|:x:|:x:|:heavy_check_mark:|Python|- diabolic.com|Algolia_zerotolerancefilms.yml|:heavy_check_mark:|:x:|:heavy_check_mark:|:x:|python|- dianafeet.com|FFCSH.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- dickdrainers.com|Dickdrainers.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- @@ -364,37 +444,48 @@ dickontrip.com|PornCZ.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- digitaldesire.com|DigitalDesire.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- digitalplayground.com|MindGeek.yml|:heavy_check_mark:|:x:|:heavy_check_mark:|:heavy_check_mark:|-|- dillionation.com|ModelCentroAPI.yml|:heavy_check_mark:|:x:|:x:|:x:|Python|- -dirty-coach.com|TeenMegaWorld.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- -dirty-doctor.com|TeenMegaWorld.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +dirty-coach.com|TeenMegaWorld.yml|:heavy_check_mark:|:x:|:x:|:heavy_check_mark:|-|- +dirty-doctor.com|TeenMegaWorld.yml|:heavy_check_mark:|:x:|:x:|:heavy_check_mark:|-|- dirtyauditions.com|Nympho.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +dirtyboysociety.com|CarnalPlus.yml|:heavy_check_mark:|:x:|:x:|:x:|-|Gay dirtycosplay.com|LetsDoeIt.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +dirtyflix.com|DirtyFlix.yml|:heavy_check_mark:|-|-|-|-|- disruptivefilms.com|Algolia_disruptivefilms.yml|:heavy_check_mark:|:heavy_check_mark:|:x:|:x:|Python|Gay dlsite.com|DLsite.yml|:heavy_check_mark:|:heavy_check_mark:|:x:|:x:|-|- doegirls.com|LetsDoeIt.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- -dogfartnetwork.com|DogFart.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- -doghousedigital.com|MindGeek.yml|:heavy_check_mark:|:x:|:x:|:heavy_check_mark:|-|- +dogfartnetwork.com|Algolia_DogfartNetwork.yml|:heavy_check_mark:|:x:|:x:|:heavy_check_mark:|Python|- +doghousedigital.com|MindGeek.yml|:heavy_check_mark:|:x:|:heavy_check_mark:|:heavy_check_mark:|-|- dollrotic.com|GymRotic.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- dollsporn.com|Wtfpass.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- domai.com|SARJ-LLC.yml|:x:|:heavy_check_mark:|:x:|:heavy_check_mark:|Python|- dorcelclub.com|DorcelClub.yml|:heavy_check_mark:|:x:|:heavy_check_mark:|:x:|-|- dorcelvision.com|DorcelVision.yml|:heavy_check_mark:|:x:|:heavy_check_mark:|:x:|-|- +dothewife.com|SmutPuppet.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- doubleteamedteens.com|Teencoreclub.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +downblousejerk.com|WankItNow.yml|:heavy_check_mark:|:x:|:x:|:x:|-|VR downtofuckdating.com|ThirdRockEnt.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- dpfanatics.com|Algolia_Adultime.yml|:heavy_check_mark:|:x:|:x:|:x:|Python|- dreamsofspanking.com|DreamsOfSpanking.yml|:heavy_check_mark:|:heavy_check_mark:|:x:|:x:|-|- dreamteenshd.com|Teencoreclub.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- -dreamtranny.com|DreamTranny.yml|:heavy_check_mark:|:x:|:x:|:x:|-|Trans +dreamtranny.com|DreamTranny.yml|:heavy_check_mark:|:heavy_check_mark:|:x:|:heavy_check_mark:|-|Trans drilledchicks.com|Teencoreclub.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- driverxxx.com|Nubiles.yml|:heavy_check_mark:|:heavy_check_mark:|:x:|:x:|-|- dtfsluts.com|ThirdRockEnt.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +dyke4k.com|Vip4K.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- dyked.com|PaperStreetMedia.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- dylanryder.com|GammaEntertainment.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- eastboys.com|EastBoys.yml|:heavy_check_mark:|:x:|:x:|:x:|-|Gay +ebonytugs.com|TugPass.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +edwardjames.com|CarnalPlus.yml|:heavy_check_mark:|:x:|:x:|:x:|-|Gay elegantangel.com|AdultEmpireCash.yml|:heavy_check_mark:|:x:|:heavy_check_mark:|:x:|-|- elitebabes.com|EliteBabes.yml|:x:|:x:|:x:|:heavy_check_mark:|-|Babes +englishlads.com|EnglishLads.yml|:heavy_check_mark:|:heavy_check_mark:|:x:|:heavy_check_mark:|-|Gay +enzorimenez.com|PornsiteManager.yml|:heavy_check_mark:|:x:|:heavy_check_mark:|:x:|-|Gay +eporner.com|Eporner.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- ericjohnssexadventures.com|EricJohnsSexAdventures.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- ericvideos.com|EricVideos.yml|:heavy_check_mark:|:x:|:x:|:heavy_check_mark:|-|Gay erito.com|RealityKingsOL.yml|:heavy_check_mark:|:x:|:x:|:x:|-|JAV +eroprofile.com|EroProfile.yml|:heavy_check_mark:|:x:|:x:|:x:|-|Tube Site eroticax.com|Algolia_xEmpire.yml|:heavy_check_mark:|:heavy_check_mark:|:x:|:x:|Python|- eroticbeauty.com|SARJ-LLC.yml|:x:|:heavy_check_mark:|:x:|:heavy_check_mark:|Python|- eroticspice.com|devianteNetwork.yml|:heavy_check_mark:|:x:|:x:|:x:|CDP|- @@ -402,6 +493,7 @@ erotiquetvlive.com|InterracialPass.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- errotica-archives.com|SARJ-LLC.yml|:heavy_check_mark:|:heavy_check_mark:|:x:|:heavy_check_mark:|Python|- eternaldesire.com|SARJ-LLC.yml|:heavy_check_mark:|:heavy_check_mark:|:x:|:heavy_check_mark:|Python|- euro-tgirls.com|GroobyClub.yml|:heavy_check_mark:|:x:|:x:|:x:|-|Trans +eurocreme.com|PornsiteManager.yml|:heavy_check_mark:|:x:|:heavy_check_mark:|:x:|-|Gay eurogirlsongirls.com|DDFNetwork.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- euroteenerotica.com|DDFNetwork.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- evilangel.com|Algolia_EvilAngel|:heavy_check_mark:|:heavy_check_mark:|:heavy_check_mark:|:x:|Python|- @@ -411,7 +503,9 @@ evolvedfightslez.com|evolvedfightslez.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- exotic4k.com|AMAMultimedia.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- explicite-art.com|ExpliciteArt.yml|:heavy_check_mark:|:heavy_check_mark:|:x:|:x:|-|- exploitedcollegegirls.com|ExploitedX.yml|:heavy_check_mark:|:x:|:x:|:heavy_check_mark:|-|- +excogigirls.com|ExploitedX.yml|:heavy_check_mark:|:x:|:x:|:heavy_check_mark:|-|- extrapackage.com|ExtraPackage.yml|:heavy_check_mark:|:x:|:heavy_check_mark:|:x:|-|- +extremepickups.com|Algolia_Adultime.yml|:heavy_check_mark:|:x:|:x:|:x:|Python|- exxxtrasmall.com|PaperStreetMedia.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- fabsluts.com|Teencoreclub.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- facials4k.com|AMAMultimedia.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- @@ -420,11 +514,12 @@ fakehostel.com|MindGeek.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- fakehub.com|MindGeek.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- fakeshooting.com|MMP_Network.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- faketaxi.com|MindGeek.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- -falconstudios.com|GammaEntertainment.yml|:heavy_check_mark:|:x:|:x:|:x:|-|Gay +falconstudios.com|Algolia_FalconStudios.yml|:heavy_check_mark:|:x:|:x:|:x:|-|Gay fallinlovia.com|ModelCentroAPI.yml|:heavy_check_mark:|:x:|:x:|:x:|Python|- famedigital.com|Algolia_Adultime.yml|:heavy_check_mark:|:x:|:x:|:heavy_check_mark:|Python|- familycreep.com|GammaEntertainment.yml|:heavy_check_mark:|:x:|:x:|:x:|-|Gay familyhookups.com|MindGeek.yml|:heavy_check_mark:|:x:|:x:|:heavy_check_mark:|-|- +familylust.com|TugPass.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- familysexmassage.com|FamilySexMassage.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- familysinners.com|MindGeek.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- familystrokes.com|PaperStreetMedia.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- @@ -441,7 +536,8 @@ femdomempire.com|FemdomEmpire.yml|:heavy_check_mark:|:x:|:x:|:heavy_check_mark:| feminized.com|FemdomEmpire.yml|:heavy_check_mark:|:x:|:x:|:heavy_check_mark:|-|Trans femjoy.com|FemJoy.yml|:heavy_check_mark:|:heavy_check_mark:|:x:|:x:|-|- femlatex.com|GymRotic.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- -femoutsex.xxx|GroobyNetwork-Partial.yml|:heavy_check_mark:|:x:|:x:|:x:|-|Trans +femout.xxx|GroobyNetwork-Partial.yml|:heavy_check_mark:|:heavy_check_mark:|:x:|:x:|-|Trans +femoutsex.xxx|GroobyNetwork-Partial.yml|:heavy_check_mark:|:heavy_check_mark:|:x:|:x:|-|Trans ferame.com|Jhdv.yml|:heavy_check_mark:|:x:|:x:|:x:|-|JAV Uncensored fetishnetwork.com|FetishNetwork.yml|:heavy_check_mark:|:x:|:x:|:x:|-|Fetish fetishpros.com|FetishPro.yml|:heavy_check_mark:|:x:|:x:|:x:|-|Fetish @@ -453,8 +549,9 @@ filthyfamily.com|FilthyFamily.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- filthygapers.com|DollsHub.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- filthykings.com|Algolia_filthykings.yml|:heavy_check_mark:|:heavy_check_mark:|:x:|:x:|Python|- finishesthejob.com|FinishesTheJob.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +finishhim.com|TugPass.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- firstanalquest.com|Firstanalquest.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- -firstbgg.com|TeenMegaWorld.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +firstbgg.com|TeenMegaWorld.yml|:heavy_check_mark:|:x:|:x:|:heavy_check_mark:|-|- firstclasspov.com|Spizoo.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- fist4k.com|Vip4K.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- fistertwister.com|Fistertwister.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- @@ -474,18 +571,22 @@ fostertapes.com|PaperStreetMedia.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- fourfingerclub.com|NewSensationsNetworkSites.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- foxxedup.com|VNAGirls.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- fragileslave.com|FragileSlave.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- -franks-tgirlworld.com|GroobyNetwork-Partial.yml|:heavy_check_mark:|:x:|:x:|:x:|-|Trans +franks-tgirlworld.com|GroobyNetwork-Partial.yml|:heavy_check_mark:|:heavy_check_mark:|:x:|:x:|-|Trans fratx.com|FratX.yml|:heavy_check_mark:|:x:|:x:|:x:|Python|Gay freakmobmedia.com|FreakMobMedia.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- free.premiumbukkake.com|PremiumBukkake.yml|:heavy_check_mark:|:x:|:x:|:x:|Python|- -freeones.com|FreeonesCommunity.yml|:x:|:x:|:x:|:heavy_check_mark:|-|Database -freeones.xxx|FreeonesCommunity.yml|:x:|:x:|:x:|:heavy_check_mark:|-|- +freeones.com|FreeonesCommunity.yml|:heavy_check_mark:|:x:|:heavy_check_mark:|:heavy_check_mark:|-|Database +freeones.xxx|FreeonesCommunity.yml|:heavy_check_mark:|:x:|:heavy_check_mark:|:heavy_check_mark:|-|- freeusefantasy.com|PaperStreetMedia.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- freeusemilf.com|PaperStreetMedia.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +freeze.xxx|Hentaied.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- french-twinks.com|Frenchtwinks.yml|:heavy_check_mark:|:x:|:x:|:x:|-|Gay +frenchporn.fr|PornsiteManager.yml|:heavy_check_mark:|:x:|:heavy_check_mark:|:x:|-|Gay freshmen.net|Freshmen.yml|:heavy_check_mark:|:x:|:x:|:x:|-|Gay freshoutofhighschool.com|NewSensationsNetworkSites.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- frolicme.com|FrolicMe.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +ftmmen.com|CarnalPlus.yml|:heavy_check_mark:|:x:|:x:|:x:|-|FTM +ftmplus.com|CarnalPlus.yml|:heavy_check_mark:|:x:|:x:|:x:|-|FTM fuckedfeet.com|VNAGirls.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- fuckedhard18.com|FuckedHard18.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- fuckermate.com|Fuckermate.yml|:heavy_check_mark:|:x:|:x:|:x:|-|Gay @@ -493,32 +594,39 @@ fuckfiesta.com|Hustler.yml|:heavy_check_mark:|:x:|:x:|:x:|CDP|- fuckingawesome.com|FuckingAwesome.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- fuckinghardcore.com|Hustler.yml|:heavy_check_mark:|:x:|:x:|:x:|CDP|- fuckingoffice.com|PornCZ.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- -fuckingparties.com|ChickPass.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +fuckingparties.com|ChickPass.yml|:heavy_check_mark:|:x:|:x:|:heavy_check_mark:|-|- fuckingstreet.com|PornCZ.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- -fuckstudies.com|TeenMegaWorld.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- -fuckthegeek.com|ChickPass.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +fuckstudies.com|TeenMegaWorld.yml|:heavy_check_mark:|:x:|:x:|:heavy_check_mark:|-|- +fuckthegeek.com|ChickPass.yml|:heavy_check_mark:|:x:|:x:|:heavy_check_mark:|-|- +fullpornnetwork.com|ThirdRockEnt.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- funbags.com|Voyeurweb.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +funsizeboys.com|CarnalPlus.yml|:heavy_check_mark:|:x:|:x:|:x:|-|Gay futanari.xxx|Hentaied.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- -gag-n-gape.com|TeenMegaWorld.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +futanarica.com|Futanarica.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +gag-n-gape.com|TeenMegaWorld.yml|:heavy_check_mark:|:x:|:x:|:heavy_check_mark:|-|- gangav.com|Jhdv.yml|:heavy_check_mark:|:x:|:x:|:x:|-|JAV Uncensored gangbangcreampie.com|Algolia_GangBangCreampie.yml|:heavy_check_mark:|:heavy_check_mark:|:x:|:x:|Python|- gapingangels.com|GammaEntertainment.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- gasm.com|GASM.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +gayarabclub.com|PornsiteManager.yml|:heavy_check_mark:|:x:|:heavy_check_mark:|:x:|-|Gay gaycastings.com|AMAMultimedia.yml|:heavy_check_mark:|:x:|:x:|:x:|-|Gay +gaycest.com|CarnalPlus.yml|:heavy_check_mark:|:x:|:x:|:x:|-|Gay gaycreeps.com|AMAMultimedia.yml|:heavy_check_mark:|:x:|:x:|:x:|-|Gay gaydvdempire.com|GayAdultEmpire.yml|:heavy_check_mark:|:x:|:heavy_check_mark:|:x:|-|Gay gayempire.com|GayAdultEmpire.yml|:heavy_check_mark:|:x:|:heavy_check_mark:|:x:|-|Gay +gayfrenchkiss.fr|PornsiteManager.yml|:heavy_check_mark:|:x:|:heavy_check_mark:|:x:|-|Gay gayhoopla.com|BlurredMedia.yml|:heavy_check_mark:|:x:|:x:|:x:|CDP|Gay gayhorrorporn.com|Czechav.yml|:heavy_check_mark:|:x:|:x:|:x:|-|Gay gayroom.com|AMAMultimedia.yml|:heavy_check_mark:|:x:|:x:|:x:|-|Gay gayviolations.com|AMAMultimedia.yml|:heavy_check_mark:|:x:|:x:|:x:|-|Gay genderxfilms.com|Algolia_GenderXFilms.yml|:heavy_check_mark:|:heavy_check_mark:|:heavy_check_mark:|:x:|-|Trans -genuinesin.com|GenuineSin.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +genlez.com|SmutPuppet.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- girlcum.com|AMAMultimedia.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- girlfaction.com|ThirdRockEnt.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- girlfriendsfilms.com|Algolia_Girlfriendsfilms.yml|:heavy_check_mark:|:heavy_check_mark:|:heavy_check_mark:|:x:|Python|Lesbian girlgirl.com|JulesJordan.yml|:heavy_check_mark:|:heavy_check_mark:|:x:|:x:|-|Lesbian girlgirlmania.com|VNAGirls.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +girlgirlxxx.com|NewSensationsNetworkSites.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- girlgrind.com|MindGeek.yml|:heavy_check_mark:|:x:|:x:|:heavy_check_mark:|-|Lesbian girlsandstuds.com|GammaEntertainment.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- girlsgotcream.com|Teencoreclub.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- @@ -531,18 +639,24 @@ girlsunderarrest.com|GirlsUnderArrest.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- girlsway.com|Algolia_GirlsWay.yml|:heavy_check_mark:|:heavy_check_mark:|:x:|:x:|Python|Lesbian girlswhofuckgirls.com|Vivid.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- givemepink.com|sapphix.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- -gloryholesecrets.com|Aziani.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +givemeteens.com|Algolia_Adultime.yml|:heavy_check_mark:|:x:|:x:|:x:|Python|- +gloryhole.com|Algolia_DogfartNetwork.yml|:heavy_check_mark:|:x:|:x:|:heavy_check_mark:|Python|- +gloryholeinitiations.com|Algolia_DogfartNetwork.yml|:heavy_check_mark:|:x:|:x:|:heavy_check_mark:|Python|- +gloryholesecrets.com|Algolia_Gloryholesecrets.yml|:heavy_check_mark:|:heavy_check_mark:|:x:|:x:|-|- gloryholeswallow.com|GloryHoleSwallow.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- glosstightsglamour.com|GlossTightsGlamour.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- goddessnudes.com|SARJ-LLC.yml|:x:|:heavy_check_mark:|:x:|:heavy_check_mark:|Python|- goddesssnow.com|GoddessSnow.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +goldenslut.com|SmutPuppet.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- gostuckyourself.net|Mypervmom.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- gotfilled.com|BJRaw.yml|:heavy_check_mark:|:x:|:x:|:heavy_check_mark:|CDP|- grannyghetto.com|GammaEntertainment.yml|:heavy_check_mark:|:x:|:x:|:x:|-|Granny -grooby-archives.com|GroobyNetwork-Partial.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +grannyvsbbc.com|SmutPuppet.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +grooby-archives.com|GroobyNetwork-Partial.yml|:heavy_check_mark:|:heavy_check_mark:|:x:|:x:|-|- grooby.club|GroobyClub.yml|:heavy_check_mark:|:x:|:x:|:x:|-|Trans -groobygirls.com|GroobyNetwork-Partial.yml|:heavy_check_mark:|:x:|:x:|:x:|-|Trans +groobygirls.com|GroobyNetwork-Partial.yml|:heavy_check_mark:|:heavy_check_mark:|:x:|:x:|-|Trans groobyvr.com|GroobyNetwork-Partial.yml|:heavy_check_mark:|:x:|:x:|:x:|-|Trans +guysinsweatpants.com|GuysInSweatpants.yml|:heavy_check_mark:|:x:|:x:|:heavy_check_mark:|-|Gay gymnastic.xxx|GymRotic.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- gymrotic.com|GymRotic.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- gynoexclusive.com|DollsHub.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- @@ -553,9 +667,11 @@ handsonhardcore.com|DDFNetwork.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- hanime.tv|hanime.yml|:heavy_check_mark:|:x:|:x:|:x:|-|Hentai hardcoreyouth.com|Teencoreclub.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- hardfuckgirls.com|Wtfpass.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +hardkinks.com|PornsiteManager.yml|:heavy_check_mark:|:x:|:heavy_check_mark:|:x:|-|Gay hardonvr.com|SLRoriginals.yml|:heavy_check_mark:|:x:|:heavy_check_mark:|:x:|-|VR hardtied.com|insex.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- hardx.com|Algolia_xEmpire.yml|:heavy_check_mark:|:heavy_check_mark:|:x:|:x:|Python|- +harlemsex.com|PornsiteManager.yml|:heavy_check_mark:|:x:|:heavy_check_mark:|:x:|-|Gay harmonyvision.com|HarmonyVision.yml|:heavy_check_mark:|:x:|:heavy_check_mark:|:x:|-|- hazel-tucker.com|GroobyClub.yml|:heavy_check_mark:|:x:|:x:|:x:|-|Trans hd19.com|bellapass.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- @@ -573,6 +689,7 @@ heymilf.com|Jhdv.yml|:heavy_check_mark:|:x:|:x:|:x:|-|JAV Uncensored heyoutdoor.com|Jhdv.yml|:heavy_check_mark:|:x:|:x:|:x:|-|JAV Uncensored heyzo.com|Heyzo.yml|:heavy_check_mark:|:x:|:x:|:x:|-|JAV Uncensored hijabhookup.com|PaperStreetMedia.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +hijabmylfs.com|PaperStreetMedia.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- himeros.tv|HimerosTV.yml|:heavy_check_mark:|:x:|:x:|:x:|-|Gay hitzefrei.com|Hitzefrei.yml|:heavy_check_mark:|:x:|:x:|:heavy_check_mark:|-|- hmvmania.com|HmvMania.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- @@ -584,19 +701,21 @@ homemadeanalwhores.com|ThirdRockEnt.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- hometowngirls.com|Hustler.yml|:heavy_check_mark:|:x:|:x:|:x:|CDP|- hometownhoneys.com|Hustler.yml|:heavy_check_mark:|:x:|:x:|:x:|CDP|- honeytrans.com|Arx.yml|:heavy_check_mark:|:x:|:x:|:x:|Python|Trans -hongkongslut.com|ChickPass.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +hongkongslut.com|ChickPass.yml|:heavy_check_mark:|:x:|:x:|:heavy_check_mark:|-|- hookuphotshot.com|hussiemodels.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- hornydoctor.com|PornCZ.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- hornygirlscz.com|PornCZ.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- hornyhousehold.com|AdultEmpireCash.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- horrorporn.com|Czechav.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- hotandtatted.com|hussiemodels.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +hotcast.fr|PornsiteManager.yml|:heavy_check_mark:|:x:|:heavy_check_mark:|:x:|-|Gay hotcrazymess.com|Nubiles.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- hotguysfuck.com|BlurredMedia.yml|:heavy_check_mark:|:x:|:x:|:x:|CDP|- -hothouse.com|GammaEntertainment.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +hothouse.com|Algolia_FalconStudios.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- hotlegsandfeet.com|DDFNetwork.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- hotmilfsfuck.com|ExploitedX.yml|:heavy_check_mark:|:x:|:x:|:heavy_check_mark:|-|- hotmovies.com|Hotmovies.yml|:heavy_check_mark:|:x:|:heavy_check_mark:|:heavy_check_mark:|-|- +hotoldermale.com|AdultSiteRunner.yml|:heavy_check_mark:|:x:|:x:|:x:|-|Gay hottiemoms.com|Hustler.yml|:heavy_check_mark:|:x:|:x:|:x:|CDP|- hotwifexxx.com|HotWifeXXX.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- houseofgord.com|HouseOfGord.yml|:heavy_check_mark:|:x:|:x:|:x:|CDP|- @@ -606,6 +725,7 @@ hqporner.com|HQPorner.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- hucows.com|Hucows.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- hugecockbreak.com|Mypervmom.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- hungarianhoneys.com|HungarianHoneys.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +hungfuckers.com|CarnalPlus.yml|:heavy_check_mark:|:x:|:x:|:x:|-|Gay hunt4k.com|Vip4K.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- hunterleigh.com|bellapass.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- hunterpov.com|PornCZ.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- @@ -619,10 +739,12 @@ hustlerslesbians.com|Hustler.yml|:heavy_check_mark:|:x:|:x:|:x:|CDP|Lesbian hustlerstaboo.com|Hustler.yml|:heavy_check_mark:|:x:|:x:|:x:|CDP|- hypnotube.com|Hypnotube.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- iafd.com|IAFD.yml|:heavy_check_mark:|:x:|:heavy_check_mark:|:heavy_check_mark:|Python|Database -iconmale.com|MindGeek.yml|:heavy_check_mark:|:x:|:x:|:x:|-|Gay +iconmale.com|MindGeek.yml|:heavy_check_mark:|:x:|:heavy_check_mark:|:x:|-|Gay idols69.com|karatmedia.yml|:heavy_check_mark:|:x:|:x:|:x:|-|JAV ifeelmyself.com|IFeelMyself.yml|:heavy_check_mark:|:x:|:x:|:heavy_check_mark:|Python|- +ignore4k.com|Vip4K.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- ihuntmycunt.com|DollsHub.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +ikillitts.com|Trans500.yml|:heavy_check_mark:|:x:|:x:|:x:|-|Trans ikissgirls.com|IKissGirls.yml|:heavy_check_mark:|:x:|:x:|:x:|-|Lesbian iknowthatgirl.com|MindGeek.yml|:heavy_check_mark:|:x:|:x:|:heavy_check_mark:|-|- imdb.com|IMDB.yml|:heavy_check_mark:|:x:|:heavy_check_mark:|:heavy_check_mark:|-|Database @@ -633,17 +755,22 @@ innocenthigh.com|PaperStreetMedia.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- inserted.com|KBProductions.yml|:heavy_check_mark:|:x:|:x:|:heavy_check_mark:|Python|- insex.com|insex.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- insexondemand.com|insex.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +interracialblowbang.com|Algolia_DogfartNetwork.yml|:heavy_check_mark:|:x:|:x:|:heavy_check_mark:|Python|- interracialpass.com|InterracialPass.yml|:heavy_check_mark:|:x:|:x:|:x:|-|Interracial +interracialpickups.com|Algolia_DogfartNetwork.yml|:heavy_check_mark:|:x:|:x:|:heavy_check_mark:|Python|- interracialpovs.com|FFCSH.yml|:heavy_check_mark:|:x:|:x:|:x:|-|Interracial intimatelesbians.com|Spizoo.yml|:heavy_check_mark:|:x:|:x:|:x:|-|Lesbian intimatepov.com|ItsPOV.yml|:heavy_check_mark:|:x:|:x:|:x:|CDP|- ipinkvisualpass.com|PinkVisual.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- -isthisreal.com|IsThisReal.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +isthisreal.com|Algolia_Adultime.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- italianshotclub.com|MVG.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- itscleolive.com|VNAGirls.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- itspov.com|ItsPOV.yml|:heavy_check_mark:|:x:|:x:|:x:|CDP|- iwantclips.com|IWantClips.yml|:heavy_check_mark:|:x:|:x:|:heavy_check_mark:|CDP|- +iwara.tv|Iwara.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +jacquieetmichelelite.com|JacquieEtMichelElite.yml|:heavy_check_mark:|:x:|:heavy_check_mark:|:x:|-|- jacquieetmicheltv.net|JacquieEtMichelTV.yml|:heavy_check_mark:|:x:|:x:|:x:|Python|- +jalifstudio.com|CarnalPlus.yml|:heavy_check_mark:|:x:|:x:|:x:|-|Gay jamesdeen.com|ThirdRockEnt.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- janafox.com|bellapass.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- japaneseflashers.com|karatmedia.yml|:heavy_check_mark:|:x:|:x:|:x:|-|JAV @@ -651,6 +778,7 @@ japaneseslurp.com|karatmedia.yml|:heavy_check_mark:|:x:|:x:|:x:|-|JAV japanhdv.com|JapanHDV.yml|:heavy_check_mark:|:x:|:x:|:x:|-|JAV Uncensored japanlust.com|Arx.yml|:heavy_check_mark:|:x:|:x:|:x:|Python|- japornxxx.com|JapornXXX.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +jasonsparkslive.com|CarnalPlus.yml|:heavy_check_mark:|:x:|:x:|:x:|-|Gay jav.land|JavLand.yml|:heavy_check_mark:|:x:|:heavy_check_mark:|:x:|-|JAV javdb.com|javdb.yml|:heavy_check_mark:|:x:|:heavy_check_mark:|:x:|-|Database javdb36.com|javdb.yml|:heavy_check_mark:|:x:|:heavy_check_mark:|:x:|-|Database @@ -660,15 +788,20 @@ javlibrary.com|JavLibrary_python.yml|:heavy_check_mark:|:x:|:x:|:x:|Python|JAV javlibrary.com|JavLibrary.yml|:heavy_check_mark:|:x:|:heavy_check_mark:|:x:|-|JAV jaysinxxx.com|GammaEntertainment.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- jayspov.net|AdultEmpireCash.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +jbvideo.com|JBVideo.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- jcosplay.com|karatmedia.yml|:heavy_check_mark:|:x:|:x:|:x:|-|JAV jeedoo.com|Jeedoo.yml|:heavy_check_mark:|:x:|:heavy_check_mark:|:x:|-|- -jeffsmodels.com|JeffsModels.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +jeffsmodels.com|SmutPuppet.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- jelenajensen.com|VNAGirls.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- jerk-offpass.com|Teencoreclub.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- jerkaoke.com|ModelMediaUS.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- jessicajaymesxxx.com|Spizoo.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +jessroyan.com|PornsiteManager.yml|:heavy_check_mark:|:x:|:heavy_check_mark:|:x:|-|Gay jimweathersarchives.com|JimWeathersArchives.yml|:heavy_check_mark:|:x:|:x:|:heavy_check_mark:|-|- jizzbomb.com|NewSensationsNetworkSites.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +jnrc.fr|PornsiteManager.yml|:heavy_check_mark:|:x:|:heavy_check_mark:|:x:|-|Gay +jockbreeders.com|CarnalPlus.yml|:heavy_check_mark:|:x:|:x:|:x:|-|Gay +jockpussy.com|CarnalPlus.yml|:heavy_check_mark:|:x:|:x:|:x:|-|FTM jodiwest.com|AdultEmpireCash.yml|:heavy_check_mark:|:x:|:x:|:x:|-|MILF joeperv.com|bellapass.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- johnnyrapid.com|Algolia_Johnnyrapid.yml|:heavy_check_mark:|:heavy_check_mark:|:x:|:x:|Python|Gay @@ -684,7 +817,7 @@ jschoolgirls.com|karatmedia.yml|:heavy_check_mark:|:x:|:x:|:x:|-|JAV julesjordan.com|JulesJordan.yml|:heavy_check_mark:|:heavy_check_mark:|:heavy_check_mark:|:heavy_check_mark:|-|- juliaannlive.com|VNAGirls.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- karissa-diamond.com|Karissa-Diamond.yml|:heavy_check_mark:|:heavy_check_mark:|:x:|:x:|-|- -karups.com|Karups.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +karups.com|Karups.yml|:heavy_check_mark:|:heavy_check_mark:|:x:|:x:|-|- katiebanks.com|bellapass.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- kellymadison.com|KellyMadisonMedia.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- kendrajames.com|VNAGirls.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- @@ -692,27 +825,30 @@ killergram.com|Killergram.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- kimberleelive.com|VNAGirls.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- kin8tengoku.com|Kin8tengoku.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- kingsoffetish.com|AdultEmpireCash.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- -kink.com|Kink.yml|:heavy_check_mark:|:x:|:x:|:x:|CDP|- +kink.com|Kink.yml|:heavy_check_mark:|:x:|:x:|:heavy_check_mark:|CDP|- kink305.com|VNAGirls.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- kinkbomb.com|KinkBomb.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- kinkvr.com|BaDoink.yml|:heavy_check_mark:|:x:|:heavy_check_mark:|:heavy_check_mark:|-|VR kinkyfamily.com|kinkyfamily.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- kinkymistresses.com|KinkyMistresses.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- kinkyspa.com|MindGeek.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +kinkytwink.com|PornsiteManager.yml|:heavy_check_mark:|:x:|:heavy_check_mark:|:x:|-|Gay kissmefuckme.com|Algolia_Adultime.yml|:heavy_check_mark:|:x:|:x:|:x:|Python|- kpopping.com|Kpopping.yml|:x:|:x:|:x:|:heavy_check_mark:|-|- kristenbjorn.com|KristenBjorn.yml|:heavy_check_mark:|:x:|:heavy_check_mark:|:heavy_check_mark:|-|Gay -ladyboy-ladyboy.com|GroobyNetwork-Partial.yml|:heavy_check_mark:|:x:|:x:|:x:|-|Trans -ladyboy.xxx|GroobyNetwork-Partial.yml|:heavy_check_mark:|:x:|:x:|:x:|-|Trans -ladyboygold.com|LadyboyGold.yml|:x:|:x:|:x:|:heavy_check_mark:|-|Trans +ladyboy-ladyboy.com|GroobyNetwork-Partial.yml|:heavy_check_mark:|:heavy_check_mark:|:x:|:x:|-|Trans +ladyboy.xxx|GroobyNetwork-Partial.yml|:heavy_check_mark:|:heavy_check_mark:|:x:|:x:|-|Trans +ladyboygold.com|LadyboyGold.yml|:heavy_check_mark:|:x:|:x:|:heavy_check_mark:|-|Trans ladydee.xxx|PornCZ.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- lanakendrick.com|PinupDollars.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- lanesisters.com|GammaEntertainment.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- lasublimexxx.com|Lasublime.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +latinamilf.com|LetsDoeIt.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- latinoguysporn.com|AdultEmpireCash.yml|:heavy_check_mark:|:x:|:x:|:x:|-|Gay leannecrow.com|PinupDollars.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- legsex.com|TheScoreGroup.yml|:heavy_check_mark:|:heavy_check_mark:|:x:|:x:|-|- lesbea.com|RealityKingsOL.yml|:heavy_check_mark:|:x:|:x:|:x:|-|Lesbian +lesbiananalsluts.com|ThirdRockEnt.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- lesbianass.com|Hustler.yml|:heavy_check_mark:|:x:|:x:|:x:|CDP|Lesbian lesbianfactor.com|GammaEntertainment.yml|:heavy_check_mark:|:x:|:x:|:x:|-|Lesbian lesbiantribe.com|MVG.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- @@ -729,26 +865,29 @@ lexingtonsteele.com|GammaEntertainment.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- lezcuties.com|Algolia_Adultime.yml|:heavy_check_mark:|:x:|:x:|:x:|Python|Lesbian lifeselector.com|21Roles.yml|:heavy_check_mark:|:x:|:x:|:x:|CDP|- lilhumpers.com|MindGeek.yml|:heavy_check_mark:|:x:|:x:|:heavy_check_mark:|-|- +lilsis.com|Nubiles.yml|:heavy_check_mark:|:heavy_check_mark:|:x:|:x:|-|- lingerieav.com|Jhdv.yml|:heavy_check_mark:|:x:|:x:|:x:|-|JAV Uncensored -lingerietales.com|LingerieTales.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +lingerietales.com|WankItNow.yml|:heavy_check_mark:|:x:|:x:|:x:|-|VR littleasians.com|PaperStreetMedia.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- littlecaprice-dreams.com|LittleCapriceDreams.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- littlefromasia.com|Mypervmom.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- littlehellcat.com|Teencoreclub.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- -lollyhardcore.com|TeenMegaWorld.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +loan4k.com|Vip4K.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- lonestarangel.com|LoneStarAngel.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- lookathernow.com|MindGeek.yml|:heavy_check_mark:|:x:|:x:|:heavy_check_mark:|-|- +lordaardvark.com|LordAardvark.yml|:heavy_check_mark:|:heavy_check_mark:|:x:|:x:|CDP|- lovehairy.com|SARJ-LLC.yml|:heavy_check_mark:|:heavy_check_mark:|:x:|:heavy_check_mark:|Python|- loveherass.com|devianteNetwork.yml|:heavy_check_mark:|:x:|:x:|:x:|CDP|- -loveherboobs.com|LoveHerFeet.yml|:heavy_check_mark:|:x:|:x:|:heavy_check_mark:|-|Boob Fetish -loveherfeet.com|LoveHerFeet.yml|:heavy_check_mark:|:x:|:x:|:heavy_check_mark:|-|Foot Fetish -loveherfilms.com|LoveHerFeet.yml|:heavy_check_mark:|:x:|:x:|:heavy_check_mark:|-|- +loveherboobs.com|LoveHerFilms.yml|:heavy_check_mark:|:x:|:x:|:heavy_check_mark:|-|Boob Fetish +loveherfeet.com|LoveHerFilms.yml|:heavy_check_mark:|:x:|:x:|:heavy_check_mark:|-|Foot Fetish +loveherfilms.com|LoveHerFilms.yml|:heavy_check_mark:|:x:|:x:|:heavy_check_mark:|-|- lubed.com|AMAMultimedia.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- lucasentertainment.com|LucasEntertainment.yml|:heavy_check_mark:|:x:|:x:|:x:|-|Gay lustcinema.com|LustCinema.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- lustery.com|Lustery.yml|:heavy_check_mark:|:x:|:x:|:x:|-|Amateur lustreality.com|SLRoriginals.yml|:heavy_check_mark:|:x:|:heavy_check_mark:|:x:|-|VR -lustylina.com|ChickPass.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +lustylina.com|ChickPass.yml|:heavy_check_mark:|:x:|:x:|:heavy_check_mark:|-|- +mackstudio.com|PornsiteManager.yml|:heavy_check_mark:|:x:|:heavy_check_mark:|:x:|-|Gay madeincanada.xxx|MadeInCanada.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- madouqu.com|Madou.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- maggiegreenlive.com|VNAGirls.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- @@ -761,9 +900,11 @@ manojob.com|FinishesTheJob.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- manroyale.com|AMAMultimedia.yml|:heavy_check_mark:|:x:|:x:|:x:|-|Gay manuelferrara.com|JulesJordan.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- manyvids.com|ManyVids|:heavy_check_mark:|:x:|:x:|:heavy_check_mark:|python|- +marcusmojo.com|Algolia_NextDoorStudios.yml|:heavy_check_mark:|:heavy_check_mark:|:x:|:x:|Python|Gay mariskax.com|MariskaX.yml|:heavy_check_mark:|:x:|:x:|:heavy_check_mark:|-|- maskurbate.com|GammaEntertainment.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- -masqulin.com.com|Masqulin.yml|:heavy_check_mark:|:x:|:x:|:x:|-|Gay +masonicboys.com|CarnalPlus.yml|:heavy_check_mark:|:x:|:x:|:x:|-|Gay +masqulin.com|Masqulin.yml|:heavy_check_mark:|:x:|:x:|:x:|-|Gay massage-parlor.com|Algolia_Adultime.yml|:heavy_check_mark:|:x:|:x:|:x:|Python|- massagebait.com|AMAMultimedia.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- mature.nl|MatureNL.yml|:heavy_check_mark:|:heavy_check_mark:|:x:|:x:|-|- @@ -772,31 +913,39 @@ maturegapers.com|DollsHub.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- maturegynoexam.com|DollsHub.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- maturegynospy.com|DollsHub.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- max-hardcore.com|MaxHardcore.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +maxence-angel.com|PornsiteManager.yml|:heavy_check_mark:|:x:|:heavy_check_mark:|:x:|-|Gay maxinex.com|VNAGirls.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- meanawolf.com|MeanaWolf.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- meanbitches.com|MeanBitches.yml|:heavy_check_mark:|:x:|:x:|:heavy_check_mark:|-|-| +meanmassage.com|TugPass.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- meetsuckandfuck.com|Wtfpass.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- men.com|Brazzers.yml|:heavy_check_mark:|:x:|:x:|:x:|CDP|Gay menatplay.com|MenAtPlay.yml|:heavy_check_mark:|:x:|:x:|:x:|-|Gay +menoboy.com|PornsiteManager.yml|:heavy_check_mark:|:x:|:heavy_check_mark:|:x:|-|Gay menover30.com|Algolia_MenOver30.yml|:heavy_check_mark:|:heavy_check_mark:|:x:|:x:|Python|Gay menpov.com|AMAMultimedia.yml|:heavy_check_mark:|:x:|:x:|:x:|-|Gay -messyxxx.com|ChickPass.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- -metadataapi.net (JSON API)|ThePornDB.yml|:heavy_check_mark:|:x:|:heavy_check_mark:|:heavy_check_mark:|-|- +messyxxx.com|ChickPass.yml|:heavy_check_mark:|:x:|:x:|:heavy_check_mark:|-|- +metadataapi.net|**Use the Endpoint**|:heavy_check_mark:|:x:|:x:|:heavy_check_mark:|-|- +metadataapi.net/jav|ThePornDBJAV.yml|:heavy_check_mark:|:x:|:heavy_check_mark:|:x:|-|JAV +metadataapi.net/movies|ThePornDBMovies.yml|:heavy_check_mark:|:x:|:heavy_check_mark:|:x:|-|Movies +metalbondage.com|MetalBondage.yml|:heavy_check_mark:|:x:|:x:|:x:|CDP|- metart.com|SARJ-LLC.yml|:heavy_check_mark:|:heavy_check_mark:|:x:|:heavy_check_mark:|Python|- metartnetwork.com|SARJ-LLC.yml|:heavy_check_mark:|:heavy_check_mark:|:x:|:heavy_check_mark:|Python|- metartx.com|SARJ-LLC.yml|:heavy_check_mark:|:heavy_check_mark:|:x:|:heavy_check_mark:|Python|- -milehighmedia.com|MindGeek.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +milehighmedia.com|MindGeek.yml|:heavy_check_mark:|:x:|:heavy_check_mark:|:x:|-|- milfed.com|MindGeek.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +milfsodomy.com|SmutPuppet.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- milfthing.com|PerfectGonzo.yml|:heavy_check_mark:|:x:|:x:|:heavy_check_mark:|Python|- milftrip.com|VegasDreamsLLC.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- milftugs.com|MilfTugs.yml|:heavy_check_mark:|:x:|:x:|:heavy_check_mark:|-|- -milfvr.com|MilfVR.yml|:heavy_check_mark:|:x:|:x:|:x:|-|VR +milfvr.com|POVR.yml|:heavy_check_mark:|:x:|:x:|:x:|-|VR milkingtable.com|GammaEntertainment.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- milovana.com|Milovana.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- -minimuff.com|ChickPass.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +minimuff.com|ChickPass.yml|:heavy_check_mark:|:x:|:x:|:heavy_check_mark:|-|- minnano-av.com|Minnano-AV.yml|:x:|:x:|:x:|:heavy_check_mark:|-|- missax.com|MissaX.yml|:heavy_check_mark:|:x:|:x:|:x:|Python|- -mixedx.com|GenuineSin.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +mistermale.com|PornsiteManager.yml|:heavy_check_mark:|:x:|:heavy_check_mark:|:x:|-|Gay +mixedx.com|Algolia_Adultime.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- mmpnetwork.com|MMP_Network.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- modelcentro.com|ModelCentroAPI.yml|:heavy_check_mark:|:x:|:x:|:x:|Python|- modelhub.com|Modelhub.yml|:heavy_check_mark:|:x:|:x:|:heavy_check_mark:|-|- @@ -808,9 +957,12 @@ mofos.com|MindGeek.yml|:heavy_check_mark:|:x:|:x:|:heavy_check_mark:|-|- mofosnetwork.com|MindGeek.yml|:heavy_check_mark:|:x:|:x:|:heavy_check_mark:|-|- mom4k.com|AMAMultimedia.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- momcomesfirst.com|FamilyTherapyXXX.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +momcum.com|AMAMultimedia.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- momisamilf.com|Vivid.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- momlover.com|Nubiles.yml|:heavy_check_mark:|:heavy_check_mark:|:x:|:x:|-|- +mommy4k.com|Vip4K.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- mommyblowsbest.com|GammaEntertainment.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +mommyjoi.com|ThirdRockEnt.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- mommysboy.com|Algolia_Adultime.yml|:heavy_check_mark:|:x:|:x:|:x:|Python|- mommysboy.net|Mypervmom.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- mommysgirl.com|Algolia_Adultime.yml|:heavy_check_mark:|:heavy_check_mark:|:x:|:x:|Python|- @@ -828,6 +980,7 @@ momxxx.org|Mypervmom.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- mongerinasia.com|MongerInAsia.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- monicamendez.com|PinupDollars.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- monroelee.com|bellapass.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +monstercub.com|AdultSiteRunner.yml|:heavy_check_mark:|:x:|:x:|:x:|-|Gay mormongirlz.com|Mormongirlz.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- motherfuckerxxx.com|GammaEntertainment.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- motherless.com|Motherless.yml|:heavy_check_mark:|:x:|:x:|:x:|-|Tube Site @@ -838,6 +991,7 @@ mrluckypov.com|Spizoo.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- mrpov.com|FinishesTheJob.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- muchaslatinas.com|Hustler.yml|:heavy_check_mark:|:x:|:x:|:x:|CDP|- mugfucked.com|MugFucked.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +mugfucked.com|ThirdRockEnt.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- muses.com|Algolia_Adultime.yml|:heavy_check_mark:|:heavy_check_mark:|:heavy_check_mark:|:x:|Python|- my-slavegirl.com|my-slavegirl.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- mybabysittersclub.com|PaperStreetMedia.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- @@ -846,6 +1000,7 @@ mycherrycrush.com|MyCherryCrush.yml|:heavy_check_mark:|:x:|:x:|:x:|CDP|- mydaughterswap.com|Mypervmom.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- mydirtyhobby.com|MyDirtyHobby.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- myfamilypies.com|Nubiles.yml|:heavy_check_mark:|:heavy_check_mark:|:x:|:x:|-|- +myfirstdaddy.com|AdultSiteRunner.yml|:heavy_check_mark:|:x:|:x:|:x:|-|Gay mylf.com|Mylf.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- mylfdom.com|PaperStreetMedia.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- mylifeinmiami.com|ModelCentroAPI.yml|:heavy_check_mark:|:x:|:x:|:x:|Python|- @@ -864,34 +1019,43 @@ nastypublicsex.com|DollsHub.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- nastystepfamily.com|Vivid.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- nataliastarr.com|VNAGirls.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- natashanice.com|ModelCentroAPI.yml|:heavy_check_mark:|:x:|:x:|:x:|Python|- +naturalbornbreeders.com|AdultSiteRunner.yml|:heavy_check_mark:|:x:|:x:|:x:|-|Gay naughtyamerica.com|NaughtyAmerica.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- naughtyamericavr.com|NaughtyAmericaVR.yml|:heavy_check_mark:|:x:|:x:|:x:|-|VR +naughtynatural.com|NaughtyNatural.yml|:heavy_check_mark:|:heavy_check_mark:|-|-|-|- +netvideogirls.com|NVGNetwork.yml|:heavy_check_mark:|-|-|-|-|- +newgrounds.com|Newgrounds.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- newsensations.com/tour_ns/|NewSensationsMain.yml|:heavy_check_mark:|:x:|:heavy_check_mark:|:x:|-|- newsensations.com/tour_rs/|NewSensationsNetworkSites.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- -nextdoorbuddies.com|GammaEntertainment.yml|:heavy_check_mark:|:x:|:x:|:x:|-|Gay -nextdoorcasting.com|GammaEntertainment.yml|:heavy_check_mark:|:x:|:x:|:x:|-|Gay -nextdoorebony.com|GammaEntertainment.yml|:heavy_check_mark:|:x:|:x:|:x:|-|Gay -nextdoorhookups.com|GammaEntertainment.yml|:heavy_check_mark:|:x:|:x:|:x:|-|Gay -nextdoormale.com|GammaEntertainment.yml|:heavy_check_mark:|:x:|:x:|:x:|-|Gay -nextdoorraw.com|GammaEntertainment.yml|:heavy_check_mark:|:x:|:x:|:x:|-|Gay -nextdoorstudios.com|GammaEntertainment.yml|:heavy_check_mark:|:x:|:x:|:x:|-|Gay -nextdoortaboo.com|GammaEntertainment.yml|:heavy_check_mark:|:x:|:x:|:x:|-|Gay -nextdoortwink.com|GammaEntertainment.yml|:heavy_check_mark:|:x:|:x:|:x:|-|Gay +nextdoorbuddies.com|Algolia_NextDoorStudios.yml|:heavy_check_mark:|:heavy_check_mark:|:x:|:x:|Python|Gay +nextdoorcasting.com|Algolia_NextDoorStudios.yml|:heavy_check_mark:|:heavy_check_mark:|:x:|:x:|Python|Gay +nextdoorebony.com|Algolia_NextDoorStudios.yml|:heavy_check_mark:|:heavy_check_mark:|:x:|:x:|Python|Gay +nextdoorfilms.com|Algolia_NextDoorStudios.yml|:heavy_check_mark:|:heavy_check_mark:|:x:|:x:|Python|Gay +nextdoorhomemade.com|Algolia_NextDoorStudios.yml|:heavy_check_mark:|:heavy_check_mark:|:x:|:x:|Python|Gay +nextdoormale.com|Algolia_NextDoorStudios.yml|:heavy_check_mark:|:heavy_check_mark:|:x:|:x:|Python|Gay +nextdoororiginals.com|Algolia_NextDoorStudios.yml|:heavy_check_mark:|:heavy_check_mark:|:x:|:x:|Python|Gay +nextdoorraw.com|Algolia_NextDoorStudios.yml|:heavy_check_mark:|:heavy_check_mark:|:x:|:x:|Python|Gay +nextdoorstudios.com|Algolia_NextDoorStudios.yml|:heavy_check_mark:|:heavy_check_mark:|:x:|:x:|Python|Gay +nextdoortaboo.com|Algolia_NextDoorStudios.yml|:heavy_check_mark:|:heavy_check_mark:|:x:|:x:|Python|Gay +nextdoortwink.com|Algolia_NextDoorStudios.yml|:heavy_check_mark:|:heavy_check_mark:|:x:|:x:|Python|Gay nfbusty.com|Nubiles.yml|:heavy_check_mark:|:heavy_check_mark:|:x:|:x:|-|- nhentai.net|nhentai.yml|:x:|:heavy_check_mark:|:x:|:x:|-|Hentai nikkibenz.com|VNAGirls.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- nikkiphoenixxx.com|NikkiAndTera.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- ninakayy.com|VNAGirls.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +noboring.com|TeenMegaWorld.yml|:heavy_check_mark:|:x:|:x:|:heavy_check_mark:|-|- +noelalejandrofilms.com|NoelAlejandro.yml|:heavy_check_mark:|:x:|:x:|:x:|-|Gay +noirmale.com|MindGeek.yml|:heavy_check_mark:|:x:|:heavy_check_mark:|:x:|-|Gay noodledude.io|NoodleDude.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- notmygrandpa.com|PaperStreetMedia.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- nubilefilms.com|Nubiles.yml|:heavy_check_mark:|:heavy_check_mark:|:x:|:x:|-|- -nubilegirlshd.com|TeenMegaWorld.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- nubiles-casting.com|Nubiles.yml|:heavy_check_mark:|:heavy_check_mark:|:x:|:x:|-|- nubiles-porn.com|Nubiles.yml|:heavy_check_mark:|:heavy_check_mark:|:x:|:x:|-|- nubiles.net|Nubiles.yml|:heavy_check_mark:|:heavy_check_mark:|:x:|:x:|-|- nubileset.com|Nubiles.yml|:heavy_check_mark:|:heavy_check_mark:|:x:|:x:|-|- nubilesporn.com|Nubiles.yml|:heavy_check_mark:|:heavy_check_mark:|:x:|:x:|-|- nubilesunscripted.com|Nubiles.yml|:heavy_check_mark:|:heavy_check_mark:|:x:|:x:|-|- +nucosplay.com|Baberotica.yml|:heavy_check_mark:|:x:|:x:|:heavy_check_mark:|-|- nudefightclub.com|Algolia_Adultime.yml|:heavy_check_mark:|:x:|:x:|:x:|Python|- nudeyogaporn.com|Arx.yml|:heavy_check_mark:|:x:|:x:|:x:|Python|- nurumassage.com|Algolia_Adultime.yml|:heavy_check_mark:|:heavy_check_mark:|:x:|:x:|Python|- @@ -905,11 +1069,14 @@ officecock.com|AMAMultimedia.yml|:heavy_check_mark:|:x:|:x:|:x:|-|Gay officemsconduct.com|Algolia_Adultime.yml|:heavy_check_mark:|:heavy_check_mark:|:heavy_check_mark:|:x:|Python|- officepov.com|ItsPOV.yml|:heavy_check_mark:|:x:|:x:|:x:|CDP|- officesexjp.com|karatmedia.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- -old-n-young.com|TeenMegaWorld.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +ohmyholes.com|TeenMegaWorld.yml|:heavy_check_mark:|:x:|:x:|:heavy_check_mark:|-|- +old-n-young.com|TeenMegaWorld.yml|:heavy_check_mark:|:x:|:x:|:heavy_check_mark:|-|- old4k.com|Vip4K.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +older4me.com|AdultSiteRunner.yml|:heavy_check_mark:|:x:|:x:|:x:|-|Gay oldgoesyoung.com|OldGoesYoung.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- oldje-3some.com|Oldje.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- oldje.com|Oldje.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +oldnanny.com|OldNanny.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- oldsfuckdolls.com|DollsHub.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- only3xgirls.com|Only3xGirls.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- only3xlost.com|Only3xGirls.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- @@ -918,14 +1085,17 @@ only3xvr.com|Only3xGirls.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- onlyblowjob.com|DDFNetwork.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- onlygolddigger.com|Only3xGirls.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- onlyprince.com|ThirdRockEnt.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +onlytarts.com|OnlyTarts.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- onlyteenblowjobs.com|GammaEntertainment.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- -oopsie.com|Algolia_Adultime.yml|:heavy_check_mark:|:x:|:x:|:x:|Python|- +oopsie.com|Algolia_Adultime.yml|:heavy_check_mark:|:heavy_check_mark:|:x:|:x:|Python|- openlife.com|GammaEntertainment.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- oraloverdose.com|PervCity.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +oreno3d.com|Oreno3d.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- orgytrain.com|Vivid.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- outdoorjp.com|karatmedia.yml|:heavy_check_mark:|:x:|:x:|:x:|-|JAV outhim.com|AMAMultimedia.yml|:heavy_check_mark:|:x:|:x:|:x:|-|Gay outofthefamily.com|Algolia_Adultime.yml|:heavy_check_mark:|:x:|:x:|:x:|Python|- +over40handjobs.com|TugPass.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- p54u.com|JavLibrary.yml|:heavy_check_mark:|:x:|:heavy_check_mark:|:x:|-|JAV pacopacomama.com|Paco.yml|:heavy_check_mark:|:x:|:x:|:x:|-|JAV paintoy.com|insexOL.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- @@ -934,9 +1104,11 @@ pansexualx.com|Algolia_EvilAngel|:heavy_check_mark:|:heavy_check_mark:|:heavy_ch pantyjobs.com|pantyjobs.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- pantypops.com|GammaEntertainment.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- paradisegfs.com|VegasDreamworks.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +parasited.com|Hentaied.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- parodypass.com|NewSensationsNetworkSites.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- passion-hd.com|AMAMultimedia.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- passionxxx.com|Passionxxx.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +paulomassaxxx.com|PornsiteManager.yml|:heavy_check_mark:|:x:|:heavy_check_mark:|:x:|-|Gay pawged.com|Pawged.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- peeonher.com|peeonher.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- pegasproductions.com|PegasProductions.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- @@ -950,15 +1122,19 @@ perversefamily.com|Czechav.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- pervertgallery.com|ThirdRockEnt.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- pervmom.com|PaperStreetMedia.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- pervnana.com|PaperStreetMedia.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +pervpricipal.com|PaperStreetMedia.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- pervtherapy.com|PaperStreetMedia.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- peternorth.com|Algolia_Adultime.yml|:heavy_check_mark:|:heavy_check_mark:|:x:|:x:|Python|- +petite18.com|TugPass.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- petiteballerinasfucked.com|Nubiles.yml|:heavy_check_mark:|:heavy_check_mark:|:x:|:x:|-|- petited.com|Vivid.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- petitehdporn.com|Nubiles.yml|:heavy_check_mark:|:heavy_check_mark:|:x:|:x:|-|- -petiteleeanna.com|ChickPass.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +petiteleeanna.com|ChickPass.yml|:heavy_check_mark:|:x:|:x:|:heavy_check_mark:|-|- petitepov.com|ItsPOV.yml|:heavy_check_mark:|:x:|:x:|:x:|CDP|- philavise.com|Philavise.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +philippwants.com|PornsiteManager.yml|:heavy_check_mark:|:x:|:heavy_check_mark:|:x:|-|Gay pickupfuck.com|Wtfpass.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +pie4k.com|Vip4K.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- pinklabel.tv|PinkLabelTV.yml|:heavy_check_mark:|:x:|:heavy_check_mark:|:x:|-|- pinkoclub.com|Pinkoclub.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- pinkotgirls.com|Pinkoclub.yml|:heavy_check_mark:|:x:|:x:|:x:|-|Trans @@ -966,25 +1142,31 @@ pinupfiles.com|PinupDollars.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- pissplay.com|PissPlay.yml|:heavy_check_mark:|:x:|:x:|:x:|-|Amateur Fetish pissynetwork.com|PissyNetwork.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- pjgirls.com|pjgirls.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +pkfstudios.com|PKFStudios.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- playboy.tv|PlayboyTV.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- playboyplus.com|PlayboyPlus.yml|:heavy_check_mark:|:heavy_check_mark:|:x:|:x:|-|- +playdaddy.com|AdultSiteRunner.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- playwithrae.com|PlayWithRae.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- plumperpass.com|PlumperPass.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- plushies.tv|Plushies.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +pmvhaven.com|PMVHeaven.yml|:heavy_check_mark:|:x:|:x:|:x:|Python|PMVs porkvendors.com|ThirdRockEnt.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- pornbox.com|Pornbox.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +porncornvr.com|RealJamVR.yml|:heavy_check_mark:|:heavy_check_mark:|:x:|:x:|-|VR porncz.com|PornCZ.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- pornditos.com|Pornditos.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- porndudecasting.com|PornDudeCasting.yml|:heavy_check_mark:|:x:|:x:|:heavy_check_mark:|-|- pornfidelity.com|KellyMadisonMedia.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- pornforce.com|ThirdRockEnt.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- porngoespro.com|Spizoo.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +pornhex.com|PornHex.yml|:heavy_check_mark:|:x:|:x:|:heavy_check_mark:|-|Tube Site pornhub.com|Pornhub.yml|:heavy_check_mark:|:x:|:x:|:heavy_check_mark:|-|- pornhubpremium.com|PornhubPremium.yml|:heavy_check_mark:|:x:|:x:|:heavy_check_mark:|-|- pornlandvideos.com|MVG.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- pornmegaload.com|TheScoreGroup.yml|:heavy_check_mark:|:heavy_check_mark:|:x:|:x:|-|- pornperverts.com|ThirdRockEnt.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- pornpros.com|AMAMultimedia.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +pornstarbts.com|PureMedia.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- pornstarhardcore.com|Hustler.yml|:heavy_check_mark:|:x:|:x:|:x:|CDP|- pornstarplatinum.com|PornstarPlatinum.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- pornstarstroker.com|AdultEmpireCash.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- @@ -1010,10 +1192,11 @@ prettydirtyteens.com|devianteNetwork.yml|:heavy_check_mark:|:x:|:x:|:x:|CDP|- pridestudios.com|Algolia_MenOver30.yml|:heavy_check_mark:|:heavy_check_mark:|:x:|:x:|Python|Gay primecups.com|PerfectGonzo.yml|:heavy_check_mark:|:x:|:x:|:heavy_check_mark:|Python|- princesscum.com|Nubiles.yml|:heavy_check_mark:|:heavy_check_mark:|:x:|:x:|-|- -private.com|Private.yml|:heavy_check_mark:|:x:|:heavy_check_mark:|:x:|-|- +private.com|Private.yml|:heavy_check_mark:|:x:|:heavy_check_mark:|:heavy_check_mark:|-|- privatecastings.com|privatecastings.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- privatesextapes.com|Wtfpass.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- producersfun.com|ProducersFun.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +profiles.myfreecams.com|MFC.yml|:x:|:x:|:x:|:heavy_check_mark:|-|- propertysex.com|RealityKingsOL.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- publicagent.com|MindGeek.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- publicfrombohemia.com|PornCZ.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- @@ -1021,32 +1204,42 @@ publicsexadventures.com|Wtfpass.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- publicsexdate.com|ThirdRockEnt.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- puffynetwork.com|Puffynetwork.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- pumaswedexxx.com|VNAGirls.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- -pure-bbw.com|Pure-BBW.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- -pure-ts.com|ChristianXXX.yml|:heavy_check_mark:|:x:|:x:|:x:|-|Trans +pure-bbw.com|PureMedia.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +pure-ts.com|PureMedia.yml|:heavy_check_mark:|:x:|:x:|:x:|-|Trans +pure-xxx.com|PureMedia.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- purebj.com|Only3xGirls.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- puremature.com|AMAMultimedia.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- purepov.com|PerfectGonzo.yml|:heavy_check_mark:|:x:|:x:|:heavy_check_mark:|Python|- puretaboo.com|Algolia_Adultime.yml|:heavy_check_mark:|:heavy_check_mark:|:x:|:x:|Python|- pussyav.com|Jhdv.yml|:heavy_check_mark:|:x:|:x:|:x:|-|JAV Uncensored putalocura.com|Putalocura.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- -r18.com|JavLibrary_python.yml|:heavy_check_mark:|:x:|:x:|:x:|Python|JAV -r18.com|r18.yml|:heavy_check_mark:|:x:|:heavy_check_mark:|:x:|-|JAV +r18.dev|R18.dev.yml|:heavy_check_mark:|:x:|:heavy_check_mark:|:x:|-|JAV +rachel-steele.com|RachelSteele.yml|:heavy_check_mark:|:x:|:x:|:x:|-|MILF rachelaldana.com|PinupDollars.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- rachelstormsxxx.com|VNAGirls.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- -ragingstallion.com|GammaEntertainment.yml|:heavy_check_mark:|:x:|:x:|:x:|-|Gay +ragingstallion.com|Algolia_RagingStallion.yml|:heavy_check_mark:|:x:|:x:|:x:|-|Gay randyblue.com|RandyBlue.yml|:heavy_check_mark:|:x:|:x:|:x:|-|Gay +raunchybastards.com|RaunchyBastards.yml|:heavy_check_mark:|:x:|:x:|:x:|-|Gay ravenswallowzxxx.com|Andomark.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- -rawattack.com|RawAttack.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- -rawcouples.com|TeenMegaWorld.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +rawattack.com|Spizoo.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +rawcouples.com|TeenMegaWorld.yml|:heavy_check_mark:|:x:|:x:|:heavy_check_mark:|-|- +rawfuck.com|PornsiteManager.yml|:heavy_check_mark:|:x:|:heavy_check_mark:|:x:|-|Gay +rawfuckboys.com|CarnalPlus.yml|:heavy_check_mark:|:x:|:x:|:x:|-|Gay reaganfoxx.com|AdultEmpireCash.yml|:heavy_check_mark:|:x:|:x:|:x:|-|MILF -realityjunkies.com|MindGeek.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +realbikinigirls.com|WankItNow.yml|:heavy_check_mark:|:x:|:x:|:x:|-|VR +realfuckingdating.com|ThirdRockEnt.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +realityjunkies.com|MindGeek.yml|:heavy_check_mark:|:x:|:heavy_check_mark:|:x:|-|- realitykings.com|MindGeek.yml|:heavy_check_mark:|:x:|:x:|:heavy_check_mark:|-|- realitylovers.com|RealityLovers.yml|:heavy_check_mark:|:x:|:x:|:heavy_check_mark:|Python|- -realjamvr.com|RealJamVR.yml|:heavy_check_mark:|:x:|:x:|:x:|-|VR -realsensual.com|RealSensual.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +realjamvr.com|RealJamVR.yml|:heavy_check_mark:|:heavy_check_mark:|:x:|:x:|-|VR +realsensual.com|Spizoo.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +realtgirls.com|GroobyNetwork-Partial.yml|:heavy_check_mark:|:heavy_check_mark:|:x:|:x:|-|Trans realtimebondage.com|insex.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- realvr.com|BaDoink.yml|:heavy_check_mark:|:x:|:heavy_check_mark:|:heavy_check_mark:|-|VR -redheadmariah.com|ChickPass.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +reddit.com|Reddit.yml|:heavy_check_mark:|:heavy_check_mark:|:x:|:x:|-|- +redgifs.com|Redgifs.yml|:heavy_check_mark:|:x:|:heavy_check_mark:|:heavy_check_mark:|Python|Gifs +redheadmariah.com|ChickPass.yml|:heavy_check_mark:|:x:|:x:|:heavy_check_mark:|-|- +redhotstraightboys.com|RedHotStraightBoys.yml|:heavy_check_mark:|:x:|:x:|:heavy_check_mark:|-|Gay redpolishfeet.com|FFCSH.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- reidmylips.com|Andomark.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- reidmylips.elxcomplete.com|Andomark.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- @@ -1054,16 +1247,21 @@ renderfiend.com|insex.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- restrictedsenses.com|RestrictedSenses.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- retroporncz.com|PornCZ.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- rickysroom.com|KBProductions.yml|:heavy_check_mark:|:x:|:x:|:heavy_check_mark:|Python|- +ridleydovarez.com|PornsiteManager.yml|:heavy_check_mark:|:x:|:heavy_check_mark:|:x:|-|Gay +riggsfilms.vip|RiggsFilms.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- rim4k.com|Vip4K.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- rk.com|MindGeek.yml|:heavy_check_mark:|:x:|:x:|:heavy_check_mark:|-|- roccosiffredi.com|Algolia_RoccoSiffredi.yml|:heavy_check_mark:|:heavy_check_mark:|:x:|:x:|Python|- -rodsroom.com|Algolia_Rodsroom.yml|:heavy_check_mark:|:heavy_check_mark:|:x:|:x:|Python|Gay +roddaily.com|Algolia_NextDoorStudios.yml|:heavy_check_mark:|:heavy_check_mark:|:x:|:x:|Python|Gay +rodsroom.com|Algolia_NextDoorStudios.yml|:heavy_check_mark:|:heavy_check_mark:|:x:|:x:|Python|Gay romemajor.com|VNAGirls.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- rubberdoll.net|VNAGirls.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- rule34video.com|Rule34Video.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- russian-tgirls.com|GroobyClub.yml|:heavy_check_mark:|:x:|:x:|:x:|-|Trans rylskyart.com|SARJ-LLC.yml|:heavy_check_mark:|:heavy_check_mark:|:x:|:heavy_check_mark:|Python|- +sabiendemonia.com|Algolia_Adultime.yml|:heavy_check_mark:|:x:|:x:|:x:|Python|- samanthagrace.net|VNAGirls.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +samuelotoole.com|Algolia_NextDoorStudios.yml|:heavy_check_mark:|:heavy_check_mark:|:x:|:x:|Python|Gay sapphicerotica.com|sapphix.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- sapphix.com|sapphix.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- sarajay.com|VNAGirls.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- @@ -1072,15 +1270,18 @@ scarybigdicks.com|Hustler.yml|:heavy_check_mark:|:x:|:x:|:x:|CDP|- schoolgirlshd.com|Jhdv.yml|:heavy_check_mark:|:x:|:x:|:x:|-|JAV Uncensored schoolpov.com|ItsPOV.yml|:heavy_check_mark:|:x:|:x:|:x:|CDP|- scoreland.com|TheScoreGroup.yml|:heavy_check_mark:|:heavy_check_mark:|:x:|:x:|-|- +scoreland2.com|TheScoreGroup.yml|:heavy_check_mark:|:heavy_check_mark:|:x:|:x:|-|- +scoutboys.com|CarnalPlus.yml|:heavy_check_mark:|:x:|:x:|:x:|-|Gay screwmetoo.com|VegasDreamsLLC.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- -seancody.com|MindGeek.yml|:heavy_check_mark:|:x:|:x:|:heavy_check_mark:|-|- +seancody.com|MindGeek.yml|:heavy_check_mark:|:x:|:x:|:heavy_check_mark:|-|Gay seductive18.com|Teencoreclub.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- seehimfuck.com|hussiemodels.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- seehimsolo.com|hussiemodels.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- -seemomsuck.com|SeeMomSuck.yml|:heavy_check_mark:|:x:|:x:|:x:|CDP|- +seemomsuck.com|Tugpass.yml|:heavy_check_mark:|:x:|:x:|:x:|CDP|- seemysextapes.com|Hustler.yml|:heavy_check_mark:|:x:|:x:|:x:|CDP|- selfiesuck.com|SelfieSuck.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- sensualpain.com|insexOL.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +serve4k.com|Vip4K.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- severesexfilms.com|AdultEmpireCash.yml|:heavy_check_mark:|:x:|:heavy_check_mark:|:x:|-|- sexart.com|SARJ-LLC.yml|:heavy_check_mark:|:heavy_check_mark:|:x:|:heavy_check_mark:|Python|- sexbabesvr.com|SexBabesVR.yml|:heavy_check_mark:|:x:|:x:|:x:|-|VR @@ -1096,22 +1297,25 @@ sexwithmuslims.com|PornCZ.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- sexworking.com|devianteNetwork.yml|:heavy_check_mark:|:x:|:x:|:x:|CDP|- sexyhub.com|RealityKingsOL.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- shagmag.com|shagmag.yml|:x:|:heavy_check_mark:|:x:|:x:|-|Magazines +shame4k.com|Vip4K.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- shandafay.com|VNAGirls.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- shanedieselsbanginbabes.com|NewSensationsNetworkSites.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- -share.myfreecams.com|MFC.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +share.myfreecams.com|MFC.yml|:heavy_check_mark:|:heavy_check_mark:|:x:|:x:|-|- +shefucksonthefirstdate.com|ThirdRockEnt.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- shegotsix.com|Teencoreclub.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- -shelovesblack.com|LoveHerFeet.yml|:heavy_check_mark:|:x:|:x:|:heavy_check_mark:|-|- +shelovesblack.com|LoveHerFilms.yml|:heavy_check_mark:|:x:|:x:|:heavy_check_mark:|-|- shesbrandnew.com|TopWebModels.yml|:heavy_check_mark:|:x:|:x:|:x:|Python|- sheseducedme.com|Andomark.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- shewillcheat.com|RealityKingsOL.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- shinybound.com|ShinyBound.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- -shinysboundsluts.com|ShinyBound.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +shinysboundsluts.com|ShinyBound.yml|:heavy_check_mark:|:x:|:x:|:x:|-|Trans shiofuky.com|Jhdv.yml|:heavy_check_mark:|:x:|:x:|:x:|-|JAV Uncensored shoplyfter.com|PaperStreetMedia.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- shoplyftermylf.com|PaperStreetMedia.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- showerbait.com|AMAMultimedia.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- showybeauty.com|ShowyBeauty.yml|:heavy_check_mark:|:heavy_check_mark:|:x:|:x:|-|- shylaj.com|ShylaJ.yml|:heavy_check_mark:|:heavy_check_mark:|:x:|:x:|-|- +sidechick.com|KBProductions.yml|:heavy_check_mark:|:x:|:x:|:heavy_check_mark:|Python|- silverstonedvd.com|GammaEntertainment.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- silviasaint.com|GammaEntertainment.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- simplyanal.com|PuffyNetworkSite.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- @@ -1121,18 +1325,23 @@ siripornstar.com|VNAGirls.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- sis.porn|Vip4K.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- sislovesme.com|PaperStreetMedia.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- sisswap.com|PaperStreetMedia.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- -sissypov.com|SissyPov.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +sissypov.com|PureMedia.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +sketboy.com|PornsiteManager.yml|:heavy_check_mark:|:x:|:heavy_check_mark:|:x:|-|Gay slayed.com|vixenNetwork.yml|:heavy_check_mark:|:x:|:x:|:x:|Python|- slroriginals.com|SLRoriginals.yml|:heavy_check_mark:|:x:|:heavy_check_mark:|:x:|-|VR slutinspection.com|ThirdRockEnt.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +slutsbts.com|ThirdRockEnt.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +slutspov.com|ThirdRockEnt.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +sluttybbws.com|ThirdRockEnt.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- smashed.xxx|Nubiles.yml|:heavy_check_mark:|:heavy_check_mark:|:x:|:x:|-|- smashpictures.com|CombatZone.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- smokingmina.com|SmokingMina.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- smutfactor.com|AdultEmpireCash.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +smutmerchants.com|SmutPuppet.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- soapymassage.com|Algolia_Adultime.yml|:heavy_check_mark:|:x:|:x:|:x:|Python|- sofiemariexxx.com|SofieMariexxx.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- sologirlsmania.com|MVG.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- -soloteengirls.net|TeenMegaWorld.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +soloteengirls.net|TeenMegaWorld.yml|:heavy_check_mark:|:x:|:x:|:heavy_check_mark:|-|- sophiedeelive.com|VNAGirls.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- sororitysluts.com|Hustler.yml|:heavy_check_mark:|:x:|:x:|:x:|CDP|- spankbang.com|SpankBang.yml|:heavy_check_mark:|:x:|:x:|:x:|-|Tube Site @@ -1144,19 +1353,26 @@ spermswallowers.com|JulesJordan.yml|:heavy_check_mark:|:heavy_check_mark:|:x:|:x spermswap.com|PerfectGonzo.yml|:heavy_check_mark:|:x:|:x:|:heavy_check_mark:|Python|- spizoo.com|Spizoo.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- spoiledvirgins.com|OldGoesYoung.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +spunkworthy.com|SpunkWorthy.yml|:heavy_check_mark:|:x:|:x:|:heavy_check_mark:|-|Gay spyfam.com|AMAMultimedia.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- squirtalicious.com|GammaEntertainment.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- squirted.com|MindGeek.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- squirtinglesbian.com|GammaEntertainment.yml|:heavy_check_mark:|:x:|:x:|:x:|-|Lesbian squirtingorgies.com|GammaEntertainment.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- -stagcollective.com|GammaEntertainment.yml|:heavy_check_mark:|:x:|:x:|:x:|-|Gay +stagcollective.com|Algolia_NextDoorStudios.yml|:heavy_check_mark:|:heavy_check_mark:|:x:|:x:|Python|Gay +staghomme.com|CarnalPlus.yml|:heavy_check_mark:|:x:|:x:|:x:|-|Gay stasyq.com|StasyQ.yml|:heavy_check_mark:|:x:|:x:|:heavy_check_mark:|-|- +stasyqvr.com|StasyQVR.yml|:heavy_check_mark:|:x:|:x:|:x:|-|VR staxus.com|Staxus.yml|:heavy_check_mark:|:x:|:x:|:heavy_check_mark:|-|gay stayhomepov.com|PaperStreetMedia.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +stephousexxx.com|AdultEmpireCash.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- steppov.com|ItsPOV.yml|:heavy_check_mark:|:x:|:x:|:x:|CDP|- stepsiblings.com|PaperStreetMedia.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- stepsiblingscaught.com|Nubiles.yml|:heavy_check_mark:|:heavy_check_mark:|:x:|:x:|-|- stockingvideos.com|FFstockings.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +stockydudes.com|AdultSiteRunner.yml|:heavy_check_mark:|:x:|:x:|:x:|-|Gay +store.evilangel.com|EvilAngelStore.yml|:heavy_check_mark:|:x:|:heavy_check_mark:|:x:|-|- +store.evilangelvideo.com|EvilAngelStore.yml|:heavy_check_mark:|:x:|:heavy_check_mark:|:x:|-|- store.freeones.com|FreeonesStore.yml|:heavy_check_mark:|:x:|:heavy_check_mark:|:x:|-|- str8hell.com|Williamhiggins.yml|:heavy_check_mark:|:x:|:x:|:x:|-|Gay strapattackers.com|GammaEntertainment.yml|:heavy_check_mark:|:x:|:x:|:x:|-|Femdom @@ -1166,62 +1382,72 @@ strapondreamer.com|StrapDreamer.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- streaming.iafd.com|IafdStreaming.yml|:heavy_check_mark:|:x:|:heavy_check_mark:|:x:|-|- stretchedoutsnatch.com|NewSensationsNetworkSites.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- stripzvr.com|StripzVR.yml|:heavy_check_mark:|:x:|:heavy_check_mark:|:x:|-|VR +strokies.com|Strokies.yml|:heavy_check_mark:|:x:|:x:|:x:|CDP|- stuck4k.com|Vip4K.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- studiofow.com|StudioFOW.yml|:heavy_check_mark:|:x:|:x:|:x:|-|3D Animation -stuffintwats.com|ChickPass.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +stuffintwats.com|ChickPass.yml|:heavy_check_mark:|:x:|:x:|:heavy_check_mark:|-|- stunning18.com|SARJ-LLC.yml|:heavy_check_mark:|:heavy_check_mark:|:x:|:heavy_check_mark:|Python|- subbyhubby.com|SubbyHubby.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- submissivex.com|SubmissiveX.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- subspaceland.com|SubspaceLand.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +suckmevr.com|Baberotica.yml|:heavy_check_mark:|:x:|:x:|:heavy_check_mark:|-|VR sugarcookie.xxx|sugarcookie.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- sugardaddyporn.com|BlurredMedia.yml|:heavy_check_mark:|:x:|:x:|:x:|CDP|- +suggabunny.com|SmutPuppet.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- sunnylanelive.com|VNAGirls.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- sunnyleone.com|GammaEntertainment.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- superbemodels.com|superbemodels.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +superramon.com|Trans500.yml|:heavy_check_mark:|:x:|:x:|:x:|-|Trans susanayn.com|PornCZ.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- swallowbay.com|SwallowBay.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- swallowed.com|Nympho.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- swallowsalon.com|AmateurAllure.yml|:heavy_check_mark:|:heavy_check_mark:|:x:|:x:|-|- sweetcarla.com|FFCSH.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- sweetfemdom.com|SweetFemdom.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- -sweetheartvideo.com|MindGeek.yml|:heavy_check_mark:|:x:|:x:|:x:|-|Lesbian -sweetsinner.com|MindGeek.yml|:heavy_check_mark:|:x:|:x:|:heavy_check_mark:|-|- +sweetheartvideo.com|MindGeek.yml|:heavy_check_mark:|:x:|:heavy_check_mark:|:x:|-|Lesbian +sweetsinner.com|MindGeek.yml|:heavy_check_mark:|:x:|:heavy_check_mark:|:heavy_check_mark:|-|- sweetyx.com|SweetyX.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- swinger-blog.xxx|SwingerBlog.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- swnude.com|Williamhiggins.yml|:heavy_check_mark:|:x:|:x:|:x:|-|Gay tabooheat.com|Algolia_TabooHeat.yml|:heavy_check_mark:|:heavy_check_mark:|:x:|:x:|Python|- taboopov.com|taboopov.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- tacamateurs.com|TACAmateurs.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +tadpolexstudio.com|TadpolexStudio.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- takevan.com|TakeVan.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- taliashepard.com|bellapass.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- tamedteens.com|PerfectGonzo.yml|:heavy_check_mark:|:x:|:x:|:heavy_check_mark:|Python|- -tandaamateurs.com|ChickPass.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- -tandaasians.com|ChickPass.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- -tandablondes.com|ChickPass.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- -tandabrunettes.com|ChickPass.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- -tandaebony.com|ChickPass.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- -tandahousewives.com|ChickPass.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- -tandalatinas.com|ChickPass.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- -tandaredheads.com|ChickPass.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +tandaamateurs.com|ChickPass.yml|:heavy_check_mark:|:x:|:x:|:heavy_check_mark:|-|- +tandaasians.com|ChickPass.yml|:heavy_check_mark:|:x:|:x:|:heavy_check_mark:|-|- +tandablondes.com|ChickPass.yml|:heavy_check_mark:|:x:|:x:|:heavy_check_mark:|-|- +tandabrunettes.com|ChickPass.yml|:heavy_check_mark:|:x:|:x:|:heavy_check_mark:|-|- +tandaebony.com|ChickPass.yml|:heavy_check_mark:|:x:|:x:|:heavy_check_mark:|-|- +tandahousewives.com|ChickPass.yml|:heavy_check_mark:|:x:|:x:|:heavy_check_mark:|-|- +tandalatinas.com|ChickPass.yml|:heavy_check_mark:|:x:|:x:|:heavy_check_mark:|-|- +tandalesbians.com|ChickPass.yml|:heavy_check_mark:|:x:|:x:|:heavy_check_mark:|-|- +tandaredheads.com|ChickPass.yml|:heavy_check_mark:|:x:|:x:|:heavy_check_mark:|-|- tanyatate.com|ModelCentroAPI.yml|:heavy_check_mark:|:x:|:x:|:x:|Python|- taratainton.com|TaraTainton.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- teacherfucksteens.com|Nubiles.yml|:heavy_check_mark:|:heavy_check_mark:|:x:|:x:|-|- teachmyass.com|Teencoreclub.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- teamskeet.com|Teamskeet.yml|:heavy_check_mark:|:x:|:x:|:x:|Python|- teasepov.com|TeasePOV.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +teasingandpleasing.com|ThirdRockEnt.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- teenageanalsluts.com|ThirdRockEnt.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- teenagecorruption.com|ThirdRockEnt.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +teenagetryouts.com|ThirdRockEnt.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- teenanalcasting.com|Teencoreclub.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- teencoreclub.com|Teencoreclub.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- teencorezine.com|Teencoreclub.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- teencurves.com|PaperStreetMedia.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- teendrillers.com|Teencoreclub.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +teenerotica.xxx|SmutPuppet.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- teenfidelity.com|KellyMadisonMedia.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- teenfrombohemia.com|PornCZ.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- -teenmegaworld.net|TeenMegaWorld.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +teenmegaworld.net|TeenMegaWorld.yml|:heavy_check_mark:|:x:|:x:|:heavy_check_mark:|-|- teenpies.com|PaperStreetMedia.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- -teensexmania.com|TeenMegaWorld.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- -teensexmovs.com|TeenMegaWorld.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +teensandtwinks.com|CarnalPlus.yml|:heavy_check_mark:|:x:|:x:|:x:|-|Gay +teensexmania.com|TeenMegaWorld.yml|:heavy_check_mark:|:x:|:x:|:heavy_check_mark:|-|- +teensexmovs.com|TeenMegaWorld.yml|:heavy_check_mark:|:x:|:x:|:heavy_check_mark:|-|- teensgoporn.com|Teencoreclub.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- teensloveanal.com|PaperStreetMedia.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- teensloveblackcocks.com|PaperStreetMedia.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- @@ -1229,8 +1455,10 @@ teenslovehugecocks.com|MindGeek.yml|:heavy_check_mark:|:x:|:x:|:heavy_check_mark teensnaturalway.com|Teencoreclub.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- teensneaks.com|Algolia_Adultime.yml|:heavy_check_mark:|:x:|:x:|:x:|Python|- teenstryblacks.com|Teencoreclub.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- -teenthais.com|Baberotica.yml|:heavy_check_mark:|:x:|:x:|:x:|-|Thai Uncensored -tenshigao.com|Tenshigao.yml|:heavy_check_mark:|:x:|:x:|:x:|-|Jav +teenthais.com|Baberotica.yml|:heavy_check_mark:|:x:|:x:|:heavy_check_mark:|-|Thai Uncensored +teentugs.com|TugPass.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +teenytaboo.com|TeenyTaboo.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +tenshigao.com|Tenshigao.yml|:heavy_check_mark:|:x:|:x:|:heavy_check_mark:|-|Jav terapatrick.com|NikkiAndTera.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- tessafowler.com|PinupDollars.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- texasbukkake.com|TexasBukkake.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- @@ -1240,9 +1468,12 @@ tgirljapan.com|GroobyClub.yml|:heavy_check_mark:|:x:|:x:|:x:|-|Trans tgirljapanhardcore.com|GroobyClub.yml|:heavy_check_mark:|:x:|:x:|:x:|-|Trans tgirlpornstar.com|GroobyClub.yml|:heavy_check_mark:|:x:|:x:|:x:|-|Trans tgirlpostop.com|GroobyClub.yml|:heavy_check_mark:|:x:|:x:|:x:|-|Trans -tgirls.porn|GroobyNetwork-Partial.yml|:heavy_check_mark:|:x:|:x:|:x:|-|Trans -tgirls.xxx|GroobyNetwork-Partial.yml|:heavy_check_mark:|:x:|:x:|:x:|-|Trans -tgirlsex.xxx|GroobyNetwork-Partial.yml|:heavy_check_mark:|:x:|:x:|:x:|-|Trans +tgirls.porn|GroobyNetwork-Partial.yml|:heavy_check_mark:|:heavy_check_mark:|:x:|:x:|-|Trans +tgirls.xxx|GroobyNetwork-Partial.yml|:heavy_check_mark:|:heavy_check_mark:|:x:|:x:|-|Trans +tgirlsex.xxx|GroobyNetwork-Partial.yml|:heavy_check_mark:|:heavy_check_mark:|:x:|:x:|-|Trans +tgirlsfuck.com|GroobyNetwork-Partial.yml|:heavy_check_mark:|:x:|:x:|:x:|-|Trans +tgirlshookup.com|GroobyNetwork-Partial.yml|:heavy_check_mark:|:heavy_check_mark:|:x:|:x:|-|Trans +tgirltops.com|GroobyNetwork-Partial.yml|:heavy_check_mark:|:heavy_check_mark:|:x:|:x:|-|Trans thatsitcomshow.com|Nubiles.yml|:heavy_check_mark:|:heavy_check_mark:|:x:|:x:|-|- theartporn.com|Wtfpass.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- theassfactory.com|JulesJordan.yml|:heavy_check_mark:|:heavy_check_mark:|:x:|:x:|-|- @@ -1251,28 +1482,33 @@ theflourishamateurs.com|TheFlourish.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- theflourishfetish.com|TheFlourish.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- theflourishpov.com|TheFlourish.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- theflourishxxx.com|TheFlourish.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +thehabibshow.com|TheHabibShow.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- thelesbianexperience.com|NewSensationsNetworkSites.yml|:heavy_check_mark:|:x:|:x:|:x:|-|Lesbian thelifeerotic.com|SARJ-LLC.yml|:heavy_check_mark:|:heavy_check_mark:|:x:|:heavy_check_mark:|Python|- thenude.com|TheNude.yml|:x:|:x:|:x:|:heavy_check_mark:|-|- -thestripperexperience.com|Spizoo.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- thetabutales.com|NewSensationsNetworkSites.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +theyeslist.com|Algolia_Adultime.yml|:heavy_check_mark:|:x:|:x:|:x:|Python|- thicc18.com|Fit18.yml|:heavy_check_mark:|:x:|:x:|:x:|Python|- thickandbig.com|AMAMultimedia.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- thickumz.com|PaperStreetMedia.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- thirdsexxxx.com|ThirdSexXXX.yml|:heavy_check_mark:|:x:|:x:|:x:|-|Trans thirdworldxxx.com|AdultEmpireCash.yml|:heavy_check_mark:|:x:|:heavy_check_mark:|:x:|-|Asian + Latin + Trans +thisvid.com|ThisVid.yml|:heavy_check_mark:|:x:|:x:|:x:|-|Tube Site throated.com|GammaEntertainment.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- timtales.com|TimTales.yml|:heavy_check_mark:|:x:|:x:|:heavy_check_mark:|-|Gay tiny4k.com|AMAMultimedia.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- tinysis.com|PaperStreetMedia.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- tittycreampies.com|GammaEntertainment.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- titworld.com|Hustler.yml|:heavy_check_mark:|:x:|:x:|:x:|CDP|- -tmwvrnet.com|TmwVRnet.yml|:heavy_check_mark:|:x:|:heavy_check_mark:|:x:|-|VR +tmwpov.com|TeenMegaWorld.yml|:heavy_check_mark:|:x:|:x:|:heavy_check_mark:|-|- +tmwvrnet.com|TeenMegaWorld.yml|:heavy_check_mark:|:x:|:x:|:heavy_check_mark:|-|VR tokyo-hot.com|Tokyohot.yml|:heavy_check_mark:|:x:|:x:|:x:|-|JAV Uncensored tokyobang.com|karatmedia.yml|:heavy_check_mark:|:x:|:x:|:x:|-|JAV +tommydxxx.com|Algolia_NextDoorStudios.yml|:heavy_check_mark:|:heavy_check_mark:|:x:|:x:|Python|Gay tonightsgirlfriend.com|Tonightsgirlfriend.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- toomanytrannies.com|Hustler.yml|:heavy_check_mark:|:x:|:x:|:x:|CDP|Trans topgrl.com|insex.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +toplatindaddies.com|AdultSiteRunner.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- topwebmodels.com|TopWebModels.yml|:heavy_check_mark:|:x:|:x:|:x:|Python|- toticos.com|VegasDreamworks.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- touchmywife.com|Algolia_touchmywife.yml|:heavy_check_mark:|:heavy_check_mark:|:x:|:x:|Python|- @@ -1282,21 +1518,26 @@ trans500.com/tour/|Trans500.yml|:heavy_check_mark:|:x:|:x:|:x:|-|Trans trans500.com/tour3/|Trans500.yml|:heavy_check_mark:|:x:|:x:|:x:|-|Trans trans500.com/tourespanol|Trans500.yml|:heavy_check_mark:|:x:|:x:|:x:|-|Trans transangels.com|MindGeek.yml|:heavy_check_mark:|:x:|:x:|:x:|-|Trans +transatplay.com|Trans500.yml|:heavy_check_mark:|:x:|:x:|:x:|-|Trans transbella.com|LetsDoeIt.yml|:heavy_check_mark:|:x:|:x:|:x:|-|Trans +transcest.com|CarnalPlus.yml|:heavy_check_mark:|:x:|:x:|:x:|-|FTM transerotica.com|Transerotica.yml|:heavy_check_mark:|:x:|:x:|:x:|-|Trans transexdomination.com|GroobyClub.yml|:heavy_check_mark:|:x:|:x:|:x:|-|Trans -transexpov.com|GroobyNetwork-Partial.yml|:heavy_check_mark:|:x:|:x:|:x:|-|Trans +transexpov.com|GroobyNetwork-Partial.yml|:heavy_check_mark:|:heavy_check_mark:|:x:|:x:|-|Trans transfeet.com|FFCSH.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- -transfixed.com|Algolia_Adultime.yml|:heavy_check_mark:|:x:|:x:|:x:|Python|Trans -transgasm.com|GroobyNetwork-Partial.yml|:heavy_check_mark:|:x:|:x:|:x:|-|Trans +transfixed.com|Algolia_Adultime.yml|:heavy_check_mark:|:heavy_check_mark:|:x:|:x:|Python|Trans +transgasm.com|GroobyNetwork-Partial.yml|:heavy_check_mark:|:heavy_check_mark:|:x:|:x:|-|Trans transgressivefilms.com|Algolia_Adultime.yml|:heavy_check_mark:|:x:|:x:|:x:|Python|Trans transgressivexxx.com|Algolia_EvilAngel|:heavy_check_mark:|:heavy_check_mark:|:heavy_check_mark:|:x:|Python|Trans transmodeldatabase.com|TransModelDatabase.yml|:x:|:x:|:x:|:heavy_check_mark:|-|Trans +transnificent.com|GroobyNetwork-Partial.yml|:heavy_check_mark:|:heavy_check_mark:|:x:|:x:|-|Trans transroommates.com|Arx.yml|:heavy_check_mark:|:x:|:x:|:x:|Python|Trans -transsensual.com|MindGeek.yml|:heavy_check_mark:|:x:|:heavy_check_mark:|:heavy_check_mark:|-|Trans +transsensual.com|MindGeek.yml|:heavy_check_mark:|:heavy_check_mark:|:heavy_check_mark:|:heavy_check_mark:|-|Trans transsexualangel.com|GammaEntertainment.yml|:heavy_check_mark:|:x:|:x:|:x:|-|Trans transsexualroadtrip.com|GammaEntertainment.yml|:heavy_check_mark:|:x:|:x:|:x:|-|Trans -trickymasseur.com|TeenMegaWorld.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +tranzvr.com|POVR.yml|:heavy_check_mark:|:x:|:x:|:x:|-|VR +traxxx.me|Traxxx.yml|:heavy_check_mark:|:x:|:x:|:heavy_check_mark:|-|- +trickymasseur.com|TeenMegaWorld.yml|:heavy_check_mark:|:x:|:x:|:heavy_check_mark:|-|- trickyoldteacher.com|OldGoesYoung.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- trickyspa.com|Algolia_Adultime.yml|:heavy_check_mark:|:x:|:x:|:x:|Python|- trikepatrol.com|TrikePatrol.yml|:heavy_check_mark:|:x:|:x:|:heavy_check_mark:|-|- @@ -1304,26 +1545,35 @@ tripforfuck.com|TripForFuck.yml|:heavy_check_mark:|:x:|:x:|:heavy_check_mark:|-| trueamateurs.com|MindGeek.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- trueanal.com|Nympho.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- truelesbian.com|Algolia_Adultime.yml|:heavy_check_mark:|:x:|:x:|:x:|Python|- +trystanbull.com|Algolia_NextDoorStudios.yml|:heavy_check_mark:|:heavy_check_mark:|:x:|:x:|Python|Gay tryteens.com|Teencoreclub.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- ts-castingcouch.com|GroobyClub.yml|:heavy_check_mark:|:x:|:x:|:x:|-|Trans tsfactor.com|Algolia_EvilAngel.yml|:heavy_check_mark:|:heavy_check_mark:|:heavy_check_mark:|:x:|Python|Trans -tsplayground.com|GammaEntertainment.yml|:heavy_check_mark:|:x:|:x:|:x:|-|Trans -tspov.com|ChristianXXX.yml|:heavy_check_mark:|:x:|:x:|:x:|-|Trans +tsgirlfriendexperience.com|Trans500.yml|:heavy_check_mark:|:x:|:x:|:x:|-|Trans +tsplayground.com|TSPlayground.yml|:heavy_check_mark:|:x:|:x:|:heavy_check_mark:|-|Trans +tspov.com|PureMedia.yml|:heavy_check_mark:|:x:|:x:|:x:|-|Trans +tsraw.com|LadyboyGold.yml|:heavy_check_mark:|:x:|:x:|:heavy_check_mark:|-|Trans tsvirtuallovers.com|RealityLovers.yml|:heavy_check_mark:|:x:|:x:|:heavy_check_mark:|Python|- +tugpass.com|TugPass.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- tuktukpatrol.com|TukTukPatrol.yml|:heavy_check_mark:|:x:|:x:|:heavy_check_mark:|-|- tushy.com|vixenNetwork.yml|:heavy_check_mark:|:x:|:x:|:x:|Python|- tushyraw.com|vixenNetwork.yml|:heavy_check_mark:|:x:|:x:|:x:|Python|- tutor4k.com|Vip4K.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +twinkloads.com|CarnalPlus.yml|:heavy_check_mark:|:x:|:x:|:x:|-|Gay +twinks.com|CarnalPlus.yml|:heavy_check_mark:|:x:|:x:|:x:|-|Gay +twinktop.com|CarnalPlus.yml|:heavy_check_mark:|:x:|:x:|:x:|-|Gay twistedvisual.com|ThirdRockEnt.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- twistys.com|Twistys.yml|:heavy_check_mark:|:x:|:x:|:heavy_check_mark:|-|- twistysnetwork.com|Twistys.yml|:heavy_check_mark:|:x:|:x:|:heavy_check_mark:|-|- twotgirls.com|TwoTGirls.yml|:heavy_check_mark:|:x:|:x:|:x:|-|Trans uk-tgirls.com|GroobyClub.yml|:heavy_check_mark:|:x:|:x:|:x:|-|Trans ultrafilms.com|UltraFilms.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +underhentai.com|UnderHentai.yml|:heavy_check_mark:|:x:|:x:|:x:|-|Hentai +universblack.com|PornsiteManager.yml|:heavy_check_mark:|:x:|:heavy_check_mark:|:x:|-|Gay unlimitedmilfs.com|NewSensationsNetworkSites.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- unrealporn.com|Czechav.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- upherasshole.com|PervCity.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- -upskirtjerk.com|UpskirtJerk.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +upskirtjerk.com|WankItNow.yml|:heavy_check_mark:|:x:|:x:|:x:|-|VR valentina.passionepiedi.com|FFCSH.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- vangoren.com|MVG.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- vcaxxx.com|Hustler.yml|:heavy_check_mark:|:x:|:x:|:x:|CDP|- @@ -1333,7 +1583,10 @@ venus.ultrafilms.com|WowNetworkVenus.yml|:heavy_check_mark:|:x:|:x:|:x:|Python|- venus.wowgirls.com|WowNetworkVenus.yml|:heavy_check_mark:|:x:|:x:|:x:|Python|- venus.wowporn.com|WowNetworkVenus.yml|:heavy_check_mark:|:x:|:x:|:x:|Python|- vickyathome.com|VNAGirls.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +viktor-rom.com|PornsiteManager.yml|:heavy_check_mark:|:x:|:heavy_check_mark:|:x:|-|Gay vinaskyxxx.com|ModelCentroAPI.yml|:heavy_check_mark:|:x:|:x:|:x:|Python|- +vintagegaymovies.com|PornsiteManager.yml|:heavy_check_mark:|:x:|:heavy_check_mark:|:x:|-|Gay +vip4k.com|Vip4K.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- vipissy.com|Vipissy.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- vipsexvault.com|LetsDoeIt.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- virtualpee.com|VirtualPee.yml|:heavy_check_mark:|:x:|:x:|:x:|-|Fetish @@ -1345,11 +1598,12 @@ virtualrealpassion.com|VirtualRealPorn.yml|:heavy_check_mark:|:x:|:heavy_check_m virtualrealporn.com|VirtualRealPorn.yml|:heavy_check_mark:|:x:|:heavy_check_mark:|:x:|-|VR virtualrealtrans.com|VirtualRealPorn.yml|:heavy_check_mark:|:x:|:heavy_check_mark:|:x:|-|VR virtualtaboo.com|VirtualTaboo.yml|:heavy_check_mark:|:x:|:heavy_check_mark:|:x:|-|VR +visit-x.net|Visit-X.yml|:heavy_check_mark:|:x:|:x:|:x:|CDP|- vivid.com|Vivid.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- vividclassic.com|Vivid.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- vivthomas.com|SARJ-LLC.yml|:heavy_check_mark:|:heavy_check_mark:|:x:|:heavy_check_mark:|Python|- vixen.com|vixenNetwork.yml|:heavy_check_mark:|:x:|:x:|:x:|Python|- -vlogxxx.com|VlogXXX.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +vlogxxx.com|Spizoo.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- vogov.com|vogov.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- vrbangers.com|VRBangers.yml|:heavy_check_mark:|:x:|:heavy_check_mark:|:x:|-|VR vrbgay.com|VRBangers.yml|:heavy_check_mark:|:x:|:x:|:x:|-|VR @@ -1358,24 +1612,30 @@ vrconk.com|VRBangers.yml|:heavy_check_mark:|:x:|:x:|:x:|-|VR vrcosplayx.com|BaDoink.yml|:heavy_check_mark:|:x:|:heavy_check_mark:|:heavy_check_mark:|-|VR vrfirsttimer.com|SLRoriginals.yml|:heavy_check_mark:|:x:|:heavy_check_mark:|:x:|-|VR vrhush.com|VRHush.yml|:heavy_check_mark:|:x:|:heavy_check_mark:|:x:|-|VR +vrintimacy.com|CzechVR.yml|:heavy_check_mark:|:x:|:heavy_check_mark:|:x:|-|VR vrlatina.com|VRLatina.yml|:heavy_check_mark:|:x:|:heavy_check_mark:|:heavy_check_mark:|-|VR -vrporncz.com|PornCZ.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +vrporn.com|VRPorn.yml|:heavy_check_mark:|:x:|:x:|:x:|-|VR +vrporncz.com|PornCZ.yml|:heavy_check_mark:|:x:|:x:|:x:|-|VR +vrpornpass.com|Baberotica.yml|:heavy_check_mark:|:x:|:x:|:heavy_check_mark:|-|VR vrxdb.com|VrxDB.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +vtubie.com|vTubie.yml|:x:|:x:|:x:|:heavy_check_mark:|-|VTuber Database wakeupnfuck.com|WakeUpnFuck.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- -wankitnow.com|Wankitnow.yml|:heavy_check_mark:|:x:|:x:|:x:|-|VR +wankitnow.com|WankItNow.yml|:heavy_check_mark:|:x:|:x:|:x:|-|VR wankz.com|wankz.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- -wankzvr.com|WankzVR.yml|:heavy_check_mark:|:x:|:heavy_check_mark:|:x:|-|VR +wankzvr.com|POVR.yml|:heavy_check_mark:|:x:|:heavy_check_mark:|:x:|-|VR warashi-asian-pornstars.fr|WAPdB.yml|:x:|:x:|:x:|:heavy_check_mark:|Python|JAV watch4beauty.com|Watch4Beauty.yml|:heavy_check_mark:|:heavy_check_mark:|:x:|:x:|-|- watch4fetish.com|Watch4Fetish.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- -watchingmydaughtergoblack.com|WatchingMyDaughterGoBlack.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +watchingmydaughtergoblack.com|Algolia_DogfartNetwork.yml|:heavy_check_mark:|:x:|:x:|:heavy_check_mark:|Python|- +watchingmymomgoblack.com|Algolia_DogfartNetwork.yml|:heavy_check_mark:|:x:|:x:|:heavy_check_mark:|Python|- watchmygf.me|WatchMyGF.yml|:heavy_check_mark:|:x:|:x:|:x:|-|Tube Site watchreal.com|Hustler.yml|:heavy_check_mark:|:x:|:x:|:x:|CDP|- watchyoujerk.com|WatchYouJerk.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- waybig.com|Waybig.yml|:heavy_check_mark:|:x:|:x:|:heavy_check_mark:|-|- wcpclub.com|AdultEmpireCash.yml|:heavy_check_mark:|:x:|:heavy_check_mark:|:x:|-|- -wearehairy.com|wearehairy.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +wearehairy.com|WeAreHairy.yml|:heavy_check_mark:|:heavy_check_mark:|:x:|:heavy_check_mark:|-|- webyoung.com|Algolia_Adultime.yml|:heavy_check_mark:|:heavy_check_mark:|:x:|:x:|Python|Lesbian +wefuckblackgirls.com|Algolia_DogfartNetwork.yml|:heavy_check_mark:|:x:|:x:|:heavy_check_mark:|Python|- wefuckblackgirls.com|WeFuckBlackGirls.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- welikegirls.com|Algolia_Adultime.yml|:heavy_check_mark:|:heavy_check_mark:|:heavy_check_mark:|:x:|Python|- weliketosuck.com|PuffyNetworkSite.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- @@ -1390,16 +1650,19 @@ whiteteensblackcocks.com|Teencoreclub.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- whorecraftvr.com|AdultEmpireCash.yml|:heavy_check_mark:|:x:|:x:|:x:|-|Fantasy VR wicked.com (/movies)|WickedMovies.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- wicked.com|Algolia_Wicked.yml|:heavy_check_mark:|:heavy_check_mark:|:heavy_check_mark:|:x:|Python|- +wifespov.com|ThirdRockEnt.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- wildoncam.com|trafficpimps.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- williamhiggins.com|Williamhiggins.yml|:heavy_check_mark:|:x:|:x:|:x:|-|Gay willtilexxx.com|WillTileXXX.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- -woodmancastingx.com|WoodmancastingX.y,l|:heavy_check_mark:|:x:|:x:|:x:|-|- +wolfwagner.com|Algolia_Adultime.yml|:heavy_check_mark:|:x:|:x:|:x:|Python|- +woodmancastingx.com|WoodmancastingX.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- wowgirls.xxx|WowPorn.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- wowgirlsblog.com|WOWGirlsBlog.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- wowporn.xxx|WowPorn.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- wtfpass.com|Wtfpass.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +wurstfilm.com|PornsiteManager.yml|:heavy_check_mark:|:x:|:heavy_check_mark:|:x:|-|Gay www.mgstage.com|mgstage.yml|:heavy_check_mark:|:x:|:x:|:x:|-|JAV -x-angels.com|TeenMegaWorld.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +x-angels.com|TeenMegaWorld.yml|:heavy_check_mark:|:x:|:x:|:heavy_check_mark:|-|- x-art.com|X-artcom.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- xart.xxx|Xartxxx.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- xconfessions.com|XConfessions.yml|:heavy_check_mark:|:x:|:heavy_check_mark:|:x:|-|- @@ -1410,15 +1673,18 @@ xevunleashed.com|Xevunleashed.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- xhamster.com|Xhamster.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- xlgirls.com|TheScoreGroup.yml|:heavy_check_mark:|:heavy_check_mark:|:x:|:x:|-|- xnxx.com|Xnxx.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +xrares.com|Xrares.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- xsinsvr.com|xSinsVR.yml|:heavy_check_mark:|:heavy_check_mark:|:x:|:x:|-|- xslist.org|Xslist.yml|:x:|:x:|:x:|:heavy_check_mark:|-|JAV Database xtube.com|Xtube.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- xvideos.com|Xvideos.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- xvideos.red|Xvideos.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- xvirtual.com|XVirtual.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- -xxxnj.com|ChickPass.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +xxxjobinterviews.com|XXXJobInterviews.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +xxxnj.com|ChickPass.yml|:heavy_check_mark:|:x:|:x:|:heavy_check_mark:|-|- xxxpawn.com|XXXPawn.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- yanks.com|Yanks.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +yesgirlz.com|YesGirls.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- yngr.com|YNGR.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- younganaltryouts.com|OldGoesYoung.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- youngerloverofmine.com|Mypervmom.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- @@ -1429,11 +1695,12 @@ yourmomdoesanal.com|ThirdRockEnt.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- yourmomdoesporn.com|ThirdRockEnt.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- yummysofie.com|YummySofie.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- z-filmz-originals.com|Z-Filmz-Originals.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +zebragirls.com|Algolia_DogfartNetwork.yml|:heavy_check_mark:|:x:|:x:|:heavy_check_mark:|Python|- zentaidolls.com|ZentaiDolls.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- zerotolerancefilms.com|Algolia_zerotolerancefilms.yml|:heavy_check_mark:|:heavy_check_mark:|:heavy_check_mark:|:x:|Python|- zexyvr.com|ZexyVR.yml|:heavy_check_mark:|:x:|:x:|:heavy_check_mark:|-|VR -zoiestarr.com|ChickPass.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- - +zishy.com|Zishy.yml|:heavy_check_mark:|:heavy_check_mark:|:x:|:x:|-|- +zoiestarr.com|ChickPass.yml|:heavy_check_mark:|:x:|:x:|:heavy_check_mark:|-|- ## Non url scrapers @@ -1443,9 +1710,10 @@ For each scraper a short description, an optional comment with the usage and the Scraper | Description | Comments | PR --------|-------------|----------|:--: ComicInfoXML.yml| A ComixInfo XML gallery scraper | A python scraper that looks for ComicInfo xml compatible files in the gallery's folder/filename and parses them | [#827](https://github.com/stashapp/CommunityScrapers/pull/827) -CopyFromScene.yml| A gallery scraper that returns metadata from the first linked scene | A python scraper that returns metadata from copied scenes, first link the scene to the gallery then run the scraper on the gallery | +CopyFromScene.yml| A gallery scraper that returns metadata from the first linked scene | A python scraper that returns metadata from copied scenes, first link the scene to the gallery then run the scraper on the gallery | CopyToGallery.yml| A scene to gallery scraper | A python scene scraper that copies metadata from a scene to the associated galleries. Can optionally (check .py file) associate and copy meta to all galleries in the same folder as the scene| [#895](https://github.com/stashapp/CommunityScrapers/pull/895) dc-onlyfans.yml| An Onlyfans DB scene scraper | A python scraper that scrapes Only Fans scenes using the DB file (user_data.db) created from DIGITALCRIMINAL's tool | [#847](https://github.com/stashapp/CommunityScrapers/pull/847) +dc_onlyfans_fansdb.yml | FansDB "compliant" OnlyFans metadata DB scraper | Python scraper which scrapes metadata from DIGITALCRIMINAL compatible `user_data.db` databases following FansDB guidelines | [#1500](https://github.com/stashapp/CommunityScrapers/pull/1500) Filename.yml | Scrape a scenes (local) filename to set as scene title | Utility scraper useful if you've bulk updated filenames outside of stash and want the changes synced back into stash | [#1136](https://github.com/stashapp/CommunityScrapers/pull/1136) jellyfin.yml| A Jellyfin/Emby scraper | A python scraper that uses the Jellyfin/Emby API to look for Scenes, Performers and Movies via URL, Query or Fragments. Needs the URL, API-Key and User from Jellyfin set in jellyfin.py and the URLs in jellyfin.yml adopted to your local Jelly/Emby Instance | MindGeekAPI.yml| A sceneBy(Name\|Fragment) scraper for MindGeek network| A python scraper that queries directly the MindGeek API. For further **needed** instructions refer to the relevant PRs and have a look in the `MindGeekApi.py` file | [#711](https://github.com/stashapp/CommunityScrapers/pull/711) [#738](https://github.com/stashapp/CommunityScrapers/pull/738) [#411](https://github.com/stashapp/CommunityScrapers/pull/411) diff --git a/build_site.sh b/build_site.sh new file mode 100755 index 000000000..49872250a --- /dev/null +++ b/build_site.sh @@ -0,0 +1,89 @@ +#!/bin/bash + +# builds a repository of scrapers +# outputs to _site with the following structure: +# index.yml +# .zip +# Each zip file contains the scraper.yml file and any other files in the same directory + +outdir="$1" +if [ -z "$outdir" ]; then + outdir="_site" +fi + +rm -rf "$outdir" +mkdir -p "$outdir" + +buildScraper() +{ + f=$1 + dir=$(dirname "$f") + + # get the scraper id from the filename + scraper_id=$(basename "$f" .yml) + versionFile=$f + if [ "$scraper_id" == "package" ]; then + scraper_id=$(basename "$dir") + fi + + if [ "$dir" != "./scrapers" ]; then + versionFile="$dir" + fi + + echo "Processing $scraper_id" + + # create a directory for the version + version=$(git log -n 1 --pretty=format:%h -- "$versionFile") + updated=$(TZ=UTC0 git log -n 1 --date="format-local:%F %T" --pretty=format:%ad -- "$versionFile") + + # create the zip file + # copy other files + zipfile=$(realpath "$outdir/$scraper_id.zip") + + name=$(grep "^name:" "$f" | cut -d' ' -f2- | sed -e 's/\r//' -e 's/^"\(.*\)"$/\1/') + ignore=$(grep "^# ignore:" "$f" | cut -c 10- | sed -e 's/\r//') + dep=$(grep "^# requires:" "$f" | cut -c 12- | sed -e 's/\r//') + + # always ignore package file + ignore="-x $ignore package" + + pushd "$dir" > /dev/null + if [ "$dir" != "./scrapers" ]; then + zip -r "$zipfile" . ${ignore} > /dev/null + else + zip "$zipfile" "$scraper_id.yml" > /dev/null + fi + popd > /dev/null + + # write to spec index + echo "- id: $scraper_id + name: $name + version: $version + date: $updated + path: $scraper_id.zip + sha256: $(sha256sum "$zipfile" | cut -d' ' -f1)" >> "$outdir"/index.yml + + # handle dependencies + if [ ! -z "$dep" ]; then + echo " requires:" >> "$outdir"/index.yml + for d in ${dep//,/ }; do + echo " - $d" >> "$outdir"/index.yml + done + fi + + echo "" >> "$outdir"/index.yml +} + +# find all yml files in ./scrapers - these are packages individually +for f in ./scrapers/*.yml; do + buildScraper "$f" +done + +find ./scrapers/ -mindepth 2 -name *.yml -print0 | while read -d $'\0' f; do + buildScraper "$f" +done + +# handle dependency packages +find ./scrapers/ -mindepth 2 -name package -print0 | while read -d $'\0' f; do + buildScraper "$f" +done diff --git a/scrapers/1passforallsites.yml b/scrapers/1passforallsites.yml new file mode 100644 index 000000000..a5cb88aa5 --- /dev/null +++ b/scrapers/1passforallsites.yml @@ -0,0 +1,35 @@ +name: "1 Pass For All Sites" +sceneByURL: + - action: scrapeXPath + url: + - 1passforallsites.com/episode/ + scraper: sceneScraper +xPathScrapers: + sceneScraper: + scene: + Studio: + Name: + selector: //a[contains(@href,'?site=')] + Title: + selector: //title + postProcess: + - replace: + - regex: (^.+) - 1 .+$ + with: $1 + Details: //div[@class="sp-info-txt"]/p/text() + Performers: + Name: + selector: //p[@class="sp-info-name"]/a/text() + Tags: + Name: + selector: //p[@class="niches-list"]/a/text() + Date: + selector: //li[contains(text(),"Added:")] + postProcess: + - replace: + - regex: "Added\\: (.+)" + with: $1 + - parseDate: 2 Jan 2006 + Image: //video/@poster + +# Last Updated July 12, 2023 diff --git a/scrapers/Algolia_21Naturals.yml b/scrapers/21Naturals/21Naturals.yml similarity index 76% rename from scrapers/Algolia_21Naturals.yml rename to scrapers/21Naturals/21Naturals.yml index 131b7bf6b..8f0dceda1 100644 --- a/scrapers/Algolia_21Naturals.yml +++ b/scrapers/21Naturals/21Naturals.yml @@ -1,3 +1,4 @@ +# requires: Algolia name: "21Naturals" sceneByURL: - action: script @@ -5,26 +6,26 @@ sceneByURL: - 21naturals.com/en/video script: - python - - Algolia.py + - ../Algolia/Algolia.py - 21naturals sceneByFragment: action: script script: - python - - Algolia.py + - ../Algolia/Algolia.py - 21naturals sceneByName: action: script script: - python - - Algolia.py + - ../Algolia/Algolia.py - 21naturals - searchName sceneByQueryFragment: action: script script: - python - - Algolia.py + - ../Algolia/Algolia.py - 21naturals - validName # Last Updated March 23, 2022 diff --git a/scrapers/Algolia_21Sextreme.yml b/scrapers/21Sextreme/21Sextreme.yml similarity index 76% rename from scrapers/Algolia_21Sextreme.yml rename to scrapers/21Sextreme/21Sextreme.yml index 1dfaa360a..34fda954e 100644 --- a/scrapers/Algolia_21Sextreme.yml +++ b/scrapers/21Sextreme/21Sextreme.yml @@ -1,3 +1,4 @@ +# requires: Algolia name: "21Sextreme" sceneByURL: - action: script @@ -5,26 +6,26 @@ sceneByURL: - 21sextreme.com/en/video script: - python - - Algolia.py + - ../Algolia/Algolia.py - 21sextreme sceneByFragment: action: script script: - python - - Algolia.py + - ../Algolia/Algolia.py - 21sextreme sceneByName: action: script script: - python - - Algolia.py + - ../Algolia/Algolia.py - 21sextreme - searchName sceneByQueryFragment: action: script script: - python - - Algolia.py + - ../Algolia/Algolia.py - 21sextreme - validName # Last Updated March 23, 2022 diff --git a/scrapers/Algolia_21Sextury.yml b/scrapers/21Sextury/21Sextury.yml similarity index 77% rename from scrapers/Algolia_21Sextury.yml rename to scrapers/21Sextury/21Sextury.yml index 62d1df2e5..afc0b5317 100644 --- a/scrapers/Algolia_21Sextury.yml +++ b/scrapers/21Sextury/21Sextury.yml @@ -1,3 +1,4 @@ +# requires: Algolia name: "21Sextury" sceneByURL: - action: script @@ -5,26 +6,26 @@ sceneByURL: - 21sextury.com/en/video script: - python - - Algolia.py + - ../Algolia/Algolia.py - 21sextury sceneByFragment: action: script script: - python - - Algolia.py + - ../Algolia/Algolia.py - 21sextury sceneByName: action: script script: - python - - Algolia.py + - ../Algolia/Algolia.py - 21sextury - searchName sceneByQueryFragment: action: script script: - python - - Algolia.py + - ../Algolia/Algolia.py - 21sextury - validName galleryByURL: @@ -33,7 +34,7 @@ galleryByURL: - 21sextury.com/en/photo/ script: - python - - Algolia.py + - ../Algolia/Algolia.py - 21sextury - gallery # Last Updated December 22, 2022 diff --git a/scrapers/AMAMultimedia.yml b/scrapers/AMAMultimedia.yml index dfc636901..cdf338430 100644 --- a/scrapers/AMAMultimedia.yml +++ b/scrapers/AMAMultimedia.yml @@ -32,6 +32,7 @@ sceneByURL: - holed.com/video/ - lubed.com/video/ - mom4k.com/video/ + - momcum.com/video/ - myveryfirsttime.com/video/ - nannyspy.com/video/ - passion-hd.com/video/ @@ -73,10 +74,22 @@ xPathScrapers: - regex: ([?&]img[wh]=\d+)+$ with: Studio: - Name: //div[@id="navigation"]/h5/a/@alt + Name: + selector: //div[@id="navigation"]/h5/a/@alt + postProcess: + - map: + Baeb: BAEB + Casting Couch X: Casting Couch-X + Cum4K: Cum 4K + Exotic4k: Exotic 4K + GirlCum: Girl Cum + MomCum: Mom Cum + NannySpy: Nanny Spy + SpyFam: Spy Fam + Tiny4K: Tiny 4K + WetVR: Wet VR Date: selector: //div[contains(text(), 'RELEASED')]/span/text() postProcess: - parseDate: January 02, 2006 - -# Last Updated March 26, 2022 +# Last Updated February 06, 2023 diff --git a/scrapers/APOVStory.yml b/scrapers/APOVStory.yml new file mode 100644 index 000000000..d78b57853 --- /dev/null +++ b/scrapers/APOVStory.yml @@ -0,0 +1,37 @@ +name: A POV Story + +sceneByURL: + - action: scrapeXPath + url: + - apovstory.com/trailers/ + scraper: sceneScraper +xPathScrapers: + sceneScraper: + scene: + Studio: + Name: + fixed: "A POV Story" + Title: + selector: //div[@class = 'trailerArea centerwrap']/h3 + Details: + selector: //div[@class = 'trailerContent']//*//text() + concat: "\n\n" + postProcess: + - replace: + - regex: ^Description:\s* + with: + Tags: + Name: //li/span[contains(text(),'CATEGORIES')]/parent::li//a//text() + Performers: + Name: //li/span[contains(text(),'FEATURING')]/parent::li//a//text() + Image: + selector: //div[@class="player-thumb"]/img/@src0_3x + postProcess: + - replace: + - regex: ^ + with: "https://apovstory.com" + Date: + selector: //span[contains(text(),'RELEASE DATE')]/parent::li/text() + postProcess: + - parseDate: "January 2, 2006" +# Last Updated August 24, 2023 diff --git a/scrapers/ATKGirlfriends/ATKGirlfriends.py b/scrapers/ATKGirlfriends/ATKGirlfriends.py new file mode 100644 index 000000000..b934c1995 --- /dev/null +++ b/scrapers/ATKGirlfriends/ATKGirlfriends.py @@ -0,0 +1,79 @@ +import json +import os +import re +import requests +import sys + +# to import from a parent directory we need to add that directory to the system path +csd = os.path.dirname(os.path.realpath(__file__)) # get current script directory +parent = os.path.dirname(csd) # parent directory (should be the scrapers one) +sys.path.append( + parent +) # add parent dir to sys path so that we can import py_common from there + +try: + import py_common.log as log +except ModuleNotFoundError: + print("You need to download the folder 'py_common' from the community repo! (CommunityScrapers/tree/master/scrapers/py_common)", file=sys.stderr) + sys.exit() +try: + from lxml import html +except ModuleNotFoundError: + print("You need to install the lxml module. (https://lxml.de/installation.html#installation)", file=sys.stderr) + print("If you have pip (normally installed with python), run this command in a terminal (cmd): pip install lxml", file=sys.stderr) + sys.exit() + +kgs_per_lb = 0.45359237 +cms_per_in = 2.54 +filename_pattern = re.compile(r"(?P[a-z]{3}\d{3})ATK_(?P\d{6})(?P\d{3})_(?P\w+)(?:\.(?P\w+))?", re.IGNORECASE) + +def getSceneByFilename(filename): + # Parse filename + filename_match = filename_pattern.match(filename) + (model_id, movie_id, _, _, _) = filename_match.groups() + + # Fetch model page + model_url = f"https://www.atkgirlfriends.com/tour/model/{model_id}" + log.debug(f"Fetching {model_url} ({movie_id})") + response = requests.get(model_url, cookies=dict(start_session_galleria = 'stash')) + if (response.url.startswith("https://www.atkgirlfriends.com?nats")): + # Refetch page on cookie failure + response = requests.get(model_url, cookies=dict(start_session_galleria = 'stash')) + + # Build performer + tree = html.fromstring(response.text) + performer = dict(Gender = "female") + model_profile_wrap_xpath = '//div[contains(@class, "model-profile-wrap")]' + performer["name"] = tree.xpath('//h1[contains(@class, "page-title")]')[0].text + performer["url"] = f"{model_url}/1/atk-girlfriends-{performer['name'].replace(' ', '-')}" + performer["ethnicity"] = tree.xpath(f'{model_profile_wrap_xpath}/b[contains(text(), "Ethnicity")]/following-sibling::text()')[0].strip().capitalize() + performer["hair_color"] = tree.xpath(f'{model_profile_wrap_xpath}/b[contains(text(), "Hair Color")]/following-sibling::text()')[0].strip().capitalize() + height_ft_ins_str = tree.xpath(f'{model_profile_wrap_xpath}/b[contains(text(), "Height")]/following-sibling::text()')[0].strip() + (height_ft_str, height_ins_str) = re.compile(r"(\d+)[\"'](\d+)").findall(height_ft_ins_str)[0] + height_ins = float(height_ft_str) * 12 + float(height_ins_str) + performer["height"] = str(int(height_ins * cms_per_in)) + weight_lbs_str = tree.xpath(f'{model_profile_wrap_xpath}/b[contains(text(), "Weight")]/following-sibling::text()')[0].strip() + weight_lbs = float(re.compile(r"\d+").findall(weight_lbs_str)[0]) + performer["weight"] = str(int(weight_lbs * kgs_per_lb)) + performer["measurements"] = tree.xpath(f'{model_profile_wrap_xpath}/b[contains(text(), "Bust Size")]/following-sibling::text()')[0].strip() + performer["image"] = tree.xpath(f'{model_profile_wrap_xpath}/img/@src')[0] + + # Build scene + scene = dict(studio = dict(name = "ATK Girlfriends"), performers = [performer]) + movie_wrap_xpath = f'//img[contains(@src, "/{model_id}/{movie_id}")]/../../../..' + scene["title"] = tree.xpath(f'{movie_wrap_xpath}//h1')[0].text.strip() + scene["details"] = tree.xpath(f'{movie_wrap_xpath}//b[contains(text(), "Description")]/following-sibling::text()')[0].strip() + movie_url_relative = tree.xpath(f'{movie_wrap_xpath}//a/@href')[0] + scene["url"] = f'https://www.atkgirlfriends.com{movie_url_relative}' + scene["image"] = tree.xpath(f'{movie_wrap_xpath}//img/@src')[0] + + return scene + +input = sys.stdin.read() +match = filename_pattern.search(input) +if (match): + scene = getSceneByFilename(match.group()) + output = json.dumps(scene) + print(output) +else: + print(r"{}") diff --git a/scrapers/ATKGirlfriends/ATKGirlfriends.yml b/scrapers/ATKGirlfriends/ATKGirlfriends.yml new file mode 100644 index 000000000..782e4fefd --- /dev/null +++ b/scrapers/ATKGirlfriends/ATKGirlfriends.yml @@ -0,0 +1,91 @@ +name: ATK Girlfriends +# requires: py_common +sceneByFragment: + action: script + script: + - python + - ATKGirlfriends.py +performerByURL: + - action: scrapeXPath + url: + - https://www.atkgirlfriends.com/tour/model/ + scraper: performerScraper +sceneByURL: + - action: scrapeXPath + url: + - https://www.atkgirlfriends.com/tour/movie/ + scraper: sceneScraper +xPathScrapers: + performerScraper: + common: + $modelWrap: &modelWrap //div[contains(@class, "model-profile-wrap")] + performer: + Name: //h1[contains(@class, "page-title")] + Gender: + fixed: female + Ethnicity: + selector: $modelWrap/b[contains(text(), "Ethnicity")]/following-sibling::text() + HairColor: + selector: $modelWrap/b[contains(text(), "Hair Color")]/following-sibling::text() + postProcess: + - map: + black: Black + blond: Blonde + brown: Brown + red: Red + white: White + Height: + selector: $modelWrap/b[contains(text(), "Height")]/following-sibling::text() + postProcess: + - feetToCm: true + Weight: + selector: $modelWrap/b[contains(text(), "Weight")]/following-sibling::text() + postProcess: + - replace: + - regex: (\d+).* + with: $1 + - lbToKg: true + Measurements: + selector: $modelWrap/b[contains(text(), "Bust Size")]/following-sibling::text() + Image: + selector: $modelWrap/img/@src + sceneScraper: + common: + $movieWrap: //div[contains(@class, "movie-wrap")] + $modelWrap: *modelWrap + scene: + Title: //title + Details: $movieWrap/b[contains(text(), "Description")]/following-sibling::text() + URL: + selector: //video/source/@src + postProcess: + - replace: + # 0: Match (ale138ATK_290698001_hd.mp4) + # 1: Model (ale138) + # 2: Movie (290698) + # 3: Scene (001) + # 4: Resolution (hd) + # 5: Extension (mp4) + - regex: &filenameRegex ^.*([a-z]{3}\d{3})ATK_(\d+)(\d{3})_(\w*)\.(\w+).*$ + with: https://www.atkgirlfriends.com/tour/movie/$2 + Image: //video/@poster + Studio: + Name: + fixed: ATK Girlfriends + URL: + fixed: https://www.atkgirlfriends.com/ + Tags: + Name: + selector: $movieWrap/b[contains(text(), "Tags")]/following-sibling::text() + split: ',' + Performers: + Name: $modelWrap/text()[1] +driver: + cookies: + - CookieURL: https://www.atkgirlfriends.com + Cookies: + - Name: start_session_galleria + Domain: www.atkgirlfriends.com + Value: stash # Rotate this value if the scraper is blocked. The first request with the new value should fail. + Path: / +# Last Updated April 14, 2022 diff --git a/scrapers/AVJiali.yml b/scrapers/AVJiali.yml new file mode 100644 index 000000000..03ff3c85f --- /dev/null +++ b/scrapers/AVJiali.yml @@ -0,0 +1,168 @@ +name: "AV Jiali" +sceneByURL: + - action: scrapeXPath + url: + - avjiali.com + scraper: sceneScraper +performerByURL: + - action: scrapeXPath + url: + - avjiali.com + scraper: performerScraper +sceneByName: + action: scrapeXPath + queryURL: https://avjiali.com/?s={} + scraper: sceneSearch +sceneByQueryFragment: + action: scrapeXPath + queryURL: "{url}" + scraper: sceneScraper +xPathScrapers: + sceneScraper: + scene: + Title: + selector: //h5/a + Date: + selector: //div[@class="video-date"]/text() + postProcess: + - replace: + - regex: (\d+)(st|nd|rd|th) + with: "$1" + - parseDate: January 02, 2006 + Performers: + Name: //div[@class="cat"][1]/a[starts-with(@href, 'https://avjiali.com/model')] + URL: //div[@class="cat"][1]/a/@href + Tags: + Name: //div[@class="cat"][2]/a + Details: + selector: //p[span[@class="readmore"]] + postProcess: + - replace: + - regex: ... Read More + with: + Image: + selector: //video[@id="videohtml5tour"]/@poster + postProcess: + - replace: + - regex: ^// + with: "https://" + URL: //link[@rel="canonical"]/@href + Studio: + Name: + fixed: AV Jiali + performerScraper: + common: + $profile: //div[@class="model-profile"] + performer: + Name: + selector: $profile[contains(strong, "Name:")]//text() + postProcess: + - replace: + - regex: .*Name:\s*(.*)\s*$ + with: $1 + - regex: None + with: + Aliases: + selector: $profile[contains(strong, "Chinese name:")]//text() + postProcess: + - replace: + - regex: .*Chinese name:\s*(.*)\s*$ + with: $1 + - regex: None + with: + Birthdate: + selector: $profile[contains(strong, "Birth date:")]//text() + postProcess: + - replace: + - regex: .*Birth date:\s*(.*)\s*$ + with: $1 + - parseDate: January 2, 2006 + - parseDate: 2006-01-02 + Measurements: + selector: $profile[contains(strong, "Body:")]//text() + postProcess: + - replace: + - regex: .*Body:\s*(.*)\s*$ + with: $1 + - regex: B(\d*)-W(\d*)-H(\d*) + with: $1-$2-$3 + - regex: -- + with: + - regex: None + with: + Weight: + selector: $profile[contains(strong, "Weight:")]//text() + postProcess: + - replace: + - regex: .*Weight:\s*(.*)\s*$ + with: $1 + - regex: (\d+)kg + with: $1 + - regex: None + with: + Height: + selector: $profile[contains(strong, "Height:")]//text() + postProcess: + - replace: + - regex: .*Height:\s*(.*)\s*$ + with: $1 + - regex: (\d+)cm.* + with: $1 + - regex: None + with: + Details: + selector: //p[span[@class="readmore"]] + postProcess: + - replace: + - regex: ... Read More + with: + Piercings: + selector: $profile[contains(strong, "Piercings:")]//text() + postProcess: + - replace: + - regex: .*Piercings:\s*(.*)\s*$ + with: $1 + - regex: None|^No$|No Piercing.* + with: + Tattoos: + selector: $profile[contains(strong, "Tattoo:")]//text() + postProcess: + - replace: + - regex: .*Tattoo:\s*(.*)\s*$ + with: $1 + - regex: None|^No$|No Tattoo.* + with: + HairColor: + selector: $profile[contains(strong, "Hair color:")]//text() + postProcess: + - replace: + - regex: (?i).*hair color:?\s*(.*)\s*$ + with: $1 + - regex: None + with: + URL: //link[@rel="canonical"][1]/@href + Image: + selector: //div[@class="model-photo"]/img[@class="rounded"]/@src + postProcess: + - replace: + - regex: ^// + with: https:// + - regex: 460x640 + with: 690x960 + Ethnicity: + fixed: Asian + Gender: + fixed: Female + Country: + fixed: Taiwan + sceneSearch: + common: + $videos: //div[@class="thumb"]/a + scene: + Title: $videos/@title + URL: $videos/@href + Image: $videos/img/@src + Studio: + Name: + fixed: AVJiali +# Last Updated November 07, 2023 diff --git a/scrapers/ActiveDuty/ActiveDuty.yml b/scrapers/ActiveDuty/ActiveDuty.yml new file mode 100644 index 000000000..895fea7aa --- /dev/null +++ b/scrapers/ActiveDuty/ActiveDuty.yml @@ -0,0 +1,31 @@ +# requires: Algolia +name: "Active Duty" +sceneByURL: + - action: script + url: + - activeduty.com/en/video + script: + - python + - ../Algolia/Algolia.py + - activeduty +sceneByFragment: + action: script + script: + - python + - ../Algolia/Algolia.py + - activeduty +sceneByName: + action: script + script: + - python + - ../Algolia/Algolia.py + - activeduty + - searchName +sceneByQueryFragment: + action: script + script: + - python + - ../Algolia/Algolia.py + - activeduty + - validName +# Last Updated September 26, 2023 diff --git a/scrapers/Algolia_AddictedToGirls.yml b/scrapers/AddictedToGirls/AddictedToGirls.yml similarity index 78% rename from scrapers/Algolia_AddictedToGirls.yml rename to scrapers/AddictedToGirls/AddictedToGirls.yml index 296ac220d..8eb060de4 100644 --- a/scrapers/Algolia_AddictedToGirls.yml +++ b/scrapers/AddictedToGirls/AddictedToGirls.yml @@ -1,3 +1,4 @@ +# requires: Algolia name: "addicted 2 Girls" sceneByURL: - action: script @@ -5,26 +6,26 @@ sceneByURL: - addicted2girls.com/en/video script: - python - - Algolia.py + - ../Algolia/Algolia.py - addicted2girls sceneByFragment: action: script script: - python - - Algolia.py + - ../Algolia/Algolia.py - addicted2girls sceneByName: action: script script: - python - - Algolia.py + - ../Algolia/Algolia.py - addicted2girls - searchName sceneByQueryFragment: action: script script: - python - - Algolia.py + - ../Algolia/Algolia.py - addicted2girls - validName galleryByURL: @@ -33,7 +34,7 @@ galleryByURL: - addicted2girls.com/en/photo/ script: - python - - Algolia.py + - ../Algolia/Algolia.py - addicted2girls - gallery # Last Updated December 22, 2022 diff --git a/scrapers/AdultDvdMarketPlace.yml b/scrapers/AdultDvdMarketPlace.yml index 3b7b9d0f4..9033c5c78 100644 --- a/scrapers/AdultDvdMarketPlace.yml +++ b/scrapers/AdultDvdMarketPlace.yml @@ -7,6 +7,22 @@ movieByURL: - adultdvdmarketplace.com/dvd_view scraper: movieScraper +sceneByName: + action: scrapeXPath + queryURL: https://www.adultdvdmarketplace.com/xcart/adult_dvd/dvd_search.php?type=title&search={} + scraper: sceneSearch + +sceneByURL: + - action: scrapeXPath + url: + - adultdvdmarketplace.com/dvd_view + scraper: sceneScraper + +sceneByQueryFragment: + action: scrapeXPath + queryURL: "{url}" + scraper: sceneScraper + xPathScrapers: movieScraper: movie: @@ -22,4 +38,36 @@ xPathScrapers: FrontImage: //strong[contains(text(),"Large Front")]/parent::a/@href BackImage: //strong[contains(text(),"Large Back")]/parent::a/@href + sceneSearch: + common: + $selection: //div[contains(concat(' ',normalize-space(@class),' '),' product-col ')] + scene: + Title: $selection//h4/a + URL: + selector: $selection//h4/a/@href + postProcess: + - replace: + - regex: ^ + with: https://www.adultdvdmarketplace.com + Image: $selection//a/img/@src + + sceneScraper: + scene: + Title: //meta[@property="og:title"]/@content + Details: //h3[contains(text(), "Description")]/following-sibling::p + Date: + selector: //span[contains(text(),"Released")]/following-sibling::text() + postProcess: + - parseDate: 01/2006 + Image: //strong[contains(text(),"Large Front")]/parent::a/@href + Studio: + Name: //span[@itemprop="brand"]/text() + Movies: + Name: //meta[@property="og:title"]/@content + URL: //meta[@property="og:url"]/@content + Performers: + Name: //h3[text()="Cast"]/following-sibling::a + Tags: + Name: //span[text()="Category:"]/following-sibling::a + # Last Updated September 07, 2020 diff --git a/scrapers/AdultEmpire.yml b/scrapers/AdultEmpire.yml index a891ca3d4..39ab09139 100644 --- a/scrapers/AdultEmpire.yml +++ b/scrapers/AdultEmpire.yml @@ -21,7 +21,13 @@ sceneByQueryFragment: action: scrapeXPath queryURL: "{url}" scraper: sceneScraper - +performerByURL: + - + action: scrapeXPath + url: + - adultdvdempire.com + - adultempire.com + scraper: performerScraper xPathScrapers: sceneSearch: @@ -60,7 +66,7 @@ xPathScrapers: postProcess: - parseDate: Jan 02 2006 Synopsis: - selector: //h4[contains(@class,"synopsis")]//text() + selector: //div[contains(@class,"synopsis-content")]//text() concat: " " Studio: Name: //a[@label="Studio"]/text() @@ -78,12 +84,13 @@ xPathScrapers: scene: Title: //h1/text() Details: - selector: //h4[contains(@class,"synopsis")]//text() + selector: //div[contains(@class,"synopsis-content")]//text() concat: " " Date: selector: //small[contains(text(), "Released")]/following-sibling::text() postProcess: - parseDate: Jan 02 2006 + Director: //a[@label="Director"]/text() Image: //a[@id="front-cover"]/@data-href Studio: Name: //a[@label="Studio"]/text() @@ -101,4 +108,44 @@ xPathScrapers: - regex: ^ with: "https://www.adultdvdempire.com" URL: //meta[@name='og:url']/@content -# Last Updated December 16, 2021 + performerScraper: + common: + $infoPiece: //*[@id="profileModal"]/div/div/div[2]/div[1]/ul + performer: + Name: //*[@id="content"]/section/div/div[2]/h1/text() + Birthdate: + selector: $infoPiece/li[contains(text(), 'Born:')]/text() + postProcess: + - replace: + - regex: Born:\s+(.*) + with: $1 + Height: + selector: $infoPiece/li[contains(text(), 'Height:')]/text() + postProcess: + - replace: + - regex: Height:\s+(.*) + with: $1 + - feetToCm: true + Image: //*[@id="content"]/section/div/div[1]/a[1]|/img/@src|//*[@id="content"]/section/div/div[1]/img/@src + Country: + selector: $infoPiecel/li[contains(text(), 'From:')]/text() + postProcess: + - replace: + - regex: From:\s+(.*) + with: $1 + Measurements: + selector: $infoPiece/li[contains(text(), 'Measurements:')]/text() + postProcess: + - replace: + - regex: Measurements:\s+(\d\d\w*)\D+(\d+)\D+(\d+).* + with: $1-$2-$3 + Aliases: + selector: //*[@id="content"]/section/div/div[2]/div[contains(text(), "Alias:")] + concat: ", " + postProcess: + - replace: + - regex: "Alias: (.*)" + with: $1 + Details: //*[@id="content"]/section/div/div[5]/aside/text() + URL: //link[@rel='canonical']/@href +# Last Updated December 07, 2023 diff --git a/scrapers/AdultEmpireCash.yml b/scrapers/AdultEmpireCash.yml index af30a76f1..6cdc18579 100644 --- a/scrapers/AdultEmpireCash.yml +++ b/scrapers/AdultEmpireCash.yml @@ -19,9 +19,11 @@ sceneByURL: - severesexfilms.com/ - smutfactor.com/ - spankmonster.com/ + - stephousexxx.com/ - thirdworldxxx.com/ - wcpclub.com/ - westcoastproductions.com/ + - data18.empirestores.co/ # VR Sites - action: scrapeXPath @@ -46,6 +48,8 @@ movieByURL: - thirdworldxxx.com/ - wcpclub.com/ - westcoastproductions.com/ + - data18.empirestores.co/ + xPathScrapers: sceneScraper: @@ -76,7 +80,14 @@ xPathScrapers: Image: //link[@rel="image_src"]/@href Movies: Name: //h1[@class="description"]/following-sibling::p/a/text() - Director: //div[@class="director"]/text() + Director: + selector: //div[@class="director"]/text() + postProcess: + - replace: + - regex: \s{2,} + with: " " + - regex: " ," + with: "," Code: selector: //meta[@name="og:url"]/@content postProcess: diff --git a/scrapers/AdultPrime.yml b/scrapers/AdultPrime.yml index ac852a7f6..f94ec4759 100644 --- a/scrapers/AdultPrime.yml +++ b/scrapers/AdultPrime.yml @@ -1,5 +1,3 @@ -# The search by name does not return images by default. To get images you will need to uncomment the driver section at the end of this file. -# Each click you uncomment will load another 4 images, however the search will be much slower, so it's disabled by default. name: "AdultPrime" sceneByName: action: scrapeXPath @@ -14,77 +12,60 @@ sceneByQueryFragment: sceneByURL: - action: scrapeXPath url: - - adultprime.com/studios/video/ + - adultprime.com/studios/video scraper: sceneScraper xPathScrapers: sceneSearch: common: - $scenes: //ul[@id='studio-videos-container'] + $scenes: //ul[@id="studio-videos-container"] scene: Title: - selector: $scenes//span[contains(@class, 'description-title')]/text() + selector: $scenes//span[contains(@class, "description-title")]/text() Date: - selector: $scenes//span[@class='description-releasedate']/text() + selector: $scenes//span[@class="description-releasedate"]/text() postProcess: - parseDate: Jan 02, 2006 URL: - selector: $scenes//a[@class='absolute']/@href + selector: $scenes//a[@class="absolute"]/@href postProcess: - replace: - - regex: "^.signup.galleryId." - with: "https://adultprime.com/studios/video/" - Image: - selector: $scenes//div[contains(@class, 'ap-slider-img')]/@style - postProcess: - - replace: - - regex: .+url\("(.+)"\).+ - with: $1 + - regex: "^.signup.galleryId." + with: "https://adultprime.com/studios/video/" + Image: $scenes//div[contains(@class, 'ap-slider-img')]/@data-src sceneScraper: + common: + $scene: //div[@class="update-info-container"] scene: - Title: //h2[contains(@class,"update-info-title")]/text() + Title: $scene//h2 Date: - selector: //p[@class="update-info-line regular"][1]/b[1]/text() + selector: ($scene//p[@class="update-info-line regular"]/b)[1]/text() postProcess: - - parseDate: 02.01.2006 + - parseDate: "02.01.2006" Details: - selector: //p[contains(@class,"ap-limited-description-text")] + selector: $scene//p[contains(@class,"ap-limited-description-text")] Image: - selector: //div[contains(@class, "video-wrapper")]//div[starts-with(@style,"background-image:") and not(contains(@style,"player-placeholder.gif"))]/@style + selector: //div[@id="theatre-row"]//video/@poster | //div[@id="theatre-row"]//div[contains(@class, "video-wrapper")]//div[contains(@style,"background-image:") and not(contains(@style,"player-placeholder.gif"))]/@style postProcess: - replace: - - regex: .+url\((.+)\).+ + # Sometimes they put the URL in quotes, sometimes they just don't + # https://regex101.com/r/fszqAQ/3 + - regex: .+url\(\"?(.+)\"?\).+ with: $1 - URL: - selector: //a[contains(@href, "/signup?galleryId")][1]/@href - postProcess: - - replace: - - regex: .*= - with: "https://adultprime.com/studios/video/" + URL: &url //link[@rel="canonical"]/@href Code: - selector: //div[@class="keys"]/@title + selector: *url postProcess: - replace: - - regex: .+/(\d+)$ - with: $1 - Studio: - Name: //p[contains(@class,"update-info-line")]/b/a[contains(@href,"/studio/")]/text() + - regex: \D+(\d+)$ + with: $1 + Studio: + Name: $scene//p[contains(@class,"update-info-line")]//a[contains(@href,"/studio/")]/text() Tags: Name: - selector: //p[contains(@class,"update-info-line regular")][contains(b,"Niches:")]/text() + selector: $scene//b[contains(.,"Niches")]/following-sibling::text() split: ", " Performers: - Name: //p[@class="update-info-line regular"]/a[contains(@href, "/signup?")]/text() -#driver: -# useCDP: true -# headers: -# - Key: User-Agent -# Value: stash/1.0.0 -# clicks: -# - xpath: //ul[@id='studio-videos-container']/following-sibling::div[1]/a[@class="lSNext"] -# sleep: 1 -# - xpath: //ul[@id='studio-videos-container']/following-sibling::div[1]/a[@class="lSNext"] -# sleep: 1 - -# Last Updated November 17, 2022 + Name: $scene//b[contains(.,"Performer")]/following-sibling::a/text() +# Last Updated November 04, 2023 diff --git a/scrapers/AdultSiteRunner.yml b/scrapers/AdultSiteRunner.yml new file mode 100644 index 000000000..a69388239 --- /dev/null +++ b/scrapers/AdultSiteRunner.yml @@ -0,0 +1,134 @@ +name: Raunchy Bastards +sceneByURL: + - action: scrapeXPath + url: + - boundjocks.com/scene/ + - boyshalfwayhouse.com/scene/ + - coltstudiogroup.com/scene/ + - daddycarl.com/scene/ + - hotoldermale.com/scene/ + - monstercub.com/scene/ + - naturalbornbreeders.com/scene/ + - older4me.com/scene/ + - raunchybastards.com/scene/ + - stockydudes.com/scene/ + - toplatindaddies.com/scene/ + scraper: oldStyleSite + - action: scrapeXPath + url: + - blackboyaddictionz.com/scene/ + - blacksondaddies.com/scene/ + - myfirstdaddy.com/scene/ + - playdaddy.com/scene/ + scraper: newStyleSite +xPathScrapers: + oldStyleSite: + common: + $scene: //div[contains(@class, "sceneContainer")] + scene: + Title: $scene/div[@class="sceneTitle"] + Code: + selector: $scene//div[contains(@class, "sceneImgBig")]/@id + postProcess: + - replace: + - regex: \D* + with: $1 + Date: + selector: $scene//span[contains(@class, "sceneDetails")] + postProcess: &ppDate + - replace: + # https://regex101.com/r/rsjbb6/3 + - regex: ^(?:Details:\s*)?(\w{3}\s*\d{1,2}),\s*(\d{4}).*?$ + with: $1, $2 + - parseDate: Jan 2, 2006 + # All of this can be replaced once scrapers get access to the URL they are scraping + Studio: + Name: + selector: &image > + $scene//video/@poster + | $scene//div[contains(@class, "sceneImgBig")]/img/@src + | //div[contains(@style, "background-image")]/@style + | //*[contains(@class, "videoTrailer") or contains(@class, "bgScene")]//@srcset + postProcess: &studioNameFromURL + - replace: + - regex: ^(?:https:\/\/[\w\.]*?)([^.]+)\.com.*$ + with: $1 + - map: + blackboyaddictionz: Black Boy Addictionz + blacksondaddies: Blacks on Daddies + boundjocks: Bound Jocks + boyshalfwayhouse: Boys Halfway House + coltstudiogroup: Colt Studio Group + daddycarl: Daddy Carl + hotoldermale: Hot Older Male + monstercub: Monster Cub + myfirstdaddy: My First Daddy + naturalbornbreeders: Natural Born Breeders + older4me: Older4Me + playdaddy: Play Daddy + raunchybastards: Raunchy Bastards + stockydudees: Stocky dudes + toplatindaddies: Top Latin Daddies + URL: + selector: *image + postProcess: + - replace: + - regex: ^(?:https:\/\/[\w\.]*?)([^.]+)\.com.*$ + with: https://$1.com + Performers: + Name: > + $scene//div[contains(@class, "scenePerformers")]/a + | $scene//div[@class="scenePerf"]/span[@class="perfName"] + URL: > + $scene//div[contains(@class, "scenePerformers")]/a/@href + | $scene//div[@class="scenePerf"]/@data-href + Tags: + Name: $scene//a[@class="sceneTagsLnk"]/text() + Details: + selector: $scene//div[contains(@class, "sceneDescription")]/text() + concat: "\n\n" + Image: + selector: *image + postProcess: + - replace: + - regex: .*url\("(.*)"\).* + with: $1 + - regex: \s*2x$ + with: + + newStyleSite: + common: + $details: //div[contains(@class, "container_styled_1")] + scene: + Title: //h2[@class="main_title"] + Code: + selector: //link[@rel="canonical"]/@href + postProcess: + - replace: + - regex: \D* + with: $1 + # All of this can be replaced once scrapers get access to the URL they are scraping + Studio: + Name: + selector: //link[@rel="canonical"]/@href + postProcess: *studioNameFromURL + URL: + selector: //link[@rel="canonical"]/@href + postProcess: + - replace: + - regex: ^(?:https:\/\/[\w\.]*?)([^.]+)\.com.*$ + with: https://$1.com + Performers: + Name: $details//span[contains(@class, "perfImage")]/a + URL: $details//span[contains(@class, "perfImage")]/a/@href + Details: + selector: $details//p/text() + concat: "\n\n" + Date: + selector: ($details//h5[contains(text(), "Details")]/text())[1] + postProcess: *ppDate + Image: + selector: //meta[@property="og:image"]/@content + Tags: + Name: $details//h5[contains(., "Categories")]/a/text() +# Last Updated September 22, 2023 diff --git a/scrapers/Algolia_Adultime.yml b/scrapers/Adultime/Adultime.yml similarity index 74% rename from scrapers/Algolia_Adultime.yml rename to scrapers/Adultime/Adultime.yml index 3b723a445..4a01a9fc9 100644 --- a/scrapers/Algolia_Adultime.yml +++ b/scrapers/Adultime/Adultime.yml @@ -1,3 +1,4 @@ +# requires: Algolia name: "Adultime" sceneByURL: - action: script @@ -6,23 +7,30 @@ sceneByURL: - adamandevepictures.com/en/video/ - adulttime.com/en/video/ - adulttimepilots.com/en/video/ + - agentredgirl.com/en/video/ - analteenangels.com/en/video/ + - asmrfantasy.com/en/video/ - assholefever.com/en/video/ - beingtrans247.com/en/video/ + - blowmepov.com/en/video/ - caughtfapping.com/en/video/ - devilsfilm.com/en/video/ - devilstgirls.com/en/video/ - dpfanatics.com/en/video/ + - extremepickups.com/en/video/ - famedigital.com/en/video/ - footsiebabes.com/en/video/ - forbiddenseductions.com/en/video/ - girlstryanal.com/en/video/ + - givemeteens.com/en/video/ - hairyundies.com/en/video/ + - isthisreal.com/en/video/ - joymii.com/en/video/ - kissmefuckme.com/en/video/ - lezcuties.com/en/video/ - massage-parlor.com/en/video/ - milkingtable.com/en/video/ + - mixedx.com/en/video/ - modeltime.com/en/video/ - moderndaysins.com/en/video/ - mommysboy.com/en/video/ @@ -36,41 +44,52 @@ sceneByURL: - peternorth.com/en/video/ - prettydirty.com/en/video/ - puretaboo.com/en/video/ + - sabiendemonia.com/en/video/ - soapymassage.com/en/video/ - teensneaks.com/en/video/ + - theyeslist.com/en/video/ - transfixed.com/en/video/ - transgressivefilms.com/en/video/ - trickyspa.com/en/video/ - truelesbian.com/en/video/ - webyoung.com/en/video/ - welikegirls.com/en/video/ + - wolfwagner.com/en/video/ script: - python - - Algolia.py + - ../Algolia/Algolia.py - girlsway sceneByFragment: action: script script: - python - - Algolia.py + - ../Algolia/Algolia.py - girlsway sceneByName: action: script script: - python - - Algolia.py + - ../Algolia/Algolia.py - girlsway - searchName sceneByQueryFragment: action: script script: - python - - Algolia.py + - ../Algolia/Algolia.py - girlsway - validName +galleryByFragment: + action: script + script: + - python + - ../Algolia/Algolia.py + - puretaboo + - gallery galleryByURL: - action: script url: + - accidentalgangbang.com/en/photo/ - devilsfilm.com/en/photo/ - joymii.com/en/photo/ - mommysgirl.com/en/photo/ @@ -78,10 +97,11 @@ galleryByURL: - peternorth.com/en/photo/ - prettydirty.com/en/photo/ - puretaboo.com/en/photo/ + - transfixed.com/en/video/ - webyoung.com/en/photo/ script: - python - - Algolia.py + - ../Algolia/Algolia.py - puretaboo - gallery movieByURL: @@ -90,9 +110,10 @@ movieByURL: - adulttime.com/en/dvd/ - devilsfilm.com/en/dvd/ - devilstgirls.com/en/dvd/ + - outofthefamily.com/en/dvd/ script: - python - - Algolia.py + - ../Algolia/Algolia.py - puretaboo - movie -# Last Updated December 22, 2022 +# Last Updated October 12, 2023 diff --git a/scrapers/Algolia.py b/scrapers/Algolia/Algolia.py similarity index 76% rename from scrapers/Algolia.py rename to scrapers/Algolia/Algolia.py index 66e924612..3295a1183 100644 --- a/scrapers/Algolia.py +++ b/scrapers/Algolia/Algolia.py @@ -8,6 +8,13 @@ from configparser import ConfigParser, NoSectionError from urllib.parse import urlparse +# to import from a parent directory we need to add that directory to the system path +csd = os.path.dirname(os.path.realpath(__file__)) # get current script directory +parent = os.path.dirname(csd) # parent directory (should be the scrapers one) +sys.path.append( + parent +) # add parent dir to sys path so that we can import py_common from there + try: from bs4 import BeautifulSoup as bs import requests @@ -37,6 +44,15 @@ # Include non female performers NON_FEMALE = True +# a list of main channels (`mainChannelName` from the API) to use as the studio +# name for a scene +MAIN_CHANNELS_AS_STUDIO_FOR_SCENE = [ + "Buttman", + "Cock Choking Sluts", + "Devil's Film Parodies", + "Euro Angels", +] + # a dict with sites having movie sections # used when populating movie urls from the scene scraper MOVIE_SITES = { @@ -54,12 +70,38 @@ "zerotolerancefilms": "https://www.zerotolerancefilms.com/en/movie" } +# a dict of serie (`serie_name` from the API) which should set the value +# for the studio name for a scene +SERIE_USING_OVERRIDE_AS_STUDIO_FOR_SCENE = { + "Jonni Darkko's Stand Alone Scenes": "Jonni Darkko XXX", + "Big Boob Angels": "BAM Visions", + "Mick's ANAL PantyHOES": "BAM Visions", + "Real Anal Lovers": "BAM Visions", + "XXXmailed": "Blackmailed" +} + +# a list of serie (`serie_name` from the API) which should use the sitename +# for the studio name for a scene +SERIE_USING_SITENAME_AS_STUDIO_FOR_SCENE = [ + "Evil", # sitename_pretty: Evil Angel + "Trans-Active", # sitename_pretty: Evil Angel +] + # a dict of sites (`sitename_pretty` from the API) which should set the value # for the studio name for a scene # this is because the `serie_name` is the Movie (series) title on these sites, # not the studio SITES_USING_OVERRIDE_AS_STUDIO_FOR_SCENE = { - "Devilstgirls": "Devil's Tgirls" + "Adamandevepictures": "Adam & Eve Pictures", + "AgentRedGirl": "Agent Red Girl", + "Devils Gangbangs": "Devil's Gangbangs", + "Devilstgirls": "Devil's Tgirls", + "Dpfanatics": "DP Fanatics", + "Janedoe": "Jane Doe Pictures", + "ModernDaySins": "Modern-Day Sins", + "Transgressivexxx": "TransgressiveXXX", + "Hot House": "Hot House Entertainment", + "HotHouse.com": "Hot House Entertainment", } # a list of sites (`sitename_pretty` from the API) which should pick out the @@ -67,8 +109,16 @@ # this is because the `serie_name` is the Movie (series) title on these sites, # not the studio SITES_USING_SITENAME_AS_STUDIO_FOR_SCENE = [ + "ChaosMen", "Devil's Film", - "GenderXFilms" + "GenderXFilms", + "Give Me Teens", + "Hairy Undies", + "Lesbian Factor", + "Oopsie", + "Out of the Family", + "Rocco Siffredi", + "Squirtalicious", ] # a list of sites (`sitename_pretty` from the API) which should pick out the @@ -76,9 +126,27 @@ # this is because the `serie_name` is the Movie (series) title on these sites, # not the studio SITES_USING_NETWORK_AS_STUDIO_FOR_SCENE = [ - "Muses", + "Extremepickups", # network_name: Adult Time Originals + "Isthisreal", # network_name: Is This Real + "Muses", # network_name: Transfixed + "Officemsconduct", # network_name: Transfixed + "Sabiendemonia", # network_name: Sabien DeMonia + "Upclosex" # network_name: UpCloseX ] +# a list of networks (`network_name` from the API) which should pick out the +# `sitename_pretty` for the studio name for a scene +NETWORKS_USING_SITENAME_AS_STUDIO_FOR_SCENE = [ + "Fame Digital", # this should support all sub-studios listed at https://stashdb.org/studios/cd5591a5-eb26-42fc-a406-b6969a8ef3dd + "fistinginferno", + "MyXXXPass", +] + +# a dict of directors to use as the studio for a scene +DIRECTOR_AS_STUDIO_OVERRIDE_FOR_SCENE = { + "Le Wood": "LeWood" +} + def clean_text(details: str) -> str: """ @@ -197,8 +265,10 @@ def write_config(date, app_id, api_key): # API Search Data def api_search_req(type_search, query, url): api_request = None - if type_search == "query": - api_request = api_search_query(query, url) + if type_search == "query_all_scenes": + api_request = api_search_query("all_scenes", query, url) + if type_search == "query_all_photosets": + api_request = api_search_query("all_photosets", query, url) if type_search == "id": api_request = api_search_id(query, url) if api_request: @@ -234,27 +304,28 @@ def api_search_movie_id(m_id, url): return req def api_search_gallery_id(p_id, url): - gallery_id = [f"set_id:{p_id}"] + gallery_id = [[f"set_id:{p_id}"]] request_api = { "requests": [{ "indexName": "all_photosets", "params": "query=&hitsPerPage=20&page=0", - "facetFilters": gallery_id + "facetFilters": gallery_id, + "facets": [] }] } req = send_request(url, HEADERS, request_api) return req -def api_search_query(query, url): +def api_search_query(index_name, query, url): request_api = { "requests": [{ - "indexName": "all_scenes", + "indexName": index_name, "params": "query=" + query + "&hitsPerPage=40&page=0" }] } - req = send_request(url, HEADERS, request_api) - return req + res = send_request(url, HEADERS, request_api) + return res # Searching Result @@ -372,9 +443,9 @@ def match_result(api_scene, range_duration=60, single=False, clip_id: str=None): match_clip_id = False # Using database if database_dict: - db_duration = int(database_dict["duration"]) - db_height = str(database_dict["height"]) - db_size = int(database_dict["size"]) + db_duration = int(database_dict[0]["duration"]) + db_height = str(database_dict[0]["height"]) + db_size = int(database_dict[0]["size"]) if api_scene.get("download_file_sizes"): if db_height == "2160": api_filesize = api_scene["download_file_sizes"].get("4k") @@ -476,7 +547,7 @@ def parse_movie_json(movie_json: dict) -> dict: """ scrape = {} try: - studio_name = movie_json[0].get("sitename_pretty") + studio_name = determine_studio_name_from_json(movie_json[0]) except IndexError: log.debug("No movie found") return scrape @@ -514,6 +585,45 @@ def parse_movie_json(movie_json: dict) -> dict: scrape["director"] = ", ".join(directors) return scrape +def determine_studio_name_from_json(some_json): + ''' + Reusable function to determine studio name based on what was scraped. + This can be used for scraping: + - scene + - gallery + - movie + ''' + studio_name = None + if some_json.get('sitename_pretty'): + if some_json.get('sitename_pretty') in SITES_USING_OVERRIDE_AS_STUDIO_FOR_SCENE: + studio_name = \ + SITES_USING_OVERRIDE_AS_STUDIO_FOR_SCENE.get(some_json.get('sitename_pretty')) + elif some_json.get('sitename_pretty') in SITES_USING_SITENAME_AS_STUDIO_FOR_SCENE \ + or some_json.get('serie_name') in SERIE_USING_SITENAME_AS_STUDIO_FOR_SCENE \ + or some_json.get('network_name') \ + and some_json.get('network_name') in NETWORKS_USING_SITENAME_AS_STUDIO_FOR_SCENE: + studio_name = some_json.get('sitename_pretty') + elif some_json.get('sitename_pretty') in SITES_USING_NETWORK_AS_STUDIO_FOR_SCENE \ + and some_json.get('network_name'): + studio_name = some_json.get('network_name') + if not studio_name and some_json.get('network_name') and \ + some_json.get('network_name') in NETWORKS_USING_SITENAME_AS_STUDIO_FOR_SCENE: + studio_name = some_json.get('sitename_pretty') + if not studio_name and some_json.get('mainChannelName') and \ + some_json.get('mainChannelName') in MAIN_CHANNELS_AS_STUDIO_FOR_SCENE: + studio_name = some_json.get('mainChannelName') + if not studio_name and some_json.get('directors'): + for director in [ d.get('name').strip() for d in some_json.get('directors') ]: + if DIRECTOR_AS_STUDIO_OVERRIDE_FOR_SCENE.get(director): + studio_name = \ + DIRECTOR_AS_STUDIO_OVERRIDE_FOR_SCENE.get(director) + if not studio_name and some_json.get('serie_name'): + if some_json.get('serie_name') in SERIE_USING_OVERRIDE_AS_STUDIO_FOR_SCENE: + studio_name = \ + SERIE_USING_OVERRIDE_AS_STUDIO_FOR_SCENE.get(some_json.get('serie_name')) + else: + studio_name = some_json.get('serie_name') + return studio_name def parse_scene_json(scene_json, url=None): """ @@ -541,15 +651,9 @@ def parse_scene_json(scene_json, url=None): # Studio scrape['studio'] = {} - if scene_json.get('sitename_pretty') and scene_json.get('sitename_pretty') in SITES_USING_OVERRIDE_AS_STUDIO_FOR_SCENE: - scrape['studio']['name'] = SITES_USING_OVERRIDE_AS_STUDIO_FOR_SCENE.get(scene_json.get('sitename_pretty')) - elif scene_json.get('sitename_pretty') and scene_json.get('sitename_pretty') in SITES_USING_SITENAME_AS_STUDIO_FOR_SCENE: - scrape['studio']['name'] = scene_json.get('sitename_pretty') - elif scene_json.get('sitename_pretty') and scene_json.get('sitename_pretty') in SITES_USING_NETWORK_AS_STUDIO_FOR_SCENE \ - and scene_json.get('network_name'): - scrape['studio']['name'] = scene_json.get('network_name') - elif scene_json.get('serie_name'): - scrape['studio']['name'] = scene_json.get('serie_name') + studio_name = determine_studio_name_from_json(scene_json) + if studio_name: + scrape['studio']['name'] = studio_name log.debug( f"[STUDIO] {scene_json.get('serie_name')} - {scene_json.get('network_name')} - {scene_json.get('mainChannelName')} - {scene_json.get('sitename_pretty')}" @@ -590,7 +694,9 @@ def parse_scene_json(scene_json, url=None): log.warning("Can't locate image.") # URL try: - hostname = scene_json['sitename'] + hostname = scene_json.get('sitename') + if hostname is None: + hostname = SITE # Movie if scene_json.get('movie_title'): scrape['movies'] = [{ @@ -604,17 +710,22 @@ def parse_scene_json(scene_json, url=None): if URL_DOMAIN and MOVIE_SITES.get(URL_DOMAIN): scrape['movies'][0][ 'url'] = f"{MOVIE_SITES[URL_DOMAIN]}/{scene_json['url_movie_title']}/{scene_json['movie_id']}" - net_name = scene_json['network_name'] - if net_name.lower() == "21 sextury": - hostname = "21sextury" - elif net_name.lower() == "21 naturals": - hostname = "21naturals" + net_name = scene_json.get('network_name') + if net_name: + if net_name.lower() == "21 sextury": + hostname = "21sextury" + elif net_name.lower() == "21 naturals": + hostname = "21naturals" + elif net_name.lower() == 'transfixed': + hostname = 'transfixed' + scrape[ - 'url'] = f"https://{hostname.lower()}.com/en/video/{scene_json['sitename'].lower()}/{scene_json['url_title']}/{scene_json['clip_id']}" - except: + 'url'] = f"https://{hostname.lower()}.com/en/video/{hostname.lower()}/{scene_json['url_title']}/{scene_json['clip_id']}" + except Exception as exc: + log.debug(f"{exc}") if url: scrape['url'] = url - #debug(f"{scrape}") + #log.debug(f"{scrape}") return scrape def parse_gallery_json(gallery_json: dict, url: str = None) -> dict: @@ -623,10 +734,12 @@ def parse_gallery_json(gallery_json: dict, url: str = None) -> dict: """ scrape = {} # Title - if gallery_json.get('title'): + if gallery_json.get('clip_title'): + scrape['title'] = gallery_json['clip_title'].strip() + elif gallery_json.get('title'): scrape['title'] = gallery_json['title'].strip() # Date - scrape['date'] = gallery_json.get('date_online') + scrape['date'] = gallery_json.get('date_online') or gallery_json.get('release_date') # Details scrape['details'] = clean_text(gallery_json.get('description')) @@ -643,15 +756,9 @@ def parse_gallery_json(gallery_json: dict, url: str = None) -> dict: # Studio scrape['studio'] = {} - if gallery_json.get('sitename_pretty') and gallery_json.get('sitename_pretty') in SITES_USING_OVERRIDE_AS_STUDIO_FOR_SCENE: - scrape['studio']['name'] = SITES_USING_OVERRIDE_AS_STUDIO_FOR_SCENE.get(gallery_json.get('sitename_pretty')) - elif gallery_json.get('sitename_pretty') and gallery_json.get('sitename_pretty') in SITES_USING_SITENAME_AS_STUDIO_FOR_SCENE: - scrape['studio']['name'] = gallery_json.get('sitename_pretty') - elif gallery_json.get('sitename_pretty') and gallery_json.get('sitename_pretty') in SITES_USING_NETWORK_AS_STUDIO_FOR_SCENE \ - and gallery_json.get('network_name'): - scrape['studio']['name'] = gallery_json.get('network_name') - elif gallery_json.get('serie_name'): - scrape['studio']['name'] = gallery_json.get('serie_name') + studio_name = determine_studio_name_from_json(gallery_json) + if studio_name: + scrape['studio']['name'] = studio_name log.debug( f"[STUDIO] {gallery_json.get('serie_name')} - {gallery_json.get('network_name')} - {gallery_json.get('mainChannelName')} - {gallery_json.get('sitename_pretty')}" @@ -687,8 +794,8 @@ def parse_gallery_json(gallery_json: dict, url: str = None) -> dict: hostname = "21sextury" elif net_name.lower() == "21 naturals": hostname = "21naturals" - scrape[ - 'url'] = f"https://{hostname.lower()}.com/en/video/{gallery_json['sitename'].lower()}/{gallery_json['url_title']}/{gallery_json['set_id']}" + scrape['url'] = f"https://www.{hostname.lower()}.com/en/photo/" \ + f"{gallery_json['url_title']}/{gallery_json['set_id']}" except: if url: scrape['url'] = url @@ -720,6 +827,8 @@ def parse_gallery_json(gallery_json: dict, url: str = None) -> dict: SCENE_TITLE = FRAGMENT.get("title") SCENE_URL = FRAGMENT.get("url") +# log.trace(f"fragment: {FRAGMENT}") + # ACCESS API # Check existing API keys CURRENT_TIME = datetime.datetime.now() @@ -775,7 +884,7 @@ def parse_gallery_json(gallery_json: dict, url: str = None) -> dict: "GraphQL request failed, accessing database directly...") database_dict = check_db(DB_PATH, SCENE_ID) else: - database_dict = database_dict["file"] + database_dict = database_dict["files"] log.debug(f"[DATABASE] Info: {database_dict}") else: database_dict = None @@ -816,7 +925,7 @@ def parse_gallery_json(gallery_json: dict, url: str = None) -> dict: if SEARCH_TITLE: SEARCH_TITLE = SEARCH_TITLE.replace(".", " ") log.debug(f"[API] Searching for: {SEARCH_TITLE}") - api_search = api_search_req("query", SEARCH_TITLE, api_url) + api_search = api_search_req("query_all_scenes", SEARCH_TITLE, api_url) final_json = None if api_search: result_search = [] @@ -842,13 +951,13 @@ def parse_gallery_json(gallery_json: dict, url: str = None) -> dict: log.warning("[API] No result") if url_title and api_json is None: log.debug("[API] Searching using URL_TITLE") - api_search = api_search_req("query", url_title, api_url) + api_search = api_search_req("query_all_scenes", url_title, api_url) if api_search: log.info(f"[API] Search gives {len(api_search)} result(s)") api_json = json_parser(api_search) if SCENE_TITLE and api_json is None: log.debug("[API] Searching using STASH_TITLE") - api_search = api_search_req("query", SCENE_TITLE, api_url) + api_search = api_search_req("query_all_scenes", SCENE_TITLE, api_url) if api_search: log.info(f"[API] Search gives {len(api_search)} result(s)") api_json = json_parser(api_search) @@ -872,13 +981,39 @@ def parse_gallery_json(gallery_json: dict, url: str = None) -> dict: #log.debug(scraped_movie) print(json.dumps(scraped_movie)) elif "gallery" in sys.argv: - log.debug("Scraping gallery") - gallery_id = get_id_from_url(SCENE_URL) - if gallery_id: - gallery_results = api_search_gallery_id(gallery_id, api_url) - gallery = gallery_results.json()["results"][0].get("hits") - if gallery: - #log.debug(gallery[0]) - scraped_gallery = parse_gallery_json(gallery[0]) - #log.debug(scraped_gallery) - print(json.dumps(scraped_gallery)) + scraped_gallery = None + if SCENE_URL: + if "/video/" in SCENE_URL: + log.debug("Scraping scene by URL") + scene_id = get_id_from_url(SCENE_URL) + api_search_response = api_search_req("id", scene_id, api_url) + if api_search_response: + # log.debug(f"[API] Search gives {len(api_search_response)} result(s)") + # log.trace(f"api_search_response: {api_search_response}") + scraped_gallery = parse_gallery_json(api_search_response[0]) + else: + log.debug("Scraping gallery by URL") + gallery_id = get_id_from_url(SCENE_URL) + if gallery_id: + gallery_results = api_search_gallery_id(gallery_id, api_url) + gallery = gallery_results.json()["results"][0].get("hits") + if gallery: + #log.debug(gallery[0]) + scraped_gallery = parse_gallery_json(gallery[0]) + #log.debug(scraped_gallery) + elif SCENE_TITLE: + log.debug("Scraping gallery by fragment") + # log.debug(f"[API] Searching using SCENE_TITLE: {SCENE_TITLE}") + api_search = api_search_req("query_all_photosets", SCENE_TITLE, api_url) + if api_search: + log.info(f"[API] Search gives {len(api_search)} result(s)") + # log.trace(f"api_search: {api_search}") + log.debug(f"Galleries found: {'; '.join([g['title'] for g in api_search])}") + scraped_gallery = parse_gallery_json(api_search[0]) + # Scraping the JSON + if scraped_gallery: + print(json.dumps(scraped_gallery)) + else: + log.error("Can't find the gallery") + print(json.dumps({})) + sys.exit() diff --git a/scrapers/Algolia/package b/scrapers/Algolia/package new file mode 100644 index 000000000..acffc82b4 --- /dev/null +++ b/scrapers/Algolia/package @@ -0,0 +1,3 @@ +# script used as a dependency only +# requires: py_common +name: Algolia diff --git a/scrapers/Algolia_Rodsroom.yml b/scrapers/Algolia_Rodsroom.yml deleted file mode 100644 index 516382fe8..000000000 --- a/scrapers/Algolia_Rodsroom.yml +++ /dev/null @@ -1,39 +0,0 @@ -name: "Rod's Room" -sceneByURL: - - action: script - url: - - rodsroom.com/en/video - script: - - python - - Algolia.py - - rodsroom -sceneByFragment: - action: script - script: - - python - - Algolia.py - - rodsroom -sceneByName: - action: script - script: - - python - - Algolia.py - - rodsroom - - searchName -sceneByQueryFragment: - action: script - script: - - python - - Algolia.py - - rodsroom - - validName -galleryByURL: - - action: script - url: - - rodsroom.com/en/photo/ - script: - - python - - Algolia.py - - rodsroom - - gallery -# Last Updated December 26, 2022 diff --git a/scrapers/Algolia_AllGirlMassage.yml b/scrapers/AllGirlMassage/AllGirlMassage.yml similarity index 78% rename from scrapers/Algolia_AllGirlMassage.yml rename to scrapers/AllGirlMassage/AllGirlMassage.yml index e648981e7..45c23adb0 100644 --- a/scrapers/Algolia_AllGirlMassage.yml +++ b/scrapers/AllGirlMassage/AllGirlMassage.yml @@ -1,3 +1,4 @@ +# requires: Algolia name: "AllGirlMassage" sceneByURL: - action: script @@ -5,26 +6,26 @@ sceneByURL: - allgirlmassage.com/en/video script: - python - - Algolia.py + - ../Algolia/Algolia.py - allgirlmassage sceneByFragment: action: script script: - python - - Algolia.py + - ../Algolia/Algolia.py - allgirlmassage sceneByName: action: script script: - python - - Algolia.py + - ../Algolia/Algolia.py - allgirlmassage - searchName sceneByQueryFragment: action: script script: - python - - Algolia.py + - ../Algolia/Algolia.py - allgirlmassage - validName galleryByURL: @@ -33,7 +34,7 @@ galleryByURL: - allgirlmassage.com/en/photo/ script: - python - - Algolia.py + - ../Algolia/Algolia.py - allgirlmassage - gallery # Last Updated December 22, 2022 diff --git a/scrapers/LegalPorno.py b/scrapers/AnalVids/AnalVids.py similarity index 100% rename from scrapers/LegalPorno.py rename to scrapers/AnalVids/AnalVids.py diff --git a/scrapers/AnalVids/AnalVids.yml b/scrapers/AnalVids/AnalVids.yml new file mode 100644 index 000000000..ae746fa42 --- /dev/null +++ b/scrapers/AnalVids/AnalVids.yml @@ -0,0 +1,60 @@ +# yaml-language-server: $schema=../validator/scraper.schema.json + +name: "AnalVids" +sceneByURL: + - action: scrapeXPath + url: + - analvids.com/watch/ + - pissvids.com/watch/ + scraper: sceneScraper +performerByURL: + - action: scrapeXPath + url: + - analvids.com/model/ + - pissvids.com/model/ + scraper: performerScraper +sceneByFragment: + action: script + script: + - python + # use python3 instead if needed + - AnalVids.py + - query + +xPathScrapers: + sceneScraper: + common: + $title: //h1[contains(@class, "watch__title")]//text()[not(ancestor::span)] + scene: + Title: + selector: $title + concat: " " + Date: + selector: //i[contains(@class, "bi-calendar3")]/text() + postProcess: + - parseDate: 2006-01-02 + Details: + selector: //div[contains(@class, "text-mob-more")]//text()[not(parent::span[contains(@class, "dots")])] + concat: " " + Code: + selector: $title + postProcess: + - replace: + - regex: .+?([A-Z]{2,3}\d+)$|(.+) + with: $1 + Performers: + Name: //h1[contains(@class, "watch__title")]//a/text() + URL: //h1[contains(@class, "watch__title")]//a/@href + Studio: + Name: //div[contains(@class, "genres-list")]//a[contains(@href, "/studios/")]/text() + URL: //div[contains(@class, "genres-list")]//a[contains(@href, "/studios/")]/@href + Tags: + Name: //div[contains(@class, "genres-list")]//a[contains(@href, "/genre/")]/text() + Image: //video/@data-poster + + performerScraper: + performer: + Name: //h1 + Country: //a[contains(@href, "nationality")] + Image: //div[contains(@class, 'model__left')]//img/@src +# Last Updated August 16, 2023 diff --git a/scrapers/AniDB.yml b/scrapers/AniDB.yml new file mode 100644 index 000000000..e138715cc --- /dev/null +++ b/scrapers/AniDB.yml @@ -0,0 +1,233 @@ +name: AniDB + +# ~~~~~~ GETTING STARTED ~~~~~~ +# Store this file in the ~/stash/scrapers/AniDB.yml +# - If the scrapers directory is not there, you must create it first +# +# ~~~~~~ SETTING COOKIES ~~~~~~ +# Note: I recommend creating a new account just for this scraper +# 1. Access the anidb.net website > login > right button > inspect > find cookies storage +# 2. Copy the "Value" of "adbsess" and "adbuin" and replace in the cookies category of this document +# 3. If your account is new, you need to access any NSFW anime and confirm that you want to see 18+ content +# 4. Do not change the order of the columns, as it can make it stop working +# +# ~~~~~~ SETTING USER AGENT ~~~~~~ +# - Go to your Stash > Settings > Metadata Providers > Scraping > Scraper User Agent +# - Use the User Agent of your choice +# - For example: Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:79.0) Gecko/20100101 Firefox/79.0 +# +# ~~~~~ RECOMMENDED WORKFLOW ~~~~~ +# 1. Scrape scene by fragment (for performers, tags, artwork, etc) +# - If this fails, scrape by anime URL +# 2. Scrape by episode URL (for title, date) +# 3. Manually set movie scene number on scene page +# 3. Navigate to each performer's page & scrape by URL +# 4. Navigate to movie page & scrape by URL +# +# ~~~~~~ HOW TO USE (detailed) ~~~~~~ +# tl;dr when in doubt, use the URL scrapers +# - For scenes: anidb.net/episode/XXX, anidb.net/anime/XXX +# - For performers: anidb.net/character/XXX +# - For movies: anidb.net/anime/XXX +# +# SCENES (by anime): +# - The Scraper by Fragment will usually work, assuming a filename like "[XX] My Lewd Anime - 01 (720p) (x264).mkv" +# - This regex expression strips underscores, dashes, content containing brackets and parentheses, and two digit numbers +# - For example, the above filename is stripped to "My Lewd Anime" +# - If this does not work, I recommend scraping with the episode URL, the anime URL, or the name scraper +# - By default, the scene scraper does not set the title, as the episode scraper serves this purpose better +# - However, if you'd like to enable this functionality, uncomment the "Title" line in sceneScraperAnime > scene +# - The scene (by anime) scraper automatically creates a new movie (i.e., series) entry, +# but unfortunately you will have to set the movie scene (i.e., episode) number manually +# +# SCENES (by episode): +# - This scraper is only accessible by scraping the episode URL (anidb.net/episode/XXX) +# - The scene episode scraper sets the episode title, the anime URL (if missing), and the original airing date +# - By default, the regex expression strips the episode number when setting the title +# - If you want to keep the episode number, delete the second regex replacement in +# sceneScraperEpisode > scene > Title > postProcess > replace +# +# MOVIES: +# - The scene (by anime) scraper automatically creates a new movie entry using the anime title and anime URL +# - On the movie page, you can scrape by URL +# +# PERFORMERS: +# - Performers need to be individually scraped by name or URL +# - I recommend creating them by scraping the anime URL, then navigating to the performer page. +# The performer URL should already be set, so you just need to press the scrape by URL button. +# +# ~~~~~ TROUBLESHOOTING ~~~~~ +# - If you find that the scraper has suddenly stopped working, RESET YOUR COOKIES! +# +# ~~~~~ ANYTHING ELSE? ~~~~~ +# THAT'S IT, ENJOY! +# Made by @escargotbuffed, further improvements by @symptom6186 + +performerByURL: + - action: scrapeXPath + url: + - https://anidb.net + scraper: performerScraper +performerByName: + action: scrapeXPath + queryURL: https://anidb.net/search/anime/?adb.search={}&entity.chartb=1 + scraper: performerSearch + +sceneByFragment: + action: scrapeXPath + queryURL: https://anidb.net/anime/?adb.search={filename} + queryURLReplace: + filename: + - regex: '\[.*?\]|\(.*?\)|\d\d|\..*' + with: + - regex: '\-|\_' + with: " " + - regex: \s+ + with: "%20" + scraper: sceneScraperAnime +sceneByURL: + - action: scrapeXPath + url: + - https://anidb.net/episode/ + scraper: sceneScraperEpisode + - action: scrapeXPath + url: + - https://anidb.net/anime/ + scraper: sceneScraperAnime +sceneByName: + action: scrapeXPath + queryURL: https://anidb.net/search/anime/?adb.search={}&entity.animetb=1 + scraper: sceneSearch +sceneByQueryFragment: + action: scrapeXPath + queryURL: "{url}" + scraper: sceneScraperAnime + +movieByURL: + - action: scrapeXPath + url: + - https://anidb.net/ + scraper: sceneScraperAnime + +xPathScrapers: + performerSearch: + performer: + Name: //td[@class="relid"]/a + URL: + selector: //td[@class="relid"]/a/@href + postProcess: + - replace: + - regex: ^ + with: https://anidb.net + performerScraper: + common: + $info: //div[@class="g_section info"] + $tab_1_pane: //div[@class="g_section info"]//div[@id="tab_1_pane"] + $looks: //div[@class="g_section info"]//div[@id="tab_1_pane"]//tr[contains(@class, 'looks')] + performer: + Name: $tab_1_pane//tr[contains(@class, 'mainname')]//span[@itemprop="name"] + Aliases: $tab_1_pane//tr[contains(@class, 'official')]//label[@itemprop="alternateName"] + Disambiguation: $tab_1_pane//tr[contains(@class, 'mainname')]//a[@class='shortlink'] + Gender: $tab_1_pane//tr[contains(@class, 'gender')]//span[@itemprop="gender"] + Ethnicity: $tab_1_pane//tr[contains(@class, 'entity')]//span[@class="tagname"] + HairColor: $looks//span[contains(@class, 'tagname') and contains(text(), 'hair')] + EyeColor: $looks//span[contains(@class, 'tagname') and contains(text(), 'eyes')] + Height: $tab_1_pane//tr[contains(@class, 'height')]//span[@itemprop="height"] + Weight: $tab_1_pane//tr[contains(@class, 'weight')]//span[@itemprop="weight"] + #Measurements: Todo + URL: //link[@rel="canonical"]/@href + Details: + selector: //div[@itemprop="description"]//text() + concat: "\n" + Tags: + Name: $tab_1_pane//span[@class="g_tag"]//span[@class="tagname"] + Image: $info//div[@class="image"]//img/@src + + sceneSearch: + scene: + Title: //td[@class="relid"]/a + URL: + selector: //td[@class="relid"]/a/@href + postProcess: + - replace: + - regex: ^ + with: https://anidb.net + Image: //td[@class="thumb anime"]//img/@src + sceneScraperEpisode: + scene: + Title: + selector: //div[@id="layout-main"]//h1[@class="ep"] + postProcess: + - replace: + - regex: ^.{0,9} + with: "" + - regex: \- \d+ \- + with: "/" + URL: + selector: //ul[@class="main-tabs"]//li[@class="g_odd anime"]//span/a/@href + postProcess: + - replace: + - regex: ^ + with: https://anidb.net + Date: //div[@id="layout-main"]//span[@itemprop="datePublished"]/@content + sceneScraperAnime: + common: + $info: //div[@class="g_section info"] + $title: //div[@class="g_section info"]//div[@id="tab_1_pane"]//span[@itemprop="name"] + $en_title: //div[@class="g_section info"]//div[@id="tab_1_pane"]//tr[contains(@class, "official verified") and contains(.//span, 'en')]//label[@itemprop="alternateName"] + $character: //div[@id="characterlist"]//div[contains(@class, 'main character') or contains(@class, 'secondary cast')]//div[@itemprop="character"] + scene: + #Title: $en_title or $title + #Date: + # selector: $info//div[@id="tab_1_pane"]//span[contains(@itemprop, "datePublished") or contains(@itemprop, "startDate")] + # parseDate: 02.01.2006 + Details: + selector: //div[@itemprop="description"]//text() + concat: " " + Tags: + Name: $info//div[@id="tab_1_pane"]//span[@class="tagname"] + Performers: + Name: $character/a/span + URL: + selector: $character/a/@href + postProcess: + - replace: + - regex: ^ + with: https://anidb.net + Movies: + Name: $title + URL: //link[@rel="canonical"]/@href + Studio: + Name: $info//table[@id="staffoverview"]//tr[last()]/td[@class="name creator"]/a + Image: $info//div[@class="image"]//img/@src + URL: //link[@rel="canonical"]/@href + movie: + Name: $title + Aliases: $en_title + Date: + selector: $info//div[@id="tab_1_pane"]//span[contains(@itemprop, "datePublished") or contains(@itemprop, "startDate")] + postProcess: + - parseDate: 02.01.2006 + Synopsis: + selector: //div[@itemprop="description"]//text() + concat: " " + Studio: + Name: $info//table[@id="staffoverview"]//tr[last()]/td[@class="name creator"]/a + FrontImage: $info//div[@class="image"]//img/@src + URL: //link[@rel="canonical"]/@href + +driver: + cookies: + - CookieURL: "https://anidb.net/" + Cookies: + # Access adult content requires a anidb account + # Replace value field + - Name: "adbsess" + Domain: "anidb.net" + Value: "" # Enter the value of the 'adbsess' here + Path: "/" + - Name: "adbuin" + Domain: "anidb.net" + Value: "" # Enter the value of the 'adbuin' here + Path: "/" +# Last Updated Dec 20, 2023 diff --git a/scrapers/Anime-DB.yml b/scrapers/Anime-DB.yml new file mode 100644 index 000000000..743cd5933 --- /dev/null +++ b/scrapers/Anime-DB.yml @@ -0,0 +1,129 @@ +name: Anime-DB + +sceneByName: + action: scrapeXPath + queryURL: "https://adultanime.dbsearch.net/black_curtain_redirect.php?url=%2F%2Fadultanime.dbsearch.net%2Fsearch%2F%3Fkeyword%3D{}&check=true" + scraper: sceneSearch +sceneByQueryFragment: + action: scrapeXPath + queryURL: "https://adultanime.dbsearch.net/black_curtain_redirect.php?url={url}&check=true" + queryURLReplace: + url: + - regex: "https:" + with: + - regex: \/ + with: "%2F" + scraper: sceneScraper +sceneByURL: + - action: scrapeXPath + url: + - adultanime.dbsearch.net + queryURL: "https://adultanime.dbsearch.net/black_curtain_redirect.php?url={url}&check=true" + queryURLReplace: + url: + - regex: "https:" + with: + - regex: \/ + with: "%2F" + scraper: sceneScraper + +xPathScrapers: + sceneSearch: + scene: + Title: //div[@class="item-info"]/h4/a/text() + URL: + selector: //div[@class="item-info"]/h4/a/@href + postProcess: + - replace: + - regex: "^" + with: "https:" + Image: + selector: //section[@class="item-box"]/div[@class="item-img"]/a/img/@data-src + postProcess: + - replace: + - regex: "^" + with: "https:" + - regex: "/basic/" + with: "/small/" + - regex: '\.jpg' + with: "_s.jpg" + Date: + selector: //div[@class="item-info"]/p[@class="ndate"]/span/text() + postProcess: + - replace: # 2006年1月2日 + - regex: "\u5E74|\u6708" + with: "-" + - regex: "\u65E5" + with: + - regex: -(\d)- + with: -0$1- + - regex: -(\d)$ + with: -0$1 + Studio: + Name: //div[@class="item-info"]/p[@class="maker"]/a/text() + sceneScraper: + scene: + Title: //meta[@property="og:title"]/@content + URL: //meta[@property="og:url"]/@content + Details: + selector: //section[@class="iteminfo-box"]/blockquote/p[@class="pq"]//text() + concat: "\n\n" + Code: //dt[text()="規格品番"]/following-sibling::dd[1]/p/text() + Image: //section[@id="sample-image"]/img/@data-src + Tags: + Name: + selector: //nav[@id="tag-list"]/ul/li/a/text() + postProcess: + - map: # remove all 作品形式 (format) tags + DVD: "" + オリジナルアニメ作品: "" + PCゲーム原作アニメ: "" + コミック原作アニメ: "" + ライトノベル・ノベル原作アニメ: "" + 同人原作アニメ: "" + アダルトコミック原作アニメ: "" + ボーイズラブアニメ作品: "" + 廉価版アニメ: "" + BD-BOX・DVD-BOX: "" + 3D: "" + RPG: "" + アクション: "" + 麻雀・テーブルゲーム: "" + 3Dポリゴン: "" + 廉価版・新装版: "" + 萌えゲーアワード受賞: "" + 4時間以上作品: "" + ベスト・総集編: "" + サンプル動画: "" + アドベンチャー: "" + シミュレーション: "" + Blu-ray(ブルーレイ): "" + DVDPG: "" + UMD: "" + VFT: "" + フルボイス: "" + 廉価版: "" + BDPG: "" + 売り尽くしセール: "" # other unneeded tags + Yahooコメント掲載禁止: "" + アニメ: "" + 特典付き・セット商品: "" + Studio: + Name: //dt[text()="レーベル"]/following-sibling::dd[1]/p/text() + Date: + selector: //dt[text()="発売日"]/following-sibling::dd[1]/p/text() + postProcess: + - replace: # 2006年1月2日 + - regex: "\u5E74|\u6708" + with: "-" + - regex: "\u65E5" + with: + - regex: -(\d)- + with: -0$1- + - regex: -(\d)$ + with: -0$1 + +driver: + useCDP: true # needed for the age confirmation redirect - cookies only work temporarily + +# Last Updated January 22, 2023 diff --git a/scrapers/AnimeCharactersDatabase.py b/scrapers/AnimeCharactersDatabase/AnimeCharactersDatabase.py similarity index 96% rename from scrapers/AnimeCharactersDatabase.py rename to scrapers/AnimeCharactersDatabase/AnimeCharactersDatabase.py index 6628d04c4..35e6f76fc 100644 --- a/scrapers/AnimeCharactersDatabase.py +++ b/scrapers/AnimeCharactersDatabase/AnimeCharactersDatabase.py @@ -1,8 +1,16 @@ import json +import os import re import sys from datetime import datetime +# to import from a parent directory we need to add that directory to the system path +csd = os.path.dirname(os.path.realpath(__file__)) # get current script directory +parent = os.path.dirname(csd) # parent directory (should be the scrapers one) +sys.path.append( + parent +) # add parent dir to sys path so that we can import py_common from there + try: import cloudscraper except ModuleNotFoundError: diff --git a/scrapers/AnimeCharactersDatabase.yml b/scrapers/AnimeCharactersDatabase/AnimeCharactersDatabase.yml similarity index 93% rename from scrapers/AnimeCharactersDatabase.yml rename to scrapers/AnimeCharactersDatabase/AnimeCharactersDatabase.yml index f83e35251..22d778cfd 100644 --- a/scrapers/AnimeCharactersDatabase.yml +++ b/scrapers/AnimeCharactersDatabase/AnimeCharactersDatabase.yml @@ -1,4 +1,5 @@ name: AnimeCharactersDatabase +# requires: py_common performerByURL: - action: script diff --git a/scrapers/AntonioSuleiman.yml b/scrapers/AntonioSuleiman.yml new file mode 100644 index 000000000..47a5d61af --- /dev/null +++ b/scrapers/AntonioSuleiman.yml @@ -0,0 +1,67 @@ +name: AntonioSuleiman.com +sceneByURL: + - action: scrapeXPath + url: + - antoniosuleiman.com + scraper: sceneScraper +sceneByName: + action: scrapeXPath + queryURL: https://antoniosuleiman.com/search.php?query={} + scraper: sceneSearch +sceneByQueryFragment: + action: scrapeXPath + queryURL: "{url}" + scraper: sceneScraper +xPathScrapers: + sceneScraper: + common: + $scene: //div[contains(@class, "updatesBlock")] + $image: (//div[contains(@class, "updatesBlock")]//img)[1] + scene: + Title: $scene//h3 + Date: + selector: ($scene//div[contains(@class, "updateDetails")]//p)[1] + postProcess: + - parseDate: 2006-01-02 + Details: $scene/div[@class="wrapper"]/*[last()] + URL: //link[@rel="canonical"]/@href + Tags: + Name: + # The worst way to do tags but it's all they have + selector: //meta[@name="keywords"]/@content + split: "," + Performers: + Name: $scene//*[contains(@class,"tour_update_models")]//a + Image: >- + $image/@src0_4x | + $image/@src0_3x | + $image/@src0_2x | + $image/@src0_1x + Studio: + Name: + fixed: Antonio Suleiman + sceneSearch: + common: + $scene: //div[@data-url] + scene: + Title: $scene//p[@class="left-first-paragraph"] + URL: $scene/@data-url + # Search doesn't return any description but we can show the performers instead + Details: //div[@data-url]//p[@class="left-second-paragraph"] + Image: $scene//img/@src0_1x + Date: $scene//p[@class="right-paragraph" and not(span)] +driver: + cookies: + - CookieURL: "https://antoniosuleiman.com" + Cookies: + - Name: "PHPSESSID" + Domain: ".antoniosuleiman.com" + # Unsure about the duration of this cookie + Value: ovejq7d8cfhoc99q1jrn265af8 + Path: "/" + - Name: "lang" + Domain: ".antoniosuleiman.com" + # 0 is English, 1 is Arabic + Value: "0" + Path: "/" +# Last Updated November 20, 2023 diff --git a/scrapers/Arx.py b/scrapers/Arx/Arx.py similarity index 77% rename from scrapers/Arx.py rename to scrapers/Arx/Arx.py index e7cee3306..70dff548a 100644 --- a/scrapers/Arx.py +++ b/scrapers/Arx/Arx.py @@ -1,8 +1,9 @@ -import requests import sys import json from urllib.parse import urlparse +import requests + # Static definition, used in the GraphQL request site_ids = { 'japanlust.com': 2, @@ -15,8 +16,11 @@ 'transroommates.com': 12 } +# Timeout (seconds) to prevent indefinite hanging +API_TIMEOUT = 10 + # GraphQL API endpoint -endpoint = "https://arwest-api-production.herokuapp.com/graphql" +ENDPOINT = "https://arwest-api-production.herokuapp.com/graphql" # Request headers headers = { @@ -30,11 +34,11 @@ "Referer": "https://lesworship.com" } -def __prefix(levelChar): - startLevelChar = b'\x01' - endLevelChar = b'\x02' +def __prefix(level_char): + start_level_char = b'\x01' + end_level_char = b'\x02' - ret = startLevelChar + levelChar + endLevelChar + ret = start_level_char + level_char + end_level_char return ret.decode() def __log(levelChar, s): @@ -43,36 +47,39 @@ def __log(levelChar, s): print(__prefix(levelChar) + s + "\n", file=sys.stderr, flush=True) -def LogTrace(s): +def log_trace(s): __log(b't', s) -def LogDebug(s): +def log_debug(s): __log(b'd', s) -def LogInfo(s): +def log_info(s): __log(b'i', s) -def LogWarning(s): +def log_warning(s): __log(b'w', s) -def LogError(s): +def log_error(s): __log(b'e', s) -def readJSONInput(): - input = sys.stdin.read() - return json.loads(input) +def read_json_input(): + json_input = sys.stdin.read() + return json.loads(json_input) -def callGraphQL(query, variables=None): - json = {'query': query} +def call_graphql(query, variables=None): + graphql_json = {'query': query} if variables is not None: - json['variables'] = variables + graphql_json['variables'] = variables - response = requests.post(endpoint, json=json, headers=headers) + response = requests.post(ENDPOINT, json=graphql_json, headers=headers, timeout=API_TIMEOUT) if response.status_code == 200: result = response.json() + + log_debug(json.dumps(result)) + if result.get("errors", None): for error in result["errors"]["errors"]: raise Exception("GraphQL error: {}".format(error)) @@ -85,7 +92,7 @@ def callGraphQL(query, variables=None): ) -def getScene(url): +def get_scene(url): # Sending the full query that gets used in the regular frontend query = """ query @@ -144,16 +151,16 @@ def getScene(url): site_id = site_ids.get(urlparse(url).netloc) if site_id is None: - LogError(f"Could not determine id for site {urlparse(url).netloc}") + log_error(f"Could not determine id for site {urlparse(url).netloc}") return None - + try: scene_id = int(urlparse(url).path.split('/')[2]) except ValueError: - LogError(f"No scene id found in url {url}") + log_error(f"No scene id found in url {url}") return None - - LogInfo(f"Scraping scene {scene_id}") + + log_info(f"Scraping scene {scene_id}") variables = { 'id': int(scene_id), @@ -161,9 +168,9 @@ def getScene(url): } try: - result = callGraphQL(query, variables) + result = call_graphql(query, variables) except ConnectionError as e: - LogError(e) + log_error(e) return None result = result.get('scene') @@ -176,12 +183,13 @@ def getScene(url): ret['tags'] = [{'name': x.get('name')} for x in result.get('genres')] ret['performers'] = [{'name': x.get('stageName')} for x in result.get('actors')] ret['image'] = result.get('primaryPhotoUrl') - ret['date'] = result.get('createdAt')[:10] + ret['date'] = result.get('availableAt') and result.get('availableAt')[:10] \ + or result.get('createdAt') and result.get('createdAt')[:10] return ret if sys.argv[1] == 'scrapeByURL': - i = readJSONInput() - ret = getScene(i.get('url')) + i = read_json_input() + ret = get_scene(i.get('url')) print(json.dumps(ret)) diff --git a/scrapers/Arx.yml b/scrapers/Arx/Arx.yml similarity index 91% rename from scrapers/Arx.yml rename to scrapers/Arx/Arx.yml index 3a5cd7318..3b970eebf 100644 --- a/scrapers/Arx.yml +++ b/scrapers/Arx/Arx.yml @@ -15,4 +15,4 @@ sceneByURL: - Arx.py - scrapeByURL -# Last Updated October 01, 2021 +# Last Updated April 24, 2023 diff --git a/scrapers/Assylum.yml b/scrapers/Assylum.yml index d98450a1c..14fadaa0a 100644 --- a/scrapers/Assylum.yml +++ b/scrapers/Assylum.yml @@ -25,7 +25,7 @@ xPathScrapers: selector: //div[@class='mainpic']/comment() postProcess: - replace: - - regex: .*src="(.*?)".* + - regex: Works without token - selector: //base/@href|//div[@class="player-thumb"]//img[contains(@class, "update_thumb")]/@src0_1x - concat: "|" + Image: &image //div[@class="player-thumb"]//img[contains(@class, "update_thumb")]/@src0_1x + Details: &details + selector: //p[contains(@class, "descriptionFull")]//text() + concat: "\n\n" postProcess: - replace: - - regex: "[|].+?\\.hwcdn.net/(.+?)\\?.+$" + - regex: (.*?)\s*Read Less with: $1 - Details: - selector: //div[@class="update-info-block"]/h3[text()="Description:"]/following-sibling::text() - concat: "\n\n" + + newSiteScraper: + scene: + Title: //h1 + Date: *date + Image: *image + Tags: *tags + Studio: *studio + Details: *details + Performers: + Name: //div[@class="card txt-lg-left"]//*[@class="model-name"] + URL: //div[@class="card txt-lg-left"]//a/@href performerScraper: common: diff --git a/scrapers/FalconStudios/FalconStudios.yml b/scrapers/FalconStudios/FalconStudios.yml new file mode 100644 index 000000000..61bc61961 --- /dev/null +++ b/scrapers/FalconStudios/FalconStudios.yml @@ -0,0 +1,32 @@ +# requires: Algolia +name: Falcon Studios +sceneByURL: + - action: script + url: + - falconstudios.com/en/video + - hothouse.com/en/video + script: + - python + - ../Algolia/Algolia.py + - falconstudios +sceneByFragment: + action: script + script: + - python + - ../Algolia/Algolia.py + - falconstudios +sceneByName: + action: script + script: + - python + - ../Algolia/Algolia.py + - falconstudios + - searchName +sceneByQueryFragment: + action: script + script: + - python + - ../Algolia/Algolia.py + - falconstudios + - validName +# Last Updated September 09, 2023 diff --git a/scrapers/Algolia_FantasyMassage.yml b/scrapers/FantasyMassage/FantasyMassage.yml similarity index 78% rename from scrapers/Algolia_FantasyMassage.yml rename to scrapers/FantasyMassage/FantasyMassage.yml index 22e387367..f5bdf03e3 100644 --- a/scrapers/Algolia_FantasyMassage.yml +++ b/scrapers/FantasyMassage/FantasyMassage.yml @@ -1,3 +1,4 @@ +# requires: Algolia name: "FantasyMassage" sceneByURL: - action: script @@ -5,26 +6,26 @@ sceneByURL: - fantasymassage.com/en/video script: - python - - Algolia.py + - ../Algolia/Algolia.py - fantasymassage sceneByFragment: action: script script: - python - - Algolia.py + - ../Algolia/Algolia.py - fantasymassage sceneByName: action: script script: - python - - Algolia.py + - ../Algolia/Algolia.py - fantasymassage - searchName sceneByQueryFragment: action: script script: - python - - Algolia.py + - ../Algolia/Algolia.py - fantasymassage - validName galleryByURL: @@ -33,7 +34,7 @@ galleryByURL: - fantasymassage.com/en/photo/ script: - python - - Algolia.py + - ../Algolia/Algolia.py - fantasymassage - gallery # Last Updated December 22, 2022 diff --git a/scrapers/FapHouse.yml b/scrapers/FapHouse.yml index b1785ade3..aff15363f 100644 --- a/scrapers/FapHouse.yml +++ b/scrapers/FapHouse.yml @@ -13,11 +13,11 @@ xPathScrapers: postProcess: - parseDate: "02.01.2006" Details: - selector: //div[contains(@class,"video-info-details__description")]/span + selector: //div[contains(@class,"video-info-details")]//p concat: " " Tags: Name: //div[@class="video-info-details__categories"]/a Studio: Name: //a[@class="video-info-details__studio-link"] Image: //meta[@property="og:image"]/@content -# Last Updated November 24, 2021 +# Last Updated April 25, 2023 diff --git a/scrapers/FemJoy.yml b/scrapers/FemJoy.yml deleted file mode 100644 index 8b904681d..000000000 --- a/scrapers/FemJoy.yml +++ /dev/null @@ -1,48 +0,0 @@ -name: FemJoy -galleryByURL: - - action: scrapeXPath - url: - - femjoy.com/photos/ - scraper: galleryScraper -sceneByURL: - - action: scrapeXPath - url: - - femjoy.com/videos/ - scraper: sceneScraper - -xPathScrapers: - galleryScraper: - common: &commonSel - $performer: //div[@id='model-info']//a[starts-with(@href,"/models")] - gallery: - Title: &titleSel //div[@id='model-info']//span[normalize-space(.)='in']/following-sibling::text() - Studio: &studioAttr - Name: - fixed: Fem Joy - Date: &dateAttr - selector: //div[@id='model-info']//p[contains(.,"released")] - postProcess: - - replace: - - regex: '.*released on\s+' - with: - - parseDate: Jan 2, 2006 - Performers: &performersAttr - Name: $performer - URL: - selector: $performer/@href - postProcess: - - replace: - - regex: ^ - with: https://femjoy.com - Details: &detailsSel //div[@id='model-info']//div[@class="col-md-12"] - - sceneScraper: - common: *commonSel - scene: - Title: *titleSel - Studio: *studioAttr - Date: *dateAttr - Performers: *performersAttr - Image: //img[@class="lazy comment-photo"]/@data-original - Details: *detailsSel -# Last Updated October 24, 2021 diff --git a/scrapers/Femjoy.yml b/scrapers/Femjoy.yml new file mode 100644 index 000000000..60b207ace --- /dev/null +++ b/scrapers/Femjoy.yml @@ -0,0 +1,73 @@ +name: Femjoy +galleryByURL: + - action: scrapeXPath + url: + - femjoy.com/post/ + scraper: galleryScraper +sceneByQueryFragment: + action: scrapeXPath + queryURL: "{url}" + scraper: sceneScraper +sceneByName: + action: scrapeXPath + queryURL: "https://www.femjoy.com/videos?s={}" + scraper: sceneSearch +sceneByURL: + - action: scrapeXPath + url: + - femjoy.com/post/ + scraper: sceneScraper +xPathScrapers: + sceneScraper: + common: + $performer: //h1[@class='post_title']/a[starts-with(@href,"/models")] + scene: + Title: &titleSel //h1[@class='post_title']/span[last()]/text() + Studio: &studioAttr + Name: + fixed: Femjoy + Date: &dateAttr + selector: //h2[@class='post_title']/text()[2] + postProcess: &datePP + - replace: + - regex: '.*released on\s+' + with: + - parseDate: Jan 2, 2006 + Director: //*[@class='post_title']/a[starts-with(@href,"/director")] + Performers: &performersAttr + Name: $performer + URL: + selector: $performer/@href + postProcess: &prependDomain + - replace: + - regex: ^ + with: https://femjoy.com + Details: &details + selector: //*[@class='post_description']//text() + concat: "\n" + Image: //meta[@name='twitter:image']/@content + sceneSearch: + common: + $scene: //div[@class='post_video'] + $preview: //div[@class='post_video']//a[@class='preview'] + scene: + Title: $preview/@title + Date: + selector: $scene//span[@class='posted_on']/text() + postProcess: *datePP + URL: + selector: $preview/@href + postProcess: *prependDomain + Image: + selector: $preview/@data-media-poster + galleryScraper: + common: + $performer: //h1[@class='post_title']/a[starts-with(@href,"/models")] + gallery: + Title: *titleSel + Studio: *studioAttr + Date: *dateAttr + Performers: *performersAttr + Details: *details + +# Last Updated August 03, 2023 diff --git a/scrapers/FetishPro.yml b/scrapers/FetishPro.yml index 4180c54e2..075865a07 100644 --- a/scrapers/FetishPro.yml +++ b/scrapers/FetishPro.yml @@ -8,23 +8,19 @@ xPathScrapers: sceneScraper: scene: Title: - selector: //h2[@class="title"] + selector: //h1 Date: - selector: //span[@class="update_date"] + selector: //ul[@class="contentInfo"]/li[3] postProcess: - - parseDate: 01/02/2006 + - parseDate: Jan 2, 2006 Performers: - Name: //span[@class="tour_update_models"]/a - Details: //span[@class="latest_update_description"] + Name: //div[@class="models"]//a/text() + Details: //div[contains(@class, "videoDescription")]/p Tags: - Name: //span[@class="tour_update_tags"]/a + Name: //div[@class="tags"]//a Image: - selector: //img[@class="stdimage thumbs"]/@src - postProcess: - - replace: - - regex: ^ - with: "https://www.fetishpros.com/updates/" + selector: //div[@class="videoPreview"]//img/@src Studio: Name: fixed: FetishPros -# Last Updated April 09, 2021 +# Last Updated July 01, 2023 diff --git a/scrapers/Filename.py b/scrapers/Filename/Filename.py similarity index 50% rename from scrapers/Filename.py rename to scrapers/Filename/Filename.py index 8f1be3cf9..b19254ef5 100644 --- a/scrapers/Filename.py +++ b/scrapers/Filename/Filename.py @@ -2,33 +2,49 @@ import os import sys +# to import from a parent directory we need to add that directory to the system path +csd = os.path.dirname(os.path.realpath(__file__)) # get current script directory +parent = os.path.dirname(csd) # parent directory (should be the scrapers one) +sys.path.append( + parent +) # add parent dir to sys path so that we can import py_common from ther + try: from py_common import graphql from py_common import log except ModuleNotFoundError: print( "You need to download the folder 'py_common' from the community repo! (CommunityScrapers/tree/master/scrapers/py_common)", - file=sys.stderr) + file=sys.stderr, + ) sys.exit() REMOVE_EXT = False # remove file extension from title def title_from_filename(js): - scene_id = js['id'] - scene_title = js['title'] - response = graphql.callGraphQL(""" + scene_id = js["id"] + scene_title = js["title"] + response = graphql.callGraphQL( + """ query FilenameBySceneId($id: ID){ findScene(id: $id){ - path + files { + path + } } - }""", {"id": scene_id}) - path = response["findScene"]["path"] + }""", + {"id": scene_id}, + ) + assert response is not None + path = response["findScene"]["files"][0]["path"] filename = os.path.basename(path) if REMOVE_EXT: filename = os.path.splitext(filename)[0] if scene_title != filename: - log.info(f"Scene {scene_id}: Title differs from filename: '{scene_title}' => '{filename}'") + log.info( + f"Scene {scene_id}: Title differs from filename: '{scene_title}' => '{filename}'" + ) return {"title": filename} return {} diff --git a/scrapers/Filename.yml b/scrapers/Filename/Filename.yml similarity index 87% rename from scrapers/Filename.yml rename to scrapers/Filename/Filename.yml index 41380fe6a..0a8e037a7 100644 --- a/scrapers/Filename.yml +++ b/scrapers/Filename/Filename.yml @@ -1,4 +1,6 @@ name: Filename +# requires: py_common + sceneByFragment: action: script script: diff --git a/scrapers/FilthyFamily.yml b/scrapers/FilthyFamily.yml index 90ce5cb26..a6acc3b54 100644 --- a/scrapers/FilthyFamily.yml +++ b/scrapers/FilthyFamily.yml @@ -3,30 +3,41 @@ sceneByURL: - action: scrapeXPath url: - filthyfamily.com + - mobile.filthyfamily.com scraper: sceneScraper + queryURL: "{url}" + queryURLReplace: + url: + - regex: https://(www\.)?filthyfamily\.com + with: https://mobile.filthyfamily.com xPathScrapers: sceneScraper: + common: + $videoinfo: //div[@id="video-player-meta"]/div[@class="card-info"] scene: - Title: //div[@class='hideWhilePlaying']/img/@alt - Details: //p[@class='videoDetail']/text() + Title: $videoinfo/h1/text() + Details: + selector: $videoinfo/p[@class[contains(.,"desc")]] Image: - selector: //div[@class="hideWhilePlaying"]/img/@src + selector: //video/@data-poster-url postProcess: - replace: - - regex: ^ - with: "https:" + - regex: \[resolution\] + with: ipadbig.jpg + - regex: ^// + with: https:// Tags: Name: - selector: //meta[@http-equiv='keywords']/@content - split: ", " + selector: $videoinfo/div[@class="tags"]//a/text() + Performers: + Name: //section[@class="group"]/div[@data-scrollbar="#model-scroll"]/ul//li//div[@class="model-info"]/h2/a/text() Studio: Name: fixed: Filthy Family URL: - selector: //link[@rel='canonical']/@href + selector: $videoinfo/div[contains(@class, "act")]/a[@id="ovrl-share-btn"]/@*[name()="addthis:url"] postProcess: - replace: - - regex: ^ - with: "https:" - -# Last Updated October 07, 2020 + - regex: mobile\.bangbros\.com + with: mobile.filthyfamily.com +# Last Updated February 27, 2023 diff --git a/scrapers/FinishesTheJob.yml b/scrapers/FinishesTheJob.yml index 0108edea2..a1615bcb3 100644 --- a/scrapers/FinishesTheJob.yml +++ b/scrapers/FinishesTheJob.yml @@ -14,6 +14,14 @@ xPathScrapers: scene: Title: $content/h1/text() Details: $content/p[2]/text() + # only works when url is from finishesthejob.com + Date: + selector: $content/meta[@itemprop="uploadDate"]/@content + postProcess: + - replace: + - regex: "^(.+?)T.*" + with: $1 + - parseDate: "2006-01-02" Performers: Name: $content/h3/a/text() Tags: @@ -33,4 +41,4 @@ xPathScrapers: with: "" URL: //link[@rel='canonical']/@href -# Last Updated June 10, 2021 +# Last Updated October 19, 2023 diff --git a/scrapers/FistingInferno/FistingInferno.yml b/scrapers/FistingInferno/FistingInferno.yml new file mode 100644 index 000000000..f4a900755 --- /dev/null +++ b/scrapers/FistingInferno/FistingInferno.yml @@ -0,0 +1,40 @@ +# requires: Algolia +name: "FistingInferno" +sceneByURL: + - action: script + url: + - fistinginferno.com/en/video/ + script: + - python + - ../Algolia/Algolia.py + - fistinginferno +sceneByFragment: + action: script + script: + - python + - ../Algolia/Algolia.py + - fistinginferno +sceneByName: + action: script + script: + - python + - ../Algolia/Algolia.py + - fistinginferno + - searchName +sceneByQueryFragment: + action: script + script: + - python + - ../Algolia/Algolia.py + - fistinginferno + - validName +galleryByURL: + - action: script + url: + - fistinginferno.com/en/photo/ + script: + - python + - ../Algolia/Algolia.py + - fistinginferno + - gallery +# Last Updated September 24, 2023 diff --git a/scrapers/Fit18.py b/scrapers/Fit18/Fit18.py similarity index 94% rename from scrapers/Fit18.py rename to scrapers/Fit18/Fit18.py index 1428dbb8b..19aa4bca8 100644 --- a/scrapers/Fit18.py +++ b/scrapers/Fit18/Fit18.py @@ -1,9 +1,17 @@ import json +import os import re import sys import urllib.parse from urllib.parse import urlparse +# to import from a parent directory we need to add that directory to the system path +csd = os.path.dirname(os.path.realpath(__file__)) # get current script directory +parent = os.path.dirname(csd) # parent directory (should be the scrapers one) +sys.path.append( + parent +) # add parent dir to sys path so that we can import py_common from there + try: import requests except ModuleNotFoundError: diff --git a/scrapers/Fit18.yml b/scrapers/Fit18/Fit18.yml similarity index 88% rename from scrapers/Fit18.yml rename to scrapers/Fit18/Fit18.yml index e21d06676..c6be98c45 100644 --- a/scrapers/Fit18.yml +++ b/scrapers/Fit18/Fit18.yml @@ -1,4 +1,6 @@ name: Fit18 +# requires: py_common + sceneByURL: - url: - fit18.com/videos/ diff --git a/scrapers/FratX.py b/scrapers/FratX/FratX.py similarity index 100% rename from scrapers/FratX.py rename to scrapers/FratX/FratX.py diff --git a/scrapers/FratX.yml b/scrapers/FratX/FratX.yml similarity index 92% rename from scrapers/FratX.yml rename to scrapers/FratX/FratX.yml index 1e9081f64..90ab03290 100644 --- a/scrapers/FratX.yml +++ b/scrapers/FratX/FratX.yml @@ -1,4 +1,6 @@ name: FratX +# requires: py_common + sceneByURL: - action: script script: diff --git a/scrapers/FreeonesCommunity.yml b/scrapers/FreeonesCommunity.yml index 29a5232e3..5686d1f05 100644 --- a/scrapers/FreeonesCommunity.yml +++ b/scrapers/FreeonesCommunity.yml @@ -1,8 +1,10 @@ name: FreeonesCommunity + performerByName: action: scrapeXPath queryURL: https://www.freeones.com/babes?q={}&v=teasers&s=relevance&l=96&m%5BcanPreviewFeatures%5D=0 scraper: performerSearch + performerByURL: - action: scrapeXPath url: @@ -10,6 +12,28 @@ performerByURL: - freeones.com scraper: performerScraper +sceneByName: + action: scrapeXPath + queryURL: https://www.freeones.com/vod?q={} + scraper: sceneSearch + +sceneByQueryFragment: + action: scrapeXPath + queryURL: "{url}" + scraper: sceneScraper + +sceneByURL: + - action: scrapeXPath + url: + - www.freeones.com + scraper: sceneScraper + +movieByURL: + - action: scrapeXPath + url: + - www.freeones.com + scraper: movieScraper + xPathScrapers: performerSearch: performer: @@ -22,6 +46,71 @@ xPathScrapers: with: https://www.freeones.com - regex: /feed$ with: /bio + sceneSearch: + common: + $movieTitle: //div[@data-test="teaser-vod"]//img + scene: + Title: $movieTitle/@alt + Image: $movieTitle/@src + URL: + selector: //div[@data-test="teaser-vod"]/a/@href + postProcess: + - replace: + - regex: ^ + with: "https://www.freeones.com" + + sceneScraper: + common: + $commonRoot: //*[@id="description"] + $performerName: //a[@data-test="link_Cast"] + scene: + Title: //h1 + URL: //link[@rel="alternate"][1]/@href + Details: $commonRoot//div[contains(concat(' ',normalize-space(@class),' '),' pb-2 ')] + Studio: + Name: $commonRoot//span[@data-test="link_span_Studio"] + Director: $commonRoot//span[@data-test="link_span_Director"] + Date: + selector: //div[contains(concat(' ',normalize-space(@class),' '),' mid-content-pr-past-date ')] + postProcess: + - replace: + - regex: .+?(\w+\s\d{1,2},\s\d{4}).+ + with: $1 + - parseDate: January 2, 2006 + Movies: + Name: //h1 + URL: //link[@rel="alternate"][1]/@href + Performers: + Name: $performerName/span + Tags: + Name: //li[@class="list-inline-item"]/a +# URL: +# selector: $performerName/@href +# postProcess: +# - replace: +# - regex: ^ +# with: "https://www.freeones.com" + + movieScraper: + common: + $commonRoot: //*[@id="description"] + $performerName: //a[@data-test="link_Cast"] + movie: + Name: //h1 + Synopsis: $commonRoot//div[contains(concat(' ',normalize-space(@class),' '),' pb-2 ')] + Duration: $commonRoot//span[@data-test="link_span_Duration"] + Studio: + Name: $commonRoot//span[@data-test="link_span_Studio"] + Director: $commonRoot//span[@data-test="link_span_Director"] + Date: + selector: //div[contains(concat(' ',normalize-space(@class),' '),' mid-content-pr-past-date ')] + postProcess: + - replace: + - regex: .+?(\w+\s\d{1,2},\s\d{4}).+ + with: $1 + - parseDate: January 2, 2006 + FrontImage: //*[@id="fxgp-gallery"]/a[1]/@href + BackImage: //*[@id="fxgp-gallery"]/a[2]/@href performerScraper: performer: @@ -35,38 +124,40 @@ xPathScrapers: Twitter: //form//a[contains(@href,'twitter.com/')]/@href Instagram: //form//a[contains(@href,'instagram.com/')]/@href Birthdate: - selector: //span[contains(text(),'Born On')] + selector: //span[@data-test="link_span_dateOfBirth"]/text() postProcess: - - replace: - - regex: Born On - with: - parseDate: January 2, 2006 Ethnicity: - selector: //a[@data-test="link_ethnicity"]/span/text() + selector: //span[@data-test="link_span_ethnicity"] postProcess: - map: Asian: Asian Caucasian: White Black: Black Latin: Hispanic - Country: //a[@data-test="link-country"]/span/text() - EyeColor: //span[text()='Eye Color']/following-sibling::span/a + Country: + selector: //a[@data-test="link_placeOfBirth"][contains(@href, 'country')]/span/text() + postProcess: + - map: + United States: "USA" + EyeColor: //span[text()='Eye Color:']/following-sibling::span/a/span/text() Height: - selector: //span[text()='Height']/following-sibling::span/a + selector: //span[text()='Height:']/following-sibling::span/a postProcess: - - replace: - - regex: \D+[\s\S]+ - with: "" + - feetToCm: true - map: Unknown: "" Measurements: - selector: //span[text()='Measurements']/following-sibling::span/span/a + selector: //span[(@data-test='link_span_bra') or (@data-test='link_span_waist') or (@data-test='link_span_hip')] concat: " - " postProcess: + - replace: + - regex: \sIn + with: "" - map: Unknown: "" FakeTits: - selector: //span[text()='Boobs']/following-sibling::span/a + selector: //span[text()='Boobs:']/following-sibling::span/a postProcess: - map: Unknown: "" @@ -75,14 +166,16 @@ xPathScrapers: CareerLength: selector: //div[contains(@class,'timeline-horizontal')]//p[@class='m-0'] concat: "-" - Aliases: //p[@data-test='p_aliases']/text() + Aliases: + selector: //span[@data-test='link_span_aliases']/text() + concat: ", " Tattoos: - selector: //span[text()='Tattoos']/following-sibling::span/span + selector: //span[text()='Tattoo locations:']/following-sibling::span postProcess: - map: Unknown: "" Piercings: - selector: //span[text()='Piercings']/following-sibling::span/span + selector: //span[text()='Piercing locations:']/following-sibling::span postProcess: - map: Unknown: "" @@ -90,7 +183,7 @@ xPathScrapers: selector: //div[contains(@class,'image-container')]//a/img/@src Gender: fixed: "Female" - Details: //div[@data-test="biography"] + #Details: //div[@data-test="biography"] DeathDate: selector: //div[contains(text(),'Passed away on')] postProcess: @@ -102,8 +195,9 @@ xPathScrapers: Weight: selector: //span[@data-test="link_span_weight"] postProcess: - - replace: - - regex: \D+[\s\S]+ + - replace: + - regex: \slbs with: "" + - lbToKg: true -# Last Updated April 16, 2021 +# Last Updated January 19, 2023 diff --git a/scrapers/Futanarica.yml b/scrapers/Futanarica.yml new file mode 100644 index 000000000..fff328664 --- /dev/null +++ b/scrapers/Futanarica.yml @@ -0,0 +1,26 @@ +name: "Futanarica" +sceneByURL: + - action: scrapeXPath + url: + - futanarica.com + scraper: sceneScraper + +xPathScrapers: + sceneScraper: + scene: + Title: //article//h3 + Image: //article//a/img/@src + Studio: + Name: + fixed: Futanarica + URL: //meta[@property="og:url"]/@content + Details: //meta[@property="og:description"]/@content + Date: + selector: //meta[@property="article:published_time"]/@content + postProcess: + - replace: + - regex: "T(.*)$" + with: "" + - parseDate: 2006-01-02 + +# Last Updated July 20, 2023 \ No newline at end of file diff --git a/scrapers/GammaEntertainment.yml b/scrapers/GammaEntertainment.yml index a28bad1b4..4aae1fbfd 100644 --- a/scrapers/GammaEntertainment.yml +++ b/scrapers/GammaEntertainment.yml @@ -3,12 +3,10 @@ sceneByURL: - action: scrapeXPath url: - 1000facials.com/en/scene/ - - activeduty.com/en/video/ - alettaoceanempire.com/en/video/ - analacrobats.com/en/video/ - ashleyfires.com/ - bigfatcreampie.com/ - - blowpass.com/en/video/ - bskow.com/en/video/ - bushybushy.com/ - buttman.com/ @@ -18,13 +16,11 @@ sceneByURL: - devilsgangbangs.com/ - devonlee.com/ - dylanryder.com/ - - falconstudios.com/en/video/ - familycreep.com/ - fistingcentral.com/ - gapingangels.com/ - girlsandstuds.com/ - grannyghetto.com/ - - hothouse.com/en/video/ - immorallive.com/en/video/ - jaysinxxx.com/ - jonnidarkkoxxx.com/ @@ -36,34 +32,22 @@ sceneByURL: - mommyblowsbest.com/en/scene/ - motherfuckerxxx.com/ - myteenoasis.com/ - - nextdoorbuddies.com/en/video/ - - nextdoorcasting.com/ - - nextdoorebony.com/ - - nextdoorhookups.com/ - - nextdoormale.com/ - - nextdoorraw.com/en/video/ - - nextdoorstudios.com/en/video/ - - nextdoortaboo.com/en/video/ - - nextdoortwink.com/en/video/ - onlyteenblowjobs.com/en/scene/ - openlife.com/ - pantypops.com/ - povblowjobs.com/ - povthis.com/ - - ragingstallion.com/en/scene/ - silverstonedvd.com/ - silviasaint.com/ - squirtalicious.com/ - squirtinglesbian.com/en/video/ - squirtingorgies.com/en/scene/ - - stagcollective.com/ - strapattackers.com/ - sunnyleone.com/ - throated.com/en/video/ - tittycreampies.com/ - transsexualangel.com/en/video/ - transsexualroadtrip.com/ - - tsplayground.com/en/video/ - whiteghetto.com/ scraper: sceneScraper @@ -157,7 +141,6 @@ xPathScrapers: analacrobats: Anal Acrobats ashleyfires: Ashley Fires bigfatcreampie: Big Fat Cream Pie - blowpass: Blowpass bskow: BsKow bushybushy: Bushy Bushy buttman: Buttman @@ -170,13 +153,11 @@ xPathScrapers: dylanryder: Dylan Ryder eroticax: EroticaX evilangel: Evil Angel - falconstudios: Falcon Studios familycreep: Family Creep fistingcentral: Fisting Central gapingangels: Gaping Angels girlsandstuds: Girls And Studs grannyghetto: Granny Ghetto - hothouse: Hot House immorallive: Immoral Live jaysinxxx: Jay Sin XXX jonnidarkkoxxx: Jonni Darkko XXX @@ -189,35 +170,23 @@ xPathScrapers: mommyblowsbest: Mommy Blows Best motherfuckerxxx: Mother Fucker XXX myteenoasis: My Teen Oasis - nextdoorbuddies: Next Door Buddies - nextdoorcasting: Next Door Casting - nextdoorebony: Next Door Ebony - nextdoorhookups: Next Door Hookups - nextdoormale: Next Door Male - nextdoorraw: Next Door Raw - nextdoorstudios: Next Door Studios - nextdoortaboo: Next Door Taboo - nextdoortwink: Next Door Twink onlyteenblowjobs: Only Teen Blowjobs openlife: Open Life outofthefamily: Out Of The Family pantypops: Panty Pops povblowjobs: POV Blowjobs povthis: POV This - ragingstallion: Raging Stallion silverstonedvd: Silverstone DVD silviasaint: Silvia Saint squirtalicious: Squirtalicious squirtinglesbian: Squirting Lesbian squirtingorgies: Squirting Orgies - stagcollective: Stag Collective strapattackers: Strap Attackers sunnyleone: Sunny Leone throated: Throated tittycreampies: Titty Creampies transsexualangel: Transsexual Angel transsexualroadtrip: Transsexual Roadtrip - tsplayground: TS Playground whiteghetto: White Ghetto xempire: XEmpire @@ -240,4 +209,4 @@ xPathScrapers: Name: //a[contains(@class, 'GA_Id_headerLogo')]/span[@class='linkMainCaption']/text() FrontImage: //a[@class='frontCoverImg']/@href BackImage: //a[@class='backCoverImg']/@href -# Last Updated December 29, 2022 +# Last Updated October 04, 2023 diff --git a/scrapers/Algolia_GangBangCreampie.yml b/scrapers/GangBangCreampie/GangBangCreampie.yml similarity index 78% rename from scrapers/Algolia_GangBangCreampie.yml rename to scrapers/GangBangCreampie/GangBangCreampie.yml index 1eae61aa1..f49d827e5 100644 --- a/scrapers/Algolia_GangBangCreampie.yml +++ b/scrapers/GangBangCreampie/GangBangCreampie.yml @@ -1,3 +1,4 @@ +# requires: Algolia name: "GangbangCreampie" sceneByURL: - action: script @@ -5,26 +6,26 @@ sceneByURL: - gangbangcreampie.com/en/video script: - python - - Algolia.py + - ../Algolia/Algolia.py - gangbangcreampie sceneByFragment: action: script script: - python - - Algolia.py + - ../Algolia/Algolia.py - gangbangcreampie sceneByName: action: script script: - python - - Algolia.py + - ../Algolia/Algolia.py - gangbangcreampie - searchName sceneByQueryFragment: action: script script: - python - - Algolia.py + - ../Algolia/Algolia.py - gangbangcreampie - validName galleryByURL: @@ -33,7 +34,7 @@ galleryByURL: - gangbangcreampie.com/en/photo/ script: - python - - Algolia.py + - ../Algolia/Algolia.py - gangbangcreampie - gallery # Last Updated December 22, 2022 diff --git a/scrapers/Algolia_GenderXFilms.yml b/scrapers/GenderXFilms/GenderXFilms.yml similarity index 78% rename from scrapers/Algolia_GenderXFilms.yml rename to scrapers/GenderXFilms/GenderXFilms.yml index fd567c4c6..aad91c963 100644 --- a/scrapers/Algolia_GenderXFilms.yml +++ b/scrapers/GenderXFilms/GenderXFilms.yml @@ -1,3 +1,4 @@ +# requires: Algolia name: GenderX Films sceneByURL: - action: script @@ -5,26 +6,26 @@ sceneByURL: - genderxfilms.com/en/video script: - python - - Algolia.py + - ../Algolia/Algolia.py - genderxfilms sceneByFragment: action: script script: - python - - Algolia.py + - ../Algolia/Algolia.py - genderxfilms sceneByName: action: script script: - python - - Algolia.py + - ../Algolia/Algolia.py - genderxfilms - searchName sceneByQueryFragment: action: script script: - python - - Algolia.py + - ../Algolia/Algolia.py - genderxfilms - validName galleryByURL: @@ -33,7 +34,7 @@ galleryByURL: - genderxfilms.com/en/photo/ script: - python - - Algolia.py + - ../Algolia/Algolia.py - genderxfilms - gallery movieByURL: @@ -42,7 +43,7 @@ movieByURL: - genderxfilms.com/en/movie/ script: - python - - Algolia.py + - ../Algolia/Algolia.py - genderxfilms - movie # Last Updated December 22, 2022 diff --git a/scrapers/GenuineSin.yml b/scrapers/GenuineSin.yml deleted file mode 100644 index e21a54d88..000000000 --- a/scrapers/GenuineSin.yml +++ /dev/null @@ -1,23 +0,0 @@ -name: GenuineSin -sceneByURL: - - action: scrapeXPath - url: - - genuinesin.com - - mixedx.com - scraper: sceneScraper -xPathScrapers: - sceneScraper: - scene: - Title: - selector: //div[@class="videoDetails clear"]/h3 - Performers: - Name: //li[@class="update_models"]//a - Tags: - Name: //ul[li[contains(text(),"Tags:")]]//a - Details: - selector: //div[@class="videoDetails clear"]/p - Image: - selector: //meta[@property="og:image"]/@content - Studio: - Name: //div[@class="logo"]/a/img/@alt -# Last Updated April 28, 2021 diff --git a/scrapers/Algolia_Girlfriendsfilms.yml b/scrapers/Girlfriendsfilms/Girlfriendsfilms.yml similarity index 79% rename from scrapers/Algolia_Girlfriendsfilms.yml rename to scrapers/Girlfriendsfilms/Girlfriendsfilms.yml index 8e02c4bce..933251fa0 100644 --- a/scrapers/Algolia_Girlfriendsfilms.yml +++ b/scrapers/Girlfriendsfilms/Girlfriendsfilms.yml @@ -1,3 +1,4 @@ +# requires: Algolia name: "Girlfriends Films" sceneByURL: - action: script @@ -5,26 +6,26 @@ sceneByURL: - girlfriendsfilms.com/en/video/ script: - python - - Algolia.py + - ../Algolia/Algolia.py - girlfriendsfilms sceneByFragment: action: script script: - python - - Algolia.py + - ../Algolia/Algolia.py - girlfriendsfilms sceneByName: action: script script: - python - - Algolia.py + - ../Algolia/Algolia.py - girlfriendsfilms - searchName sceneByQueryFragment: action: script script: - python - - Algolia.py + - ../Algolia/Algolia.py - girlfriendsfilms - validName movieByURL: @@ -33,7 +34,7 @@ movieByURL: - girlfriendsfilms.com/en/movie/ script: - python - - Algolia.py + - ../Algolia/Algolia.py - girlfriendsfilms - movie galleryByURL: @@ -42,7 +43,7 @@ galleryByURL: - girlfriendsfilms.com/en/photo/ script: - python - - Algolia.py + - ../Algolia/Algolia.py - girlfriendsfilms - gallery # Last Updated December 22, 2022 diff --git a/scrapers/GirlsRimming.yml b/scrapers/GirlsRimming.yml index f46dc9f25..0e297c21b 100644 --- a/scrapers/GirlsRimming.yml +++ b/scrapers/GirlsRimming.yml @@ -6,20 +6,22 @@ sceneByURL: scraper: sceneScraper xPathScrapers: sceneScraper: + common: + $image: //div[@class='player-thumb']/img scene: Title: //div[@class='updatesBlock']/h2[@class='title']/text() Details: //meta[@name='description']/@content - Image: //div[@class='player-thumb']/img/@src0_1x + Image: $image/@src0_4x|$image/@src0_3x|$image/@src0_2x|$image/@src0_1x Studio: Name: - fixed: GirlsRimming + fixed: Girls Rimming Movies: Name: selector: //div[@class='updatesBlock']/h2[@class='title']/text() postProcess: - replace: - - regex: (.+)(?:\sEp\d).* - with: $1 + - regex: (.+)(?:\sEp\d).* + with: $1 Tags: Name: selector: //meta[@name='keywords']/@content @@ -28,5 +30,4 @@ xPathScrapers: - regex: "[^,]*Id\\s(\\d+)[^,]*" with: split: "," - -# Last Updated September 30, 2020 +# Last Updated August 20, 2023 diff --git a/scrapers/Algolia_Girlsway.yml b/scrapers/Girlsway/Girlsway.yml similarity index 77% rename from scrapers/Algolia_Girlsway.yml rename to scrapers/Girlsway/Girlsway.yml index d81ad7dbd..4b37bb8b3 100644 --- a/scrapers/Algolia_Girlsway.yml +++ b/scrapers/Girlsway/Girlsway.yml @@ -1,3 +1,4 @@ +# requires: Algolia name: "GirlsWay" sceneByURL: - action: script @@ -5,26 +6,26 @@ sceneByURL: - girlsway.com/en/video/ script: - python - - Algolia.py + - ../Algolia/Algolia.py - girlsway sceneByFragment: action: script script: - python - - Algolia.py + - ../Algolia/Algolia.py - girlsway sceneByName: action: script script: - python - - Algolia.py + - ../Algolia/Algolia.py - girlsway - searchName sceneByQueryFragment: action: script script: - python - - Algolia.py + - ../Algolia/Algolia.py - girlsway - validName galleryByURL: @@ -33,7 +34,7 @@ galleryByURL: - girlsway.com/en/photo/ script: - python - - Algolia.py + - ../Algolia/Algolia.py - girlsway - gallery # Last Updated December 22, 2022 diff --git a/scrapers/Gloryholesecrets/Gloryholesecrets.yml b/scrapers/Gloryholesecrets/Gloryholesecrets.yml new file mode 100644 index 000000000..4fbc18858 --- /dev/null +++ b/scrapers/Gloryholesecrets/Gloryholesecrets.yml @@ -0,0 +1,40 @@ +# requires: Algolia +name: GloryholeSecrets +sceneByURL: + - action: script + url: + - gloryholesecrets.com/en/video + script: + - python + - ../Algolia/Algolia.py + - gloryholesecrets +sceneByFragment: + action: script + script: + - python + - ../Algolia/Algolia.py + - gloryholesecrets +sceneByName: + action: script + script: + - python + - ../Algolia/Algolia.py + - gloryholesecrets + - searchName +sceneByQueryFragment: + action: script + script: + - python + - ../Algolia/Algolia.py + - gloryholesecrets + - validName +galleryByURL: + - action: script + url: + - gloryholesecrets.com/en/photo/ + script: + - python + - ../Algolia/Algolia.py + - gloryholesecrets + - gallery +# Last Updated August 22, 2023 diff --git a/scrapers/GoddessSnow.yml b/scrapers/GoddessSnow.yml index c0027d2f4..0b9b7c63d 100644 --- a/scrapers/GoddessSnow.yml +++ b/scrapers/GoddessSnow.yml @@ -1,57 +1,81 @@ -name: GoddessSnow +name: GoddessSnow.com + +sceneByName: + action: scrapeXPath + scraper: sceneSearch + queryURL: "https://www.goddesssnow.com/vod/search.php?query={}" + +# We don't want the /updates URL here because it has the wrong release date by a year (scenes get released a year early on /scenes) +# And also the descriotion is truncated often on /updates +# /scenes also has two versions, one that ends in "_vids.html" and one that ends in ".html" +# We want to make sure we get the _vids.html version as that is the one with the images +# We take care of both issues above in the queryURLReplace section sceneByURL: - - action: scrapeXPath - url: - - goddesssnow.com/vod/scenes/ - scraper: vodScraper - action: scrapeXPath url: - goddesssnow.com/updates/ - scraper: updateScraper + - goddesssnow.com/vod/scenes + queryURL: "{url}" + queryURLReplace: + url: + # convert /updates URLs to /vod/scenes + - regex: (.+)(\/updates\/)(.+)(\.html) + with: $1/vod/scenes/$3.html + # fix up the /vod/scenes urls that people may have that do not end in _vids.html. First get rid of it for everyone, then add it back in. + # This both adds it to the /updates urls from above, leaves the urls that have the correct form alone, and fix /vod/scenes urls that are the "bad" ones + # We are doing this two step process because Go regex does not support backreferences which would have let us cleanly do this in one regex + - regex: _vids\.html + with: ".html" + - regex: \.html + with: "_vids.html" + scraper: sceneScraper + +sceneByQueryFragment: + action: scrapeXPath + queryURL: "{url}" + scraper: sceneScraper xPathScrapers: - vodScraper: + sceneScraper: scene: - Title: - selector: //div[@class="title_bar"]/span/text() + Title: //div[@class="title_bar"]/span/text() + URL: + selector: //div/@data-redirect + postProcess: + - replace: + - regex: \.html + with: _vids.html Date: - selector: //span[@class="release-date"]/text() + selector: //span[@class="release-date"]/text()|//div[@class="cell update_date"]/text() postProcess: - replace: - - regex: 'Release Date: (\d{2}/\d{2}/\d{4})' - with: $1 + - regex: ^Release Date:\s + with: - parseDate: 01/02/2006 - Details: &details - selector: //span[@class="update_description"]/text() - Performers: - Name: //span[@class="update_models"]/a/text() + Details: //span[@class="update_description"] Tags: - Name: //span[@class="update_tags"]/a/text() - Studio: &studio Name: - fixed: Goddess Alexandra Snow - Image: &image - selector: //meta[@name="twitter:image"]/@content + selector: //span[@class="update_tags"]/a/text() + Image: + selector: //div[@class="VOD_update"]/img/@src0_4x + postProcess: + - replace: + - regex: ^ + with: https://www.goddesssnow.com + Studio: + Name: + fixed: Alexandra Snow + Performers: + Name: //span[@class="update_models"]/a - updateScraper: + sceneSearch: scene: - Title: - selector: //h2[@class="update-title"] - Date: - selector: //span[@class="update-date"] - postProcess: - - parseDate: 01/02/2006 - Details: - selector: //div[@class="update-join"]/a[2]/@href + Title: //div[@class="update_details"]/div/@data-title + URL: //a[@class="update-details-image"]/@href + Image: + selector: //a[@class="update-details-image"]/img/@src0_1x postProcess: - replace: - regex: ^ with: https://www.goddesssnow.com - - subScraper: *details - Performers: - Name: //span[@class="tour_update_models"]/a - Tags: - Name: //div[@class="update-tags"]/a - Studio: *studio - Image: *image -# Last Updated June 07, 2021 +# Last Updated January 31, 2023 diff --git a/scrapers/GroobyClub.yml b/scrapers/GroobyClub.yml index 74439b5fe..38de096f3 100644 --- a/scrapers/GroobyClub.yml +++ b/scrapers/GroobyClub.yml @@ -6,7 +6,6 @@ sceneByURL: - asianamericantgirls.com - canada-tgirl.com - euro-tgirls.com - - grooby.club - hazel-tucker.com - krissy4u.com - russian-tgirls.com @@ -17,31 +16,63 @@ sceneByURL: - transexdomination.com - ts-castingcouch.com - uk-tgirls.com + scraper: substudioScraper + - action: scrapeXPath + url: + - grooby.club # other grooby sites which work - tgirljapan.com - tgirljapanhardcore.com - scraper: sceneScraper + scraper: clubScraper + xPathScrapers: - sceneScraper: + clubScraper: scene: - Title: //div[@class="trailer_videoinfo"]//h3/text() - Date: - selector: //div[@class="trailer_videoinfo"]//b[contains(.,"Added")]/following-sibling::text()[1] + Title: &title //div[@class="trailer_toptitle_left"] + Date: &date + selector: //b[contains(.,"Added")]/following-sibling::text()[1] postProcess: - replace: - regex: ^- with: "" - - parseDate: Jan 2, 2006 - Details: //div[@class="trailer_videoinfo"]/p[not(b)] - Performers: - Name: //div[@class="trailer_videoinfo"]//b[contains(.,"Featuring")]/following-sibling::a/text()[1] + - parseDate: January 2, 2006 + Details: &details + selector: //div[@class="trailerpage_info"]/p/text() + concat: " " + Performers: &performers + Name: //div[@class="setdesc"]//a[contains(@href, "models")]/text() Studio: - Name: //meta[@name="author"]/@content - Image: - selector: //base/@href|//div[@class="videohere"]/img[@class="thumbs stdimage"]/@src|//script[contains(.,'jwplayer("jwbox").setup')]/text() - concat: "|" + Name: //div[@class="sitename"]/a/text() + URL: &url //link[@rel="canonical"]/@href + Image: &image + selector: //meta[@property="og:image"]/@content postProcess: - replace: - - regex: "(^[^|]+)\\|.*/tour/([^\\.]+\\.jpg).*" - with: $1$2 -# Last Updated June 26, 2022 + - regex: ^// + with: https:// + Tags: + Name: &tagName //div[@class="set_tags"]/ul/li//a/text() + + substudioScraper: + scene: + Title: *title + Date: *date + Details: *details + Performers: *performers + Studio: + Name: //meta[@name="author"]/@content + URL: *url + Image: *image + # Tags for these subsites only appear on grooby.club as of 2023-08-15 + # but we have to extend the subScraper functionality in Stash + # if we want to be able to scrape more than just a single field + # TODO: write a python scraper, merge with GroobyNetwork-*.yml ? + Tags: + Name: + selector: //link[@rel="canonical"]/@href + postProcess: + - replace: + - regex: ^.+/tour + with: https://grooby.club/tour + - subScraper: *tagName +# Last Updated August 21, 2023 diff --git a/scrapers/GroobyNetwork-Brazilian.yml b/scrapers/GroobyNetwork-Brazilian.yml index d5af835cf..a7a8994bb 100644 --- a/scrapers/GroobyNetwork-Brazilian.yml +++ b/scrapers/GroobyNetwork-Brazilian.yml @@ -2,7 +2,6 @@ name: "GroobyNetwork-Brazilian" sceneByURL: - action: scrapeXPath url: - - brazilian-transsexuals.com - braziltgirls.xxx scraper: sceneScraper xPathScrapers: @@ -35,4 +34,4 @@ xPathScrapers: - regex: ^\/\/ with: "https://" -# Last Updated December 16, 2022 +# Last Updated July 27, 2023 diff --git a/scrapers/GroobyNetwork-Partial.yml b/scrapers/GroobyNetwork-Partial.yml index 4e1b19662..98501c4b3 100644 --- a/scrapers/GroobyNetwork-Partial.yml +++ b/scrapers/GroobyNetwork-Partial.yml @@ -1,43 +1,104 @@ name: "GroobyNetwork-Partial" sceneByURL: - action: scrapeXPath - url: - # Gone through list at: grooby.com/about/websites/ + url: &urls + # Gone through list at: grooby.com/about/websites/ - asiantgirl.com - blacktgirlshardcore.com - black-tgirls.com - bobstgirls.com + - brazilian-transsexuals.com + - femout.xxx - femoutsex.xxx #Scenes on 'femout.xxx' can some times be found on this one as well - franks-tgirlworld.com - grooby-archives.com - groobygirls.com - - groobyvr.com - ladyboy-ladyboy.com - ladyboy.xxx + - realtgirls.com - tgirlsex.xxx - tgirls.porn - tgirls.xxx + - tgirlsfuck.com + - tgirlshookup.com + - tgirltops.com - transexpov.com - transgasm.com + - transnificent.com scraper: sceneScraper + - action: scrapeXPath + url: + - groobyvr.com + scraper: sceneScraperGroobyVR +galleryByURL: + - action: scrapeXPath + url: *urls + scraper: galleryScraper xPathScrapers: sceneScraper: scene: - Title: //p[@class="trailertitle"]/text()|//div[@class="trailer_toptitle_left"]/text() - Date: + Title: &title //p[@class="trailertitle"]/text()|//div[@class="trailer_toptitle_left"]/text() + Date: &date selector: //div[@class="setdesc"]//b[contains(.,"Added")]/following-sibling::text()[1] postProcess: - parseDate: January 2, 2006 - Details: //div[@class="trailerpage_info"]/p[not(@class)]/text() - Performers: + Details: &details + selector: //div[@class="trailerpage_info"]/p[not(@class)]/descendant-or-self::*/text() + concat: "\n\n" + Performers: &performers Name: //div[@class="setdesc"]//a/text() - Studio: + Studio: &studio Name: //meta[@name="author"]/@content + URL: + selector: //link[@rel="canonical"]/@href + postProcess: + - replace: + - regex: (https://[^/]*)/.* + with: $1 Image: - selector: //meta[@property="og:image"]/@content + selector: //link[@rel="canonical"]/@href|//img[contains(@class, "update_thumb thumbs stdimage")]/@src|//img[contains(@class, "update_thumb thumbs stdimage")]/@src0_1x|//div[@class="trailerposter"]/img/@src0 + concat: "__SEPARATOR__" postProcess: - replace: - - regex: ^// # bobstgirls + - regex: ^.*__SEPARATOR__// # bobstgirls with: "https://" - -# Last Updated July 20, 2021 + - regex: ^(https://[^/]*)/.*(__SEPARATOR__.*)$ + with: $1$2 + - regex: content// + with: content/ + - regex: __SEPARATOR__ + with: '' + Tags: &tags + Name: //div[@class="set_tags"]/ul/li//a/text() + galleryScraper: + gallery: + Title: *title + Date: *date + Details: *details + Performers: *performers + Studio: *studio + Tags: *tags + sceneScraperGroobyVR: + scene: + Title: *title + Date: + selector: //div[@class="set_meta"]//b[contains(.,"Added")]/following-sibling::text()[1] + postProcess: + - parseDate: January 2, 2006 + Details: + selector: //div[@class="trailerblock"]/p[not(@class)]/text() + concat: "\n\n" + Performers: + Name: //div[@class="trailer_toptitle_left"]//a/text() + Studio: *studio + Image: + selector: //dl8-video/@poster + postProcess: + - replace: + - regex: content// # errant double slash + with: content/ + - replace: + - regex: ^/ + with: https://www.groobyvr.com/ + Tags: *tags +# Last Updated July 27, 2023 diff --git a/scrapers/GuysInSweatpants.yml b/scrapers/GuysInSweatpants.yml new file mode 100644 index 000000000..3b09f69f0 --- /dev/null +++ b/scrapers/GuysInSweatpants.yml @@ -0,0 +1,97 @@ +name: Guys In Sweatpants +sceneByURL: + - action: scrapeXPath + url: + - guysinsweatpants.com/scenes/ + - guysinsweatpants.com/index.php/scenes/ + scraper: sceneScraper +performerByURL: + - action: scrapeXPath + url: + - guysinsweatpants.com/models/ + - guysinsweatpants.com/index.php/models/ + scraper: performerScraper +xPathScrapers: + sceneScraper: + common: + $content: //div[@id="content"] + scene: + Title: $content//h1 + Details: + selector: $content//p/text() + concat: "\n\n" + Date: + selector: $content//h1/following-sibling::div/span/text() + postProcess: + - replace: + # https://regex101.com/r/QVvQyH/1 + - regex: \s*(.*?)\s*\|.* + with: $1 + - parseDate: Jan 2, 2006 + Image: + selector: //div[@id="banner"]/img/@src + postProcess: &ppPrependOrigin + - replace: + - regex: ^ + with: https://guysinsweatpants.com + Studio: + Name: + fixed: Guys In Sweatpants + Performers: + Name: $content/div[@class="meta"]//a/text() + URL: + selector: $content/div[@class="meta"]//a/@href + postProcess: *ppPrependOrigin + performerScraper: + common: + $content: //div[@class="inner"] + performer: + Name: $content//h1 + Image: + selector: //div[@class="model"]/img/@src + postProcess: *ppPrependOrigin + Details: + selector: $content//p/text() + concat: "\n\n" + postProcess: + - replace: + - regex: (?s)(.*).+Videos:.* + with: $1 + Height: + selector: ($content//p/text())[1] + postProcess: + - replace: + - regex: \s*(.*?)\s*\|.* + with: $1 + - feetToCm: true + Weight: + selector: ($content//p/text())[1] + postProcess: + - replace: + - regex: .*\|\s*(\d*)\D*\|.* + with: $1 + - lbToKg: true + Circumcised: + selector: ($content//p/text())[1] + postProcess: + - replace: + - regex: .*,\s*(\S*)\s* + with: $1 + PenisLength: + selector: ($content//p/text())[1] + postProcess: + - replace: + - regex: .*\|\s+(\d*)\D*,.* + with: "0 $1" + # We do not have an inchToCm so we lose the fractional part + - feetToCm: true + +driver: + cookies: + - CookieURL: https://guysinsweatpants.com + Cookies: + - Name: pp-accepted + Domain: .guysinsweatpants.com + Value: "true" + Path: "/" +# Last Updated September 28, 2023 diff --git a/scrapers/HeavyOnHotties.yml b/scrapers/HeavyOnHotties.yml index b22682b64..948609c10 100644 --- a/scrapers/HeavyOnHotties.yml +++ b/scrapers/HeavyOnHotties.yml @@ -9,6 +9,10 @@ xPathScrapers: scene: Title: selector: //div[@class="vid-title clearfix text-center-mobile"]/h2 + postProcess: + - replace: + - regex: .+\"([^\"]+)\" + with: $1 Date: selector: //span[contains(@class, "released title")]/strong/text() postProcess: @@ -25,4 +29,4 @@ xPathScrapers: Studio: Name: fixed: HeavyOnHotties -# Last Updated July 15, 2021 +# Last Updated November 30, 2023 diff --git a/scrapers/HelixStudios.yml b/scrapers/HelixStudios.yml index 982f77cdf..45f1fc7c4 100644 --- a/scrapers/HelixStudios.yml +++ b/scrapers/HelixStudios.yml @@ -56,15 +56,7 @@ xPathScrapers: Date: selector: //div[@class="info-items"]/span[@class="info-item date"]/text() postProcess: - - replace: - - regex: "th|st|nd|rd" - with: - - regex: '(\d+)\sdays\sago' - with: $1 - - regex: '^([a-zA-Z]+)\s(\d+)$' - with: "$1 $2, 2021" - - subtractDays: true - - parseDate: Jan 2, 2006 + - parseDate: January 2, 2006 Details: selector: //div[contains(@class, "description-content")]/p/text() concat: "#LINEBREAK#" @@ -82,6 +74,21 @@ xPathScrapers: URL: //link[@rel="canonical"][1]/@href Image: selector: //video/@poster + postProcess: + - replace: + - regex: 960w + with: 1500w + Director: + selector: //span[contains(@class, "info-item director")]/text() + Code: + selector: //*[@id="titleImage"]/@src + postProcess: + - replace: + - regex: ^.*\/\s* + with: + - replace: + - regex: \_1600.*$ + with: Studio: Name: fixed: Helix @@ -137,4 +144,4 @@ xPathScrapers: - replace: - regex: $ with: " " -# Last Updated December 29, 2021 +# Last Updated February 20, 2023 diff --git a/scrapers/Hentaied.yml b/scrapers/Hentaied.yml index 7ac946183..508d0533a 100644 --- a/scrapers/Hentaied.yml +++ b/scrapers/Hentaied.yml @@ -2,8 +2,10 @@ name: Hentaied sceneByURL: - action: scrapeXPath url: + - freeze.xxx - futanari.xxx - hentaied.com + - parasited.com scraper: sceneScraper xPathScrapers: sceneScraper: @@ -11,13 +13,13 @@ xPathScrapers: $desc: "*[local-name()='p' or local-name()='h3' or local-name()='ol' or local-name()='ul' or (local-name()='div' and @class='excerpt')]" scene: Title: - selector: //div[@class="innerflex"]//h1 + selector: //div[@class="left-top-part"]//h1 Date: selector: //div[contains(@class, "datesingle")] postProcess: - parseDate: January 2, 2006 Performers: - Name: //div[@class="innerflex"]//div[contains(@class,"tagsmodels")]/a + Name: //div[@class="left-top-part"]//div[contains(@class,"tagsmodels")]//a Tags: Name: selector: //ul[@class="post-categories"]//a @@ -26,7 +28,7 @@ xPathScrapers: - regex: "Exclude" with: "" Details: - selector: //div[@class="cont" or @class="column"]/$desc + selector: //div[@class="fullss"]/p concat: "\n\n" postProcess: - replace: @@ -40,4 +42,5 @@ xPathScrapers: postProcess: - map: Real Life Hentai: Hentaied -# Last Updated June 29, 2022 + +# Last Updated December 2, 2023 \ No newline at end of file diff --git a/scrapers/HimerosTV.yml b/scrapers/HimerosTV.yml index cc96388f2..351afe8c5 100644 --- a/scrapers/HimerosTV.yml +++ b/scrapers/HimerosTV.yml @@ -1,35 +1,38 @@ -name: himerostv -sceneByURL: - - action: scrapeXPath - url: - - himeros.tv - scraper: sceneScraper -xPathScrapers: - sceneScraper: - scene: - Title: //h1[@class='highlight']/text() - Date: - selector: //div[@class='update-info-row text-gray']/text()[1] - postProcess: - - replace: - - regex: \s*\|.* - with: - - parseDate: January 2, 2006 - Details: - selector: //div[@class='update-info-block']/div[@class='update-info-block'][1]//text() - concat: "\n\n" - Performers: - Name: - selector: //div[@class='item-title']/a[contains(@href,"/models/")] - Tags: - Name: - selector: //ul[@class='tags']/li/a/text() - Image: - selector: //script[contains(text(),'hidden_fake_trailer')]/text() - postProcess: - - replace: - - regex: .+(?:poster=")([^"]*).+ - with: https://himeros.tv$1 - - regex: "-1x.jpg" - with: "-3x.jpg" -# Last Updated January 09, 2022 +name: HimerosTV +sceneByURL: + - action: scrapeXPath + url: + - himeros.tv + scraper: sceneScraper +xPathScrapers: + sceneScraper: + scene: + Title: //h1[@class='highlight']/text() + Date: + selector: //div[@class='update-info-row text-gray']/text()[1] + postProcess: + - replace: + - regex: \s*\|.* + with: + - parseDate: January 2, 2006 + Details: + selector: //div[@class='update-info-block']/div[@class='update-info-block'][1]//text() + concat: "\n\n" + Performers: + Name: + selector: //div[@class='item-title']/a[contains(@href,"/models/")] + Tags: + Name: + selector: //ul[@class='tags']/li/a/text() + Image: + selector: //script[contains(text(),'hidden_fake_trailer')]/text() + postProcess: + - replace: + - regex: .+(?:poster=")([^"]*).+ + with: https://himeros.tv$1 + - regex: "-1x.jpg" + with: "-3x.jpg" + Studio: + Name: + fixed: HimerosTV +# Last Updated November 06, 2023 \ No newline at end of file diff --git a/scrapers/Hustler.yml b/scrapers/Hustler.yml index 5e4caf352..041305adc 100644 --- a/scrapers/Hustler.yml +++ b/scrapers/Hustler.yml @@ -39,18 +39,25 @@ sceneByURL: scraper: sceneScraper xPathScrapers: sceneScraper: + common: + $content: //div[@class="panel-content"] scene: - Title: //h3/a/text() + Title: //h3/a/text()|$content//h3[contains(@class, "headline")]/a/text() URL: //link[@rel='canonical']/@href Performers: Name: - selector: //span[@class="attr-key" and contains(text(),"Cast")]/following-sibling::span[@class="attr-value"]/a[not(text()="Hustler Models") and not(text()="Barely Legal Models")]/text() + selector: //span[@class="attr-key" and contains(text(),"Cast")]/following-sibling::span[@class="attr-value"]/a[not(text()="Hustler Models") and not(text()="Barely Legal Models")]/text()|$content//span[contains(text(), "Cast")]//following-sibling::span/a/text() Date: - selector: //span[@class="attr-key" and contains(text(),"Released")]/following-sibling::span[@class="attr-value"]/text() + selector: //span[@class="attr-key" and contains(text(),"Released")]/following-sibling::span[@class="attr-value"]/text()|$content//span[contains(text(), "Released")]//following-sibling::span/text() postProcess: - parseDate: Jan 02, 2006 - Details: //meta[@property="og:description"]/@content|//div[@class="description"]/p - Image: //div[@class="img-container"]/img/@src + Details: //p[following-sibling::a[@class="clickable"]]|//meta[@property="og:description"]/@content|//div[@class="description"]/p + Image: + selector: //div[@class="img-container"]/img/@src|//div[contains(@class, "jw-preview")]/@style|$content/img/@src + postProcess: + - replace: + - regex: (?:background-image:\s*url\(")(.+)(?:"\).*);? + with: $1 Tags: Name: //div[@class="tag-list"]/a/text() Studio: @@ -61,4 +68,4 @@ driver: clicks: - xpath: //a[@class="clickable"] sleep: 2 -# Last Updated February 01, 2020 +# Last Updated June 30, 2023 diff --git a/scrapers/Hypnotube.yml b/scrapers/Hypnotube.yml index aef5ee1f3..6e0def19b 100644 --- a/scrapers/Hypnotube.yml +++ b/scrapers/Hypnotube.yml @@ -4,19 +4,36 @@ sceneByURL: url: - hypnotube.com scraper: sceneScraper + +sceneByName: + action: scrapeXPath + queryURL: https://hypnotube.com/search/{}/ + scraper: hypnotubeSearch + +sceneByQueryFragment: + action: scrapeXPath + queryURL: "{url}" + scraper: sceneScraper + xPathScrapers: + hypnotubeSearch: + common: + $card: //div[@class='item-inner-col inner-col'] + scene: + Title: + selector: $card//a[contains(@href, '/video')]/@title + URL: + selector: $card//a[contains(@href, '/video')]/@href + Image: + selector: $card//img[contains(@data-mb, "shuffle-thumbs")]/@src + sceneScraper: common: $studio: //a[starts-with(@href,'https://hypnotube.com/user/')] scene: Title: //div[@class='item-tr-inner-col inner-col']/h1/text() Details: //div[@class='main-description']/text() - Image: - selector: //script[@type="text/javascript" and contains(text(),"og:image")]/text() - postProcess: - - replace: - - regex: '.+"og:image" content="([^"]+)".+' - with: $1 + Image: //meta[@property='og:image']/@content Studio: Name: $studio/@title URL: $studio/@href @@ -28,6 +45,5 @@ xPathScrapers: with: "" Tags: Name: //div[@class='tags-block']/a/text() - URL: //link[rel='canonical']/@href - -# Last Updated July 16, 2022 \ No newline at end of file + URL: //link[@rel='canonical']/@href +# Last Updated April 29, 2023 diff --git a/scrapers/IAFD.py b/scrapers/IAFD.py deleted file mode 100644 index 16edc4d99..000000000 --- a/scrapers/IAFD.py +++ /dev/null @@ -1,653 +0,0 @@ -import base64 -import datetime -import json -import string -import sys -import time -import re -import random -import requests -from urllib.parse import urlparse -# extra modules below need to be installed -try: - import py_common.log as log -except ModuleNotFoundError: - print("You need to download the folder 'py_common' from the community repo! (CommunityScrapers/tree/master/scrapers/py_common)", file=sys.stderr) - sys.exit() - -try: - import cloudscraper -except ModuleNotFoundError: - print("You need to install the cloudscraper module. (https://pypi.org/project/cloudscraper/)", file=sys.stderr) - print("If you have pip (normally installed with python), run this command in a terminal (cmd): pip install cloudscraper", file=sys.stderr) - sys.exit() - -try: - from lxml import html -except ModuleNotFoundError: - print("You need to install the lxml module. (https://lxml.de/installation.html#installation)", file=sys.stderr) - print("If you have pip (normally installed with python), run this command in a terminal (cmd): pip install lxml", file=sys.stderr) - sys.exit() - -class Scraper: - def set_value(self,value): - if value: - if not re.match(r'(?i)no data', value[0]): - return value[0] - return None - - def set_stripped_value(self,value): - if value: - return value[0].strip("\n ") - return None - - def set_concat_value(self,sep,values): - if values: - return sep.join(values) - return None - - def set_named_value(self, name, value): - if value: - attr = { name: value[0] } - return attr - return None - - def set_named_values(self, name, values): - res = [] - for v in values: - r = { name: v } - res.append(r) - return res - - def print(self): - for a in dir(self): - if not a.startswith('__') and not callable(getattr(self, a)) : - if vars(self)[a]: - print("%s: %s" % (a , vars(self)[a] ) ) - - def to_json(self): - for a in dir(self): - if not a.startswith('__') and not callable(getattr(self, a)) : - if not vars(self)[a]: - del vars(self)[a] - return json.dumps(self.__dict__) - - def map_ethnicity(self, value): - ethnicity = { - 'Asian': 'asian', - 'Caucasian': 'white', - 'Black': 'black', - 'Latin': 'hispanic', - } - return ethnicity.get(value, value) - - def map_gender(self, value): - gender = { - 'f': 'Female', - 'm': 'Male', - } - return gender.get(value, value) - - def map_country(self, value): - country = { - # https://en.wikipedia.org/wiki/List_of_adjectival_and_demonymic_forms_for_countries_and_nations - "Abkhaz": "Abkhazia", - "Abkhazian": "Abkhazia", - "Afghan": "Afghanistan", - "Albanian": "Albania", - "Algerian": "Algeria", - "American Samoan": "American Samoa", - "American": "United States of America", - "Andorran": "Andorra", - "Angolan": "Angola", - "Anguillan": "Anguilla", - "Antarctic": "Antarctica", - "Antiguan": "Antigua and Barbuda", - "Argentine": "Argentina", - "Argentinian": "Argentina", - "Armenian": "Armenia", - "Aruban": "Aruba", - "Australian": "Australia", - "Austrian": "Austria", - "Azerbaijani": "Azerbaijan", - "Azeri": "Azerbaijan", - "Bahamian": "Bahamas", - "Bahraini": "Bahrain", - "Bangladeshi": "Bangladesh", - "Barbadian": "Barbados", - "Barbudan": "Antigua and Barbuda", - "Basotho": "Lesotho", - "Belarusian": "Belarus", - "Belgian": "Belgium", - "Belizean": "Belize", - "Beninese": "Benin", - "Beninois": "Benin", - "Bermudan": "Bermuda", - "Bermudian": "Bermuda", - "Bhutanese": "Bhutan", - "BIOT": "British Indian Ocean Territory", - "Bissau-Guinean": "Guinea-Bissau", - "Bolivian": "Bolivia", - "Bonaire": "Bonaire", - "Bonairean": "Bonaire", - "Bosnian": "Bosnia and Herzegovina", - "Botswanan": "Botswana", - "Bouvet Island": "Bouvet Island", - "Brazilian": "Brazil", - "British Virgin Island": "Virgin Islands, British", - "British": "United Kingdom", - "Bruneian": "Brunei", - "Bulgarian": "Bulgaria", - "Burkinabé": "Burkina Faso", - "Burmese": "Burma", - "Burundian": "Burundi", - "Cabo Verdean": "Cabo Verde", - "Cambodian": "Cambodia", - "Cameroonian": "Cameroon", - "Canadian": "Canada", - "Cantonese": "Hong Kong", - "Caymanian": "Cayman Islands", - "Central African": "Central African Republic", - "Chadian": "Chad", - "Channel Island": "Guernsey", - #Channel Island: "Jersey" - "Chilean": "Chile", - "Chinese": "China", - "Christmas Island": "Christmas Island", - "Cocos Island": "Cocos (Keeling) Islands", - "Colombian": "Colombia", - "Comoran": "Comoros", - "Comorian": "Comoros", - "Congolese": "Congo", - "Cook Island": "Cook Islands", - "Costa Rican": "Costa Rica", - "Croatian": "Croatia", - "Cuban": "Cuba", - "Curaçaoan": "Curaçao", - "Cypriot": "Cyprus", - "Czech": "Czech Republic", - "Danish": "Denmark", - "Djiboutian": "Djibouti", - "Dominican": "Dominica", - "Dutch": "Netherlands", - "Ecuadorian": "Ecuador", - "Egyptian": "Egypt", - "Emirati": "United Arab Emirates", - "Emiri": "United Arab Emirates", - "Emirian": "United Arab Emirates", - "English people": "England", - "English": "England", - "Equatoguinean": "Equatorial Guinea", - "Equatorial Guinean": "Equatorial Guinea", - "Eritrean": "Eritrea", - "Estonian": "Estonia", - "Ethiopian": "Ethiopia", - "European": "European Union", - "Falkland Island": "Falkland Islands", - "Faroese": "Faroe Islands", - "Fijian": "Fiji", - "Filipino": "Philippines", - "Finnish": "Finland", - "Formosan": "Taiwan", - "French Guianese": "French Guiana", - "French Polynesian": "French Polynesia", - "French Southern Territories": "French Southern Territories", - "French": "France", - "Futunan": "Wallis and Futuna", - "Gabonese": "Gabon", - "Gambian": "Gambia", - "Georgian": "Georgia", - "German": "Germany", - "Ghanaian": "Ghana", - "Gibraltar": "Gibraltar", - "Greek": "Greece", - "Greenlandic": "Greenland", - "Grenadian": "Grenada", - "Guadeloupe": "Guadeloupe", - "Guamanian": "Guam", - "Guatemalan": "Guatemala", - "Guinean": "Guinea", - "Guyanese": "Guyana", - "Haitian": "Haiti", - "Heard Island": "Heard Island and McDonald Islands", - "Hellenic": "Greece", - "Herzegovinian": "Bosnia and Herzegovina", - "Honduran": "Honduras", - "Hong Kong": "Hong Kong", - "Hong Konger": "Hong Kong", - "Hungarian": "Hungary", - "Icelandic": "Iceland", - "Indian": "India", - "Indonesian": "Indonesia", - "Iranian": "Iran", - "Iraqi": "Iraq", - "Irish": "Ireland", - "Israeli": "Israel", - "Israelite": "Israel", - "Italian": "Italy", - "Ivorian": "Ivory Coast", - "Jamaican": "Jamaica", - "Jan Mayen": "Jan Mayen", - "Japanese": "Japan", - "Jordanian": "Jordan", - "Kazakh": "Kazakhstan", - "Kazakhstani": "Kazakhstan", - "Kenyan": "Kenya", - "Kirghiz": "Kyrgyzstan", - "Kirgiz": "Kyrgyzstan", - "Kiribati": "Kiribati", - "Korean": "South Korea", - "Kosovan": "Kosovo", - "Kosovar": "Kosovo", - "Kuwaiti": "Kuwait", - "Kyrgyz": "Kyrgyzstan", - "Kyrgyzstani": "Kyrgyzstan", - "Lao": "Lao People's Democratic Republic", - "Laotian": "Lao People's Democratic Republic", - "Latvian": "Latvia", - "Lebanese": "Lebanon", - "Lettish": "Latvia", - "Liberian": "Liberia", - "Libyan": "Libya", - "Liechtensteiner": "Liechtenstein", - "Lithuanian": "Lithuania", - "Luxembourg": "Luxembourg", - "Luxembourgish": "Luxembourg", - "Macanese": "Macau", - "Macedonian": "North Macedonia", - "Magyar": "Hungary", - "Mahoran": "Mayotte", - "Malagasy": "Madagascar", - "Malawian": "Malawi", - "Malaysian": "Malaysia", - "Maldivian": "Maldives", - "Malian": "Mali", - "Malinese": "Mali", - "Maltese": "Malta", - "Manx": "Isle of Man", - "Marshallese": "Marshall Islands", - "Martinican": "Martinique", - "Martiniquais": "Martinique", - "Mauritanian": "Mauritania", - "Mauritian": "Mauritius", - "McDonald Islands": "Heard Island and McDonald Islands", - "Mexican": "Mexico", - "Moldovan": "Moldova", - "Monacan": "Monaco", - "Mongolian": "Mongolia", - "Montenegrin": "Montenegro", - "Montserratian": "Montserrat", - "Monégasque": "Monaco", - "Moroccan": "Morocco", - "Motswana": "Botswana", - "Mozambican": "Mozambique", - "Myanma": "Myanmar", - "Namibian": "Namibia", - "Nauruan": "Nauru", - "Nepalese": "Nepal", - "Nepali": "Nepal", - "Netherlandic": "Netherlands", - "New Caledonian": "New Caledonia", - "New Zealand": "New Zealand", - "Ni-Vanuatu": "Vanuatu", - "Nicaraguan": "Nicaragua", - "Nigerian": "Nigeria", - "Nigerien": "Niger", - "Niuean": "Niue", - "Norfolk Island": "Norfolk Island", - "Northern Irish": "Northern Ireland", - "Northern Marianan": "Northern Mariana Islands", - "Norwegian": "Norway", - "Omani": "Oman", - "Pakistani": "Pakistan", - "Palauan": "Palau", - "Palestinian": "Palestine", - "Panamanian": "Panama", - "Papua New Guinean": "Papua New Guinea", - "Papuan": "Papua New Guinea", - "Paraguayan": "Paraguay", - "Persian": "Iran", - "Peruvian": "Peru", - "Philippine": "Philippines", - "Pitcairn Island": "Pitcairn Islands", - "Polish": "Poland", - "Portuguese": "Portugal", - "Puerto Rican": "Puerto Rico", - "Qatari": "Qatar", - "Romanian": "Romania", - "Russian": "Russia", - "Rwandan": "Rwanda", - "Saba": "Saba", - "Saban": "Saba", - "Sahraouian": "Western Sahara", - "Sahrawi": "Western Sahara", - "Sahrawian": "Western Sahara", - "Salvadoran": "El Salvador", - "Sammarinese": "San Marino", - "Samoan": "Samoa", - "Saudi Arabian": "Saudi Arabia", - "Saudi": "Saudi Arabia", - "Scottish": "Scotland", - "Senegalese": "Senegal", - "Serbian": "Serbia", - "Seychellois": "Seychelles", - "Sierra Leonean": "Sierra Leone", - "Singapore": "Singapore", - "Singaporean": "Singapore", - "Slovak": "Slovakia", - "Slovene": "Slovenia", - "Slovenian": "Slovenia", - "Solomon Island": "Solomon Islands", - "Somali": "Somalia", - "Somalilander": "Somaliland", - "South African": "South Africa", - "South Georgia Island": "South Georgia and the South Sandwich Islands", - "South Ossetian": "South Ossetia", - "South Sandwich Island": "South Georgia and the South Sandwich Islands", - "South Sudanese": "South Sudan", - "Spanish": "Spain", - "Sri Lankan": "Sri Lanka", - "Sudanese": "Sudan", - "Surinamese": "Suriname", - "Svalbard resident": "Svalbard", - "Swati": "Eswatini", - "Swazi": "Eswatini", - "Swedish": "Sweden", - "Swiss": "Switzerland", - "Syrian": "Syrian Arab Republic", - "Taiwanese": "Taiwan", - "Tajikistani": "Tajikistan", - "Tanzanian": "Tanzania", - "Thai": "Thailand", - "Timorese": "Timor-Leste", - "Tobagonian": "Trinidad and Tobago", - "Togolese": "Togo", - "Tokelauan": "Tokelau", - "Tongan": "Tonga", - "Trinidadian": "Trinidad and Tobago", - "Tunisian": "Tunisia", - "Turkish": "Turkey", - "Turkmen": "Turkmenistan", - "Turks and Caicos Island": "Turks and Caicos Islands", - "Tuvaluan": "Tuvalu", - "Ugandan": "Uganda", - "Ukrainian": "Ukraine", - "Uruguayan": "Uruguay", - "Uzbek": "Uzbekistan", - "Uzbekistani": "Uzbekistan", - "Vanuatuan": "Vanuatu", - "Vatican": "Vatican City State", - "Venezuelan": "Venezuela", - "Vietnamese": "Vietnam", - "Wallis and Futuna": "Wallis and Futuna", - "Wallisian": "Wallis and Futuna", - "Welsh": "Wales", - "Yemeni": "Yemen", - "Zambian": "Zambia", - "Zimbabwean": "Zimbabwe", - "Åland Island": "Åland Islands", - } - return country.get(value,value) - -stash_date = '%Y-%m-%d' -iafd_date = '%B %d, %Y' -iafd_date_scene = '%b %d, %Y' - - -def strip_end(text, suffix): - if suffix and text.endswith(suffix): - return text[:-len(suffix)] - return text - -def performer_query(query): - tree = scrape(f"https://www.iafd.com/results.asp?searchtype=comprehensive&searchstring={query}") - performer_names = tree.xpath('//table[@id="tblFem" or @id="tblMal"]//td[a[img]]/following-sibling::td[1]/a/text()') - performer_urls = tree.xpath('//table[@id="tblFem" or @id="tblMal"]//td[a[img]]/following-sibling::td[1]/a/@href') - performers = [] - for i, name in enumerate(performer_names): - p = { - 'Name': name, - 'URL': "https://www.iafd.com/" + performer_urls[i], - } - performers.append(p) - print(json.dumps(performers)) - if not performers: - log.warning(" found") - sys.exit(0) - - -def scrape(url, retries=0): - scraper = cloudscraper.create_scraper() - try: - scraped = scraper.get(url, timeout=(3,7)) - except requests.exceptions.Timeout as exc_time: - log.debug(f"Timeout: {exc_time}") - return scrape(url, retries+1) - except Exception as e: - log.error(f"scrape error {e}") - sys.exit(1) - if scraped.status_code >= 400: - if retries < 10: - log.debug(f'HTTP Error: {scraped.status_code}') - time.sleep(random.randint(1, 4)) - return scrape(url, retries+1) - else: - log.error(f'HTTP Error: {scraped.status_code}') - sys.exit(1) - return html.fromstring(scraped.content) - -def scrape_image(url, retries=0): - scraper = cloudscraper.create_scraper() - try: - scraped = scraper.get(url, timeout=(3,5)) - except requests.exceptions.Timeout as exc_time: - log.debug(f"Timeout: {exc_time}") - return scrape_image(url, retries+1) - except Exception as e: - log.debug(f"scrape error {e}") - return None - if scraped.status_code >= 400: - log.debug(f'HTTP Error: {scraped.status_code}') - if retries < 10: - time.sleep(random.randint(1, 4)) - return scrape_image(url, retries+1) - return None - b64img = base64.b64encode(scraped.content) - return "data:image/jpeg;base64," + b64img.decode('utf-8') - -def performer_from_tree(tree): - p = Scraper() - - performer_name = tree.xpath("//h1/text()") - p.name = p.set_stripped_value(performer_name) - - performer_gender = tree.xpath('//form[@id="correct"]/input[@name="Gender"]/@value') - p.gender = p.set_value(performer_gender) - p.gender = p.map_gender(p.gender) - - performer_url = tree.xpath('//div[@id="perfwith"]//*[contains(@href,"person.rme")]/@href') - if performer_url: - p.url = "https://www.iafd.com" + performer_url[0] - performer_twitter = tree.xpath('//p[@class="biodata"]/a[contains(text(),"http://twitter.com/")]/@href') - p.twitter = p.set_value(performer_twitter) - - performer_instagram = tree.xpath('//p[@class="biodata"]/a[contains(text(),"http://instagram.com/")]/@href') - p.instagram = p.set_value(performer_instagram) - - performer_birthdate = tree.xpath('(//p[@class="bioheading"][text()="Birthday"]/following-sibling::p)[1]//text()') - p.birthdate = p.set_value(performer_birthdate) - if p.birthdate: - p.birthdate = re.sub(r'(\S+\s+\d+,\s+\d+).*', r'\1', p.birthdate) - try: - p.birthdate = datetime.datetime.strptime(p.birthdate, iafd_date).strftime(stash_date) - except: - p.birthdate = None - if performer_birthdate[0].lower() != "no data": - dob = f'D.O.B. : {performer_birthdate[0]}\n' - try: - p.details = p.details + dob - except: - p.details = dob - pass - - performer_deathdate = tree.xpath('(//p[@class="bioheading"][text()="Date of Death"]/following-sibling::p)[1]//text()') - p.death_date = p.set_value(performer_deathdate) - if p.death_date: - p.death_date = re.sub(r'(\S+\s+\d+,\s+\d+).*', r'\1', p.death_date) - try: - p.death_date = datetime.datetime.strptime(p.death_date, iafd_date).strftime(stash_date) - except: - p.death_date = None - if performer_deathdate[0].lower() != "no data": - dod = f'D.O.D. : {performer_deathdate[0]}\n' - try: - p.details = p.details + dod - except: - p.details = dod - pass - - performer_ethnicity = tree.xpath('//div[p[text()="Ethnicity"]]/p[@class="biodata"][1]//text()') - p.ethnicity = p.set_value(performer_ethnicity) - p.ethnicity = p.map_ethnicity(p.ethnicity) - - performer_country = tree.xpath('//div/p[text()="Nationality"]/following-sibling::p[1]//text()') - p.country = p.set_value(performer_country) - if p.country: - p.country = re.sub(r'^American,.+','American',p.country) - p.country = p.map_country(p.country) - - performer_height = tree.xpath('//div/p[text()="Height"]/following-sibling::p[1]//text()') - p.height = p.set_value(performer_height) - if p.height: - p.height = re.sub(r'.*\((\d+)\s+cm.*', r'\1', p.height) - - performer_weight = tree.xpath('//div/p[text()="Weight"]/following-sibling::p[1]//text()') - p.weight = p.set_value(performer_weight) - if p.weight: - p.weight = re.sub(r'.*\((\d+)\s+kg.*', r'\1', p.weight) - - performer_haircolor = tree.xpath('//div/p[starts-with(.,"Hair Color")]/following-sibling::p[1]//text()') - p.hair_color = p.set_value(performer_haircolor) - - performer_measurements = tree.xpath('//div/p[text()="Measurements"]/following-sibling::p[1]//text()') - p.measurements = p.set_value(performer_measurements) - - performer_careerlength = tree.xpath('//div/p[@class="biodata"][contains(text(),"Started around")]/text()') - p.career_length = p.set_value(performer_careerlength) - if p.career_length: - p.career_length = re.sub(r'(\D+\d\d\D+)$', "", p.career_length) - - performer_aliases = tree.xpath('//div[p[@class="bioheading" and contains(normalize-space(text()),"Performer AKA")]]//div[@class="biodata" and not(text()="No known aliases")]/text()') - p.aliases = p.set_value(performer_aliases) - - performer_tattoos = tree.xpath('//div/p[text()="Tattoos"]/following-sibling::p[1]//text()') - p.tattoos = p.set_value(performer_tattoos) - - performer_piercings = tree.xpath('//div/p[text()="Piercings"]/following-sibling::p[1]//text()') - p.piercings = p.set_value(performer_piercings) - - performer_image_url = tree.xpath('//div[@id="headshot"]//img/@src') - if performer_image_url: - try: - log.debug(f"downloading image from {performer_image_url[0]}") - p.images = [scrape_image(performer_image_url[0])] - except Exception as e: - log.debug(f"error downloading image {e}") - - res = p.to_json() - #log.debug(res) - print(res) - sys.exit(0) - -def scene_from_tree(tree): - s = Scraper() - - scene_title = tree.xpath('//h1/text()') - s.title = s.set_stripped_value(scene_title) - - scene_date = tree.xpath('//div[@class="col-xs-12 col-sm-3"]//p[text() = "Release Date"]/following-sibling::p[1]//text()') - s.date = s.set_stripped_value(scene_date) - if s.date: - try: - s.date = datetime.datetime.strptime(s.date, iafd_date_scene).strftime(stash_date) - except: - s.date = None - - scene_details = tree.xpath('//div[@id="synopsis"]/div[@class="padded-panel"]//text()') - s.details = s.set_value(scene_details) - - scene_studio = tree.xpath('//div[@class="col-xs-12 col-sm-3"]//p[text() = "Studio"]/following-sibling::p[1]//text()') - s.studio = s.set_named_value("name",scene_studio) - - scene_performers = tree.xpath('//div[@class="castbox"]/p/a/text()') - s.performers = s.set_named_values("name", scene_performers) - - res = s.to_json() - print(res) - sys.exit(0) - -def movie_from_tree(tree): - m = Scraper() - movie_name = tree.xpath("//h1/text()") - m.name = m.set_stripped_value(movie_name) - if m.name: - m.name = re.sub(r'\s*\([0-9]+\)$', "", m.name) - - movie_directors = tree.xpath('//p[@class="bioheading"][contains(text(), "Directors")]/following-sibling::p[@class="biodata"][1]/a/text()') - m.direcors = m.set_stripped_value(movie_directors) - - movie_synopsis = tree.xpath('//div[@id="synopsis"]/div[@class="padded-panel"]//text()') - m.synopsis = m.set_value(movie_synopsis) - - movie_duration = tree.xpath('//p[@class="bioheading"][contains(text(), "Minutes")]/following-sibling::p[@class="biodata"][1]/text()') - m.duration = m.set_stripped_value(movie_duration) - - movie_date = tree.xpath('//p[@class="bioheading"][contains(text(), "Release Date")]/following-sibling::p[@class="biodata"][1]/text()') - m.date = m.set_stripped_value(movie_date) - if m.date: - try: - m.date = datetime.datetime.strptime(m.date, iafd_date_scene).strftime(stash_date) - except: - m.date = None - - movie_aliases = tree.xpath('//div[@class="col-sm-12"]/dl/dd//text()') - m.aliases = m.set_concat_value(", ", movie_aliases) - - movie_studio = tree.xpath('//p[@class="bioheading"][contains(text(),"Studio")]/following-sibling::p[@class="biodata"][1]//text()|//p[@class="bioheading"][contains(text(),"Distributor")]/following-sibling::p[@class="biodata"][1]//text()') - m.studio = m.set_named_value("name",movie_studio) - - res = m.to_json() - print(res) - #log.debug(res) - sys.exit(0) - -frag = json.loads(sys.stdin.read()) -#log.debug(json.dumps(frag)) -mode = "performer" - -if len(sys.argv)>1: - if sys.argv[1] == "query": - log.debug(f"searching for <{frag['name']}>") - performer_query(frag['name']) - if sys.argv[1] == "movie": - mode = "movie" - if sys.argv[1] == "scene": - mode = "scene" - -if not frag['url']: - log.error('No URL entered.') - sys.exit(1) - -url = frag["url"] -log.debug(f"scraping {url}") -random.seed() -tree = scrape(url) - -if mode == "movie": - movie_from_tree(tree) - -if mode == "scene": - scene_from_tree(tree) - -#by default performer scraper -performer_from_tree(tree) diff --git a/scrapers/IAFD/IAFD.py b/scrapers/IAFD/IAFD.py new file mode 100644 index 000000000..1d0716aea --- /dev/null +++ b/scrapers/IAFD/IAFD.py @@ -0,0 +1,801 @@ +import argparse +import json +import os +import random +import re +import requests +import sys +import time +from typing import Iterable, Callable, TypeVar +from datetime import datetime + +# to import from a parent directory we need to add that directory to the system path +csd = os.path.dirname(os.path.realpath(__file__)) # get current script directory +parent = os.path.dirname(csd) # parent directory (should be the scrapers one) +sys.path.append( + parent +) # add parent dir to sys path so that we can import py_common from ther + +# extra modules below need to be installed +try: + import py_common.log as log +except ModuleNotFoundError: + print( + "You need to download the folder 'py_common' from the community repo! (CommunityScrapers/tree/master/scrapers/py_common)", + file=sys.stderr, + ) + sys.exit() + +try: + import cloudscraper +except ModuleNotFoundError: + print( + "You need to install the cloudscraper module. (https://pypi.org/project/cloudscraper/)", + file=sys.stderr, + ) + print( + "If you have pip (normally installed with python), run this command in a terminal (cmd): pip install cloudscraper", + file=sys.stderr, + ) + sys.exit() + +try: + from lxml import html +except ModuleNotFoundError: + print( + "You need to install the lxml module. (https://lxml.de/installation.html#installation)", + file=sys.stderr, + ) + print( + "If you have pip (normally installed with python), run this command in a terminal (cmd): pip install lxml", + file=sys.stderr, + ) + sys.exit() + +stash_date = "%Y-%m-%d" +iafd_date = "%B %d, %Y" +iafd_date_scene = "%b %d, %Y" + +T = TypeVar("T") + + +def maybe( + values: Iterable[str], f: Callable[[str], (T | None)] = lambda x: x +) -> T | None: + """ + Returns the first value in values that is not "No data" after applying f to it + """ + return next( + (f(x) for x in values if not re.search(r"(?i)no data|director", x)), None + ) + + +def cleandict(d: dict): + return {k: v for k, v in d.items() if v} + + +def map_ethnicity(ethnicity: str): + ethnicities = { + "Asian": "asian", + "Caucasian": "white", + "Black": "black", + "Latin": "hispanic", + } + return ethnicities.get(ethnicity, ethnicity) + + +def map_gender(gender: str): + genders = { + "f": "Female", + "m": "Male", + } + return genders.get(gender, gender) + + +def map_country(country: str): + countries = { + # https://en.wikipedia.org/wiki/List_of_adjectival_and_demonymic_forms_for_countries_and_nations + "Abkhaz": "Abkhazia", + "Abkhazian": "Abkhazia", + "Afghan": "Afghanistan", + "Albanian": "Albania", + "Algerian": "Algeria", + "American Samoan": "American Samoa", + "American": "United States of America", + "Andorran": "Andorra", + "Angolan": "Angola", + "Anguillan": "Anguilla", + "Antarctic": "Antarctica", + "Antiguan": "Antigua and Barbuda", + "Argentine": "Argentina", + "Argentinian": "Argentina", + "Armenian": "Armenia", + "Aruban": "Aruba", + "Australian": "Australia", + "Austrian": "Austria", + "Azerbaijani": "Azerbaijan", + "Azeri": "Azerbaijan", + "Bahamian": "Bahamas", + "Bahraini": "Bahrain", + "Bangladeshi": "Bangladesh", + "Barbadian": "Barbados", + "Barbudan": "Antigua and Barbuda", + "Basotho": "Lesotho", + "Belarusian": "Belarus", + "Belgian": "Belgium", + "Belizean": "Belize", + "Beninese": "Benin", + "Beninois": "Benin", + "Bermudan": "Bermuda", + "Bermudian": "Bermuda", + "Bhutanese": "Bhutan", + "BIOT": "British Indian Ocean Territory", + "Bissau-Guinean": "Guinea-Bissau", + "Bolivian": "Bolivia", + "Bonaire": "Bonaire", + "Bonairean": "Bonaire", + "Bosnian": "Bosnia and Herzegovina", + "Botswanan": "Botswana", + "Bouvet Island": "Bouvet Island", + "Brazilian": "Brazil", + "British Virgin Island": "Virgin Islands, British", + "British": "United Kingdom", + "Bruneian": "Brunei", + "Bulgarian": "Bulgaria", + "Burkinabé": "Burkina Faso", + "Burmese": "Burma", + "Burundian": "Burundi", + "Cabo Verdean": "Cabo Verde", + "Cambodian": "Cambodia", + "Cameroonian": "Cameroon", + "Canadian": "Canada", + "Cantonese": "Hong Kong", + "Caymanian": "Cayman Islands", + "Central African": "Central African Republic", + "Chadian": "Chad", + "Channel Island": "Guernsey", + "Chilean": "Chile", + "Chinese": "China", + "Christmas Island": "Christmas Island", + "Cocos Island": "Cocos (Keeling) Islands", + "Colombian": "Colombia", + "Comoran": "Comoros", + "Comorian": "Comoros", + "Congolese": "Congo", + "Cook Island": "Cook Islands", + "Costa Rican": "Costa Rica", + "Croatian": "Croatia", + "Cuban": "Cuba", + "Curaçaoan": "Curaçao", + "Cypriot": "Cyprus", + "Czech": "Czech Republic", + "Danish": "Denmark", + "Djiboutian": "Djibouti", + "Dominican": "Dominica", + "Dutch": "Netherlands", + "Ecuadorian": "Ecuador", + "Egyptian": "Egypt", + "Emirati": "United Arab Emirates", + "Emiri": "United Arab Emirates", + "Emirian": "United Arab Emirates", + "English people": "England", + "English": "England", + "Equatoguinean": "Equatorial Guinea", + "Equatorial Guinean": "Equatorial Guinea", + "Eritrean": "Eritrea", + "Estonian": "Estonia", + "Ethiopian": "Ethiopia", + "European": "European Union", + "Falkland Island": "Falkland Islands", + "Faroese": "Faroe Islands", + "Fijian": "Fiji", + "Filipino": "Philippines", + "Finnish": "Finland", + "Formosan": "Taiwan", + "French Guianese": "French Guiana", + "French Polynesian": "French Polynesia", + "French Southern Territories": "French Southern Territories", + "French": "France", + "Futunan": "Wallis and Futuna", + "Gabonese": "Gabon", + "Gambian": "Gambia", + "Georgian": "Georgia", + "German": "Germany", + "Ghanaian": "Ghana", + "Gibraltar": "Gibraltar", + "Greek": "Greece", + "Greenlandic": "Greenland", + "Grenadian": "Grenada", + "Guadeloupe": "Guadeloupe", + "Guamanian": "Guam", + "Guatemalan": "Guatemala", + "Guinean": "Guinea", + "Guyanese": "Guyana", + "Haitian": "Haiti", + "Heard Island": "Heard Island and McDonald Islands", + "Hellenic": "Greece", + "Herzegovinian": "Bosnia and Herzegovina", + "Honduran": "Honduras", + "Hong Kong": "Hong Kong", + "Hong Konger": "Hong Kong", + "Hungarian": "Hungary", + "Icelandic": "Iceland", + "Indian": "India", + "Indonesian": "Indonesia", + "Iranian": "Iran", + "Iraqi": "Iraq", + "Irish": "Ireland", + "Israeli": "Israel", + "Israelite": "Israel", + "Italian": "Italy", + "Ivorian": "Ivory Coast", + "Jamaican": "Jamaica", + "Jan Mayen": "Jan Mayen", + "Japanese": "Japan", + "Jordanian": "Jordan", + "Kazakh": "Kazakhstan", + "Kazakhstani": "Kazakhstan", + "Kenyan": "Kenya", + "Kirghiz": "Kyrgyzstan", + "Kirgiz": "Kyrgyzstan", + "Kiribati": "Kiribati", + "Korean": "South Korea", + "Kosovan": "Kosovo", + "Kosovar": "Kosovo", + "Kuwaiti": "Kuwait", + "Kyrgyz": "Kyrgyzstan", + "Kyrgyzstani": "Kyrgyzstan", + "Lao": "Lao People's Democratic Republic", + "Laotian": "Lao People's Democratic Republic", + "Latvian": "Latvia", + "Lebanese": "Lebanon", + "Lettish": "Latvia", + "Liberian": "Liberia", + "Libyan": "Libya", + "Liechtensteiner": "Liechtenstein", + "Lithuanian": "Lithuania", + "Luxembourg": "Luxembourg", + "Luxembourgish": "Luxembourg", + "Macanese": "Macau", + "Macedonian": "North Macedonia", + "Magyar": "Hungary", + "Mahoran": "Mayotte", + "Malagasy": "Madagascar", + "Malawian": "Malawi", + "Malaysian": "Malaysia", + "Maldivian": "Maldives", + "Malian": "Mali", + "Malinese": "Mali", + "Maltese": "Malta", + "Manx": "Isle of Man", + "Marshallese": "Marshall Islands", + "Martinican": "Martinique", + "Martiniquais": "Martinique", + "Mauritanian": "Mauritania", + "Mauritian": "Mauritius", + "McDonald Islands": "Heard Island and McDonald Islands", + "Mexican": "Mexico", + "Moldovan": "Moldova", + "Monacan": "Monaco", + "Mongolian": "Mongolia", + "Montenegrin": "Montenegro", + "Montserratian": "Montserrat", + "Monégasque": "Monaco", + "Moroccan": "Morocco", + "Motswana": "Botswana", + "Mozambican": "Mozambique", + "Myanma": "Myanmar", + "Namibian": "Namibia", + "Nauruan": "Nauru", + "Nepalese": "Nepal", + "Nepali": "Nepal", + "Netherlandic": "Netherlands", + "New Caledonian": "New Caledonia", + "New Zealand": "New Zealand", + "Ni-Vanuatu": "Vanuatu", + "Nicaraguan": "Nicaragua", + "Nigerian": "Nigeria", + "Nigerien": "Niger", + "Niuean": "Niue", + "Norfolk Island": "Norfolk Island", + "Northern Irish": "Northern Ireland", + "Northern Marianan": "Northern Mariana Islands", + "Norwegian": "Norway", + "Omani": "Oman", + "Pakistani": "Pakistan", + "Palauan": "Palau", + "Palestinian": "Palestine", + "Panamanian": "Panama", + "Papua New Guinean": "Papua New Guinea", + "Papuan": "Papua New Guinea", + "Paraguayan": "Paraguay", + "Persian": "Iran", + "Peruvian": "Peru", + "Philippine": "Philippines", + "Pitcairn Island": "Pitcairn Islands", + "Polish": "Poland", + "Portuguese": "Portugal", + "Puerto Rican": "Puerto Rico", + "Qatari": "Qatar", + "Romanian": "Romania", + "Russian": "Russia", + "Rwandan": "Rwanda", + "Saba": "Saba", + "Saban": "Saba", + "Sahraouian": "Western Sahara", + "Sahrawi": "Western Sahara", + "Sahrawian": "Western Sahara", + "Salvadoran": "El Salvador", + "Sammarinese": "San Marino", + "Samoan": "Samoa", + "Saudi Arabian": "Saudi Arabia", + "Saudi": "Saudi Arabia", + "Scottish": "Scotland", + "Senegalese": "Senegal", + "Serbian": "Serbia", + "Seychellois": "Seychelles", + "Sierra Leonean": "Sierra Leone", + "Singapore": "Singapore", + "Singaporean": "Singapore", + "Slovak": "Slovakia", + "Slovene": "Slovenia", + "Slovenian": "Slovenia", + "Solomon Island": "Solomon Islands", + "Somali": "Somalia", + "Somalilander": "Somaliland", + "South African": "South Africa", + "South Georgia Island": "South Georgia and the South Sandwich Islands", + "South Ossetian": "South Ossetia", + "South Sandwich Island": "South Georgia and the South Sandwich Islands", + "South Sudanese": "South Sudan", + "Spanish": "Spain", + "Sri Lankan": "Sri Lanka", + "Sudanese": "Sudan", + "Surinamese": "Suriname", + "Svalbard resident": "Svalbard", + "Swati": "Eswatini", + "Swazi": "Eswatini", + "Swedish": "Sweden", + "Swiss": "Switzerland", + "Syrian": "Syrian Arab Republic", + "Taiwanese": "Taiwan", + "Tajikistani": "Tajikistan", + "Tanzanian": "Tanzania", + "Thai": "Thailand", + "Timorese": "Timor-Leste", + "Tobagonian": "Trinidad and Tobago", + "Togolese": "Togo", + "Tokelauan": "Tokelau", + "Tongan": "Tonga", + "Trinidadian": "Trinidad and Tobago", + "Tunisian": "Tunisia", + "Turkish": "Turkey", + "Turkmen": "Turkmenistan", + "Turks and Caicos Island": "Turks and Caicos Islands", + "Tuvaluan": "Tuvalu", + "Ugandan": "Uganda", + "Ukrainian": "Ukraine", + "Uruguayan": "Uruguay", + "Uzbek": "Uzbekistan", + "Uzbekistani": "Uzbekistan", + "Vanuatuan": "Vanuatu", + "Vatican": "Vatican City State", + "Venezuelan": "Venezuela", + "Vietnamese": "Vietnam", + "Wallis and Futuna": "Wallis and Futuna", + "Wallisian": "Wallis and Futuna", + "Welsh": "Wales", + "Yemeni": "Yemen", + "Zambian": "Zambia", + "Zimbabwean": "Zimbabwe", + "Åland Island": "Åland Islands", + } + return countries.get(country, country) + + +def clean_date(date: str) -> str | None: + date = date.strip() + cleaned = re.sub(r"(\S+\s+\d+,\s+\d+).*", r"\1", date) + for date_format in [iafd_date, iafd_date_scene]: + try: + return datetime.strptime(cleaned, date_format).strftime(stash_date) + except ValueError: + pass + log.warning(f"Unable to parse '{date}' as a date") + + +def clean_alias(alias: str) -> str | None: + # Aliases like "X or Y or Z" are indeterminate + # and should not be included + if " or " in alias: + return None + # We do not want studio disambiguation: "X (studio.com)" -> "X" + return re.sub(r"\s*\(.*$", "", alias) + + +def performer_haircolor(tree): + return maybe( + tree.xpath( + '//div/p[starts-with(.,"Hair Color")]/following-sibling::p[1]//text()' + ) + ) + + +def performer_weight(tree): + return maybe( + tree.xpath('//div/p[text()="Weight"]/following-sibling::p[1]//text()'), + lambda w: re.sub(r".*\((\d+)\s+kg.*", r"\1", w), + ) + + +def performer_height(tree): + return maybe( + tree.xpath('//div/p[text()="Height"]/following-sibling::p[1]//text()'), + lambda h: re.sub(r".*\((\d+)\s+cm.*", r"\1", h), + ) + + +def performer_country(tree): + return maybe( + tree.xpath('//div/p[text()="Nationality"]/following-sibling::p[1]//text()'), + lambda c: map_country(re.sub(r"^American,.+", "American", c)), + ) + + +def performer_ethnicity(tree): + return maybe( + tree.xpath('//div[p[text()="Ethnicity"]]/p[@class="biodata"][1]//text()'), + map_ethnicity, + ) + + +def performer_deathdate(tree): + return maybe( + tree.xpath( + '(//p[@class="bioheading"][text()="Date of Death"]/following-sibling::p)[1]//text()' + ), + clean_date, + ) + + +def performer_birthdate(tree): + return maybe( + tree.xpath( + '(//p[@class="bioheading"][text()="Birthday"]/following-sibling::p)[1]//text()' + ), + clean_date, + ) + + +def performer_instagram(tree): + return maybe( + tree.xpath( + '//p[@class="biodata"]/a[contains(text(),"http://instagram.com/")]/@href' + ) + ) + + +def performer_twitter(tree): + return maybe( + tree.xpath( + '//p[@class="biodata"]/a[contains(text(),"http://twitter.com/")]/@href' + ) + ) + + +def performer_url(tree): + return maybe( + tree.xpath('//div[@id="perfwith"]//*[contains(@href,"person.rme")]/@href'), + lambda u: f"https://www.iafd.com{u}", + ) + + +def performer_gender(tree): + def prepend_transgender(gender: str): + perf_id = next( + iter(tree.xpath('//form[@id="correct"]/input[@name="PerfID"]/@value')), "" + ) + trans = ( + "Transgender " + # IAFD are not consistent with their + if any(mark in perf_id for mark in ("_ts", "_ftm", "_mtf")) + else "" + ) + return trans + map_gender(gender) + + return maybe( + tree.xpath('//form[@id="correct"]/input[@name="Gender"]/@value'), + prepend_transgender, + ) + + +def performer_name(tree): + return maybe(tree.xpath("//h1/text()"), lambda name: name.strip()) + + +def performer_piercings(tree): + return maybe( + tree.xpath('//div/p[text()="Piercings"]/following-sibling::p[1]//text()') + ) + + +def performer_tattoos(tree): + return maybe( + tree.xpath('//div/p[text()="Tattoos"]/following-sibling::p[1]//text()') + ) + + +def performer_aliases(tree): + return maybe( + tree.xpath( + '//div[p[@class="bioheading" and contains(normalize-space(text()),"Performer AKA")]]//div[@class="biodata" and not(text()="No known aliases")]/text()' + ), + lambda aliases: ", ".join( + filter(None, (clean_alias(alias) for alias in aliases.split(", "))) + ), + ) + + +def performer_careerlength(tree): + return maybe( + tree.xpath( + '//div/p[@class="biodata"][contains(text(),"Started around")]/text()' + ), + lambda c: re.sub(r"(\D+\d\d\D+)$", "", c), + ) + + +def performer_measurements(tree): + return maybe( + tree.xpath('//div/p[text()="Measurements"]/following-sibling::p[1]//text()') + ) + + +def scene_director(tree): + return maybe( + tree.xpath( + '//p[@class="bioheading"][text()="Director" or text()="Directors"]/following-sibling::p[1]//text()' + ), + lambda d: d.strip(), + ) + + +def scene_studio(tree): + return maybe( + tree.xpath( + '//div[@class="col-xs-12 col-sm-3"]//p[text() = "Studio"]/following-sibling::p[1]//text()' + ), + lambda s: {"name": s}, + ) + + +def scene_details(tree): + return maybe(tree.xpath('//div[@id="synopsis"]/div[@class="padded-panel"]//text()')) + + +def scene_date(tree): + return maybe( + tree.xpath( + '//div[@class="col-xs-12 col-sm-3"]//p[text() = "Release Date"]/following-sibling::p[1]//text()' + ), + clean_date, + ) + + +def scene_title(tree): + return maybe( + tree.xpath("//h1/text()"), lambda t: re.sub(r"\s*\(\d{4}\)$", "", t.strip()) + ) + + +def movie_studio(tree): + return maybe( + tree.xpath( + '//p[@class="bioheading"][contains(text(),"Studio" or contains(text(),"Distributor"))]/following-sibling::p[@class="biodata"][1]//text()' + ), + lambda s: {"name": s}, + ) + + +def movie_date(tree): + # If there's no release date we will use the year from the title for an approximate date + return maybe( + tree.xpath( + '//p[@class="bioheading"][contains(text(), "Release Date")]/following-sibling::p[@class="biodata"][1]/text()' + ), + lambda d: clean_date(d.strip()), + ) or maybe( + tree.xpath("//h1/text()"), + lambda t: re.sub(r".*\(([0-9]+)\).*$", r"\1-01-01", t), + ) + + +def movie_duration(tree): + # Convert duration from minutes to seconds, but keep it a string because that's what stash expects + return maybe( + tree.xpath( + '//p[@class="bioheading"][contains(text(), "Minutes")]/following-sibling::p[@class="biodata"][1]/text()' + ), + lambda d: str(int(d) * 60), + ) + + +def movie_synopsis(tree): + return maybe(tree.xpath('//div[@id="synopsis"]/div[@class="padded-panel"]//text()')) + + +def movie_director(tree): + return maybe( + tree.xpath( + '//p[@class="bioheading"][contains(text(), "Directors")]/following-sibling::p[@class="biodata"][1]/a/text()' + ), + lambda d: d.strip(), + ) + + +def movie_title(tree): + return maybe( + tree.xpath("//h1/text()"), lambda t: re.sub(r"\s*\(\d+\)$", "", t.strip()) + ) + + +# Only create a single scraper: this saves time when scraping multiple pages +# because it doesn't need to get past Cloudflare each time +scraper = cloudscraper.create_scraper() + + +def scrape(url: str, retries=0): + try: + scraped = scraper.get(url, timeout=(3, 7)) + except requests.exceptions.Timeout as exc_time: + log.debug(f"Timeout: {exc_time}") + return scrape(url, retries + 1) + except Exception as e: + log.error(f"scrape error {e}") + sys.exit(1) + if scraped.status_code >= 400: + if retries < 10: + wait_time = random.randint(1, 4) + log.debug(f"HTTP Error: {scraped.status_code}, waiting {wait_time} seconds") + time.sleep(wait_time) + return scrape(url, retries + 1) + log.error(f"HTTP Error: {scraped.status_code}, giving up") + sys.exit(1) + return html.fromstring(scraped.content) + + +def performer_query(query): + tree = scrape( + f"https://www.iafd.com/results.asp?searchtype=comprehensive&searchstring={query}" + ) + performer_names = tree.xpath( + '//table[@id="tblFem" or @id="tblMal"]//td[a[img]]/following-sibling::td[1]/a/text()' + ) + performer_urls = tree.xpath( + '//table[@id="tblFem" or @id="tblMal"]//td[a[img]]/following-sibling::td[1]/a/@href' + ) + performers = [ + { + "Name": name, + "URL": f"https://www.iafd.com{url}", + } + for name, url in zip(performer_names, performer_urls) + ] + if not performers: + log.warning(f"No performers found for '{query}'") + return performers + + +def performer_from_tree(tree): + return { + "name": performer_name(tree), + "gender": performer_gender(tree), + "url": performer_url(tree), + "twitter": performer_twitter(tree), + "instagram": performer_instagram(tree), + "birthdate": performer_birthdate(tree), + "death_date": performer_deathdate(tree), + "ethnicity": performer_ethnicity(tree), + "country": performer_country(tree), + "height": performer_height(tree), + "weight": performer_weight(tree), + "hair_color": performer_haircolor(tree), + "measurements": performer_measurements(tree), + "career_length": performer_careerlength(tree), + "aliases": performer_aliases(tree), + "tattoos": performer_tattoos(tree), + "piercings": performer_piercings(tree), + "images": tree.xpath('//div[@id="headshot"]//img/@src'), + } + + +def scene_from_tree(tree): + return { + "title": scene_title(tree), + "date": scene_date(tree), + "details": scene_details(tree), + "director": scene_director(tree), + "studio": scene_studio(tree), + "performers": [ + {"name": name} for name in tree.xpath('//div[@class="castbox"]/p/a/text()') + ], + } + + +def movie_from_tree(tree): + return { + "name": movie_title(tree), + "director": movie_director(tree), + "synopsis": movie_synopsis(tree), + "duration": movie_duration(tree), + "date": movie_date(tree), + "aliases": ", ".join(tree.xpath('//div[@class="col-sm-12"]/dl/dd//text()')), + "studio": movie_studio(tree), + } + + +def main(): + parser = argparse.ArgumentParser("IAFD Scraper", argument_default="") + subparsers = parser.add_subparsers( + dest="operation", help="Operation to perform", required=True + ) + + subparsers.add_parser("search", help="Search for performers").add_argument( + "name", nargs="?", help="Name to search for" + ) + subparsers.add_parser("performer", help="Scrape a performer").add_argument( + "url", nargs="?", help="Performer URL" + ) + subparsers.add_parser("movie", help="Scrape a movie").add_argument( + "url", nargs="?", help="Movie URL" + ) + subparsers.add_parser("scene", help="Scrape a scene").add_argument( + "url", nargs="?", help="Scene URL" + ) + + if len(sys.argv) == 1: + parser.print_help(sys.stderr) + sys.exit(1) + + args = parser.parse_args() + log.debug(f"Arguments from commandline: {args}") + # Script is being piped into, probably by Stash + if not sys.stdin.isatty(): + try: + frag = json.load(sys.stdin) + args.__dict__.update(frag) + log.debug(f"With arguments from stdin: {args}") + except json.decoder.JSONDecodeError: + log.error("Received invalid JSON from stdin") + sys.exit(1) + + if args.operation == "search": + name = args.name + if not name: + log.error("No query provided") + sys.exit(1) + log.debug(f"Searching for '{name}'") + matches = performer_query(name) + print(json.dumps(matches)) + sys.exit(0) + + url = args.url + if not url: + log.error("No URL provided") + sys.exit(1) + + log.debug(f"{args.operation} scraping '{url}'") + scraped = scrape(url) + result = {} + if args.operation == "performer": + result = performer_from_tree(scraped) + elif args.operation == "movie": + result = movie_from_tree(scraped) + elif args.operation == "scene": + result = scene_from_tree(scraped) + + print(json.dumps(cleandict(result))) + + +if __name__ == "__main__": + main() diff --git a/scrapers/IAFD.yml b/scrapers/IAFD/IAFD.yml similarity index 56% rename from scrapers/IAFD.yml rename to scrapers/IAFD/IAFD.yml index f4466d8c3..1a3acdc34 100644 --- a/scrapers/IAFD.yml +++ b/scrapers/IAFD/IAFD.yml @@ -1,20 +1,23 @@ name: IAFD +# requires: py_common + performerByName: - action: script - script: - - python3 - - IAFD.py - - query + action: script + script: + - python3 + - IAFD.py + - search performerByURL: - url: - - iafd.com + - iafd.com/person.rme/perfid= action: script script: - python3 - IAFD.py + - performer sceneByURL: - url: - - iafd.com/title.rme + - iafd.com/title.rme/id= action: script script: - python3 @@ -22,10 +25,10 @@ sceneByURL: - scene movieByURL: - url: - - iafd.com/title.rme/title= + - iafd.com/title.rme/id= action: script script: - python3 - IAFD.py - movie -# Last Updated August 10, 2022 +# Last Updated September 25, 2023 diff --git a/scrapers/IFeelMyself.py b/scrapers/IFeelMyself/IFeelMyself.py similarity index 90% rename from scrapers/IFeelMyself.py rename to scrapers/IFeelMyself/IFeelMyself.py index dbafa1489..69dbb8105 100644 --- a/scrapers/IFeelMyself.py +++ b/scrapers/IFeelMyself/IFeelMyself.py @@ -33,7 +33,7 @@ def readJSONInput(): def extract_SceneInfo(table,cover_url=None): description = None if table.find(class_= ["blog_wide_new_text","entryBlurb"]): - description=table.find(class_= ["blog_wide_new_text","entryBlurb"]).get_text(strip=True) + description=table.find(class_= ["blog_wide_new_text","entryBlurb"]).get_text(" ", strip=True) description=unicodedata.normalize('NFKC', description).encode('ascii','ignore').decode('ascii') date = table.find(class_="blog-title-right").get_text(strip=True) #This is a BeautifulSoup element performer = table.find(class_= ["entryHeadingFlash","entryHeading"]).find_all("a")[1].get_text().replace("_"," ") @@ -41,7 +41,10 @@ def extract_SceneInfo(table,cover_url=None): debugPrint(f"performer:{performer}") date = datetime.strptime(date, '%d %b %Y').date().strftime('%Y-%m-%d') #Convert date to ISO format if cover_url == None: - cover_url=str(table.find("img")['src']) + if table.find("img"): + cover_url=str(table.find("img")['src']) + else: + cover_url=str(table.find("video")['poster']) title = table.find(class_= ["entryHeadingFlash","entryHeading"]).find('a').get_text().replace("\x92","'") media_id = re.search(r"\/(\d{3,5})\/",cover_url,re.I).group(1) artist_id = re.search(r"\/(f\d{4,5})",cover_url,re.I).group(1) @@ -86,7 +89,7 @@ def scrapeScene(filename,date,url): video_id = re.search(r"-(\d+)",filename,re.I).group(1) cookie_obj = create_cookie(name='ifm_search_keyword', value=artist_id, domain='ifeelmyself.com') browser.session.cookies.set_cookie(cookie_obj) - cookie_obj = create_cookie(name='ifm_prefs', value="a%3A1%3A%7Bs%3A6%3A%22search%22%3Ba%3A17%3A%7Bs%3A8%3A%22category%22%3Ba%3A0%3A%7B%7Ds%3A7%3A%22view_by%22%3Bs%3A4%3A%22news%22%3Bs%3A7%3A%22date_by%22%3Bs%3A7%3A%22anytime%22%3Bs%3A10%3A%22from_month%22%3Bs%3A1%3A%221%22%3Bs%3A9%3A%22from_year%22%3Bs%3A4%3A%222006%22%3Bs%3A8%3A%22to_month%22%3Bs%3A2%3A%2210%22%3Bs%3A7%3A%22to_year%22%3Bs%3A4%3A%222022%22%3Bs%3A7%3A%22country%22%3Bs%3A3%3A%22all%22%3Bs%3A10%3A%22attributes%22%3Ba%3A0%3A%7B%7Ds%3A12%3A%22tags_logical%22%3Bs%3A3%3A%22AND%22%3Bs%3A13%3A%22tags_remember%22%3Bs%3A1%3A%22n%22%3Bs%3A4%3A%22tags%22%3Ba%3A0%3A%7B%7Ds%3A12%3A%22tags_exclude%22%3Bs%3A0%3A%22%22%3Bs%3A9%3A%22hide_tags%22%3Ba%3A0%3A%7B%7Ds%3A8%3A%22age_from%22%3Bs%3A2%3A%2218%22%3Bs%3A6%3A%22age_to%22%3Bs%3A2%3A%2299%22%3Bs%3A16%3A%22profilevid_limit%22%3Bs%3A0%3A%22%22%3B%7D%7D", domain='.ifeelmyself.com') + cookie_obj = create_cookie(name='ifm_prefs', value="a%3A1%3A%7Bs%3A6%3A%22search%22%3Ba%3A17%3A%7Bs%3A8%3A%22category%22%3Ba%3A0%3A%7B%7Ds%3A7%3A%22view_by%22%3Bs%3A4%3A%22news%22%3Bs%3A7%3A%22date_by%22%3Bs%3A7%3A%22anytime%22%3Bs%3A10%3A%22from_month%22%3Bs%3A1%3A%221%22%3Bs%3A9%3A%22from_year%22%3Bs%3A4%3A%222006%22%3Bs%3A8%3A%22to_month%22%3Bs%3A2%3A%2212%22%3Bs%3A7%3A%22to_year%22%3Bs%3A4%3A%223000%22%3Bs%3A7%3A%22country%22%3Bs%3A3%3A%22all%22%3Bs%3A10%3A%22attributes%22%3Ba%3A0%3A%7B%7Ds%3A12%3A%22tags_logical%22%3Bs%3A3%3A%22AND%22%3Bs%3A13%3A%22tags_remember%22%3Bs%3A1%3A%22n%22%3Bs%3A4%3A%22tags%22%3Ba%3A0%3A%7B%7Ds%3A12%3A%22tags_exclude%22%3Bs%3A0%3A%22%22%3Bs%3A9%3A%22hide_tags%22%3Ba%3A0%3A%7B%7Ds%3A8%3A%22age_from%22%3Bs%3A2%3A%2218%22%3Bs%3A6%3A%22age_to%22%3Bs%3A2%3A%2299%22%3Bs%3A16%3A%22profilevid_limit%22%3Bs%3A0%3A%22%22%3B%7D%7D", domain='.ifeelmyself.com') browser.session.cookies.set_cookie(cookie_obj) cookie_obj = create_cookie(name='ifeel_auth', value=ifeelauth, domain='.ifeelmyself.com') browser.session.cookies.set_cookie(cookie_obj) @@ -96,13 +99,14 @@ def scrapeScene(filename,date,url): debugPrint(artist_id+"-"+video_id) tables = response.find_all(class_= ["blog_wide_news_tbl entry ppss-scene","entry ppss-scene"]) for table in tables: - img=str(table.find("img")['src']) - debugPrint(f"Image:{str(img)}") - if (f"/{artist_id}-{video_id}" in img) and img.endswith(("vg.jpg","hs.jpg")): - debugPrint("Found a single match video!") - # Extract data from this single result - ret = extract_SceneInfo(table) - break + if table.find('video'): + img=str(table.find("video")['poster']) + debugPrint(f"Image:{str(img)}") + if (f"/{artist_id}-{video_id}vg.jpg" in img) or (f"/{artist_id}-{video_id}hs.jpg" in img): + debugPrint("Found a single match video!") + # Extract data from this single result + ret = extract_SceneInfo(table) + break else: sys.stderr.write("0 matches found! Checking offset") pages=int(response.find_all("a", class_="pagging_nonsel")[-1].get_text()) @@ -113,12 +117,13 @@ def scrapeScene(filename,date,url): response = browser.page tables = response.find_all(class_= ["blog_wide_news_tbl entry ppss-scene","entry ppss-scene"]) for table in tables: - img=str(table.find("img")["src"]) - debugPrint(f"Image:{img}") - if (f"/{artist_id}-{video_id}" in img) and img.endswith(("vg.jpg","hs.jpg")): - sys.stderr.write("FOUND") - ret = extract_SceneInfo(table) - break + if table.find('video'): + img=str(table.find("video")["poster"]) + debugPrint(f"Image:{img}") + if (f"/{artist_id}-{video_id}vg.jpg" in img) or (f"/{artist_id}-{video_id}hs.jpg" in img): + sys.stderr.write("FOUND") + ret = extract_SceneInfo(table) + break else: sys.stderr.write("0 matches found!, check your filename") @@ -135,7 +140,7 @@ def scrapeScene(filename,date,url): debugPrint(f"Title: {title}") cookie_obj = create_cookie(name='ifm_search_keyword', value=title, domain='ifeelmyself.com') browser.session.cookies.set_cookie(cookie_obj) - cookie_obj = create_cookie(name='ifm_prefs', value="a%3A1%3A%7Bs%3A6%3A%22search%22%3Ba%3A17%3A%7Bs%3A8%3A%22category%22%3Ba%3A0%3A%7B%7Ds%3A7%3A%22view_by%22%3Bs%3A4%3A%22news%22%3Bs%3A7%3A%22date_by%22%3Bs%3A7%3A%22anytime%22%3Bs%3A10%3A%22from_month%22%3Bs%3A1%3A%221%22%3Bs%3A9%3A%22from_year%22%3Bs%3A4%3A%222006%22%3Bs%3A8%3A%22to_month%22%3Bs%3A2%3A%2210%22%3Bs%3A7%3A%22to_year%22%3Bs%3A4%3A%222022%22%3Bs%3A7%3A%22country%22%3Bs%3A3%3A%22all%22%3Bs%3A10%3A%22attributes%22%3Ba%3A0%3A%7B%7Ds%3A12%3A%22tags_logical%22%3Bs%3A3%3A%22AND%22%3Bs%3A13%3A%22tags_remember%22%3Bs%3A1%3A%22n%22%3Bs%3A4%3A%22tags%22%3Ba%3A0%3A%7B%7Ds%3A12%3A%22tags_exclude%22%3Bs%3A0%3A%22%22%3Bs%3A9%3A%22hide_tags%22%3Ba%3A0%3A%7B%7Ds%3A8%3A%22age_from%22%3Bs%3A2%3A%2218%22%3Bs%3A6%3A%22age_to%22%3Bs%3A2%3A%2299%22%3Bs%3A16%3A%22profilevid_limit%22%3Bs%3A0%3A%22%22%3B%7D%7D", domain='.ifeelmyself.com') + cookie_obj = create_cookie(name='ifm_prefs', value="a%3A1%3A%7Bs%3A6%3A%22search%22%3Ba%3A17%3A%7Bs%3A8%3A%22category%22%3Ba%3A0%3A%7B%7Ds%3A7%3A%22view_by%22%3Bs%3A4%3A%22news%22%3Bs%3A7%3A%22date_by%22%3Bs%3A7%3A%22anytime%22%3Bs%3A10%3A%22from_month%22%3Bs%3A1%3A%221%22%3Bs%3A9%3A%22from_year%22%3Bs%3A4%3A%222006%22%3Bs%3A8%3A%22to_month%22%3Bs%3A2%3A%2212%22%3Bs%3A7%3A%22to_year%22%3Bs%3A4%3A%223000%22%3Bs%3A7%3A%22country%22%3Bs%3A3%3A%22all%22%3Bs%3A10%3A%22attributes%22%3Ba%3A0%3A%7B%7Ds%3A12%3A%22tags_logical%22%3Bs%3A3%3A%22AND%22%3Bs%3A13%3A%22tags_remember%22%3Bs%3A1%3A%22n%22%3Bs%3A4%3A%22tags%22%3Ba%3A0%3A%7B%7Ds%3A12%3A%22tags_exclude%22%3Bs%3A0%3A%22%22%3Bs%3A9%3A%22hide_tags%22%3Ba%3A0%3A%7B%7Ds%3A8%3A%22age_from%22%3Bs%3A2%3A%2218%22%3Bs%3A6%3A%22age_to%22%3Bs%3A2%3A%2299%22%3Bs%3A16%3A%22profilevid_limit%22%3Bs%3A0%3A%22%22%3B%7D%7D", domain='.ifeelmyself.com') browser.session.cookies.set_cookie(cookie_obj) cookie_obj = create_cookie(name='ifeel_auth', value=ifeelauth, domain='.ifeelmyself.com') browser.session.cookies.set_cookie(cookie_obj) @@ -190,7 +195,7 @@ def queryPerformer(perfname): browser.open("https://ifeelmyself.com/public/main.php") cookie_obj = create_cookie(name='tags_popup_shown', value='true', domain='ifeelmyself.com') browser.session.cookies.set_cookie(cookie_obj) - cookie_obj = create_cookie(name='ifm_prefs', value="a%3A1%3A%7Bs%3A6%3A%22search%22%3Ba%3A17%3A%7Bs%3A8%3A%22category%22%3Ba%3A0%3A%7B%7Ds%3A7%3A%22view_by%22%3Bs%3A4%3A%22news%22%3Bs%3A7%3A%22date_by%22%3Bs%3A7%3A%22anytime%22%3Bs%3A10%3A%22from_month%22%3Bs%3A1%3A%221%22%3Bs%3A9%3A%22from_year%22%3Bs%3A4%3A%222006%22%3Bs%3A8%3A%22to_month%22%3Bs%3A2%3A%2210%22%3Bs%3A7%3A%22to_year%22%3Bs%3A4%3A%222022%22%3Bs%3A7%3A%22country%22%3Bs%3A3%3A%22all%22%3Bs%3A10%3A%22attributes%22%3Ba%3A0%3A%7B%7Ds%3A12%3A%22tags_logical%22%3Bs%3A3%3A%22AND%22%3Bs%3A13%3A%22tags_remember%22%3Bs%3A1%3A%22n%22%3Bs%3A4%3A%22tags%22%3Ba%3A0%3A%7B%7Ds%3A12%3A%22tags_exclude%22%3Bs%3A0%3A%22%22%3Bs%3A9%3A%22hide_tags%22%3Ba%3A0%3A%7B%7Ds%3A8%3A%22age_from%22%3Bs%3A2%3A%2218%22%3Bs%3A6%3A%22age_to%22%3Bs%3A2%3A%2299%22%3Bs%3A16%3A%22profilevid_limit%22%3Bs%3A0%3A%22%22%3B%7D%7D", domain='.ifeelmyself.com') + cookie_obj = create_cookie(name='ifm_prefs', value="a%3A1%3A%7Bs%3A6%3A%22search%22%3Ba%3A17%3A%7Bs%3A8%3A%22category%22%3Ba%3A0%3A%7B%7Ds%3A7%3A%22view_by%22%3Bs%3A4%3A%22news%22%3Bs%3A7%3A%22date_by%22%3Bs%3A7%3A%22anytime%22%3Bs%3A10%3A%22from_month%22%3Bs%3A1%3A%221%22%3Bs%3A9%3A%22from_year%22%3Bs%3A4%3A%222006%22%3Bs%3A8%3A%22to_month%22%3Bs%3A2%3A%2212%22%3Bs%3A7%3A%22to_year%22%3Bs%3A4%3A%223000%22%3Bs%3A7%3A%22country%22%3Bs%3A3%3A%22all%22%3Bs%3A10%3A%22attributes%22%3Ba%3A0%3A%7B%7Ds%3A12%3A%22tags_logical%22%3Bs%3A3%3A%22AND%22%3Bs%3A13%3A%22tags_remember%22%3Bs%3A1%3A%22n%22%3Bs%3A4%3A%22tags%22%3Ba%3A0%3A%7B%7Ds%3A12%3A%22tags_exclude%22%3Bs%3A0%3A%22%22%3Bs%3A9%3A%22hide_tags%22%3Ba%3A0%3A%7B%7Ds%3A8%3A%22age_from%22%3Bs%3A2%3A%2218%22%3Bs%3A6%3A%22age_to%22%3Bs%3A2%3A%2299%22%3Bs%3A16%3A%22profilevid_limit%22%3Bs%3A0%3A%22%22%3B%7D%7D", domain='.ifeelmyself.com') browser.session.cookies.set_cookie(cookie_obj) cookie_obj = create_cookie(name='ifm_search_keyword', value=perfname, domain='ifeelmyself.com') browser.session.cookies.set_cookie(cookie_obj) diff --git a/scrapers/IFeelMyself.yml b/scrapers/IFeelMyself/IFeelMyself.yml similarity index 93% rename from scrapers/IFeelMyself.yml rename to scrapers/IFeelMyself/IFeelMyself.yml index 60a3c5289..9c8c2c5e4 100644 --- a/scrapers/IFeelMyself.yml +++ b/scrapers/IFeelMyself/IFeelMyself.yml @@ -27,4 +27,4 @@ sceneByURL: - python3 - IFeelMyself.py - url -# Last Updated October 29, 2022 +# Last Updated February 04, 2023 diff --git a/scrapers/IWantClips.yml b/scrapers/IWantClips.yml index 84e51e06a..a45dfbf14 100644 --- a/scrapers/IWantClips.yml +++ b/scrapers/IWantClips.yml @@ -28,14 +28,11 @@ xPathScrapers: URL: //span[@class='clip-title']/a/@href Image: //div[@class='clip-thumb-16-9']/a/img/@src sceneScraper: + common: + $model: //a[@class="modelLink"] scene: - Title: //span[@class="headline hidden-xs"]/text() - URL: - selector: //button[@class='btn btn-default cart addToCart']/@data-id - postProcess: - - replace: - - regex: ^ - with: https://iwantclips.com/store/item/ + Title: //h1[@class="headline hidden-xs"]/text() + URL: //link[@rel="canonical"]/@href Date: selector: //div[@class="col-xs-12 date fix"]/span/em/text() postProcess: @@ -43,23 +40,34 @@ xPathScrapers: - regex: Published\s(.+) with: $1 - parseDate: Jan 2, 2006 - Details: //div[@class="col-xs-12 description fix"]/span + Details: + selector: //div[@class="col-xs-12 description fix"][last()]/span/text() + concat: "\n\n" Tags: Name: - selector: //div[@class="col-xs-12 hashtags fix"]/span/em | //div[@class="col-xs-12 category fix"]/span + selector: //div[@class="col-xs-12 hashtags hashtags-grey fix"]/span/em | //div[@class="col-xs-12 category fix"]/a concat: "," postProcess: + - replace: + - regex: 'Keywords:' + with: $1 - replace: - regex: ',\s+' with: "," split: "," Image: - selector: //img[@class="videoPlayer show-flexible-picture"]/@src | //video[@class="video-js embed-responsive-item"]/@poster + selector: //div[contains(@class,'vidStuff')]//video[contains(@id,'html5_api')]/@poster | //div[contains(@class,'vidStuff')]//img/@src + postProcess: + - replace: + - regex: (\d*_.*((\.gif)|(\.mp4))) + with: t_$1 + - replace: + - regex: '(\.gif)|(\.mp4)' + with: ".jpg" Studio: - Name: - fixed: IWantClips + Name: $model Performers: - Name: //a[@class="modelLink"] + Name: $model performerScraper: common: $author: //li[@class="ais-Hits-item"][1]//span[contains(@class,'clip-author')] @@ -83,4 +91,4 @@ xPathScrapers: driver: useCDP: true -# Last Updated July 22, 2022 +# Last Updated December 14, 2023 diff --git a/scrapers/IsThisReal.yml b/scrapers/IsThisReal.yml deleted file mode 100644 index 6041926f3..000000000 --- a/scrapers/IsThisReal.yml +++ /dev/null @@ -1,77 +0,0 @@ -name: "IsThisReal" -sceneByURL: - - action: scrapeXPath - url: - - isthisreal.com/en/video/ - scraper: sceneScraper -xPathScrapers: - sceneScraper: - common: - $videoscript: //script[contains(text(), 'ScenePlayerId = "player"')]/text() - $datascript: //script[contains(text(), 'sceneDetails')]/text() - $imagescript: //script[contains(text(), 'picPreview')]/text() - scene: - Title: - selector: $videoscript - postProcess: - - replace: - - regex: .+(?:"sceneTitle":")([^"]+).+ - with: $1 - - regex: .+(?:"sceneTitle":"").+ - with: - Date: - selector: $videoscript - postProcess: - - replace: - - regex: .+(?:"sceneReleaseDate":")([^"]+).+ - with: $1 - - parseDate: 2006-01-02 - Details: - selector: $datascript - postProcess: - - replace: - - regex: .+(?:sceneDescription":")(.+)(?:","sceneActors).+ - with: $1 - - regex: .+(?:"sceneDescription":"").+ - with: - - regex: <\\\/br>||
- with: "\n" - Tags: - Name: - selector: $datascript - postProcess: - - replace: - - regex: .+(?:sceneCategories":\[)(.+)(?:\],"sceneViews").+ - with: $1 - - regex: \" - with: - split: "," - Performers: - Name: - selector: $datascript - postProcess: - - replace: - - regex: .+(?:"sceneActors":)(.+)(?:,"sceneCategories") - with: $1 - - regex: \{"actorId":"\d+","actorName":|}|\[|\]|" - with: - split: "," - Image: - selector: $imagescript - postProcess: - - replace: - - regex: .+(?:picPreview":")([\w:]+)(?:[\\\/]+)([\w-\.]+)(?:[\\\/]+)(\w+)(?:[\\\/]+)(\d+)(?:[\\\/]+)([\d_]+)(?:[\\\/]+)(\w+)(?:[\\\/]+)(\d+)(?:[\\\/]+)(\d+)(?:[\\\/]+)([\w]+)(?:[\\\/]+)([\w.]+).+ - with: $1//$2/$3/$4/$5/$6/$7/$8/$9/$10 - # if using the transport subdomain, parameters need to be passed - # otherwise a cropped square image is returned by default - - regex: (https:\/\/transform.+) - with: $1?width=960&height=543&enlarge=true - Studio: - Name: - selector: //link[@rel="canonical"]/@href - postProcess: - - replace: - - regex: .+(?:\/video\/)([^\/]+).+ - with: $1 - -# Last Updated November 08, 2020 diff --git a/scrapers/Iwara.yml b/scrapers/Iwara.yml new file mode 100644 index 000000000..80820513b --- /dev/null +++ b/scrapers/Iwara.yml @@ -0,0 +1,41 @@ +name: iwara.tv +sceneByURL: + - action: scrapeJson + url: + - iwara.tv/video/ + queryURL: "{url}" + queryURLReplace: + url: + - regex: "[^/]+$" + with: "" + - regex: "https://www.iwara.tv/video/" + with: "https://api.iwara.tv/video/" + scraper: sceneScraper + +jsonScrapers: + sceneScraper: + scene: + Title: title + Date: + selector: file.createdAt + postProcess: + - replace: + - regex: '(\d{4}-\d{2}-\d{2}).*' + with: $1 + - parseDate: "2006-01-02" + Studio: + Name: user.name + Tags: + Name: tags.#.id + Details: body + Image: + selector: "[file.id,thumbnail]" + concat: "," + postProcess: + - replace: + - regex: '\b([1-9])\b' + with: "0$1" + - replace: + - regex: (.*),(.*) + with: "https://i.iwara.tv/image/original/$1/thumbnail-$2.jpg" +# Last Updated October 05, 2023 diff --git a/scrapers/JBVideo.yml b/scrapers/JBVideo.yml new file mode 100644 index 000000000..e3d554581 --- /dev/null +++ b/scrapers/JBVideo.yml @@ -0,0 +1,25 @@ +name: JB Video +sceneByURL: + - action: scrapeXPath + url: + - jbvideo.com/videos/ + scraper: sceneScraper +xPathScrapers: + sceneScraper: + scene: + Title: //h1[@id="view_title"]/text() + Performers: + Name: //strong[text()='Starring:']/following-sibling::a[@onclick='loginPrompt()']/text() + Studio: + Name: + fixed: JB Video + Image: //div[contains(@class, 'main_vid')]/img/@src + Details: //span[@id='view_description']//text()[.!='\u00A0'] + Date: + selector: //strong[text()='Released:']/following-sibling::text()[1] + postProcess: + - replace: + - regex: " " + with: "" + - parseDate: "01/02/06" +# Last Updated December 17, 2023 diff --git a/scrapers/JacquieEtMichelElite.yml b/scrapers/JacquieEtMichelElite.yml new file mode 100644 index 000000000..9fb4d050b --- /dev/null +++ b/scrapers/JacquieEtMichelElite.yml @@ -0,0 +1,87 @@ +name: JacquieEtMichelElite + +movieByURL: + - action: scrapeXPath + url: + - jacquieetmichelelite.com/en/porn-movie + scraper: movieScraper +sceneByURL: + - action: scrapeXPath + url: + - jacquieetmichelelite.com + scraper: sceneScraper + +xPathScrapers: + movieScraper: + movie: + Name: //h1[contains(@class,"video-detail__title")] + Director: + selector: //li[contains(@class,"video-detail__info") and contains(.,'Director:')] + postProcess: + - replace: + - regex: "^Director:" + with: + Duration: + selector: //li[contains(@class,"video-detail__info") and contains(.,'Duration:')] + postProcess: + - replace: + - regex: "^Duration:" + with: + - regex: " min.+" + with: ":00" + Date: + selector: //script[contains(.,"datePublished")] + postProcess: + - replace: + - regex: ^.+datePublished":\s*"(\d\d\d\d-\d\d-\d\d).+$ + with: $1 + Synopsis: + selector: //div[contains(@class,"video-detail__description")]//text() + Studio: + Name: //li[contains(@class,"video-detail__info")]/strong[contains(.,'Studio:')]/following-sibling::a + FrontImage: //img[contains(@class,"video-detail__poster__img")]/@src + sceneScraper: + scene: + Title: + selector: //h1[contains(@class,"video-detail__title")] + Details: + selector: //div[contains(@class,"video-detail__description")]//text() + Date: + selector: //script[contains(.,"datePublished")] + postProcess: + - replace: + - regex: ^.+datePublished":\s*"(\d\d\d\d-\d\d-\d\d).+$ + with: $1 + Image: + selector: //div[contains(@class,"video-detail__handler")]/@style + postProcess: + - replace: + - regex: ^[^']*'([^']*).* + with: $1 + Studio: + Name: //li[contains(@class,"video-detail__info")]/strong[contains(.,'Studio:')]/following-sibling::a + Director: + selector: //li[contains(@class,"video-detail__info") and contains(.,'Director:')] + postProcess: + - replace: + - regex: "^Director:" + with: + Performers: + Name: //p[contains(@class,"actor-item__title")] + Movies: + Name: + selector: //a[contains(@class,"video-detail__poster")]/@href + postProcess: + - replace: + - regex: .+?([^/]+)\.html + with: $1 + - regex: "-" + with: " " + URL: + selector: //a[contains(@class,"video-detail__poster")]/@href + postProcess: + - replace: + - regex: ^ + with: https://www.jacquieetmichelelite.com + +# Last Updated April 15, 2023 diff --git a/scrapers/JacquieEtMichelTV.py b/scrapers/JacquieEtMichelTV/JacquieEtMichelTV.py similarity index 100% rename from scrapers/JacquieEtMichelTV.py rename to scrapers/JacquieEtMichelTV/JacquieEtMichelTV.py diff --git a/scrapers/JacquieEtMichelTV.yml b/scrapers/JacquieEtMichelTV/JacquieEtMichelTV.yml similarity index 94% rename from scrapers/JacquieEtMichelTV.yml rename to scrapers/JacquieEtMichelTV/JacquieEtMichelTV.yml index 30a90c91e..b92cafe2b 100644 --- a/scrapers/JacquieEtMichelTV.yml +++ b/scrapers/JacquieEtMichelTV/JacquieEtMichelTV.yml @@ -1,4 +1,6 @@ name: JacquieEtMicaelTV +# requires: py_common + sceneByFragment: action: script script: diff --git a/scrapers/JapanHDV.yml b/scrapers/JapanHDV.yml index 498da036f..ed6dd3149 100644 --- a/scrapers/JapanHDV.yml +++ b/scrapers/JapanHDV.yml @@ -15,7 +15,7 @@ xPathScrapers: $movieinfo: //div[@class="pure-u-1-5 hidden-sm hidden-xs"]/div[@class="video-info"] scene: Title: $movieinfo/p[starts-with(strong,"Title")]/text() - Details: //meta[@property="og:description"]/@content + Details: //div[contains(@class, "video-description")] Date: selector: //meta[@itemprop="datePublished"]/@content postProcess: diff --git a/scrapers/JavBus.yml b/scrapers/JavBus.yml new file mode 100644 index 000000000..c49ccf6ef --- /dev/null +++ b/scrapers/JavBus.yml @@ -0,0 +1,104 @@ +name: Javbus +sceneByFragment: + action: scrapeXPath + queryURL: https://www.javbus.com/{filename} + queryURLReplace: + filename: + - regex: -JG\d + with: "" + - regex: (.*[^a-zA-Z0-9])*([a-zA-Z-]+\d+)(.+) + with: $2 + scraper: sceneScraper +sceneByURL: + - action: scrapeXPath + url: + - https://www.javbus.com + - https://www.seejav.bid + - https://www.cdnbus.lol + - https://www.dmmbus.lol + - https://www.seedmm.cfd + scraper: sceneScraper +sceneByName: + action: scrapeXPath + queryURL: https://www.javbus.com/search/{}&type=&parent=ce + scraper: sceneSearch +sceneByQueryFragment: + action: scrapeXPath + queryURL: "{url}" + scraper: sceneScraper + +performerByURL: + - action: scrapeXPath + url: + - https://www.javbus.com + - https://www.seejav.bid + - https://www.cdnbus.lol + - https://www.dmmbus.lol + - https://www.seedmm.cfd + scraper: performerScraper +performerByName: + action: scrapeXPath + queryURL: https://www.javbus.com/searchstar/{}&type=&parent=ce + scraper: performerSearch + +xPathScrapers: + performerSearch: + performer: + Name: //span[@class="mleft"] + URL: //*[@id="waterfall"]/div/a/@href + performerScraper: + performer: + Name: //*[@id="waterfall"]/div[1]/div/div[2]/span + Birthdate: + selector: //*[@id="waterfall"]/div[1]/div/div[2]/p[contains(text(), '生日')] + postProcess: + - replace: + - regex: ^(.*? ){1} + with: + Height: + selector: //*[@id="waterfall"]/div[1]/div/div[2]/p[contains(text(), '身高')] + postProcess: + - replace: + - regex: ^(.*? ){1} + with: + # Measurements: //*[@id="waterfall"]/div[1]/div/div[2]/p[contains(text(), '胸圍')]//*[@id="waterfall"]/div[1]/div/div[2]/p[contains(text(), '腰圍')]//*[@id="waterfall"]/div[1]/div/div[2]/p[contains(text(), '臀圍')]//*[@id="waterfall"]/div[1]/div/div[2]/p[contains(text(), '罩杯')] + Image: + selector: //*[@id="waterfall"]/div[1]/div/div[1]/img/@src + postProcess: + - replace: + - regex: ^ + with: https://www.javbus.com + + sceneSearch: + scene: + Title: //div[@class="photo-info"]/span + URL: //*[@id="waterfall"]/div/a/@href + sceneScraper: + scene: + Title: + selector: //div[@class="col-md-3 info"]//span[contains(text(), '識別碼')]/../span[2]/text() + URL: + selector: /html/head/link[@hreflang="zh"]/@href + Date: + selector: //div[@class="col-md-3 info"]//span[contains(text(), '發行日期')]/../text() + Details: + selector: //div[@class="container"]/h3/text() + postProcess: + - replace: + - regex: ^(.*? ){1} + with: + Tags: + Name: //div[@class="col-md-3 info"]//span[@class="genre"]/label/a/text() + Performers: + Name: //div[@id="video_cast"]/table/tbody/tr/td[@class="text"]/span/span/a + Director: //div[@id='video_director']/table/tbody/tr/td[@class="text"]/span/a/text() + Image: + selector: //div[@class="row movie"]/div[@class="col-md-9 screencap"]/a[@class="bigImage"]/img/@src + postProcess: + - replace: + - regex: ^ + with: https://www.javbus.com + Studio: + Name: //div[@class="col-md-3 info"]//span[contains(text(), '發行商')]/../a/text() + +# Last Updated July 06, 2023 \ No newline at end of file diff --git a/scrapers/JavLibrary_python.yml b/scrapers/JavLibrary_python.yml deleted file mode 100644 index 059ec71bb..000000000 --- a/scrapers/JavLibrary_python.yml +++ /dev/null @@ -1,19 +0,0 @@ -name: "JavLibrary Python" -sceneByFragment: - action: script - script: - - python - - JavLibrary_python.py -sceneByName: - action: script - script: - - python - - JavLibrary_python.py - - searchName -sceneByQueryFragment: - action: script - script: - - python - - JavLibrary_python.py - - validSearch -# Last Updated December 20, 2022 diff --git a/scrapers/JavLibrary_python.py b/scrapers/JavLibrary_python/JavLibrary_python.py similarity index 67% rename from scrapers/JavLibrary_python.py rename to scrapers/JavLibrary_python/JavLibrary_python.py index de5792fad..35c06fe15 100644 --- a/scrapers/JavLibrary_python.py +++ b/scrapers/JavLibrary_python/JavLibrary_python.py @@ -1,4 +1,4 @@ -"""JAVLibrary/R18 python scraper""" +"""JAVLibrary python scraper""" import base64 import json import re @@ -36,23 +36,14 @@ ################### JAV_SEARCH_HTML = None -R18_SEARCH_HTML = None JAV_MAIN_HTML = None -R18_MAIN_HTML = None PROTECTION_CLOUDFLARE = False -R18_HEADERS = { - "User-Agent": - 'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:79.0) Gecko/20100101 Firefox/79.0', - "Referer": "https://www.r18.com/" -} JAV_HEADERS = { "User-Agent": 'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:79.0) Gecko/20100101 Firefox/79.0', "Referer": "http://www.javlibrary.com/" } -# Print extra debug messages -DEBUG_MODE = False # We can't add movie image atm in the same time as Scene STASH_SUPPORTED = False # Stash doesn't support Labels yet @@ -78,8 +69,6 @@ # Tags you want to be added in every scrape FIXED_TAGS = "" -# Get both Javlibrary and R18 tags -BOTH_TAGS = False # Split tags if they contain [,·] ('Best, Omnibus' -> 'Best','Omnibus') SPLIT_TAGS = False @@ -247,18 +236,12 @@ "Tits": "Small Tits" # 微乳 in Japanese } -def debug(to_print): - """debug only prints using log.debug if DEBUG_MODE is set to True""" - if DEBUG_MODE is False: - return - log.debug(to_print) - def checking_protection(url): global PROTECTION_CLOUDFLARE url_domain = re.sub(r"www\.|\.com", "", urlparse(url).netloc) - debug("=== Checking Status of Javlib site ===") + log.debug("=== Checking Status of Javlib site ===") PROTECTION_CLOUDFLARE = False for site in SITE_JAVLIB: url_n = url.replace(url_domain, site) @@ -279,9 +262,9 @@ def checking_protection(url): log.info( f"[{site}] Using this site for scraping ({response.status_code})" ) - debug("======================================") + log.debug("======================================") return site, response - debug("======================================") + log.debug("======================================") return None, None @@ -303,7 +286,7 @@ def send_request(url, head, retries=0): if JAV_DOMAIN is None: return None url = url.replace(url_domain, JAV_DOMAIN) - debug(f"[{threading.get_ident()}] Request URL: {url}") + log.debug(f"[{threading.get_ident()}] Request URL: {url}") try: response = requests.get(url, headers=head, timeout=10) except requests.exceptions.Timeout as exc_timeout: @@ -312,7 +295,7 @@ def send_request(url, head, retries=0): except Exception as exc_req: log.error(f"scrape error exception {exc_req}") if response.status_code != 200: - debug(f"[Request] Error, Status Code: {response.status_code}") + log.debug(f"[Request] Error, Status Code: {response.status_code}") response = None return response @@ -341,8 +324,8 @@ def getxpath(xpath, tree): xpath_result = [val for sublist in xpath_result for val in sublist] else: xpath_result = tree.xpath(xpath) - #debug(f"xPATH: {xpath}") - #debug(f"raw xPATH result: {xpath_result}") + #log.debug(f"xPATH: {xpath}") + #log.debug(f"raw xPATH result: {xpath_result}") list_tmp = [] for x_res in xpath_result: # for xpaths that don't end with /text() @@ -359,35 +342,9 @@ def getxpath(xpath, tree): # SEARCH PAGE -def r18_search(html, xpath): - if html is None: - return None - search_tree = lxml.html.fromstring(html.content) - search_url = getxpath(xpath['url'], search_tree) - search_serie = getxpath(xpath['series'], search_tree) - search_scene = getxpath(xpath['scene'], search_tree) - # There is only 1 scene, with serie. - # Could be useful if the movie already exists in Stash because you only need the name. - if len(search_scene) == 1 and len(search_serie) == 1 and len( - search_url) == 1: - r18_result["series_name"] = search_serie - if search_url: - search_url = search_url[0] - search_id = re.match(r".+id=(.+)/.*", search_url) - if search_id: - scene_url = f"https://www.r18.com/api/v4f/contents/{search_id.group(1)}?lang=en" - log.debug(f"Using API URL: {scene_url}") - main_html = send_request(scene_url, R18_HEADERS) - return main_html - log.warning(f"Can't find the 'id=' in the URL: {search_url}") - return None - debug("[R18] There is no result in search") - return None - - def jav_search(html, xpath): if "/en/?v=" in html.url: - debug(f"Using the provided movie page ({html.url})") + log.debug(f"Using the provided movie page ({html.url})") return html jav_search_tree = lxml.html.fromstring(html.content) jav_url = getxpath(xpath['url'], jav_search_tree) # ./?v=javme5it6a @@ -469,7 +426,7 @@ def buildlist_tagperf(data, type_scrape=""): def th_request_perfpage(page_url, perf_url): # vl_star.php?s=afhvw - #debug("[DEBUG] Aliases Thread: {}".format(threading.get_ident())) + #log.debug("[DEBUG] Aliases Thread: {}".format(threading.get_ident())) javlibrary_ja_html = send_request(page_url.replace("/en/", "/ja/"), JAV_HEADERS) if javlibrary_ja_html: @@ -482,18 +439,16 @@ def th_request_perfpage(page_url, perf_url): '"]/text()')[0]) if list_tmp: jav_result['performer_aliases'] = list_tmp - debug(f"Got the aliases: {list_tmp}") + log.debug(f"Got the aliases: {list_tmp}") except: - debug("Error with the aliases") + log.debug("Error with the aliases") else: - debug("Can't get the Jap HTML") + log.debug("Can't get the Jap HTML") def th_imageto_base64(imageurl, typevar): - #debug("[DEBUG] {} thread: {}".format(typevar,threading.get_ident())) + #log.debug("[DEBUG] {} thread: {}".format(typevar,threading.get_ident())) head = JAV_HEADERS - if typevar in ("R18Series", "R18"): - head = R18_HEADERS if isinstance(imageurl,list): for image_index, image_url in enumerate(imageurl): try: @@ -513,11 +468,9 @@ def th_imageto_base64(imageurl, typevar): image_index] = "data:image/jpeg;base64," + base64image.decode( 'utf-8') except: - debug( + log.debug( f"[{typevar}] Failed to get the base64 of the image" ) - if typevar == "R18Series": - r18_result["series_image"] = imageurl else: try: img = requests.get(imageurl.replace("ps.jpg", "pl.jpg"), @@ -533,17 +486,13 @@ def th_imageto_base64(imageurl, typevar): jav_result[ "image"] = "data:image/jpeg;base64," + base64image.decode( 'utf-8') - if typevar == "R18": - r18_result[ - "image"] = "data:image/jpeg;base64," + base64image.decode( - 'utf-8') - debug(f"[{typevar}] Converted the image to base64!") + log.debug(f"[{typevar}] Converted the image to base64!") except: - debug(f"[{typevar}] Failed to get the base64 of the image") + log.debug(f"[{typevar}] Failed to get the base64 of the image") return -#debug(f"[DEBUG] Main Thread: {threading.get_ident()}") +#log.debug(f"[DEBUG] Main Thread: {threading.get_ident()}") FRAGMENT = json.loads(sys.stdin.read()) SEARCH_TITLE = FRAGMENT.get("name") @@ -563,7 +512,7 @@ def th_imageto_base64(imageurl, typevar): sys.exit() if "searchName" in sys.argv: - debug(f"Using search with Title: {SEARCH_TITLE}") + log.debug(f"Using search with Title: {SEARCH_TITLE}") JAV_SEARCH_HTML = send_request( f"https://www.javlibrary.com/en/vl_searchbyid.php?keyword={SEARCH_TITLE}", JAV_HEADERS) @@ -572,33 +521,17 @@ def th_imageto_base64(imageurl, typevar): scene_domain = re.sub(r"www\.|\.com", "", urlparse(SCENE_URL).netloc) # Url from Javlib if scene_domain in SITE_JAVLIB: - debug(f"Using URL: {SCENE_URL}") + log.debug(f"Using URL: {SCENE_URL}") JAV_MAIN_HTML = send_request(SCENE_URL, JAV_HEADERS) - elif "r18.com" in SCENE_URL: - r18_id = re.match(r".+id=(.+)/.*", SCENE_URL) - if r18_id: - SCENE_URL = f"https://www.r18.com/api/v4f/contents/{r18_id.group(1)}?lang=en" - debug(f"Using API URL: {SCENE_URL}") - R18_MAIN_HTML = send_request(SCENE_URL, R18_HEADERS) - else: - log.warning(f"Can't find the 'id=' in the URL: {SCENE_URL}") else: - log.warning(f"The URL is not from Javlib/R18 ({SCENE_URL})") - if JAV_MAIN_HTML is None and R18_MAIN_HTML is None and SCENE_TITLE: - debug(f"Using search with Title: {SCENE_TITLE}") + log.warning(f"The URL is not from JavLibrary ({SCENE_URL})") + if JAV_MAIN_HTML is None and SCENE_TITLE: + log.debug(f"Using search with Title: {SCENE_TITLE}") JAV_SEARCH_HTML = send_request( f"https://www.javlibrary.com/en/vl_searchbyid.php?keyword={SCENE_TITLE}", JAV_HEADERS) # XPATH -r18_xPath_search = {} -r18_xPath_search[ - 'series'] = '//p[text()="TOP SERIES"]/following-sibling::ul//a/span[@class="item01"]/text()' -r18_xPath_search[ - 'url'] = '//li[contains(@class,"item-list")]/a//img[string-length(@alt)'\ - '=string-length(preceding::div[@class="genre01"]/span/text())]/ancestor::a/@href' -r18_xPath_search['scene'] = '//li[contains(@class,"item-list")]' - jav_xPath_search = {} jav_xPath_search[ 'url'] = '//div[@class="videos"]/div/a[not(contains(@title,"(Blu-ray"))]/@href' @@ -638,15 +571,13 @@ def th_imageto_base64(imageurl, typevar): "label"] = '//td[@class="header" and text()="Label:"]'\ '/following-sibling::td/span[@class="label"]/a/text()' jav_xPath["image"] = '//div[@id="video_jacket"]/img/@src' -jav_xPath["r18"] = '//a[text()="purchasing HERE"]/@href' -r18_result = {} jav_result = {} if "searchName" in sys.argv: if JAV_SEARCH_HTML: if "/en/?v=" in JAV_SEARCH_HTML.url: - debug(f"Scraping the movie page directly ({JAV_SEARCH_HTML.url})") + log.debug(f"Scraping the movie page directly ({JAV_SEARCH_HTML.url})") jav_tree = lxml.html.fromstring(JAV_SEARCH_HTML.content) jav_result["title"] = getxpath(jav_xPath["title"], jav_tree) jav_result["details"] = getxpath(jav_xPath["details"], jav_tree) @@ -681,16 +612,8 @@ def th_imageto_base64(imageurl, typevar): if JAV_SEARCH_HTML: JAV_MAIN_HTML = jav_search(JAV_SEARCH_HTML, jav_xPath_search) -if JAV_MAIN_HTML is None and R18_MAIN_HTML is None and SCENE_TITLE: - # If javlibrary doesn't have it, there is no way that R18 while have it but why not try... - log.info("Javlib doesn't give any result, trying search with R18...") - R18_SEARCH_HTML = send_request( - f"https://www.r18.com/common/search/searchword={SCENE_TITLE}/?lg=en", - R18_HEADERS) - R18_MAIN_HTML = r18_search(R18_SEARCH_HTML, r18_xPath_search) - if JAV_MAIN_HTML: - #debug("[DEBUG] Javlibrary Page ({})".format(JAV_MAIN_HTML.url)) + #log.debug("[DEBUG] Javlibrary Page ({})".format(JAV_MAIN_HTML.url)) jav_tree = lxml.html.fromstring(JAV_MAIN_HTML.content) # is not None for removing the FutureWarning... if jav_tree is not None: @@ -705,7 +628,7 @@ def th_imageto_base64(imageurl, typevar): "image"] or "noimage" in jav_result["image"]: # https://pics.dmm.com/mono/movie/n/now_printing/now_printing.jpg # https://pics.dmm.co.jp/mono/noimage/movie/adult_ps.jpg - debug( + log.debug( "[Warning][Javlibrary] Image was deleted or failed to load "\ f"({jav_result['image']})" ) @@ -742,179 +665,53 @@ def th_imageto_base64(imageurl, typevar): )) javlibrary_aliases_thread.daemon = True javlibrary_aliases_thread.start() - # R18 - if jav_result.get("r18"): - r18_search_url = re.sub(r".+\/\/", "https://", - jav_result["r18"][0]) - r18_search_url += '/' - R18_SEARCH_HTML = send_request(r18_search_url, R18_HEADERS) - R18_MAIN_HTML = r18_search(R18_SEARCH_HTML, r18_xPath_search) - -# MAIN PAGE -if R18_MAIN_HTML: - r18_main_api = R18_MAIN_HTML.json() - if r18_main_api["status"] != "OK": - log.error(f"R18 API Status {r18_main_api.get('status')}") - else: - r18_main_api = r18_main_api["data"] - if r18_main_api.get("title"): - r18_result['title'] = r18_main_api["dvd_id"] - if r18_main_api.get("release_date"): - r18_result['date'] = re.sub(r"\s.+", "", - r18_main_api["release_date"]) - if r18_main_api.get("detail_url"): - r18_result['url'] = r18_main_api["detail_url"] - if r18_main_api.get("comment"): - r18_result[ - 'details'] = f"{r18_main_api['title']}\n\n{r18_main_api['comment']}" - else: - r18_result['details'] = f"{r18_main_api['title']}" - if r18_main_api.get("series"): - r18_result['series_url'] = r18_main_api["series"].get("series_url") - r18_result['series_name'] = r18_main_api["series"].get("name") - if r18_main_api.get("maker"): - r18_result['studio'] = r18_main_api["maker"]["name"] - if r18_main_api.get("actresses"): - r18_result['performers'] = [ - x["name"] for x in r18_main_api["actresses"] - ] - if r18_main_api.get("categories"): - r18_result['tags'] = [ - x["name"] for x in r18_main_api["categories"] - ] - if r18_main_api.get("images"): - # Don't know if it's possible no image ?????? - r18_result['image'] = r18_main_api["images"]["jacket_image"][ - "large"] - imageBase64_r18_thread = threading.Thread(target=th_imageto_base64, - args=( - r18_result["image"], - "R18", - )) - imageBase64_r18_thread.start() - -if R18_MAIN_HTML is None and JAV_MAIN_HTML is None: + +if JAV_MAIN_HTML is None: log.info("No results found") - scrape = None - print(json.dumps(scrape)) + print(json.dumps({})) sys.exit() -#debug('[DEBUG][JAV] {}'.format(jav_result)) -#debug('[DEBUG][R18] {}'.format(r18_result)) +log.debug('[JAV] {}'.format(jav_result)) # Time to scrape all data scrape = {} # DVD code -if jav_result.get('code'): - scrape['code'] = jav_result['code'][0] -# Title - Javlibrary > r18 -if r18_result.get('title'): - scrape['title'] = r18_result['title'] -if jav_result.get('title'): - scrape['title'] = jav_result['title'] - -# Date - R18 > Javlibrary -if jav_result.get('date'): - scrape['date'] = jav_result['date'][0] -if r18_result.get('date'): - scrape['date'] = r18_result['date'] - -# Director -if jav_result.get('director'): - scrape['director'] = jav_result['director'] - -# URL - Javlibrary > R18 -if r18_result.get('url'): - scrape['url'] = r18_result['url'] -if jav_result.get('url'): - scrape['url'] = jav_result['url'] - -# Details - R18 > Javlibrary -if jav_result.get('details'): - scrape['details'] = regexreplace(jav_result['details']) -if r18_result.get('details'): - scrape['details'] = regexreplace(r18_result['details']) -if r18_result.get('series_name'): - if scrape.get('details'): - scrape['details'] = scrape[ - 'details'] + "\n\nFrom the series: " + regexreplace( - r18_result['series_name']) - else: - scrape['details'] = "From the series: " + regexreplace( - r18_result['series_name']) - -# Studio - Javlibrary > R18 -scrape['studio'] = {} -if r18_result.get('studio'): - scrape['studio']['name'] = r18_result['studio'] -if jav_result.get('studio'): - scrape['studio']['name'] = jav_result['studio'][0] - -# Not supported by Stash yet -if jav_result.get('label') and STASH_SUPPORT_LABELS: - scrape['label']['name'] = jav_result['label'] - -# Performer - Javlibrary > R18 -if r18_result.get('performers'): - scrape['performers'] = buildlist_tagperf(r18_result['performers']) -if jav_result.get('performers'): - if WAIT_FOR_ALIASES is True and IGNORE_ALIASES is False: - try: - if javlibrary_aliases_thread.is_alive() is True: - javlibrary_aliases_thread.join() - except NameError: - debug("No Jav Aliases Thread") - scrape['performers'] = buildlist_tagperf(jav_result, "perf_jav") - -# Tags - Javlibrary > R18 -if r18_result.get('tags') and jav_result.get('tags') and BOTH_TAGS is True: - scrape['tags'] = buildlist_tagperf(r18_result['tags'], - "tags") + buildlist_tagperf( - jav_result['tags'], "tags") -else: - if r18_result.get('tags'): - scrape['tags'] = buildlist_tagperf(r18_result['tags'], "tags") - if jav_result.get('tags'): - scrape['tags'] = buildlist_tagperf(jav_result['tags'], "tags") - -if scrape.get("tags") and SPLIT_TAGS: - scrape['tags'] = [ - { - "name": tag_name.strip() - } for tag_dict in scrape['tags'] - for tag_name in tag_dict["name"].replace('·', ',').split(",") - ] - -# Image - Javlibrary > R18 -try: - if imageBase64_r18_thread.is_alive() is True: - imageBase64_r18_thread.join() - if r18_result.get('image'): - scrape['image'] = r18_result['image'] -except NameError: - debug("No image R18 Thread") +scrape['code'] = next(iter(jav_result.get('code', []))) +scrape['title'] = jav_result.get('title') +scrape['date'] = next(iter(jav_result.get('date', []))) +scrape['director'] = jav_result.get('director') or None +scrape['url'] = jav_result.get('url') +scrape['details'] = regexreplace(jav_result.get('details', "")) +scrape['studio'] = { + 'name': next(iter(jav_result.get('studio', []))), +} +scrape['label'] = { + 'name': jav_result.get('label'), +} + +if WAIT_FOR_ALIASES and not IGNORE_ALIASES: + try: + if javlibrary_aliases_thread.is_alive(): + javlibrary_aliases_thread.join() + except NameError: + log.debug("No Jav Aliases Thread") +scrape['performers'] = buildlist_tagperf(jav_result, "perf_jav") + +scrape['tags'] = buildlist_tagperf(jav_result.get('tags', []), "tags") +scrape['tags'] = [ + { + "name": tag_name.strip() + } for tag_dict in scrape['tags'] + for tag_name in tag_dict["name"].replace('·', ',').split(",") +] + try: if imageBase64_jav_thread.is_alive() is True: imageBase64_jav_thread.join() if jav_result.get('image'): scrape['image'] = jav_result['image'] except NameError: - debug("No image JAV Thread") - -# Movie - R18 -if r18_result.get('series_url') and r18_result.get('series_name'): - tmp = {} - tmp['name'] = regexreplace(r18_result['series_name']) - tmp['url'] = r18_result['series_url'] - if STASH_SUPPORTED is True: - # If Stash support this part - if jav_result.get('image'): - tmp['front_image'] = jav_result["image"] - if r18_result.get('image'): - tmp['front_image'] = r18_result["image"] - if scrape.get('studio'): - tmp['studio'] = {} - tmp['studio']['name'] = scrape['studio']['name'] - scrape['movies'] = [tmp] + log.debug("No image JAV Thread") + print(json.dumps(scrape)) diff --git a/scrapers/JavLibrary_python/JavLibrary_python.yml b/scrapers/JavLibrary_python/JavLibrary_python.yml new file mode 100644 index 000000000..5066baa78 --- /dev/null +++ b/scrapers/JavLibrary_python/JavLibrary_python.yml @@ -0,0 +1,21 @@ +name: "JavLibrary Python" +# requires: py_common + +sceneByFragment: + action: script + script: + - python + - JavLibrary_python.py +sceneByName: + action: script + script: + - python + - JavLibrary_python.py + - searchName +sceneByQueryFragment: + action: script + script: + - python + - JavLibrary_python.py + - validSearch +# Last Updated September 18, 2023 diff --git a/scrapers/JeffsModels.yml b/scrapers/JeffsModels.yml deleted file mode 100644 index e18a750cb..000000000 --- a/scrapers/JeffsModels.yml +++ /dev/null @@ -1,29 +0,0 @@ -name: "JeffsModels" -sceneByURL: - - action: scrapeXPath - url: - - jeffsmodels.com/ - scraper: sceneScraper -xPathScrapers: - sceneScraper: - scene: - Title: - selector: //div[@class="updateInfo"]/h2/text() - Details: - selector: //div[@class="updateDescription"]/p/text() - Performers: - Name: //div[@class="updateModels"]/a/text() - Image: - selector: //a[@class="hi iconPlay"]/img[@alt="Main Sample"]/@src - postProcess: - - replace: - - regex: ^ - with: "https://jeffsmodels.com" - Studio: - Name: - fixed: "Jeff's Models" - Date: - selector: //span[@class="updateDate"]/text() - postProcess: - - parseDate: Jan 2, 2006 -# Last Updated March 19, 2021 diff --git a/scrapers/Algolia_Johnnyrapid.yml b/scrapers/Johnnyrapid/Johnnyrapid.yml similarity index 77% rename from scrapers/Algolia_Johnnyrapid.yml rename to scrapers/Johnnyrapid/Johnnyrapid.yml index cb6d91c04..008401646 100644 --- a/scrapers/Algolia_Johnnyrapid.yml +++ b/scrapers/Johnnyrapid/Johnnyrapid.yml @@ -1,3 +1,4 @@ +# requires: Algolia name: "Johnny Rapid" sceneByURL: - action: script @@ -5,26 +6,26 @@ sceneByURL: - johnnyrapid.com/en/video/ script: - python - - Algolia.py + - ../Algolia/Algolia.py - johnnyrapid sceneByFragment: action: script script: - python - - Algolia.py + - ../Algolia/Algolia.py - johnnyrapid sceneByName: action: script script: - python - - Algolia.py + - ../Algolia/Algolia.py - johnnyrapid - searchName sceneByQueryFragment: action: script script: - python - - Algolia.py + - ../Algolia/Algolia.py - johnnyrapid - validName galleryByURL: @@ -33,7 +34,7 @@ galleryByURL: - johnnyrapid.com/en/photo/ script: - python - - Algolia.py + - ../Algolia/Algolia.py - johnnyrapid - gallery # Last Updated December 22, 2022 diff --git a/scrapers/JulesJordan.yml b/scrapers/JulesJordan.yml index a90920fed..09fc74ed4 100644 --- a/scrapers/JulesJordan.yml +++ b/scrapers/JulesJordan.yml @@ -2,12 +2,15 @@ name: "JulesJordan" sceneByURL: - action: scrapeXPath url: - - julesjordan.com/ - girlgirl.com/ - manuelferrara.com/ - theassfactory.com/ - spermswallowers.com/ scraper: sceneScraper + - action: scrapeXPath + url: + - julesjordan.com/ + scraper: newStyleScraper galleryByURL: - action: scrapeXPath @@ -85,6 +88,27 @@ xPathScrapers: Performers: *performersAttr Studio: *studioAttr + newStyleScraper: + common: + $scene: //div[@class="movie-player"] + scene: + Title: $scene//div[@class="movie_title"] + Date: + selector: $scene//span[contains(., "Date:")]/following-sibling::text() + postProcess: + - parseDate: 01/02/2006 + Details: $scene//span[contains(., "Description:")]/following-sibling::text() + Tags: + Name: $scene//a[contains(@href, "categories")]/text() + Performers: + Name: $scene//a[contains(@href, "models")]/text() + URL: $scene//a[contains(@href, "models")]/@href + Studio: *studioAttr + Movies: + Name: $scene//span[contains(., "Movie:")]/following-sibling::a/text() + URL: $scene//span[contains(., "Movie:")]/following-sibling::a/@href + Image: $scene//video/@poster + performerScraper: performer: Name: //span[@class='title_bar_hilite']/text() @@ -115,16 +139,9 @@ xPathScrapers: movieScraper: movie: - Name: //span[@class="title_bar_hilite"]/text() - Synopsis: - selector: //div[@class="dvd_extra_fields"]/div/text()[not(contains(.,"Studio"))] - concat: " " - postProcess: - - replace: - - regex: "Description:" - with: + Name: //meta[@property="og:title"]/@content Studio: - Name: //span[@class="update_date"]/text()[contains(.,"Studio")]/following-sibling::a/text() - FrontImage: //div[@class="front"]/a/img/@src0_3x - BackImage: //div[@class="back"]/a/img/@src0_3x -# Last Updated June 09, 2022 + Name: + fixed: Jules Jordan Video + FrontImage: //img[contains(@class, "dvd_box")]/@src0_3x +# Last Updated September 19, 2023 diff --git a/scrapers/KBProductions.py b/scrapers/KBProductions/KBProductions.py similarity index 100% rename from scrapers/KBProductions.py rename to scrapers/KBProductions/KBProductions.py diff --git a/scrapers/KBProductions.yml b/scrapers/KBProductions/KBProductions.yml similarity index 83% rename from scrapers/KBProductions.yml rename to scrapers/KBProductions/KBProductions.yml index b6199c7dd..d100ae752 100644 --- a/scrapers/KBProductions.yml +++ b/scrapers/KBProductions/KBProductions.yml @@ -1,4 +1,6 @@ name: "KB Productions" +# requires: py_common + sceneByURL: - url: # Keeping this to allow for updates using old urls @@ -8,6 +10,7 @@ sceneByURL: - inserted.com/videos/ - rickysroom.com/videos/ + - sidechick.com/videos/ action: script script: @@ -19,9 +22,10 @@ performerByURL: - inserted.com/tour/models/ - inserted.com/models/ - rickysroom.com/models/ + - sidechick.com/models/ action: script script: - python3 - KBProductions.py - performer -# Last Updated December 24, 2022 +# Last Updated March 14, 2023 diff --git a/scrapers/Karups.yml b/scrapers/Karups.yml index 8c9a2f6ac..31a033a53 100644 --- a/scrapers/Karups.yml +++ b/scrapers/Karups.yml @@ -1,25 +1,47 @@ -name: "Karups" +name: Karups sceneByURL: - action: scrapeXPath url: - karups.com/video/ scraper: sceneScraper +galleryByURL: + - action: scrapeXPath + url: + - karups.com/gallery/ + scraper: galleryScraper + xPathScrapers: sceneScraper: scene: - Title: //h1[@class="page-heading"]/span[@class="title"]/text() - Date: + Studio: &studio + Name: + selector: //span[@class="sup-title"]/span + postProcess: + - map: + KarupsOW: Karups Older Women + KarupsHA: Karups Hometown Amateurs + KarupsPC: Karups Private Collection + Title: &title //h1[@class="page-heading"]/span[@class="title"]/text() + Date: &date selector: //span[@class="date"]/span[@class="content"]/text() postProcess: - replace: - regex: (st|nd|rd|th)\, with: "," - parseDate: Jan 02, 2006 - Details: - selector: //div[@class="content-information-description"]/p/text() - Performers: + Performers: &performers Name: //span[@class="models"]/span[@class="content"]//a/text() - Image: + Details: &details + selector: //div[@class="content-information-description"]/p/text() + Image: &image selector: //video[@id="player"]/@poster|//div[@class="video-poster"]/img/@src -# Last Updated November 08, 2020 + galleryScraper: + gallery: + Studio: *studio + Title: *title + Date: *date + Performers: *performers + Details: *details + +# Last Updated July 11, 2023 \ No newline at end of file diff --git a/scrapers/Kink.yml b/scrapers/Kink.yml index 9bda22f0a..9684aa6bf 100644 --- a/scrapers/Kink.yml +++ b/scrapers/Kink.yml @@ -17,6 +17,15 @@ sceneByFragment: - regex: .*\((\d+)\)\.[a-zA-Z\d]+$ #support filenames in the form scene - date - performer (12345).mp4 with: $1 scraper: sceneScraper +performerByName: + action: scrapeXPath + queryURL: https://www.kink.com/search?type=performers&q={} + scraper: performerSearch +performerByURL: + - action: scrapeXPath + url: + - kink.com/model + scraper: performerScraper xPathScrapers: sceneScraper: @@ -54,71 +63,149 @@ xPathScrapers: - regex: /channel/ with: "" - map: - 30-minutes-of-torment: 30 Minutes of Torment - alternadudes: Alternadudes - amator: Amator + # List of sites as of 2023-07-29 from https://www.kink.com/channels + # [...new Set([...document.querySelectorAll('h3 > a')] + # .map(a => a.href.split("/").pop() + ": " + a.innerText))] + # .toSorted() + # .join("\n") + analized: Analized animated-kink: Animated Kink ashley-fires-scifi-dreamgirls: Ashley Fires SciFi Dreamgirls aziani-iron: Aziani Iron + badfam-pov: Bad Family POV ball-gaggers: Ball Gaggers banana-jacks: Banana Jacks - bifuck: BiFuck - bizarre-video: Bizarre Video + bifuck: BiFUCK bizarre-video-transsexual: Bizarre Video Transsexual + bizarre-video: Bizarre Video bleu-films: Bleu Films bondage-liberation: Bondage Liberation - bonus-hole-boys: Bonus Hole Boys - bound-and-gagged: Bound & Gagged - bound-gang-bangs: Bound Gangbangs - bound-gods: Bound Gods - bound-in-public: Bound in Public + bound-gang-bangs: Bound Gang Bangs + bound-men-wanked: Bound Men Wanked brutal-sessions: Brutal Sessions - butt-machine-boys: Butt Machine Boys - captive-male: Captive Male - chanta-s-bitches: Chanta's Bitches + carmen-rivera: Carmen Rivera + cfnmeu: CFNMEU + ddf-network: DDF Network + deviant-hardcore: Deviant Hardcore device-bondage: Device Bondage + digital-sin: Digital Sin divine-bitches: Divine Bitches - electrosluts: Electro Sluts + electrosluts: Electrosluts everything-butt: Everything Butt - families-tied: Familes Tied + evolved-fights-lesbian-edition: Evolved Fights Lesbian Edition + evolved-fights: Evolved Fights + families-tied: Families Tied + fembot-academy: Fembot Academy + femdum: FemDum + femme-fatale-films: Femme Fatale Films + fetishnetwork: FetishNetwork filth-syndicate: Filth Syndicate filthy-femdom: Filthy Femdom foot-worship: Foot Worship - fucked-and-bound: Fucked and Bound fucking-machines: Fucking Machines + gangbang-chief: Gangbang Chief + gloryhole-secrets: Gloryhole Secrets hardcore-gangbang: Hardcore Gangbang + hardcore-punishments: Hardcore Punishments harmony-fetish: Harmony Fetish + hogtied-up: Hogtied Up hogtied: Hogtied + hot-legs-and-feet: Hot Legs & Feet + house-of-taboo: House Of Taboo kink-classics: Kink Classics - kink-compilations: Kink Compilations kink-features: Kink Features + kink-test-shoots: Kink Test Shoots kink-university: Kink University kinklive: KinkLive - kinkmen-classics: Kink Men Classics - kinkrawtestshoots: KinkRawTestShoots - kinktestshoots: KinkTestShoots kinky-bites: Kinky Bites - kinky-bites-men: Kinky Bites Men + lakeview-entertainment: Lakeview Entertainment + machine-dom: Machine Dom mean-bitch: Mean Bitch + medical-y-sado: Medical Y Sado men-in-pain: Men In Pain - men-on-edge: Men on Edge - my-friends-feet: My Friends Feet - naked-combat: Naked Combat + pascals-sub-sluts: Pascals Sub Sluts + pegging: Pegging + peghim: PegHim + plumperd: Plumperd + pornforce: Porn Force + pornstar-platinum: Pornstar Platinum pov-pickups: POV Pickups public-disgrace: Public Disgrace - sadistic-rope: Sadistic Rope - sex-and-submission: Sex and Submission + revenge-of-the-baroness: Revenge Of The Baroness + royal-fetish-films: Royal Fetish Films + savage-gangbang: Savage Gangbang + severe-sex-films: Severe Sex Films + sex-and-submission: Sex And Submission + sexual-disgrace: Sexual Disgrace + sister-wives: Sister Wives + slutinspection: Slut Inspection + spizoo: Spizoo + strapon-squad: Strapon Squad struggling-babes: Struggling Babes submissive-x: Submissive X - the-training-of-o: The Training of O + submissived: Submissived + sweet-femdom: Sweet FemDom + the-training-of-o: The Training Of O the-upper-floor: The Upper Floor + the-venus-girls: The Venus Girls + torment-time: Torment Time + transerotica: TransErotica ts-pussy-hunters: TS Pussy Hunters ts-seduction: TS Seduction + twisted-visual: Twisted Visual ultimate-surrender: Ultimate Surrender + wasteland: Wasteland water-bondage: Water Bondage whipped-ass: Whipped Ass wired-pussy: Wired Pussy URL: //link[@rel="canonical"]/@href + performerSearch: + common: + $result: //div/a[contains(@href, "/model") and contains(concat(" ", normalize-space(@class), " "), " model-link ")] + + performer: + Name: $result/img/@alt + URL: + selector: $result/@href + postProcess: + - replace: + - regex: ^ + with: https://www.kink.com + performerScraper: + performer: + Name: + selector: //h1/text() # //div[@font-size][number(translate(@font-size,"px",""))>=35]/text() + concat: " " + postProcess: + - replace: + - regex: ^\s+ + with: "" + - regex: \s+$ + with: + Twitter: + selector: '//div/a[contains(concat(" ", normalize-space(@class), " "), " social-link ") and contains(@href, "twitter.com")]/@href' + Image: + selector: //div/img[contains(@src, "imagedb")][1]/@src + Tattoos: + selector: '//div/span[text()=" tags: "]/following-sibling::a[contains(@href,"/tattoo")]//text()' + postProcess: + - map: + Tattoo: "Yes" + Piercings: + selector: '//div/span[text()=" tags: "]/following-sibling::a[contains(@href,"/pierced")]/span' + concat: "\n" + Tags: + Name: '//div/span[text()=" tags: "]/following-sibling::a/span/text()' + Details: + selector: '//div/span/p[@class="bio"]/following-sibling::p' + concat: "\n" + postProcess: + - replace: + - regex: '(?i)]*>' + with: "" + URL: //link[@rel="canonical"]/@href driver: - useCDP: true -# Last Updated December 16, 2022 + headers: + - Key: User-Agent + Value: stash-scraper/1.0.0 +# Last Updated June 25, 2023 diff --git a/scrapers/KinkMen.yml b/scrapers/KinkMen.yml new file mode 100644 index 000000000..23da43e49 --- /dev/null +++ b/scrapers/KinkMen.yml @@ -0,0 +1,153 @@ +name: Kink Men +sceneByURL: + - action: scrapeXPath + url: + - kinkmen.com + scraper: sceneScraper + +sceneByFragment: + action: scrapeXPath + queryURL: https://www.kinkmen.com/shoot/{filename} + # constructs the scene URL from the filename, provided that the filename includes the scene id + queryURLReplace: + filename: + # the id in kink.com is a 1-6 digit number + - regex: ^(\d+)[^\d].* # support filenames in the form 12345_performer_other_data.mp4 + with: $1 + - regex: .*\((\d+)\)\.[a-zA-Z\d]+$ #support filenames in the form scene - date - performer (12345).mp4 + with: $1 + scraper: sceneScraper +performerByName: + action: scrapeXPath + queryURL: https://www.kinkmen.com/search?type=performers&q={} + scraper: performerSearch +performerByURL: + - action: scrapeXPath + url: + - kinkmen.com/model + - kink.com/model + scraper: performerScraper + +xPathScrapers: + sceneScraper: + scene: + Title: //meta[@name="twitter:title"]/@content + Date: + selector: //span[@class="shoot-date"]/text() + postProcess: + - parseDate: January 2, 2006 + Details: + selector: //span[@class="description-text"]/*[self::p or self::ul] + concat: "\n\n" + Code: //div[@class="shoot-page"]/@data-shootid + Director: //span[@class="director-name"] + Performers: + Name: + selector: //p[@class="starring"]/span[@class="names h5"]/a/text() + postProcess: + - replace: + - regex: \, + with: "" + Tags: + Name: + selector: //a[@class="tag"] + postProcess: + - replace: + - regex: \, + with: "" + Image: //meta[@name="twitter:image"]/@content + Studio: + Name: + selector: //div[@class="column shoot-logo"]/a/@href + postProcess: + - replace: + - regex: /channel/ + with: "" + - map: + # List of sites as of 2023-11-06 from https://www.kinkmen.com/channels + # [...new Set([...document.querySelectorAll('h3 > a')] + # .map(a => a.href.split("/").pop() + ": " + a.innerText))] + # .toSorted() + # .join("\n") + 30-minutes-of-torment: 30 Minutes Of Torment + alternadudes: Alternadudes + bondage-compound: Bondage Compound + bonus-hole-boys: Bonus Hole Boys + bound-and-gagged: Bound And Gagged + bound-gods: Bound Gods + bound-in-public: Bound In Public + boynapped: Boynapped + butt-machine-boys: Butt Machine Boys + fetishnetwork-male: FetishNetwork Male + kinkmen-classics: Kink Men Classics + kink-men-series: Kink Men Series + kinkmen-test-shoot: Kink Men Test Shoots + kinky-bites-men: Kinky Bites Men + masqulin: Masqulin + men-at-play: Men At Play + men-on-edge: Men On Edge + missionary-boyz: Missionary Boyz + my-dirtiest-fantasy: My Dirtiest Fantasy + my-friends-feet: My Friends' Feet + naked-kombat: Naked Kombat + nasty-daddy: Nasty Daddy + str8hell: Str8Hell + sw-nude: SW Nude + taken-rough: Taken Rough + therapy-dick: Therapy Dick + titanmen-rough: TitanMen Rough + wrestlingmale: WrestlingMale + yes-father: Yes Father + yesirboys: Yesirboys + # Kinkmen.com pages still contain old/broken Kink.com link; commenting out URL as it is currently inaccurate + # URL: //link[@rel="canonical"]/@href + performerSearch: + common: + $result: //div/a[contains(@href, "/model") and contains(concat(" ", normalize-space(@class), " "), " model-link ")] + + performer: + Name: $result/img/@alt + URL: + selector: $result/@href + postProcess: + - replace: + - regex: ^ + with: https://www.kinkmen.com + performerScraper: + performer: + Name: + selector: //h1/text() # //div[@font-size][number(translate(@font-size,"px",""))>=35]/text() + concat: " " + postProcess: + - replace: + - regex: ^\s+ + with: "" + - regex: \s+$ + with: + Twitter: + selector: '//div/a[contains(concat(" ", normalize-space(@class), " "), " social-link ") and contains(@href, "twitter.com")]/@href' + Image: + selector: //div/img[contains(@src, "imagedb")][1]/@src + Tattoos: + selector: '//div/span[text()=" tags: "]/following-sibling::a[contains(@href,"/tattoo")]//text()' + postProcess: + - map: + Tattoo: "Yes" + Piercings: + selector: '//div/span[text()=" tags: "]/following-sibling::a[contains(@href,"/pierced")]/span' + concat: "\n" + Tags: + Name: '//div/span[text()=" tags: "]/following-sibling::a/span/text()' + Details: + selector: '//div/span/p[@class="bio"]/following-sibling::p' + concat: "\n" + postProcess: + - replace: + - regex: '(?i)]*>' + with: "" + URL: //link[@rel="canonical"]/@href +driver: + headers: + - Key: User-Agent + Value: stash-scraper/1.0.0 +# Last Updated November 06, 2023 \ No newline at end of file diff --git a/scrapers/LadyboyGold.yml b/scrapers/LadyboyGold.yml index 16b8a3842..966aa6c3c 100644 --- a/scrapers/LadyboyGold.yml +++ b/scrapers/LadyboyGold.yml @@ -4,6 +4,12 @@ performerByURL: url: - ladyboygold.com/index.php scraper: performerScraper +sceneByURL: + - action: scrapeXPath + url: + - ladyboygold.com/tour + - tsraw.com + scraper: sceneScraper xPathScrapers: performerScraper: common: @@ -42,4 +48,41 @@ xPathScrapers: - regex: ^ with: https://www.ladyboygold.com Details: //div[@class="profileBio"]/text() -# Last Updated May 17, 2022 + sceneScraper: + scene: + Title: + selector: //div[contains(@class, "show_video")]//h2/text() + postProcess: + - replace: + - regex: \ 4[Kk]$ + with: "" + Details: + selector: //div[contains(@class, "setDescription")]/p[contains(@class, "d-none")]/text() + concat: "\n\n" + Tags: + Name: //div[contains(@class, "tags")]//a/text() + Performers: + Name: + selector: //div[contains(@class, "show_video")]//h3/text() + postProcess: + - replace: + - regex: .*Ladyboy (.*) + with: $1 + split: ", " + Studio: + Name: + selector: //footer//p[contains(text(), 'Copyright')]/text()[2] + postProcess: + - replace: + - regex: ^(\d+\ )?(.+)\.\s+.* + with: $2 + - map: + TSRAW.com: TSRaw + LadyboyGold.com: LadyboyGold + Image: + selector: //div[contains(@class, "show_video")]//img/@style + postProcess: + - replace: + - regex: (background:\ ?url\()(.+)(?:\).+) + with: https://ladyboygold.com/$2 +# Last Updated December 29, 2022 diff --git a/scrapers/LegalPorno.yml b/scrapers/LegalPorno.yml deleted file mode 100644 index 2a88ad65c..000000000 --- a/scrapers/LegalPorno.yml +++ /dev/null @@ -1,58 +0,0 @@ -name: "LegalPorno" -sceneByURL: - - action: scrapeXPath - url: - - analvids.com/watch/ - scraper: sceneScraper -performerByURL: - - action: scrapeXPath - url: - - https://www.analvids.com - scraper: performerScraper -sceneByFragment: - action: script - script: - - python - # use python3 instead if needed - - LegalPorno.py - - query - -xPathScrapers: - sceneScraper: - common: - $description: //dl[@class="dl-horizontal scene-description__column"] - scene: - Title: - selector: //h1[@class="watchpage-title"]//text() - concat: " " - Date: - selector: //span[@title="Release date"]/a/text() - postProcess: - - parseDate: 2006-01-02 - Details: $description/div[3]/dd/text() - Code: - selector: //h1[@class="watchpage-title"] - postProcess: - - replace: - - regex: .+?([A-Z]{2,3}\d+)$|(.+) - with: $1 - Performers: - Name: $description/div[1]/dd/a[contains(@href,'analvids.com/model')]/text() - Studio: - Name: //div[@class="col-md-4 col-lg-3 hide-mobile text-right"]/div[@class="studio-director"]//a/text() - Tags: - Name: $description/div[2]//a/text() - Image: - selector: //div[@id="player"]/@style - postProcess: - - replace: - - regex: .+(https[^"]+).+ - with: $1 - - performerScraper: - performer: - Name: //h2 - Country: //td[@class='text-danger']//a[contains(@href,'nationality')]/text() - Image: - selector: //div[@class='model--avatar']//img/@src -# Last Updated December 16, 2022 diff --git a/scrapers/LetsDoeIt.yml b/scrapers/LetsDoeIt.yml index 94cdf1d39..971d7d0bf 100644 --- a/scrapers/LetsDoeIt.yml +++ b/scrapers/LetsDoeIt.yml @@ -3,19 +3,28 @@ sceneByURL: - action: scrapeXPath url: - amateureuro.com - - dirtycosplay.com + - dirtycosplay.com - doegirls.com + - doe-tv.com - forbondage.com + - latinamilf.com - letsdoeit.com - mamacitaz.com - transbella.com - vipsexvault.com + queryURL: "{url}" + queryURLReplace: + url: + - regex: doe-tv\.com + with: letsdoeit.com + scraper: sceneScraper xPathScrapers: sceneScraper: common: - $actors: //div[@class="actors"] + $actors: //div[@class="actors" or @class="-mvd-grid-actors"] $details: //div[@class="row sides-xs"] + $letsdoeit: //div[@class="-mvd-grid-more"]//span/a[@class="-mvd-list-url"] scene: Title: selector: //title/text() @@ -41,10 +50,10 @@ xPathScrapers: - regex: (.*\.?\!?)(?:\s-\s\w.*-.*)$ # remove Studio name at the end of a description with: $1 Tags: - Name: $details//div[@class="col"][4]//a/text()|$details//div[@class="col"][6]//a/text() + Name: $details//div[@class="col"][4]//a/text()|$details//div[@class="col"][6]//a/text()|$letsdoeit Performers: Name: $actors//span/a[contains(@href,"/models/")] Studio: Name: $actors//a//text() Image: //source[@type="image/webp"]/@srcset -# Last Updated August 19, 2022 +# Last Updated October 05, 2023 \ No newline at end of file diff --git a/scrapers/LingerieTales.yml b/scrapers/LingerieTales.yml deleted file mode 100644 index c6fe7882b..000000000 --- a/scrapers/LingerieTales.yml +++ /dev/null @@ -1,28 +0,0 @@ -name: LingerieTales -sceneByURL: - - action: scrapeXPath - url: - - lingerietales.com - scraper: sceneScraper -xPathScrapers: - sceneScraper: - scene: - Title: - selector: //h1[@class="title"] - Date: - selector: //time - postProcess: - - parseDate: January 2, 2006 - Performers: - Name: //ul[@class="filters_buttons"]/li/a[contains(@href,"/model-item/")] - Tags: - Name: //ul[@class="filters_buttons"]/li/a[contains(@href,"/videotag/")] - Details: - selector: //div[@class="the_content_wrapper"]/p|//pre[@id="code"] - concat: "\n\n" - Image: - selector: //meta[@property="og:image"]/@content - Studio: - Name: - fixed: LingerieTales -# Last Updated May 21, 2021 diff --git a/scrapers/LittleCapriceDreams.yml b/scrapers/LittleCapriceDreams.yml index 203c6afa1..30a52a9b4 100644 --- a/scrapers/LittleCapriceDreams.yml +++ b/scrapers/LittleCapriceDreams.yml @@ -6,18 +6,36 @@ sceneByURL: scraper: sceneScraper xPathScrapers: sceneScraper: + common: + $project: //div[@id="main-project-content"] scene: Title: //meta[@property="og:title"]/@content - Details: //div[@class="vid_desc"]/p + Details: $project//div[contains(@class, "desc-text")] Image: //meta[@property="og:image"]/@content Studio: - Name: //meta[@property="og:site_name"]/@content + Name: + selector: $project/@class + postProcess: + - replace: + - regex: ".*(buttmuse|caprice-divas|nasstyx|porn-lifestyle|pov-dreams|streetfuck|superprivatex|virtual-reality|wecumtoyou|xpervo).*" + with: $1 + - map: + buttmuse: BUTTMuse + caprice-divas: Caprice Divas + nasstyx: NASSTYx + porn-lifestyle: Pornlifestyle + pov-dreams: POVdreams + streetfuck: STREETFUCK + superprivatex: SuperprivateX + virtual-reality: Virtual Reality + wecumtoyou: Wecumtoyou + xpervo: Xpervo Date: - selector: //div[@class="vid_date"] + selector: //meta[@property="article:published_time"]/@content postProcess: - - parseDate: January 2, 2006 + - parseDate: "2006-01-02T15:04:05+00:00" Performers: - Name: //div[@class="vid_infos"]/div[contains(div, "Models")]//a + Name: $project//div[contains(@class, "project-models")]//a Tags: - Name: //div[@class="vid_infos"]/div[contains(div, "Tags")]//a -# Last Updated March 28, 2021 + Name: $project//div[contains(@class, "project-tags")]//a +# Last Updated August 21, 2023 diff --git a/scrapers/LordAardvark.yml b/scrapers/LordAardvark.yml new file mode 100644 index 000000000..a60d16415 --- /dev/null +++ b/scrapers/LordAardvark.yml @@ -0,0 +1,58 @@ +name: "LordAardvark" +galleryByURL: + - action: scrapeXPath + url: + - lordaardvark.com/html/galleries.html + scraper: galleryScraper +sceneByURL: + - action: scrapeXPath + url: + - lordaardvark.com/films/ + scraper: sceneScraper +xPathScrapers: + galleryScraper: + common: + $content: //div[contains(@class, "viewer-content-controls")] + gallery: + Date: + selector: $content/h2 + postProcess: + - parseDate: January 2, 2006 + Title: $content/h1 + Details: + selector: $content/p + Studio: + Name: + fixed: LordAardvark + sceneScraper: + scene: + Title: //div[@class="player-overlay-title"]/h1 + Details: + selector: //section[@class="player-overlay-description"]//div[@class="row"]/div[@class="col"]/* + concat: "\n\n" + Date: + selector: //meta[@property="video:release_date"]/@content + postProcess: + - replace: + - regex: .*(\d{4}-\d{2}-\d{2}).* + with: $1 + Image: //meta[@property="og:image"]/@content + Studio: + Name: + fixed: LordAardvark + Code: + selector: //script[contains(text(), "_filmOrigin")] + postProcess: + - replace: + - regex: '.*id: (\d+).*' + with: $1 + Movies: + Name: //p[contains(text(), "Series:")]/following-sibling::a/text() + Tags: + Name: //div[contains(@class, "col")]/a[@class="player-tag"]/text() + Performers: + Name: //p[contains(text(), "Characters:")]/following-sibling::a/text() + +driver: + useCDP: true +# Last Updated February 23, 2023 diff --git a/scrapers/LoveHerFeet.yml b/scrapers/LoveHerFilms.yml similarity index 91% rename from scrapers/LoveHerFeet.yml rename to scrapers/LoveHerFilms.yml index cc023f657..a8fbe9e4e 100644 --- a/scrapers/LoveHerFeet.yml +++ b/scrapers/LoveHerFilms.yml @@ -1,5 +1,4 @@ name: LoveHerFeet - sceneByURL: - action: scrapeXPath url: @@ -24,7 +23,6 @@ performerByURL: - loveherfilms.com/tour/models/ - shelovesblack.com/tour/models/ scraper: performerScraper - xPathScrapers: sceneScraper: scene: @@ -34,6 +32,15 @@ xPathScrapers: selector: //div[@class='date']/text() postProcess: - parseDate: January 2, 2006 + Code: + selector: //*/div[@class='photos vide-section']/a[1]/img/@data-src + postProcess: + - replace: + - regex: ^https?.+/(?:[a-zA-Z]+_(\d+)_.+|(\d+)_[a-zA-Z0-9]+(?:\.jpg)?.*) + with: $1$2 + - replace: + - regex: ^\D.* + with: Image: selector: //div[@class='video']//img[contains(@class,'mainThumb') or contains(@class,'update_thumb')]/@src0_3x|//video/@poster|//base/@href concat: "|" @@ -44,7 +51,11 @@ xPathScrapers: - regex: ^/tour([^|]+)\|(.+) # video/@poster urls need a domain with: $2$1 Studio: - Name: //meta[@name='author']/@content + Name: + selector: //meta[@name='author']/@content + postProcess: + - map: + LoveHerFeet.com: "Love Her Feet" Tags: Name: //div[@class='video-tags']/a/text() Performers: @@ -156,4 +167,4 @@ xPathScrapers: with: "" Gender: fixed: "Female" -# Last Updated August 28, 2022 +# Last Updated April 13, 2023 diff --git a/scrapers/Loyalfans/Loyalfans.py b/scrapers/Loyalfans/Loyalfans.py new file mode 100644 index 000000000..725c1e158 --- /dev/null +++ b/scrapers/Loyalfans/Loyalfans.py @@ -0,0 +1,179 @@ +import os +import sys +import json + +# to import from a parent directory we need to add that directory to the system path +csd = os.path.dirname(os.path.realpath(__file__)) # get current script directory +parent = os.path.dirname(csd) # parent directory (should be the scrapers one) +sys.path.append( + parent +) # add parent dir to sys path so that we can import py_common from there + +try: + # Import Stash logging system from py_common + from py_common import log +except ModuleNotFoundError: + print( + "You need to download the folder 'py_common' from the community repo. (CommunityScrapers/tree/master/scrapers/py_common)", + file=sys.stderr, + ) + sys.exit() + +try: + # Import necessary modules. + import requests + import re + +# If one of these modules is not installed: +except ModuleNotFoundError: + log.error("You need to install the python modules mentioned in requirements.txt") + log.error( + "If you have pip (normally installed with python), run this command in a terminal from the directory the scraper is located: pip install -r requirements.txt" + ) + sys.exit() + +# Lookup table for tag replacements. The tags are in the form of hashtags, and often have multiple words mashed together. +# This is a quick and dirty way of turning these into meaningful data, and can be expanded on to taste. +TAG_REPLACEMENTS = { + "Fin Dom": "Findom", + "Fem Dom": "Femdom", + "bigtits": "Big Tits", + "titworship": "Tit Worship", + "financialdomination": "Financial Domination", + "R I P O F F": "ripoff", + "pussydenial": "pussy denial", +} + + +def output_json_url(title, tags, url, image, studio, performers, description, date): + # Create a tag dictionary from the tag list. + tag_dicts = [{"name": tag.strip(". ")} for tag in tags if tag.strip() != "N/A"] + # We're only using the value of 'performers' for our performer list + performer_dicts = [{"name": performer} for performer in performers] + # Dump all of this as JSON data. + return json.dumps( + { + "title": title, + "tags": tag_dicts, + "url": url, + "image": image, + "studio": {"name": studio}, + "performers": performer_dicts, + "details": description, + "date": date, + }, + indent=2, + ) + + +def get_cookies(scene_url: str): + # Establish a session. + session = requests.Session() + # Set headers required for a successful POST query. + headers = { + "Accept": "application/json", + "Accept-Language": "en-US,en;q=0.9", + "Content-Type": "application/json", + "Origin": "https://www.loyalfans.com", + "Referer": scene_url, + "User-Agent": "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/114.0.0.0 Safari/537.36", + } + # URL of the system status API. This is called when a Loyalfans page is first loaded from what I can tell. + url = "https://www.loyalfans.com/api/v2/system-status" + # Perform a POST query to capture initial cookies. + response = session.post(url, headers=headers) + # Return these cookies. + return response.cookies + + +def get_api_url(scene_url: str): + # Extract the last component of the scene URL. + end_segment = scene_url.split("/")[-1] + # Append this to the API link. As far as I can tell, post names in scene URLs are unique. I have yet to encounter any data mismatches. + return f"https://www.loyalfans.com/api/v1/social/post/{end_segment}" + + +def get_json(scene_url: str): + # Set headers required for a successful request. + headers = { + "Accept": "application/json", + "Accept-Language": "en-US,en;q=0.9", + "Content-Type": "application/json", + "Origin": "https://www.loyalfans.com", + "Referer": scene_url, + "User-Agent": "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/114.0.0.0 Safari/537.36", + } + # Set cookies using get_cookies function. + cookie_set = get_cookies(scene_url) + # Perform request using the API URL of the scene in question, adding headers and cookies. + response = requests.get(get_api_url(scene_url), headers=headers, cookies=cookie_set) + # Capture the response as JSON. + json_data = response.json() + # Return the JSON data. + return json_data + + +def scrape_scene(scene_url: str) -> dict: + # Capture JSON relating to this scene from the Loyalfans API. + json = get_json(scene_url) + # Extract title from the JSON and strip out any whitespace. + title = json["post"]["title"].strip() + # Use the video thumbnail/preview poster as the image. + image = json["post"]["video_object"].get("poster") + # Extract description, fix apostrophes and remove HTML newline tags. + description = json["post"]["content"].replace("\u2019", "'").replace("
", "") + # Sometimes hashtags are included at the bottom of the description. This line strips all that junk out, as we're utilising the hashtags for the tags. Also tidies up double-spacing and ellipses. + description = ( + re.sub(r"#\w+\b", "", description) + .strip() + .replace(" ", " ") + .replace(". . .", "...") + ) + # Extract studio name. + studio = json["post"]["owner"]["display_name"] + # Extract date. The JSON returns the date in the format '2023-06-18 12:00:00', but we only need the date, so the time is stripped out. + date = json["post"]["created_at"]["date"].split(" ")[0] + # Extract tags. + tags_list = json["post"]["hashtags"] + fixed_tags = [] + # For every tag we find: + for tag in tags_list: + # Remove the hash from the start. + tag = tag[1:] + modified_tag = tag + # Split CamelCase tags into separate words. + modified_tag = re.sub(r"(? requests.Response(): +def get_request(url: str) -> requests.Response: """ wrapper function over requests.get to set common options """ - mv_headers = { - "User-Agent": - 'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:79.0) Gecko/20100101 Firefox/79.0', - "Referer": "https://www.manyvids.com/" - } - return requests.get(url, headers=mv_headers, timeout=(3, 10)) + log.trace(f"GET {url}") + return requests.get(url, timeout=(3, 10)) + +def post_request(url: str, json: dict) -> requests.Response: + """ + wrapper function over requests.post to set common options + """ + with requests.Session() as session: + log.trace(f"POST {url} {json}") + poke = session.get("https://www.manyvids.com/Vids/", timeout=(3, 10)) + root = html.fromstring(poke.content) + token = root.xpath('//html/@data-mvtoken') + if not token: + log.error("Failed to get @data-mvtoken from page") + xsrf_token = session.cookies.get("XSRF-TOKEN") + if not xsrf_token: + log.error("Failed to get XSRF-TOKEN from cookies") + res = session.post( + url, + json=json | {"mvtoken": token[0]}, + headers={"X-XSRF-TOKEN": xsrf_token, "x-requested-with": "XMLHttpRequest"}, + timeout=(3, 10) + ) + return res -def get_model_name(model_id: str) -> str: +def get_model_name(model_id: str) -> str | None: """ Get model name from its id Manyvids redirects to the model profile page as long as you provide the id in the url - The url_handler ( we use x) doesnt matter if the model_id is valid + The url_handler (we use x) doesnt matter if the model_id is valid """ try: response = get_request( @@ -58,12 +77,11 @@ def get_model_name(model_id: str) -> str: name = root.xpath( '//h1[contains(@class,"mv-model-display__stage-name")]/text()[1]') return name[0].strip() - except: - log.debug(f"Failed to get name for {model_id}") - return None + except Exception as exc: + log.debug(f"Failed to get name for '{model_id}': {exc}") -def clean_text(details: str) -> dict: +def clean_text(details: str) -> str: """ remove escaped backslashes and html parse the details text """ @@ -74,9 +92,6 @@ def clean_text(details: str) -> dict: def map_ethnicity(ethnicity: str) -> str: - if ethnicity is None: - return None - ethnicities = { "Alaskan": "alaskan", "Asian": "asian", @@ -91,10 +106,7 @@ def map_ethnicity(ethnicity: str) -> str: "Other": "other" } - found = ethnicities.get(ethnicity) - if found: - return found - return ethnicity + return ethnicities.get(ethnicity, ethnicity) def get_scene(scene_id: str) -> dict: """ @@ -105,29 +117,46 @@ def get_scene(scene_id: str) -> dict: f"https://video-player-bff.estore.kiwi.manyvids.com/videos/{scene_id}" ) except requests.exceptions.RequestException as api_error: - log.error(f"Error {api_error} while requesting data from manyvids api") - return None + log.error(f"Error {api_error} while requesting data from API") + return {} meta = response.json() + log.debug(f"Raw response from API: {json.dumps(meta)}") scrape = {} - scrape['title'] = meta.get('title') - scrape['details'] = meta.get('description') + scrape['title'] = meta['title'] + scrape['details'] = unescape(meta['description']) + scrape['code'] = scene_id + + sceneURLPartial = meta.get('url') + if sceneURLPartial: + scrape["url"] = f'https://www.manyvids.com{sceneURLPartial}' + else: + log.debug("No scene url found") + if meta.get('modelId'): model_name = get_model_name(meta['modelId']) if model_name: - scrape['performers'] = [] - scrape['performers'].append({'name': model_name}) + scrape['performers'] = [{'name': model_name}] + scrape['studio'] = {"name": model_name} + else: + log.debug("No model name found") + image = meta.get('screenshot') - if image is None: # fallback to thumbnail + if not image: + log.debug("No screenshot found, using thumbnail") image = meta.get('thumbnail') scrape['image'] = image + date = meta.get('launchDate') if date: date = re.sub(r"T.*", "", date) scrape['date'] = date - if meta.get('tags'): - scrape['tags'] = [{"name": x} for x in meta['tags']] + else: + log.debug("No date found") + scrape['tags'] = [{"name": x} for x in meta.get('tags', [])] + + log.debug(f"Scraped data: {json.dumps(scrape)}") return scrape @@ -141,13 +170,14 @@ def get_model_bio(url_handle: str, performer_url: str) -> dict: ) except requests.exceptions.RequestException as api_error: log.error(f"Error {api_error} while requesting data from manyvids api") - return None + return {} model_meta = response.json() - log.debug(json.dumps(model_meta)) # useful to get all json entries + log.debug(f"Raw response from API: {json.dumps(model_meta)}") + scrape = {} scrape['name'] = model_meta.get('displayName') scrape['image'] = model_meta.get('portrait') - log.debug(f"image {scrape['image']}") + date = model_meta.get('dob') if date: date = re.sub(r"T.*", "", date) @@ -225,57 +255,64 @@ def get_model_bio(url_handle: str, performer_url: str) -> dict: if career_length: scrape["career_length"] = re.sub(r"^Joined\s+", "", career_length[0]) + " - today" except requests.exceptions.RequestException as url_error: - log.error(f"Error {url_error} while requesting data from profile page") + log.error(f"Error while requesting data from profile page: {url_error}") + + log.debug(f"Scraped data: {json.dumps(scrape)}") return scrape -def scrape_scene(scene_url: str) -> None: - id_match = re.search(r".+/Video/(\d+)(/.+)?", scene_url) - if id_match: - scene_id = id_match.group(1) - scraped = get_scene(scene_id) - if scraped: - print(json.dumps(scraped)) - return - print("{}") +def scrape_scene(scene_url: str) -> dict | None: + if scene_id := re.search(r".+/Video/(\d+)(/.+)?", scene_url): + return get_scene(scene_id.group(1)) + else: + log.error(f"Failed to get video ID from '{scene_url}'") -def scrape_performer(performer_url: str) -> None: - handler_match = re.search(r".+/Profile/(\d+)/([^/]+)/.+", performer_url) - if handler_match: +def scrape_performer(performer_url: str) -> dict | None: + scraped = None + if (handler_match := re.search(r".+/Profile/(\d+)/([^/]+)/.*", performer_url)): performer_id = handler_match.group(1) url_handler = handler_match.group(2).lower() performer_about_url = f"https://www.manyvids.com/Profile/{performer_id}/{url_handler}/About/" scraped = get_model_bio(url_handler, performer_about_url) - if scraped: - scraped["url"] = performer_url - print(json.dumps(scraped)) - return - print("{}") - - -def performer_by_name(name: str, max_results: int = 25) -> None: - performers = [] - if name: - search_url = f'https://www.manyvids.com/MVGirls/?keywords={name}&search_type=0&sort=10&page=1' - xpath_url = '//h4[contains(@class,"profile-pic-name")]/a[@title]/@href' - xpath_name = '//h4[contains(@class,"profile-pic-name")]/a[@title]/text()[1]' + scraped["url"] = performer_url + return scraped + else: + log.error(f"Failed to get performer ID from '{performer_url}'") + + +def performer_by_name(name: str, max_results: int = 25) -> list[dict] | None: + search_url = f'https://www.manyvids.com/MVGirls/?keywords={quote_plus(name)}&search_type=0&sort=10&page=1' + xpath_url = '//h4[contains(@class,"profile-pic-name")]/a[@title]' try: response = get_request(search_url) - root = html.fromstring(response.content) - names = root.xpath(xpath_name) - urls = root.xpath(xpath_url) - if len(names) != len(urls): - log.warning("Names/URL mismatch! Aborting") - else: - if max_results > len(names): - max_results = len(names) - log.debug(f"Found {max_results} performers with name {name}") - for i in range(0, max_results): - performers.append({"name": names[i].strip(), "url": urls[i]}) except Exception as search_exc: - log.error(f"Failed to search for {name}: {search_exc}") - print(json.dumps(performers)) + log.error(f"Failed to search for performer '{name}': {search_exc}") + return + root = html.fromstring(response.content) + perf_nodes = root.xpath(xpath_url)[:max_results] + performers = [{"name": perf.text.strip(), "url": perf.get('href')} for perf in perf_nodes] + return performers + +def scene_by_name(name: str, max_results: int = 10) -> list[dict] | None: + try: + response = post_request("https://www.manyvids.com/api/vids/", {"sort":10,"page":1,"type":"video","keywords":name}) + except Exception as search_exc: + log.error(f"Failed to search for scene '{name}': {search_exc}") + return + meta = response.json() + if "error" in meta: + log.error(f"Failed to search for scene '{name}': {meta['error']}") + return [] + vids = [res['video'] for res in meta['content']['items'][:max_results]] + scrapes = [] + for vid in vids: + scrape = {} + scrape['Title'] = vid['title'] + scrape['URL'] = 'https://www.manyvids.com' + vid['preview']['path'] + scrape['Image'] = vid['videoThumb'] + scrapes.append(scrape) + return scrapes def main(): @@ -283,18 +320,28 @@ def main(): url = fragment.get("url") name = fragment.get("name") - if url is None and name is None: - log.error("No URL/Name provided") - sys.exit(1) - - if url and "performer_by_url" in sys.argv: - scrape_performer(url) - elif name and "performer_by_name" in sys.argv: - search_name = quote_plus(name) - performer_by_name(search_name) - elif url: - scrape_scene(url) - + result = None + if "scene_by_url" in sys.argv or "scene_by_query_fragment" in sys.argv: + if url: + result = scrape_scene(url) + else: + log.error("Missing URL: this should not be possible when called from Stash") + elif "scene_by_name" in sys.argv: + if name: + result = scene_by_name(name) + else: + log.error("Missing search query: this should not be possible when called from Stash") + elif "performer_by_url" in sys.argv: + if url: + result = scrape_performer(url) + else: + log.error("Missing URL: this should not be possible when called from Stash") + elif "performer_by_name" in sys.argv: + if name: + result = performer_by_name(name) + else: + log.error("Missing search query: this should not be possible when called from Stash") + print(json.dumps(result)) if __name__ == "__main__": main() diff --git a/scrapers/ManyVids/ManyVids.yml b/scrapers/ManyVids/ManyVids.yml index 06bb0127d..0d492db42 100644 --- a/scrapers/ManyVids/ManyVids.yml +++ b/scrapers/ManyVids/ManyVids.yml @@ -1,4 +1,6 @@ name: ManyVids +# requires: py_common + sceneByURL: - url: - manyvids.com/Video @@ -6,6 +8,20 @@ sceneByURL: script: - python3 - ManyVids.py + - scene_by_url +sceneByName: + action: script + script: + - python3 + - ManyVids.py + - scene_by_name +sceneByQueryFragment: + action: script + queryURL: "{url}" + script: + - python3 + - ManyVids.py + - scene_by_query_fragment performerByURL: - url: - manyvids.com/Profile @@ -20,4 +36,4 @@ performerByName: - python3 - ManyVids.py - performer_by_name -# Last Updated December 30, 2022 \ No newline at end of file +# Last Updated October 07, 2023 \ No newline at end of file diff --git a/scrapers/Masqulin.yml b/scrapers/Masqulin.yml index 77acfaf35..982956fe1 100644 --- a/scrapers/Masqulin.yml +++ b/scrapers/Masqulin.yml @@ -8,22 +8,19 @@ xPathScrapers: sceneScraper: scene: Title: - selector: //h1[@class="titlePlayer"] + selector: //div[@class="gallery_info spacer"]/h1 Performers: Name: //span[@class="tour_update_models"]/a Tags: - Name: //p[@class="sceneInfo"]/span/a + Name: //a[@class="tagsVideoPage"] Details: - selector: //p[@class="col-lg-6 textDescription"] + selector: //p[@id="textDesc"] + Image: //meta[@property="og:image"]/@content + Date: + selector: //span[@class='availdate'][1] postProcess: - - replace: - - regex: \.\.\. - with: - - regex: \s*Read\smore\s*$ - with: - Image: - selector: //*[@class="hiddenImg stdimage"]/@src + - parseDate: Jan 02, 2006 Studio: Name: fixed: Masqulin -# Last Updated December 29, 2021 +# Last Updated January 07, 2023 diff --git a/scrapers/MatureNL.yml b/scrapers/MatureNL.yml index ea42b9fce..1abfbc89e 100644 --- a/scrapers/MatureNL.yml +++ b/scrapers/MatureNL.yml @@ -5,25 +5,57 @@ sceneByURL: &byUrl - mature.nl/ scraper: sceneScraper galleryByURL: *byUrl +sceneByFragment: + action: scrapeXPath + queryURL: "{url}" + scraper: sceneScraper xPathScrapers: sceneScraper: + common: + $url: //link[@rel="canonical"]/@href scene: + URL: &url + selector: $url + postProcess: + - replace: + - regex: (^https?:\/\/(?:www\.)?mature\.nl\/..\/update\/\d+)\/.+ + with: $1 + Code: + selector: $url + postProcess: + - replace: + - regex: .+\/update\/(\d+)\/.+ + with: $1 Title: &title //div[@class="box"]/h1/text() - Details: &details //div/span[text()="Synopsis:"]/following-sibling::text() | //meta[@name="description"]/@content + Details: &details + selector: //div/span[text()="Synopsis:"]/following-sibling::text() | //meta[@name="description"]/@content + postProcess: + - replace: + - regex: " Watch this amazing porn video on mature.nl as a member today!" + with: Tags: &tags - Name: //div[@class="box-cnt"]/div[@class="mar-t"]/a[not(@class)]/text() + Name: //div[@id="divPageUpdateNiches"]/a[contains(@class, "tag")]/text() Performers: &performers Name: - selector: //div[@class="name"]/span[@class="col-accent"]/text() + # This method is easier, but the names here are in uppercase + # selector: //div[contains(@class,"card-label")]/a/text() + selector: //div[@class="box-cnt"]//span[@class="col-accent"]/following-sibling::text() postProcess: - replace: - - regex: ( \(EU\))$ + - regex: \s\(\w+\) + with: + - regex: \s\(EU\) with: + - regex: " & " + with: ", " + - regex: (\w)\., + with: $1, + split: ", " Image: - selector: //span[@id="spnPageUpdateTrailer"]/a/img/@data-src + selector: (//video/@poster | //span[@id="spnPageUpdateTrailer"]/a/img/@data-src | //img[@class="img-responsive lazy"][1]/@data-src)[1] Date: &date - selector: //div[@class="box-cnt"]/div[@class="mar-t"][not (contains(a, "the full"))]/text()[1] + selector: //span[@class="val-m"][1] postProcess: - replace: - regex: ^(\d{1,2}-\d{1,2}-\d{4}).*$ @@ -33,10 +65,11 @@ xPathScrapers: Name: fixed: "Mature.nl" gallery: + URL: *url Title: *title Details: *details Tags: *tags Performers: *performers Date: *date Studio: *studio -# Last Updated March 21, 2022 +# Last Updated July 11, 2023 diff --git a/scrapers/MenAtPlay.yml b/scrapers/MenAtPlay.yml index 5ea97cd78..05e53b7fd 100644 --- a/scrapers/MenAtPlay.yml +++ b/scrapers/MenAtPlay.yml @@ -10,8 +10,8 @@ xPathScrapers: Title: selector: //div[@class="gallery_info spacer"]/h1/text() Details: - selector: //div[@class="containerText"]/p - concat: "\n" + selector: //div[@class="containerText"]/p//text() + concat: "\n\n" Performers: Name: selector: //div[@class="gallery_info spacer"]/p/span[@class="tour_update_models"]/a/text() @@ -26,4 +26,4 @@ xPathScrapers: Studio: Name: fixed: MenAtPlay -# Last Updated May 29, 2022 +# Last Updated July 02, 2023 diff --git a/scrapers/Algolia_MenOver30.yml b/scrapers/MenOver30/MenOver30.yml similarity index 79% rename from scrapers/Algolia_MenOver30.yml rename to scrapers/MenOver30/MenOver30.yml index f0a362915..8342bd998 100644 --- a/scrapers/Algolia_MenOver30.yml +++ b/scrapers/MenOver30/MenOver30.yml @@ -1,3 +1,4 @@ +# requires: Algolia name: "MenOver30" sceneByURL: - action: script @@ -6,26 +7,26 @@ sceneByURL: - pridestudios.com/en/video script: - python - - Algolia.py + - ../Algolia/Algolia.py - menover30 sceneByFragment: action: script script: - python - - Algolia.py + - ../Algolia/Algolia.py - menover30 sceneByName: action: script script: - python - - Algolia.py + - ../Algolia/Algolia.py - menover30 - searchName sceneByQueryFragment: action: script script: - python - - Algolia.py + - ../Algolia/Algolia.py - menover30 - validName galleryByURL: @@ -35,7 +36,7 @@ galleryByURL: - pridestudios.com/en/photo/ script: - python - - Algolia.py + - ../Algolia/Algolia.py - menover30 - gallery # Last Updated December 22, 2022 diff --git a/scrapers/MetalBondage.yml b/scrapers/MetalBondage.yml new file mode 100644 index 000000000..5beee9d55 --- /dev/null +++ b/scrapers/MetalBondage.yml @@ -0,0 +1,40 @@ +name: Metal Bondage +sceneByURL: + - action: scrapeXPath + url: + - metalbondage.com + scraper: sceneScraper + +xPathScrapers: + sceneScraper: + scene: + Title: //title/text() + Date: + selector: //div[@class="metabar"]//text() + postProcess: + - replace: + - regex: '(.*\d{1,2}, \d{4}).*' + with: $1 + - parseDate: January 2, 2006 + Details: + selector: //div[@class="textcontent"]/p + concat: "\n\n" + Code: + selector: //div[@class="post-title fix"]//a/text() + postProcess: + - replace: + - regex: '(MB\d+)\s([\w-].*)' + with: $1 + Performers: + Name: + selector: //div[@class="tags"]//a[@rel="tag"]/text() + Tags: + Name: + selector: //div[@class="tags"]//a[@rel="category tag"]/text() + Image: //div[@class="textcontent"]//img/@src + Studio: + Name: + fixed: "Metal Bondage" + URL: //link[@rel="canonical"]/@href + +# Last Updated July 27, 2023 diff --git a/scrapers/MilfVR.yml b/scrapers/MilfVR.yml deleted file mode 100644 index 1fd148967..000000000 --- a/scrapers/MilfVR.yml +++ /dev/null @@ -1,28 +0,0 @@ -name: "MilfVR" -sceneByURL: - - action: scrapeXPath - url: - - milfvr.com - scraper: sceneScraper -xPathScrapers: - sceneScraper: - common: - $info: //div[@class="detail"] - scene: - Title: //div[@class="detail__header detail__header-lg"]/h1 - Studio: - Name: - fixed: MilfVR - Date: - selector: $info//span[@class="detail__date"]/text() - postProcess: - - parseDate: 2 January, 2006 - Details: - selector: //div[@class="detail__txt detail__txt-show_lg"]/text()|//span[@class="more__body"]/text() - concat: " " - Tags: - Name: $info//div[@class="tag-list__body"]//a/text() - Performers: - Name: //div[@class="detail__inf detail__inf-align_right"]/div[@class="detail__models"]/a/text() - Image: (//div[@class="photo-strip__body"]/div[@class="photo-strip__slide"])[1]/@data-src -# Last Updated November 04, 2021 diff --git a/scrapers/MindGeek.yml b/scrapers/MindGeek.yml index 4fbb7390b..0a88b8290 100644 --- a/scrapers/MindGeek.yml +++ b/scrapers/MindGeek.yml @@ -3,6 +3,7 @@ sceneByURL: - action: scrapeXPath url: - babesnetwork.com/scene/ + - biempire.com/scene/ - devianthardcore.com/scene/ - doghousedigital.com/ - familyhookups.com/scene/ @@ -30,6 +31,7 @@ sceneByURL: - lilhumpers.com/scene/ - milfed.com/scene/ - mofos.com/scene/ + - noirmale.com/scene/ - publicagent.com/scene/ - realitykings.com/scene/ - seancody.com/scene/ @@ -38,17 +40,31 @@ sceneByURL: - trueamateurs.com/scene/ scraper: scriptScraper +galleryByURL: + - action: scrapeXPath + url: + - transsensual.com/scene + scraper: galleryFromSceneScriptScraper + movieByURL: - action: scrapeXPath url: - - digitalplayground.com/movie - - transsensual.com/movie + - digitalplayground.com/movie/ + - doghousedigital.com/movie/ + - iconmale.com/movie/ + - milehighmedia.com/movie/ + - noirmale.com/movie/ + - realityjunkies.com/movie/ + - sweetheartvideo.com/movie/ + - sweetsinner.com/movie/ + - transsensual.com/movie/ scraper: movieScraper performerByURL: - action: scrapeXPath url: - babesnetwork.com/model/ + - biempire.com/model/ - devianthardcore.com/model/ - digitalplayground.com/modelprofile/ - doghousedigital.com/model/ @@ -59,6 +75,7 @@ performerByURL: - lookathernow.com/model/ - mofos.com/model/ - mofosnetwork.com/model/ + - noirmale.com/model/ - realitykings.com/model/ - rk.com/model/ - seancody.com/model/ @@ -71,6 +88,8 @@ xPathScrapers: sceneScraper: common: $section: //div[contains(@class,"tg5e7m")]/ancestor::section + $canonicalUrl: &canonicalUrl //link[@rel="canonical"]/@href + $movieUriPath: &movieUriPath //a[text()="Movie Info"]/@href scene: Title: $section//h1/text()|$section//h2/text() Date: @@ -113,17 +132,42 @@ xPathScrapers: sweetsinner: Sweet Sinner teenslovehugecocks: Teens Love Huge Cocks Image: $section//img[contains(@src,"poster")]/@src + Movies: &sceneMovies + URL: + selector: $canonicalUrl|$movieUriPath + concat: __SEPARATOR__ + postProcess: + - replace: + - regex: '^(https://[^/]+).+__SEPARATOR__' + with: $1 + Name: + selector: $canonicalUrl|$movieUriPath + concat: __SEPARATOR__ + postProcess: + - replace: + - regex: '^(https://[^/]+).+__SEPARATOR__' + with: $1 + - subScraper: + selector: //h1/text()|//h2/text() + Code: &sceneCode + selector: $canonicalUrl + postProcess: + - replace: + - regex: '.*/scene/(\d+).*' + with: $1 scriptScraper: common: - $script: //script[@type="application/ld+json"] + $script: &script //script[@type="application/ld+json"] + $canonicalUrl: *canonicalUrl + $movieUriPath: *movieUriPath scene: - Title: + Title: &title selector: $script postProcess: - replace: - regex: '.+"name": "([^"]+)".+' with: $1 - Date: + Date: &date selector: $script postProcess: - replace: @@ -136,9 +180,9 @@ xPathScrapers: - replace: - regex: '.+"thumbnailUrl": "([^"]+)".+' with: $1 - Studio: + Studio: &studio Name: - selector: //div[contains(@class,"tg5e7m")]/ancestor::section//a[contains(@href,"site=")]/@title|//link[@rel="canonical"]/@href + selector: //div[text()="Subsite"]/following-sibling::a/text()|//div[contains(@class,"tg5e7m")]/ancestor::section//a[contains(@href,"site=")]/@title|//link[@rel="canonical"]/@href postProcess: - replace: - regex: (.+www\.)(\w+)(.+) @@ -152,6 +196,7 @@ xPathScrapers: iknowthatgirl: I Know That Girl lilhumpers: Lil Humpers milfed: Milfed + noirmale: Noir Male publicagent: Public Agent realitykings: Reality Kings rk: Reality Kings @@ -160,7 +205,7 @@ xPathScrapers: transsensual: TransSensual trueamateurs: True Amateurs Tags: *tags - Details: + Details: &details selector: $script postProcess: - replace: @@ -170,9 +215,10 @@ xPathScrapers: with: $1 - regex: '\|' with: '"' - Performers: + Performers: &performers Name: //div/*[self::h1 or self::h2]/..//a[contains(@href,"/model")] - + Movies: *sceneMovies + Code: *sceneCode movieScraper: common: $section: //div[text()="Release Date:"]/ancestor::section @@ -323,4 +369,14 @@ xPathScrapers: Image: selector: //img[contains(@src, "model")]/@src URL: //link[@rel="canonical"]/@href -# Last Updated October 14, 2022 + galleryFromSceneScriptScraper: + common: + $script: *script + gallery: + Title: *title + Date: *date + Details: *details + Performers: *performers + Tags: *tags + Studio: *studio +# Last Updated December 27, 2023 diff --git a/scrapers/MindGeekAPI.py b/scrapers/MindGeekAPI/MindGeekAPI.py similarity index 98% rename from scrapers/MindGeekAPI.py rename to scrapers/MindGeekAPI/MindGeekAPI.py index 04612f8e7..0018fbfc2 100644 --- a/scrapers/MindGeekAPI.py +++ b/scrapers/MindGeekAPI/MindGeekAPI.py @@ -7,6 +7,13 @@ from datetime import datetime from urllib.parse import urlparse +# to import from a parent directory we need to add that directory to the system path +csd = os.path.dirname(os.path.realpath(__file__)) # get current script directory +parent = os.path.dirname(csd) # parent directory (should be the scrapers one) +sys.path.append( + parent +) # add parent dir to sys path so that we can import py_common from ther + try: import py_common.log as log except ModuleNotFoundError: diff --git a/scrapers/MindGeekAPI.yml b/scrapers/MindGeekAPI/MindGeekAPI.yml similarity index 94% rename from scrapers/MindGeekAPI.yml rename to scrapers/MindGeekAPI/MindGeekAPI.yml index 70d9af864..bf83410c4 100644 --- a/scrapers/MindGeekAPI.yml +++ b/scrapers/MindGeekAPI/MindGeekAPI.yml @@ -1,4 +1,6 @@ name: "MindGeekAPI" +# requires: py_common + sceneByFragment: action: script script: diff --git a/scrapers/Minnano-AV.yml b/scrapers/Minnano-AV.yml deleted file mode 100644 index 3d779d0b1..000000000 --- a/scrapers/Minnano-AV.yml +++ /dev/null @@ -1,141 +0,0 @@ -name: "Minnano-AV (JAV)" -performerByName: - action: scrapeXPath - queryURL: http://www.minnano-av.com/search_result.php?search_scope=actress&search_word={} - scraper: performerSearch -performerByURL: - - action: scrapeXPath - url: - - http://www.minnano-av.com/ - scraper: performerScraper - -xPathScrapers: - performerSearch: - performer: - Name: //tbody/tr/td/p[@class="furi"]|//div[@class="act-profile"]/table/tbody/tr/td/h2/text() - URL: - selector: //tbody/tr/td/h2[@class="ttl"]/a/@href|//form[@class="add_favorite"]/@action - postProcess: - - replace: - - regex: .*?(\d+).* - with: $1 - - regex: ^ - with: "http://www.minnano-av.com/actress" - - regex: $ - with: ".html" - - performerScraper: - performer: - Name: - selector: //section[@class="main-column details"]/h1/span/text() - # $1 Alt Name in Jap? | $3 Name in Latin script - postProcess: - - replace: - - regex: (.+)(\s\/\s)(.+) - with: $3 - Aliases: - selector: //section[@class="main-column details"]/h1/text()|//span[text()='別名']/following-sibling::p/text() - concat: ", " - postProcess: - - replace: - - regex: ( - with: ( - - regex: ) - with: ) - URL: - selector: //form[@class="add_favorite"]/@action - postProcess: - - replace: - - regex: (.+=)(.+) - with: http://www.minnano-av.com/actress$2 - - regex: $ - with: ".html" - Twitter: //span[text()='ブログ']/../p/a[contains(@href,'twitter.com')]/@href - Instagram: //span[text()='ブログ']/../p/a[contains(@href,'instagram.com')]/@href - Birthdate: - selector: //span[text()='生年月日']/../p/a/@href - postProcess: - - replace: - - regex: (.+=)(.+) - with: $2 - Height: - selector: //span[text()='サイズ']/../p/text()[1] - postProcess: - - replace: - - regex: (T)(\d+)(.+) - with: $2 - - regex: ^T.* # if the above regex doesnt match => no height was provided - with: "" - Measurements: - selector: //span[text()='サイズ']/../p/a/@href|//span[text()='サイズ']/../p/text() - concat: "|" - postProcess: - - replace: - - regex: (.+=)(\w*)(.+B)(\d*)(.+W)(\d*)(.+H)(\d*)(.+) - with: $4$2-$6-$8 - - regex: ^T(.+B)(\d*)(.+W)(\d*)(.+H)(\d*)(.+) # cup size missing case - with: $2-$4-$6 - CareerLength: - selector: //span[text()='AV出演期間']/../p/text() - postProcess: - - replace: - # Stupid regex to replace Jap Unicode (Can appear like this: http://www.minnano-av.com/actress741247.html) - - regex: "0" - with: "0" - - regex: "1" - with: "1" - - regex: "2" - with: "2" - - regex: "3" - with: "3" - - regex: "4" - with: "4" - - regex: "5" - with: "5" - - regex: "6" - with: "6" - - regex: "7" - with: "7" - - regex: "8" - with: "8" - - regex: "9" - with: "9" - - regex: "、" - with: "," - - regex: ( - with: ( - - regex: ) - with: ) - - regex: \s*[-~]\s* - with: "-" - - regex: -+|年\s*- - with: "-" - - regex: 年\s*, - with: ", " - - regex: 年\s* - with: "/" - - regex: 月\D+ - with: "-" - - regex:  |年|\(.*?\) - with: "" - - regex: \/, - with: "," - - regex: \/$ - with: "" - - regex: "[\\p{Han}\\p{Hiragana}\\p{Katakana}ー]+" - with: "" - #Image: - # selector: //div[@class='act-area']/div[@class="thumb"]/img/@src - # postProcess: - # - replace: - # - regex: ^ - # with: http://www.minnano-av.com - # - regex: ?new - # with: "" - Ethnicity: - fixed: "Japanese" - Country: - fixed: "Japan" - Gender: - fixed: "Female" -# Last Updated June 20, 2021 diff --git a/scrapers/Minnano-AV/Minnano-AV.py b/scrapers/Minnano-AV/Minnano-AV.py new file mode 100644 index 000000000..9cfde5855 --- /dev/null +++ b/scrapers/Minnano-AV/Minnano-AV.py @@ -0,0 +1,378 @@ +import json +import os +import re +import sys +from typing import Any + +CURRENT_SCRIPT_DIR = os.path.dirname(os.path.realpath(__file__)) +PARENT_DIR = os.path.dirname(CURRENT_SCRIPT_DIR) +sys.path.append(PARENT_DIR) + +try: + import py_common.log as log +except ModuleNotFoundError: + print( + "You need to download the folder 'py_common' from the community repo (CommunityScrapers/tree/master/scrapers/py_common)", + file=sys.stderr, + ) + sys.exit() + +try: + import requests + from lxml import etree +except ModuleNotFoundError: + print("You need to install dependencies from requirements.txt") + sys.exit(1) + +XPATHS = { + "alias": "//section[@class=\"main-column details\"]/h1/text()|//span[text()='別名']/following-sibling::p/text()", + "birthdate": "//span[text()='生年月日']/../p/a/@href", + "career": "//span[text()='AV出演期間']/../p/text()", + "debut": "//span[text()='デビュー作品']/../p/text()", + "id": '//form[@class="add_favorite"]/@action', + "image": "//div[@class='act-area']/div[@class=\"thumb\"]/img/@src", + "instagram": ("//span[text()='ブログ']/../p/a[contains(@href,'instagram.com')]/@href"), + "measurements": ( + "//span[text()='サイズ']/../p/a/@href|//span[text()='サイズ']/../p/text()" + ), + "name_kanji": '//section[@class="main-column details"]/h1/text()', + "origin": "//span[text()='出身地']/../p/a/text()", + "name": '//section[@class="main-column details"]/h1/span/text()', + "search_url": '../h2[@class="ttl"]/a/@href', + "search": '//p[@class="furi"]', + "twitter": ("//span[text()='ブログ']/../p/a[contains(@href,'twitter.com')]/@href"), +} + +REGEXES = { + # https://regex101.com/r/9k2GXw/5 + "alias": r"(?P[^\x29\uFF09]+?)(?P[\x28\uFF08\u3010][^\x29\uFF09\u3011]+(?:[\x29\uFF09\u3011]))?\s[\x28\uFF08](?P\w+)?\s+/\s(?P[a-z-A-Z ]+)?[\x29\uFF09]", + "id": r"\d+", + "birthdate": r"[0-9-]+", + "career": (r"(?P\d+)年?(?:\d+月)? ?(?:\d+)?日?[-~]? ?(?:(?P\d+)?)?年?"), + "measurements": ( + r"(?<=T)(?P\d+)? / B(?P\d+)\([^=]+=(?P\w+)\) / W(?P\d+) / H(?P\d+)" + ), + "url": r"https://www.minnano-av.com/actress\d+.html", +} + +FORMATS = { + "image": "https://www.minnano-av.com{IMAGE_URL_FRAGMENT}", + "url": "https://www.minnano-av.com/actress{PERFORMER_ID}.html", +} + + +def reverse_first_last_name(performer_name): + return " ".join(reversed(performer_name.split(" "))) + + +def convert_to_halfwidth(input: str) -> str: + """Convert full-width characters to half-width.""" + fullwidth_range = range(0xFF01, 0xFF5E + 1) + fullwidth_to_halfwidth_dict = { + chr(fw_char): chr(fw_char - 0xFEE0) for fw_char in fullwidth_range + } + halfwidth_str = "".join( + fullwidth_to_halfwidth_dict.get(char, char) for char in input + ) + return halfwidth_str + + +def cm_to_inches(centimeters: int) -> int: + return int(f"{centimeters / 2.54:.0f}") + + +def convert_bra_jp_to_us(jp_size: str) -> str: + """ + Converts bra size from Japanese to US size. + First it looks up the whole size in predefined chart, + and if that fails: + 1. Band size is calculated manually. + 2. Cup size is looked up in another chart. + 1. If that fails as well, the Japanese cup size is used. + References: + * https://www.petitecherry.com/pages/size-guide + * https://japanrabbit.com/blog/japanese-clothing-size-chart/ + """ + predefined_conversion_chart = { + "65A": "30AA", + "65B": "30A", + "65C": "30B", + "65D": "30C", + "65E": "30D", + "65F": "30E", + "70A": "32AA", + "70B": "32A", + "70C": "32B", + "70D": "32C", + "70E": "32D", + "70F": "32E", + "70G": "32F", + "70H": "32F", + "70I": "32G", + "75A": "34AA", + "75B": "34A", + "75C": "34B", + "75D": "34C", + "75E": "34D", + "75F": "34E", + "75G": "32E", + "75H": "34F", + "75I": "34G", + "80B": "36A", + "80C": "36B", + "80D": "36C", + "80E": "36D", + "80F": "36E", + "80G": "36E", + "80H": "36F", + "80I": "36G", + "85C": "38B", + "85D": "38C", + "85E": "38D", + "85F": "38E", + "85G": "38E", + "85H": "38F", + "90D": "40C", + "90E": "40D", + "90F": "40E", + "90G": "40E", + "90H": "40F", + "90I": "40G", + "95E": "42C", + "95F": "42E", + "95G": "42E", + "95H": "42F", + "95I": "42G", + "100E": "44D", + "100F": "44E", + "100G": "44E", + "100H": "44F", + } + cup_conversion_chart = { + "A": "AA", + "B": "A", + "C": "B", + "D": "C", + "F": "DD", + "G": "D", + "H": "F", + "I": "G", + "J": "H", + "K": "I", + } + + converted_size = None + converted_size = predefined_conversion_chart.get(jp_size, None) + + if converted_size is None: + band_size = int(jp_size[:-1]) + cup_size = jp_size[-1] + converted_size = ( + f"{cm_to_inches(band_size)}{cup_conversion_chart.get(cup_size, cup_size)}" + ) + return converted_size + + +def get_xpath_result(tree: Any, xpath_string: str) -> str | list[str] | None: + _result = tree.xpath(xpath_string) + if _result == []: + return None + elif len(_result) == 1: + return _result[0] + else: + return _result + + +def performer_by_url(url): + request = requests.get(url) + log.debug(request.status_code) + + tree = etree.HTML(request.text) + + scrape = {} + aliases = set() + + JAPANESE = True + + if origin_result := get_xpath_result(tree, XPATHS["origin"]): + if origin_result == "海外": + JAPANESE = False + + if name_xpath_result := get_xpath_result(tree, XPATHS["name"]): + _, romanized_name = name_xpath_result.split(" / ") + performer_name = romanized_name + if JAPANESE: + performer_name = reverse_first_last_name(performer_name) + scrape["name"] = performer_name + aliases.add(romanized_name) + + if kanji_xpath_result := get_xpath_result(tree, XPATHS["name_kanji"]): + # \u3010 is 【 + if "\u3010" in kanji_xpath_result: + kanji_name, _ = kanji_xpath_result.split("\u3010") + else: + kanji_name = kanji_xpath_result + if kanji_name != "": + aliases.add(kanji_name) + else: + log.debug("Kanji name XPath matched, but no value found.") + + if aliases_xpath_result := get_xpath_result(tree, XPATHS["alias"]): + for alias in aliases_xpath_result: + if match := re.match(REGEXES["alias"], alias): + aliases.add(match.group("kanji")) + try: + aliases.add(match.group("romanized")) + except: + pass + + if favorite_form_url := get_xpath_result(tree, XPATHS["id"]): + if match := re.search(REGEXES["id"], favorite_form_url): + scrape["url"] = FORMATS["url"].format(PERFORMER_ID=match[0]) + else: + log.debug("URL XPath matched, but no value found.") + + if twitter_url_result := get_xpath_result(tree, XPATHS["twitter"]): + if twitter_url_result != None: + scrape["twitter"] = twitter_url_result + else: + log.debug("Twitter XPath matched, but no value found.") + + if instagram_url_result := get_xpath_result(tree, XPATHS["instagram"]): + if instagram_url_result != None: + scrape["instagram"] = instagram_url_result + else: + log.debug("Instagram XPath matched, but no value found.") + + if birthdate_result := get_xpath_result(tree, XPATHS["birthdate"]): + if match := re.search( + REGEXES["birthdate"], convert_to_halfwidth(birthdate_result) + ): + scrape["birthdate"] = match[0] + else: + log.debug("Birthday XPath matched, but no value found.") + + if measurements_result := get_xpath_result(tree, XPATHS["measurements"]): + combined = "".join(measurements_result) + if match := re.search(REGEXES["measurements"], convert_to_halfwidth(combined)): + waist_in_inches, hip_in_inches = [ + cm_to_inches(int(measurement)) + for measurement in [match["waist"], match["hip"]] + ] + + bra_size = convert_bra_jp_to_us(f'{match["bust"]}{match["cup"]}') + + scrape["measurements"] = f"{bra_size}-{waist_in_inches}-{hip_in_inches}" + if match["height"] != None: + scrape["height"] = match["height"] + else: + log.debug("Measurements XPath matched, but no value found.") + + if career_result := get_xpath_result(tree, XPATHS["career"]): + clean_career_result = convert_to_halfwidth(career_result).replace(" ", "") + if match := re.match(REGEXES["career"], clean_career_result): + groups = match.groups() + start = match["start"] + "-" if groups[0] != None else "" + end = match["end"] if groups[1] != None else "" + scrape["career_length"] = start + end + else: + log.debug("Career debut XPath matched, but no value found.") + + elif debut_result := get_xpath_result(tree, XPATHS["debut"]): + if match := re.search(REGEXES["career"], convert_to_halfwidth(debut_result)): + groups = match.groups() + scrape[ + "career_length" + ] = f'{match["start"] if groups[0] != None else ""}-{match["end"] if groups[1] != None else ""}' + else: + log.debug("Career debut XPath matched, but no value found.") + + if image_result := get_xpath_result(tree, XPATHS["image"]): + clean_url_fragment = str.replace(image_result, "?new", "") + if clean_url_fragment != "": + scrape["image"] = str.format( + FORMATS["image"], IMAGE_URL_FRAGMENT=clean_url_fragment + ) + else: + log.debug("Image XPath matched, but no value found.") + + aliases.discard(None) + sorted_aliases = sorted(aliases) + scrape["aliases"] = ", ".join(sorted_aliases) + if JAPANESE: + scrape["country"] = "Japan" + scrape["ethnicity"] = "Asian" + scrape["hair_color"] = "Black" + scrape["eye_color"] = "Brown" + scrape["gender"] = "Female" + print(json.dumps(scrape)) + + +def performer_by_name(name: str, retry=True) -> None: + queryURL = f"https://www.minnano-av.com/search_result.php?search_scope=actress&search_word={name}" + + result = requests.get(queryURL) + tree = etree.HTML(result.text) + + performer_list = [] + + if re.search(REGEXES["url"], result.url): + performer_list.append({"name": name, "url": result.url}) + elif search_result := get_xpath_result(tree, XPATHS["search"]): + for node in search_result: + performer = {} + node_value = node.text + if "/" not in node_value: + continue + _, romanized_name = node_value.split(" / ") + performer["name"] = romanized_name + if url_result := get_xpath_result(node, XPATHS["search_url"]): + url = "" + if match := re.search(REGEXES["id"], url_result): + url = str.format(FORMATS["url"], PERFORMER_ID=match[0]) + performer["url"] = url + performer_list.append(performer) + elif retry: + modified_name = reverse_first_last_name(name) + performer_by_name(modified_name, retry=False) + else: + performer_list.append({"name": "No performer found"}) + + print(json.dumps(performer_list)) + + +def main(): + if len(sys.argv) == 1: + log.error("No arguments") + sys.exit(1) + + stdin = sys.stdin.read() + + inputJSON = json.loads(stdin) + url = inputJSON.get("url", None) + name = inputJSON.get("name", None) + + if "performer_by_url" in sys.argv: + log.debug("Processing performer by URL") + log.debug(stdin) + if url: + performer_by_url(url) + else: + log.error("Missing URL") + elif "performer_by_name" in sys.argv: + log.debug("Processing performer by name") + log.debug(stdin) + if name: + performer_by_name(name) + else: + log.error("Missing name") + else: + log.error("No argument processed") + log.debug(stdin) + + +if __name__ == "__main__": + try: + main() + except Exception as e: + log.error(e) diff --git a/scrapers/Minnano-AV/Minnano-AV.yml b/scrapers/Minnano-AV/Minnano-AV.yml new file mode 100644 index 000000000..fc0a6bb23 --- /dev/null +++ b/scrapers/Minnano-AV/Minnano-AV.yml @@ -0,0 +1,18 @@ +name: "Minnano-AV (JAV)" +performerByURL: + - url: + - https://www.minnano-av.com/ + - http://www.minnano-av.com/ + action: script + script: + - python + - Minnano-AV.py + - performer_by_url + +performerByName: + action: script + script: + - python + - Minnano-AV.py + - performer_by_name +# Last Updated December 16, 2023 diff --git a/scrapers/Minnano-AV/requirements.txt b/scrapers/Minnano-AV/requirements.txt new file mode 100644 index 000000000..b4531941f --- /dev/null +++ b/scrapers/Minnano-AV/requirements.txt @@ -0,0 +1,2 @@ +requests +lxml \ No newline at end of file diff --git a/scrapers/MissaX.py b/scrapers/MissaX/MissaX.py similarity index 90% rename from scrapers/MissaX.py rename to scrapers/MissaX/MissaX.py index cefa077df..0216b7eb6 100644 --- a/scrapers/MissaX.py +++ b/scrapers/MissaX/MissaX.py @@ -2,9 +2,18 @@ import base64 import datetime import json +import os import re import sys import urllib.parse + +# to import from a parent directory we need to add that directory to the system path +csd = os.path.dirname(os.path.realpath(__file__)) # get current script directory +parent = os.path.dirname(csd) # parent directory (should be the scrapers one) +sys.path.append( + parent +) # add parent dir to sys path so that we can import py_common from ther + # extra modules below need to be installed try: import py_common.log as log @@ -51,7 +60,7 @@ def scrape_scene_page(url): #scrape the main url title = tree.xpath('//p[@class="raiting-section__title"]/text()')[0].strip() #title scrape log.debug(f'Title:{title}') date = tree.xpath('//p[@class="dvd-scenes__data" and contains(text(), " Added:")]/text()[1]')[0] #get date - date = re.sub("(?:.+Added:\s)([\d\/]*).+", r'\g<1>', date).strip() #date cleanup + date = re.sub(r"(?:.+Added:\s)([\d\/]*).+", r'\g<1>', date).strip() #date cleanup date = datetime.datetime.strptime(date, "%m/%d/%Y").strftime("%Y-%m-%d") #date parse log.debug(f'Date:{date}') studio = tree.xpath('//base/@href')[0].strip() #studio scrape @@ -65,7 +74,7 @@ def scrape_scene_page(url): #scrape the main url details = tree.xpath('//p[@class="dvd-scenes__title"]/following-sibling::p//text()') #details scrape details = ''.join(details) #join details details = '\n'.join(' '.join(line.split()) for line in details.split('\n')) #get rid of double spaces - details = re.sub("\r?\n\n?", r'\n', details) #get rid of double newlines + details = re.sub(r"\r?\n\n?", r'\n', details) #get rid of double newlines log.debug(f'Details:{details}') bad_cover_url = tree.xpath("//img[@src0_4x]/@src0_4x") #cover from scene's page if better one is not found (it will be) datauri = "data:image/jpeg;base64," diff --git a/scrapers/MissaX.yml b/scrapers/MissaX/MissaX.yml similarity index 88% rename from scrapers/MissaX.yml rename to scrapers/MissaX/MissaX.yml index 931949e2e..cfca1a5f5 100644 --- a/scrapers/MissaX.yml +++ b/scrapers/MissaX/MissaX.yml @@ -1,4 +1,6 @@ name: "MissaX" +# requires: py_common + sceneByURL: - action: script script: diff --git a/scrapers/ModelCentroAPI/ModelCentroAPI.yml b/scrapers/ModelCentroAPI/ModelCentroAPI.yml index 65ddd4e84..86b1a782c 100644 --- a/scrapers/ModelCentroAPI/ModelCentroAPI.yml +++ b/scrapers/ModelCentroAPI/ModelCentroAPI.yml @@ -1,4 +1,6 @@ name: "ModelCentroAPI" +# requires: py_common + sceneByURL: - url: - beingphoenixmarie.com/scene/ diff --git a/scrapers/MyDirtyHobby.yml b/scrapers/MyDirtyHobby.yml index 35537dc8a..7a1d602b9 100644 --- a/scrapers/MyDirtyHobby.yml +++ b/scrapers/MyDirtyHobby.yml @@ -82,11 +82,20 @@ xPathScrapers: selector: $script postProcess: - replace: - - regex: '.*{"thumbnail":{[^}]+"src":"([^"]+)".*' + - regex: '.*?"thumbnail":\s*{[^}]+"src":\s*"([^"]+)".*' with: $1 - regex: '\\/' with: "/" Studio: Name: fixed: My Dirty Hobby -# Last Updated June 20, 2021 + +driver: + cookies: + - CookieURL: "https://www.mydirtyhobby.com" + Cookies: + - Name: "AGEGATEPASSED" + Domain: ".mydirtyhobby.com" + Value: "1" + Path: "/" +# Last Updated October 02, 2023 diff --git a/scrapers/Mylf.yml b/scrapers/Mylf.yml index 8bd1ab119..b0c1f03ea 100644 --- a/scrapers/Mylf.yml +++ b/scrapers/Mylf.yml @@ -8,7 +8,7 @@ xPathScrapers: sceneScraper: scene: Title: //h2[contains(@class,"sceneTitle")]/text() - Details: //div[contains(@class,"sceneDesc")]/text() + Details: //div[contains(@class,"sceneDesc")]//text() Date: selector: //div[contains(@class,"sceneDate")]/text() postProcess: @@ -47,4 +47,4 @@ xPathScrapers: MylfBoss: Mylf Boss MylfSelects: Mylf Selects StayHomeMilf: Stay Home Milf -# Last Updated June 27, 2022 +# Last Updated July 21, 2023 diff --git a/scrapers/NVGNetwork.yml b/scrapers/NVGNetwork.yml new file mode 100644 index 000000000..785d21ae8 --- /dev/null +++ b/scrapers/NVGNetwork.yml @@ -0,0 +1,51 @@ +name: NVG Network +sceneByURL: + - action: scrapeXPath + url: + - vip.netvideogirls.com/members/video/ + scraper: sceneScraper +xPathScrapers: + sceneScraper: + common: + $details: //div[@class="movie-info"] + scene: + Title: + selector: //title + postProcess: + - replace: + - regex: '.+ / (.+?) / .+' + with: $1 + Code: + selector: //div[@class="react-player__preview"]/@style + postProcess: + - replace: + - regex: '.+static\.netvideogirls\.com\/(\d+)-.+' + with: $1 + Date: + selector: //div[@class="tool_inform"]//i[@class="icon-calendar"]/../span/text() + postProcess: + - parseDate: Jan 2, 2006 + Studio: + Name: //div[@class="tool_inform"]//div[@class="name"]/a/text() + URL: + selector: //div[@class="tool_inform"]//div[@class="name"]/a/@href + postProcess: + - replace: + - regex: (.+) + with: https://vip.netvideogirls.com$1 + Image: + selector: //div[@class="react-player__preview"]/@style + postProcess: + - replace: + - regex: .+url\("(.+)"\).+ + with: $1 + Tags: + Name: //div[@class="tool_list"]//div[@class="item"]/span/text() +driver: + useCDP: true + headers: + - Key: User-Agent + Value: '' + - Key: Cookie + Value: '' +# Last Updated October 19, 2023 diff --git a/scrapers/NaughtyNatural.yml b/scrapers/NaughtyNatural.yml new file mode 100644 index 000000000..8e3be679f --- /dev/null +++ b/scrapers/NaughtyNatural.yml @@ -0,0 +1,53 @@ +name: Naughty Natural +sceneByURL: + - action: scrapeXPath + url: + - naughtynatural.com/videos/ + scraper: sceneScraper +galleryByURL: + - action: scrapeXPath + url: + - naughtynatural.com/photos/ + scraper: galleryScraper +xPathScrapers: + sceneScraper: + common: + $url: //link[@rel="canonical"]/@href + scene: + Title: &title //h1[@class="entry-title"]/text() + Studio: &studio + Name: + fixed: Naughty Natural + Date: &date + selector: //meta[@property="article:published_time"]/@content + postProcess: + - replace: + - regex: (^\d{4}-\d{2}-\d{2}).* + with: $1 + - parseDate: 2006-01-02 + Details: + selector: //p[@class="description"]/text() + Tags: &tags + Name: //span[@class="categories"]/a + Performers: &performers + Name: //span[@class="post_author"]/a + Image: + selector: //script[@type="text/javascript"][contains(text(),"jwplayer") and contains(text(),"jpg")] + postProcess: + - replace: + - regex: .*(wp-content.+jpg).*(https:\/\/[^\/]*).* + with: $2/$1 + URL: &url + selector: $url + galleryScraper: + common: + $photopage: //section[@id='photos_page-page'] + gallery: + Title: *title + Studio: *studio + Date: *date + Details: + selector: //div[@class="gallery_description"]/p/text() + Tags: *tags + Performers: *performers +# Last Updated September 03, 2023 \ No newline at end of file diff --git a/scrapers/NewSensationsNetworkSites.yml b/scrapers/NewSensationsNetworkSites.yml index 3e29e30f9..f7b58b518 100644 --- a/scrapers/NewSensationsNetworkSites.yml +++ b/scrapers/NewSensationsNetworkSites.yml @@ -5,6 +5,7 @@ sceneByURL: - ashlynnbrooke.com/tour_ab/ - fourfingerclub.com/tour_ffc/ - freshoutofhighschool.com/tour_fohs/ + - girlgirlxxx.com/tour_girlgirlxxx/ - jizzbomb.com/tour_jb/ - newsensations.com/tour_rs/ - parodypass.com/tour_pp/ @@ -42,6 +43,7 @@ xPathScrapers: https://fourfingerclub.com/tour_ffc/: Four Finger Club https://freshoutofhighschool.com/tour_fohs/: Fresh Out Of Highschool https://jizzbomb.com/tour_jb/: Jizz Bomb + https://girlgirlxxx.com/tour_girlgirlxxx/: Girl Girl XXX https://newsensations.com/tour_rs/: New Sensations https://parodypass.com/tour_pp/: Parody Pass https://shanedieselsbanginbabes.com/tour_sdbb/: Shanedlesele Bangin Babes @@ -49,4 +51,4 @@ xPathScrapers: http://thelesbianexperience.com/tour_tle/: The Lesbian Experience https://thetabutales.com/tour_tt/: The Tabu Tales https://unlimitedmilfs.com/tour_um/: Unlimited Milfs -# Last Updated July 11, 2022 +# Last Updated October 21, 2023 diff --git a/scrapers/Newgrounds.yml b/scrapers/Newgrounds.yml new file mode 100644 index 000000000..a14231019 --- /dev/null +++ b/scrapers/Newgrounds.yml @@ -0,0 +1,48 @@ +name: Newgrounds +sceneByFragment: + action: scrapeXPath + scraper: sceneScraper + queryURL: https://www.newgrounds.com/portal/view/{filename} + queryURLReplace: + filename: + - regex: ^[^\[]+\[+|(\]).* + with: "" +sceneByURL: + - action: scrapeXPath + url: + - newgrounds.com/ + scraper: sceneScraper +xPathScrapers: + sceneScraper: + scene: + Title: //head/title + Date: + selector: //meta[@itemprop="uploadDate"]/@content + postProcess: + - replace: + - regex: (?i)(T\d+.+) + with: "" + - parseDate: 2006-01-02 + Image: //meta[@property="og:image"]/@content + Details: + selector: //meta[@itemprop="description"]/@content + Tags: + Name: //dd[@class="tags"]//a + Performers: + Name: //dd[@class="tags"]//a + Studio: + Name: //div[@class="item-details-main"]//a +driver: + cookies: + - CookieURL: "https://www.newgrounds.com" + Cookies: +# You will need to provide the values for the following cookies. View storage under the inspect menu in your browser while logged in. + - Name: "NG_GG_username" + Domain: ".newgrounds.com" + Value: "[insert value here]" + Path: "/" + - Name: "vmk1du5I8m" + Domain: ".newgrounds.com" + Value: "[insert value here]" + Path: "/" +# Last Updated October 08, 2023 diff --git a/scrapers/NextDoorStudios/NextDoorStudios.yml b/scrapers/NextDoorStudios/NextDoorStudios.yml new file mode 100644 index 000000000..61263df9a --- /dev/null +++ b/scrapers/NextDoorStudios/NextDoorStudios.yml @@ -0,0 +1,49 @@ +# requires: Algolia +name: Next Door Studios +sceneByURL: + - action: script + url: + - austinwilde.com/en/video + - codycummings.com/en/video + - marcusmojo.com/en/video + - nextdoorbuddies.com/en/video + - nextdoorcasting.com/en/video + - nextdoorfilms.com/en/video + - nextdoorhomemade.com/en/video + - nextdoormale.com/en/video + - nextdoororiginals.com/en/video + - nextdoorraw.com/en/video + - nextdoorstudios.com/en/video + - nextdoortaboo.com/en/video + - nextdoortwink.com/en/video + - roddaily.com/en/video + - rodsroom.com/en/video + - samuelotoole.com/en/video + - stagcollective.com/en/video + - tommydxxx.com/en/video + - trystanbull.com/en/video + script: + - python + - ../Algolia/Algolia.py + - nextdoorstudios +sceneByFragment: + action: script + script: + - python + - ../Algolia/Algolia.py + - nextdoorstudios +sceneByName: + action: script + script: + - python + - ../Algolia/Algolia.py + - nextdoorstudios + - searchName +sceneByQueryFragment: + action: script + script: + - python + - ../Algolia/Algolia.py + - nextdoorstudios + - validName +# Last Updated August 18, 2023 diff --git a/scrapers/NoelAlejandro.yml b/scrapers/NoelAlejandro.yml new file mode 100644 index 000000000..28df3e48a --- /dev/null +++ b/scrapers/NoelAlejandro.yml @@ -0,0 +1,36 @@ +name: NoelAlejandro +sceneByURL: + - action: scrapeXPath + url: + - noelalejandrofilms.com/product + scraper: sceneScraper +xPathScrapers: + sceneScraper: + scene: + Title: + selector: //h1[@id="title"] + Image: + selector: //video[@id="product-img"]/@poster + Date: + selector: //script[@class="yoast-schema-graph"] + postProcess: + - replace: + - regex: .+datePublished\"\:\"(\d{4}-\d{2}-\d{2}).+ + with: $1 + Director: + selector: //li[contains(.,"Director:")] + postProcess: + - replace: + - regex: ^\s*Director:\s* + with: '' + Details: + selector: //div[@class="film-prologue"]/p + concat: "\n\n" + Performers: + Name: + selector: //li[contains(.,"Cast:")]/a + Studio: + Name: + selector: //meta[@name="title"]/@content + +# Last Updated March 27, 2023 diff --git a/scrapers/Nubiles.yml b/scrapers/Nubiles.yml index 210b2ef7d..26e855f71 100644 --- a/scrapers/Nubiles.yml +++ b/scrapers/Nubiles.yml @@ -12,12 +12,15 @@ sceneByURL: - badteenspunished.com/video/watch/ - bountyhunterporn.com/video/watch/ - brattymilf.com/video/watch/ + - cheatingsis.com/video/watch/ - cumswappingsis.com/video/watch/ - daddyslilangel.com/video/watch/ + - datingmystepson.com/video/watch/ - detentiongirls.com/video/watch/ - driverxxx.com/video/watch/ - familyswap.xxx/video/watch/ - imnotyourmommy.com/video/watch/ + - lilsis.com/video/watch/ - momlover.com/video/watch/ - momsboytoy.com/video/watch/ - momsfamilysecrets.com/video/watch/ @@ -120,6 +123,12 @@ xPathScrapers: scene: Title: &titleSelector //h2/text() + Code: &codeSelector + selector: //link[@rel="canonical"]/@href + postProcess: + - replace: + - regex: ".*/watch/(\\d*?)/.*" + with: $1 Date: &dateAttr selector: $contentpane//span[@class="date"]/text() postProcess: @@ -180,6 +189,7 @@ xPathScrapers: scene: Title: *titleSelector + Code: *codeSelector Date: *dateAttr Details: *detailsAttr Performers: *performersAttr @@ -212,6 +222,7 @@ xPathScrapers: scene: Title: *titleSelector + Code: *codeSelector Date: *dateAttr Details: *detailsAttr Performers: *performersAttr @@ -232,4 +243,4 @@ xPathScrapers: Performers: *performersAttr Tags: *tagsAttr Studio: *studioFromTitleAttr -# Last Updated December 24, 2022 +# Last Updated December 27, 2023 diff --git a/scrapers/OldNanny.yml b/scrapers/OldNanny.yml new file mode 100644 index 000000000..5e6df0581 --- /dev/null +++ b/scrapers/OldNanny.yml @@ -0,0 +1,39 @@ +# This scraper does not parse older scenes that use a different page layout +name: "OldNanny" +sceneByURL: + - action: scrapeXPath + url: + - oldnanny.com + scraper: sceneScraper +xPathScrapers: + sceneScraper: + scene: + Studio: + Name: + selector: //div[@class="col-12 col-xl-4"]//dl[2]//a/text() + postProcess: + - replace: + - regex: ([a-z])([A-Z]) + with: $1 $2 + Title: + selector: //title/text() + postProcess: + - replace: + - regex: ^.+\s:\s(.+)$ + with: $1 + Tags: + Name: + selector: //div[@class="col-12 col-xl-4"]//dl[3]//a/text() + Date: + selector: //div[@class="col-12 text-center title-wrapp"]/h1/small/text() + postProcess: + - parseDate: January 2, 2006 + Performers: + Name: + selector: //div[@class="col-12 col-xl-4"]//dl[1]//a + postProcess: + - replace: + - regex: \, + with: + Image: //video[@id="video-player"]/@poster +# Last Updated July 27, 2023 diff --git a/scrapers/Oldje.yml b/scrapers/Oldje.yml index 9a75a37b6..11834b2c8 100644 --- a/scrapers/Oldje.yml +++ b/scrapers/Oldje.yml @@ -25,11 +25,11 @@ xPathScrapers: - parseDate: 2006-01-02 Performers: Name: - selector: //a[contains(@href,'/models/preview/')] + selector: //a[@class="act_name_h"] Details: selector: //p[@class='text']|//div[@class="preview_desc"] Image: - selector: (//div[@id="content" or @id="prev_m"]/a)[1]/img/@src + selector: //div[@id="content" or @id="prev_m"]/a[1]/img/@src oldje3someScraper: scene: Studio: @@ -51,4 +51,4 @@ xPathScrapers: - replace: - regex: ^ with: "https://www.oldje-3some.com/" -# Last Updated April 15, 2021 +# Last Updated February 25, 2023 diff --git a/scrapers/OnlyTarts.yml b/scrapers/OnlyTarts.yml new file mode 100644 index 000000000..f859f350a --- /dev/null +++ b/scrapers/OnlyTarts.yml @@ -0,0 +1,33 @@ +name: OnlyTarts +sceneByURL: + - action: scrapeXPath + url: + - onlytarts.com/video + scraper: sceneScraper +xPathScrapers: + sceneScraper: + scene: + Title: //h1[@class="video-detail__title"] + Details: //div[@data-id="description" and @class="hidden"]/text() + Date: + selector: //div[@class="video-info__time"] + postProcess: + - replace: + - regex: .*•\s+(.*) + with: $1 + - parseDate: 2 January, 2006 + Image: + selector: //script[contains(., "pluginSettings")]/text() + postProcess: + - replace: + - regex: .*url":\s*"(.*poster[^"]*).*$ + with: $1 + Studio: + Name: + fixed: Only Tarts + Tags: + Name: //a[contains(@class, "tags__item")] + Performers: + Name: //div[contains(@class, "video-info")]//a[contains(@href, "/model")]/text() + URL: //div[contains(@class, "video-info")]//a[contains(@href, "/model")]/@href +# Last Updated September 10, 2023 diff --git a/scrapers/Oreno3d.yml b/scrapers/Oreno3d.yml new file mode 100644 index 000000000..39d5a0729 --- /dev/null +++ b/scrapers/Oreno3d.yml @@ -0,0 +1,56 @@ +name: "Oreno3d" +sceneByURL: + - action: scrapeXPath + url: + - oreno3d.com + scraper: sceneScraper + +sceneByName: + action: scrapeXPath + queryURL: https://oreno3d.com/search?keyword={} + scraper: sceneSearch +sceneByQueryFragment: + action: scrapeXPath + queryURL: "{url}" + scraper: sceneScraper + +xPathScrapers: + sceneSearch: + common: + $searchItem: //div[@class="g-main-grid"]/article/a + scene: + Title: $searchItem/h2/text() + URL: $searchItem/@href + Image: + selector: $searchItem/figure/img/@src + postProcess: + - replace: + - regex: "^" + with: "https://oreno3d.com" + Studio: + Name: $searchItem/div[@class="box-text1"]/div/text() + sceneScraper: + common: + $article: //article[@class="g-main-video-article"] + $aTag: //article[@class="g-main-video-article"]/section[@class="video-section-tag"]//a[contains(@class,"tag-btn")] + scene: + URL: //link[rel='canonical']/@href|//meta[@property='og:url']/@content + Title: $article//h1[@class="video-h1"]/text() + Details: + selector: $article//blockquote[@class="video-information-comment"]/text() + concat: "\n" + Image: + selector: $article//img[@class="video-img"]/@src + postProcess: + - replace: + - regex: "^" + with: "https://oreno3d.com" + Tags: + Name: $aTag/div/text() + Studio: + Name: $aTag[contains(@href,"/authors/")]/div/text() + URL: $aTag[contains(@href,"/authors/")]/@href + Performers: + Name: $aTag[contains(@href,"/characters/")]/div/text() + URL: $aTag[contains(@href,"/characters/")]/@href +# Last Updated November 25, 2022 diff --git a/scrapers/PKFStudios.yml b/scrapers/PKFStudios.yml new file mode 100644 index 000000000..1c2742a28 --- /dev/null +++ b/scrapers/PKFStudios.yml @@ -0,0 +1,57 @@ +name: PKF Studios +sceneByURL: + - action: scrapeXPath + url: + - pkfstudios.com + scraper: sceneScraper +xPathScrapers: + sceneScraper: + scene: + Title: //h1[@class="entry-title"]/text() + Details: + # Description is a sequence of p elements containing ONLY text + selector: //div[@class="entry-content"]/p[not(*)]/text() + concat: "\n\n" + # Remove the trailing "_ _ _ _ _" separator + postProcess: + - replace: + - regex: "(\n\n[_ ]+)" + with: "" + Date: + selector: //span[@class="entry-date"]//text() + postProcess: + - parseDate: January 2, 2006 + Image: + # Images are loaded with javascript, we'll just grab the last image + # from the srcset because it's usually the largest + selector: //div[contains(@class, "post-thumbnail")]/img/@data-lazy-srcset + postProcess: + - replace: + - regex: ^.*\s+(https://.*)\s+\d+w$ + with: $1 + Studio: + Name: + fixed: "PKF Studios" + Tags: + Name: + # First selector will match when the tags are outside of the tag + selector: //div[@class="entry-content"]//strong[starts-with(text(), "Role")]/following-sibling::text() | //div[@class="entry-content"]//strong[starts-with(text(), "Role")]/text() + postProcess: + - replace: + - regex: (?:.*:\s+)?([^.]*).? + with: $1 + split: ", " + Performers: + Name: + # Sometimes the performers are listed in a separate tag, sometimes they're in a paragraph mixed in with the description + selector: //div[@class="entry-content"]//*[contains(text(), "Starring")]/text() | //p[contains(., "Starring")] + postProcess: + - replace: + - regex: ".*Starring (.*)" + with: $1 + - regex: "(?i)directed by johnm" + with: "" + - regex: ", (and)?" + with: " and " + split: " and " +# Last Updated December 05, 2023 diff --git a/scrapers/PMVHaven/PMVHaven.py b/scrapers/PMVHaven/PMVHaven.py new file mode 100644 index 000000000..6ba81d85f --- /dev/null +++ b/scrapers/PMVHaven/PMVHaven.py @@ -0,0 +1,128 @@ +import os +import json +import sys +import requests +import random +import time +from urllib.parse import urlparse +# extra modules below need to be installed +try: + import cloudscraper +except ModuleNotFoundError: + print("You need to install the cloudscraper module. (https://pypi.org/project/cloudscraper/)", file=sys.stderr) + print("If you have pip (normally installed with python), run this command in a terminal (cmd): pip install cloudscraper", file=sys.stderr) + sys.exit() + +try: + from lxml import html +except ModuleNotFoundError: + print("You need to install the lxml module. (https://lxml.de/installation.html#installation)", file=sys.stderr) + print("If you have pip (normally installed with python), run this command in a terminal (cmd): pip install lxml", file=sys.stderr) + sys.exit() + +# to import from a parent directory we need to add that directory to the system path +csd = os.path.dirname(os.path.realpath(__file__)) # get current script directory +parent = os.path.dirname(csd) # parent directory (should be the scrapers one) +sys.path.append( + parent +) # add parent dir to sys path so that we can import py_common from there + +try: + from py_common import log +except ModuleNotFoundError: + print("You need to download the folder 'py_common' from the community repo! (CommunityScrapers/tree/master/scrapers/py_common)", file=sys.stderr) + sys.exit() + +#bugfix for socks5 proxies, due to pySocks implementation incompatibility with Stash +proxy = os.environ.get('HTTPS_PROXY', '') +if proxy != "" and proxy.startswith("socks5://"): + proxy = proxy.replace("socks5://", "socks5h://") + os.environ['HTTPS_PROXY'] = proxy + os.environ['HTTP_PROXY'] = proxy + +URL_XPATH = '//meta[@property="og:video:url"]/@content' +IMAGE_XPATH = '//meta[@property="og:image"]/@content' + +def getHTML(url, retries=0): + scraper = cloudscraper.create_scraper() + + try: + scraped = scraper.get(url) + except requests.exceptions.Timeout as exc_time: + log.debug(f"Timeout: {exc_time}") + return getHTML(url, retries + 1) + except Exception as e: + log.error(f"scrape error {e}") + sys.exit(1) + if scraped.status_code >= 400: + if retries < 10: + wait_time = random.randint(1, 4) + log.debug(f"HTTP Error: {scraped.status_code}, waiting {wait_time} seconds") + time.sleep(wait_time) + return getHTML(url, retries + 1) + log.error(f"HTTP Error: {scraped.status_code}, giving up") + sys.exit(1) + + return html.fromstring(scraped.text) + +def getXPATH(pageTree, XPATH): + res = pageTree.xpath(XPATH) + if res: + return res[0] + return "" + +def getData(sceneId): + try: + req = requests.post("https://pmvhaven.com/api/v2/videoInput", json={ + "video": sceneId, + "mode": "InitVideo", + "view": True + }) + except Exception as e: + log.error(f"scrape error {e}") + sys.exit(1) + return req.json() + +def getURL(pageTree): + return getXPATH(pageTree, URL_XPATH) + +def getIMG(data): + for item in data['thumbnails']: + if item.startswith("https://storage.pmvhaven.com/"): + return item + return "" + +def main(): + params = json.loads(sys.stdin.read()) + if not params['url']: + log.error('No URL entered.') + sys.exit(1) + + tree = getHTML(params['url']) + data = getData(getURL(tree).split('_')[-1])['video'][0] + + tags = data['tags'] + data['categories'] + + ret = { + 'title': data['title'], + 'image': getIMG(data), + 'date': data['isoDate'].split('T')[0], + 'details': data['description'], + 'studio': { + 'Name': data['creator'] + }, + 'tags':[ + { + 'name': x.strip() + } for x in tags + ], + 'performers': [ + { + 'name': x.strip() + } for x in data['stars'] + ] + } + print(json.dumps(ret)) + +if __name__ == "__main__": + main() \ No newline at end of file diff --git a/scrapers/PMVHaven/PMVHaven.yml b/scrapers/PMVHaven/PMVHaven.yml new file mode 100644 index 000000000..eb9c8e51a --- /dev/null +++ b/scrapers/PMVHaven/PMVHaven.yml @@ -0,0 +1,11 @@ +name: PMVHaven +# requires: py_common + +sceneByURL: + - url: + - pmvhaven.com/video/ + action: script + script: + - python + - PMVHaven.py +# Last Updated November 30, 2023 diff --git a/scrapers/POVR.yml b/scrapers/POVR.yml index 19e352386..5bd3b4b38 100644 --- a/scrapers/POVR.yml +++ b/scrapers/POVR.yml @@ -4,11 +4,77 @@ sceneByURL: url: - povr.com scraper: sceneScraper + - action: scrapeXPath + url: + - brasilvr.com + - milfvr.com + - tranzvr.com + - wankzvr.com + scraper: sceneScraperPremium +movieByURL: + - action: scrapeXPath + url: + - wankzvr.com + scraper: movieScraper xPathScrapers: + sceneScraperPremium: + common: + $info: &infoSel //div[@class="detail"] + $url: &urlSel //link[@rel="canonical"]/@href + scene: + Title: &titleSel //div[@class="detail__header detail__header-lg"]/h1 + Date: &dateAttr + selector: $info//span[@class="detail__date"]/text() + postProcess: + - parseDate: 2 January, 2006 + Details: &detailsAttr + selector: //div[@class="detail__txt detail__txt-show_lg"]/text()|//span[@class="more__body"]/text() + concat: " " + Tags: + Name: $info//div[@class="tag-list__body"]//a/text() + Performers: + Name: //div[@class="detail__inf detail__inf-align_right"]/div[@class="detail__models"]/a/text() + Image: &imageAttr + selector: //meta[@property="og:image"]/@content|//div[@class="photo-strip__body"]/div[2]/@data-src + postProcess: + - replace: + - regex: medium.jpg + with: large.jpg + # TranzVR defaults to smaller covers, but we can grab a bigger one + - regex: 472/cover.jpg + with: 680/cover.jpg + # All of these domains give 403 errors when saving the scraped image + # but povr.com has the same images and is totally cool with our scraping + - regex: cdns-i.wankzvr.com + with: images.povr.com/wvr + - regex: images.tranzvr.com + with: images.povr.com/tvr + - regex: cdns-i.milfvr.com + with: images.povr.com/mvr + - regex: cdns-i.brasilvr.com + with: images.povr.com + Studio: &studioAttr + Name: + selector: *urlSel + postProcess: + - replace: + - regex: ^.*//(?:www.)?([^/]*).*$ + with: $1 + - map: + brasilvr.com: BrasilVR + milfvr.com: MilfVR + tranzvr.com: TranzVR + wankzvr.com: WankzVR + Code: &codeAttr + selector: *urlSel + postProcess: + - replace: + - regex: ^.*-(\d+)$ + with: $1 sceneScraper: scene: Title: - selector: //span[@class="video__quality video__quality--vr mr-3"]/following-sibling::text() + selector: //h1[contains(@class, "heading-title")]/text() Date: selector: //p[contains(@class, 'player__date')] postProcess: @@ -17,14 +83,34 @@ xPathScrapers: with: $1 - parseDate: 2 January, 2006 Performers: - Name: //a[@class="btn btn--eptenary btn--xsm"] + Name: //li[span[contains(text(), "Pornstars:")]]/following-sibling::li/a/text() Tags: - Name: //ul[@class="category-link mb-2"]//a + Name: //li[span[contains(text(), "Tags:")]]/following-sibling::li/a/text() Details: - selector: //p[@style] - Image: - selector: //meta[@property="og:image"]/@content + selector: //div[contains(@class, "player__description")]/p/text() + Image: *imageAttr Studio: Name: - selector: //a[@class="btn btn--secondary btn--xsm"]/text() -# Last Updated June 13, 2021 + selector: //span[contains(text(), "Studio:")]/following-sibling::a/text() + Code: *codeAttr + movieScraper: + common: + $info: *infoSel + movie: + Name: + selector: *titleSel + postProcess: + - replace: + - regex: ^ + with: "WankzVR - " + Duration: + selector: $info//span[@class="time"]/text() + postProcess: + - replace: + - regex: \smin + with: ":00" + Date: *dateAttr + Studio: *studioAttr + Synopsis: *detailsAttr + FrontImage: *imageAttr +# Last Updated June 28, 2023 diff --git a/scrapers/PaperStreetMedia.yml b/scrapers/PaperStreetMedia.yml index 1d2a60f49..75cbb04d0 100755 --- a/scrapers/PaperStreetMedia.yml +++ b/scrapers/PaperStreetMedia.yml @@ -16,6 +16,7 @@ sceneByURL: - freeusefantasy.com/movies/ - freeusemilf.com/movies/ - hijabhookup.com/movies/ + - hijabmylfs.com/movies/ - innocenthigh.com/movies/ - littleasians.com/movies/ - momswap.com/movies/ @@ -25,6 +26,7 @@ sceneByURL: - pervdoctor.com/movies/ - pervmom.com/movies/ - pervnana.com/movies/ + - pervprincipal.com/movies/ - pervtherapy.com/movies/ - shoplyfter.com/movies/ - shoplyftermylf.com/movies/ @@ -43,7 +45,7 @@ xPathScrapers: sceneScraper: scene: Title: //p[@class="video-title"]/text() - Details: //p[@class="video-description"]/text() + Details: //div[contains(@class, 'video-description')]/descendant-or-self::*/text() Performers: Name: //p[@class="model-name"]/a Image: //div[@class="video-area"]//stream/@poster @@ -69,6 +71,7 @@ xPathScrapers: freeusefantasy: Freeuse Fantasy freeusemilf: Freeuse Milf hijabhookup: Hijab Hookup + hijabmylfs: Hijab Mylfs innocenthigh: Innocent High littleasians: Little Asians momswap: Mom Swap @@ -78,6 +81,7 @@ xPathScrapers: pervdoctor: Perv Doctor pervmom: Perv Mom pervnana: Perv Nana + pervprincipal: Perv Principal pervtherapy: Perv Therapy shoplyfter: Shoplyfter shoplyftermylf: Shoplyfter MYLF @@ -91,4 +95,4 @@ xPathScrapers: teensloveblackcocks: Teens Love Black Cocks thickumz: Thickumz tinysis: Tiny Sis -# Last Updated June 27, 2022 +# Last Updated September 28, 2023 diff --git a/scrapers/Penthouse.yml b/scrapers/Penthouse.yml index 80e2dd3fe..371fcad11 100644 --- a/scrapers/Penthouse.yml +++ b/scrapers/Penthouse.yml @@ -1,4 +1,4 @@ -name: Penthouse +name: Penthouse Gold sceneByURL: - action: scrapeXPath url: @@ -20,7 +20,7 @@ xPathScrapers: - parseDate: 01/02/2006 Studio: Name: - fixed: "Penthouse" + fixed: "Penthouse Gold" Performers: Name: //div[@class="model"]/a/@title Tags: diff --git a/scrapers/PerfectGonzo.py b/scrapers/PerfectGonzo/PerfectGonzo.py similarity index 100% rename from scrapers/PerfectGonzo.py rename to scrapers/PerfectGonzo/PerfectGonzo.py diff --git a/scrapers/PerfectGonzo.yml b/scrapers/PerfectGonzo/PerfectGonzo.yml similarity index 100% rename from scrapers/PerfectGonzo.yml rename to scrapers/PerfectGonzo/PerfectGonzo.yml diff --git a/scrapers/PervCity.yml b/scrapers/PervCity.yml index c469c9dd9..dcae70317 100644 --- a/scrapers/PervCity.yml +++ b/scrapers/PervCity.yml @@ -8,6 +8,7 @@ sceneByURL: - oraloverdose.com/trailers/ - pervcity.com/trailers/ - upherasshole.com/trailers/ + - dpdiva.com/trailers/ scraper: sceneScraper xPathScrapers: sceneScraper: @@ -16,8 +17,25 @@ xPathScrapers: $base: //head/base/@href scene: Title: $sceneinfo/div[@class="infoHeader"] + Studio: + Name: + selector: //head/base/@href + postProcess: + - replace: + - regex: .+/(?:www\.)?([^\.]+)\.com/.* + with: $1 + - map: + analoverdose: Anal Overdose + bangingbeauties: Banging Beauties + chocolatebjs: Chocolate BJs + dpdiva: DP Diva + oraloverdose: Oral Overdose + pervcity: PervCity + upherasshole: Up Her Asshole Performers: Name: $sceneinfo//span[@class="tour_update_models"]/a + Tags: + Name: $sceneinfo//div[@class="tagcats"]/a/text() Details: $sceneinfo//p/text()|$sceneinfo//h3[@class="description"] Image: selector: //head/base/@href|//img[@class="posterimg stdimage thumbs"]/@src @@ -33,25 +51,9 @@ xPathScrapers: - regex: \s with: "_" - regex: ^ - with: "https://pervcity.com/search.php?query=" + with: "https://pervcity.com/search.php?query=" - subScraper: selector: //div[@class="category_listing_block"]//div[@class="date"]/text() postProcess: - parseDate: 01-02-2006 - Studio: - Name: - selector: //head/base/@href - postProcess: - - replace: - - regex: .+/([^\.]+)\.com/.* - with: $1 - - map: - analoverdose: AnalOverdose - chocolatebjs: ChocolateBjs - bangingbeauties: BangingBeauties - oraloverdose: OralOverdose - pervcity: PervCity - upherasshole: UpHerAsshole - Tags: - Name: $sceneinfo//div[@class="tagcats"]/a/text() # Last Updated February 13, 2022 diff --git a/scrapers/PlayboyPlus.yml b/scrapers/PlayboyPlus.yml index 94d0cd642..e4e22ac56 100644 --- a/scrapers/PlayboyPlus.yml +++ b/scrapers/PlayboyPlus.yml @@ -1,14 +1,25 @@ name: PlayboyPlus sceneByURL: - action: scrapeXPath - url: + url: &urls - playboyplus.com + - pbp-ma.playboy.com + - pbp-ma-legacy.playboy.com scraper: sceneScraper + queryURL: "{url}" + queryURLReplace: + url: + - regex: (?:https://)?(pbp-ma(-legacy)?.playboy.com)/gallery/([^/]*)(?:/vip)? + with: https://playboyplus.com/gallery/$3 galleryByURL: - action: scrapeXPath - url: - - playboyplus.com + url: *urls scraper: galleryScraper + queryURL: "{url}" + queryURLReplace: + url: + - regex: (?:https://)?(pbp-ma(-legacy)?.playboy.com)/gallery/([^/]*)(?:/vip)? + with: https://playboyplus.com/gallery/$3 xPathScrapers: sceneScraper: scene: @@ -39,4 +50,4 @@ xPathScrapers: Performers: *performers Details: *details Studio: *studio -# Last Updated April 13, 2021 +# Last Updated March 27, 2023 diff --git a/scrapers/Plushies.yml b/scrapers/Plushies.yml index d1ed37572..112d53ac4 100644 --- a/scrapers/Plushies.yml +++ b/scrapers/Plushies.yml @@ -1,10 +1,37 @@ name: Plushies +sceneByName: + action: scrapeXPath + queryURL: "https://plushies.tv/tour/search.php?query={}" + scraper: sceneSearch +sceneByQueryFragment: + action: scrapeXPath + queryURL: "{url}" + scraper: sceneScraper sceneByURL: - action: scrapeXPath url: - plushies.tv scraper: sceneScraper xPathScrapers: + sceneSearch: + common: + $searchItem: //div[@class='updateItem']/div[@class='updateDetails'] + $searchThumb: //div[@class='updateItem']/a/img + scene: + Title: + selector: $searchItem/h4/a + Date: + selector: $searchItem/p/span[2] + postProcess: + - parseDate: 01/02/2006 + URL: + selector: $searchItem/h4/a/@href + Image: + selector: $searchThumb/@src + postProcess: + - replace: + - regex: ^ + with: "https://plushies.tv/tour/" sceneScraper: common: $ub: //div[@class="update_block_info"] @@ -22,7 +49,7 @@ xPathScrapers: Details: selector: $ub/span[@class="latest_update_description"] Image: - selector: //img[@class="stdimage large_update_thumb left thumbs"]/@src + selector: //img[@class="stdimage small_update_thumb thumbs"]/@src postProcess: - replace: - regex: ^ @@ -30,4 +57,4 @@ xPathScrapers: Studio: Name: fixed: Plushies -# Last Updated May 07, 2021 +# Last Updated March 30, 2023 diff --git a/scrapers/PornHex.yml b/scrapers/PornHex.yml new file mode 100644 index 000000000..d370936f6 --- /dev/null +++ b/scrapers/PornHex.yml @@ -0,0 +1,49 @@ +name: "PornHex" +sceneByURL: + - action: scrapeXPath + url: + - pornhex.com + scraper: sceneScraper +performerByURL: + - action: scrapeXPath + url: + - pornhex.com + scraper: performerScraper +xPathScrapers: + sceneScraper: + scene: + Title: //meta[@itemprop="name"]/@content + Image: //video[@id="player"]/@poster + Date: + selector: //meta[@itemprop="uploadDate"]/@content + postProcess: + - parseDate: 2006-01-02 15:04:05 + - parseDate: 2006-01-02 + Tags: + Name: //div[@class="meta-bar col-12 padd-top-small"]//a[contains(@href,"/search?q=")]/@title | //div[@class="meta-bar col-12"]//a[contains(@href,"/categories/")]/text() + Performers: + Name: //div[@class="meta-bar col-12 padd-top-small"]//a[contains(@href,"/pornstars/")]/text() + Details: //meta[@itemprop="description"]/@content + performerScraper: + common: + $pornstar: //section[@class="pornstar-header"]//div[@class="col-6"]//p + performer: + Name: $pornstar//span[contains(text(),"Alias -")]/following::span/text() + Country: $pornstar//span[contains(text(),"Home Country -")]/following::span/text() + Ethnicity: $pornstar//span[contains(text(),"Ethnicity -")]/following::span/text() + HairColor: $pornstar//span[contains(text(),"Hair -")]/following::span/text() + EyeColor: $pornstar//span[contains(text(),"Eye Colour -")]/following::span/text() + Height: + selector: $pornstar//span[contains(text(),"Height -")]/following::span/text() + postProcess: + - feetToCm: true + Measurements: $pornstar//span[contains(text(),"Measurements -")]/following::span/text() + URL: $pornstar//span[contains(text(),"Official site -")]/following::a/@href + Image: + selector: //section[@class="pornstar-header"]//div[@class="thumbnail-col-2 col-2"]/img[not(contains(@src,"default_portrait"))]/@src + postProcess: + - replace: + - regex: ^ + with: "https://pornhex.com" + +# Last Updated May 24, 2023 \ No newline at end of file diff --git a/scrapers/Pornbox.yml b/scrapers/Pornbox.yml index 01f9702da..b7c275f63 100644 --- a/scrapers/Pornbox.yml +++ b/scrapers/Pornbox.yml @@ -1,3 +1,5 @@ +# yaml-language-server: $schema=../validator/scraper.schema.json + name: Pornbox sceneByURL: @@ -28,19 +30,22 @@ sceneByQueryFragment: jsonScrapers: sceneScraper: scene: - Title: - selector: scene_name + Title: scene_name Date: selector: publish_date postProcess: - parseDate: 2006-01-02T15:04:05.999999Z Tags: - Name: - selector: niches.#.niche + Name: niches.#.niche Performers: - Name: models.#.model_name - Image: - selector: player_poster + Name: "[models.#.model_name,male_models.#.model_name].@flatten" + Gender: + selector: "[models.#.sex,male_models.#.sex].@flatten" + postProcess: + - map: + female: FEMALE + male: MALE + Image: player_poster Details: selector: small_description postProcess: @@ -60,10 +65,8 @@ jsonScrapers: common: $contents: content.contents.# scene: - Title: - selector: $contents.scene_name - Image: - selector: $contents.thumbnail.large + Title: $contents.scene_name + Image: $contents.thumbnail.large Date: selector: $contents.publish_date postProcess: @@ -73,5 +76,5 @@ jsonScrapers: postProcess: - replace: - regex: ^ - with: https://pornbox.com/contents/$1 -# Last Updated December 10, 2021 + with: https://pornbox.com/contents/ +# Last Updated March 03, 2023 diff --git a/scrapers/Porncz.yml b/scrapers/Porncz.yml index a370cbddf..466e3b4da 100644 --- a/scrapers/Porncz.yml +++ b/scrapers/Porncz.yml @@ -9,8 +9,7 @@ sceneByURL: - czechanalsex.com - czechbiporn.com - czechboobs.com - - czechdeviant.com - - czechescortgirls + - czechescortgirls.com - czechexecutor.com - czechgaycity.com - czechgypsies.com @@ -43,20 +42,27 @@ xPathScrapers: Title: selector: //div[@class="heading-detail"]/h1 Performers: - Name: //div[@class="video-info-item color_05-text" and contains(text(), "Actors:")]//a + Name: //div[contains(@class, "video-info-item") and contains(text(), "Actors:")]//a Details: //meta[@name="description"]/@content Tags: - Name: //div[@class="video-info-item color_05-text" and contains(text(), "Genres:")]//a + Name: //div[contains(@class, "video-info-item") and contains(text(), "Genres:")]//a Image: - selector: //meta[@property="og:image"]/@content + selector: //div[@id="video-poster"]/@data-poster postProcess: - replace: - - regex: "^/" - with: "https://www.porncz.com/" + - regex: ^ + with: https://www.porncz.com Date: selector: //meta[@property="video:release_date"]/@content postProcess: - parseDate: 2.01.2006 + URL: &url //link[@rel="canonical"]/@href + Code: + selector: *url + postProcess: + - replace: + - regex: .*/(\d*)$ + with: $1 Studio: Name: selector: //a[@class="video-detail-logo"]//@alt diff --git a/scrapers/Pornhub.yml b/scrapers/Pornhub.yml index 9bac8d946..e9a6ae8f2 100644 --- a/scrapers/Pornhub.yml +++ b/scrapers/Pornhub.yml @@ -18,7 +18,7 @@ sceneByFragment: queryURL: https://www.pornhub.com/view_video.php?viewkey={filename} queryURLReplace: filename: - - regex: (?:.*[^a-zA-Z\d])?(ph(?:[a-zA-Z\d]+)).+ + - regex: (?:.*[^a-zA-Z\d])?((?:ph)?(?:[a-zA-Z\d]{13})).+ with: $1 - regex: .*\.[^\.]+$ # if no ph id is found in the filename with: # clear the filename so that it doesn't leak to ph @@ -112,34 +112,32 @@ xPathScrapers: Image: //div[@class="thumbImage"]/img/@src|//img[@id="getAvatar"]/@src sceneScraper: common: - $performer: //div[@class="pornstarsWrapper js-pornstarsWrapper"]/a[@data-mxptype="Pornstar"] - $studio: //div[@data-type="channel"]/a + $datablob: //script[contains(., 'VideoObject')]/text() + $videowrap: //div[@class="video-wrapper"] scene: - Title: //h1[@class="title"]/span/text() + Title: //meta[@property="og:title"]/@content URL: //meta[@property="og:url"]/@content Date: - selector: //script[contains(., 'uploadDate')]/text() + selector: $datablob postProcess: - replace: - - regex: .+(?:"uploadDate":\s")([^"]+).+ + - regex: .+(?:"uploadDate"\s*:\s*")([^T]+).+ with: $1 - - regex: (.+)T.+ - with: $1 - - parseDate: 2006-01-02 + - parseDate: "2006-01-02" Tags: - Name: //div[@class="categoriesWrapper"]//a[not(@class="add-btn-small ")]|//div[@class="tagsWrapper"]//a[not(@class="add-btn-small")] - Image: - selector: //meta[@property="og:image"][1]/@content + Name: $videowrap//div[contains(concat(" ",normalize-space(@class)," ")," categoriesWrapper ")]/a/text()|$videowrap//div[contains(concat(" ",normalize-space(@class)," ")," tagsWrapper ")]/a/text() Performers: - Name: $performer/@data-mxptext - URL: $performer/@href + Name: $videowrap//div[contains(concat(" ",normalize-space(@class)," ")," pornstarsWrapper ")]/a/@data-mxptext + Image: //meta[@property="og:image"]/@content Studio: - Name: $studio - URL: $studio/@href - Details: - selector: //div[@class="video-info-row"][1]/text()[starts-with(normalize-space(.),"Description:")] - postProcess: - - replace: - - regex: "Description: (.*)" - with: $1 -# Last Updated July 08, 2022 + Name: $videowrap//div[contains(concat(" ",normalize-space(@class)," ")," usernameWrap ")]//a/text() + +driver: + cookies: + - CookieURL: "https://www.pornhub.com" + Cookies: + - Name: "accessAgeDisclaimerPH" + Domain: ".pornhub.com" + Value: "1" + Path: "/" +# Last Updated September 26, 2023 diff --git a/scrapers/PornsiteManager.yml b/scrapers/PornsiteManager.yml new file mode 100644 index 000000000..62c24496c --- /dev/null +++ b/scrapers/PornsiteManager.yml @@ -0,0 +1,140 @@ +name: PornsiteManager +sceneByURL: + - action: scrapeXPath + scraper: sceneScraper + url: + # Taken from https://www.pornsitemanager.com/en/sites on 2023-09-22 + - alphamales.com/en/videos/detail/ + - andolinixxl.com/en/videos/detail/ + - attackboys.com/en/videos/detail/ + - berryboys.fr/en/videos/detail/ + - bolatino.com/en/videos/detail/ + - bravofucker.com/en/videos/detail/ + - brett-tyler.com/en/videos/detail/ + - bulldogxxx.com/en/videos/detail/ + - cadinot.fr/en/videos/detail/ + - cazzofilm.com/en/videos/detail/ + - citebeur.com/en/videos/detail/ + - clairprod.com/en/videos/detail/ + - crunchboy.com/en/videos/detail/ + - darkcruising.com/en/videos/detail/ + - enzorimenez.com/en/videos/detail/ + - eurocreme.com/en/videos/detail/ + - frenchporn.fr/en/videos/detail/ + - gayarabclub.com/en/videos/detail/ + - gayfrenchkiss.fr/en/videos/detail/ + - hardkinks.com/en/videos/detail/ + - harlemsex.com/en/videos/detail/ + - hotcast.fr/en/videos/detail/ + - jessroyan.com/en/videos/detail/ + - jnrc.fr/en/videos/detail/ + - kinkytwink.com/en/videos/detail/ + - mackstudio.com/en/videos/detail/ + - maxence-angel.com/en/videos/detail/ + - menoboy.com/en/videos/detail/ + - mistermale.com/en/videos/detail/ + - paulomassaxxx.com/en/videos/detail/ + - philippwants.com/en/videos/detail/ + - rawfuck.com/en/videos/detail/ + - ridleydovarez.com/en/videos/detail/ + - sketboy.com/en/videos/detail/ + - universblack.com/en/videos/detail/ + - viktor-rom.com/en/videos/detail/ + - vintagegaymovies.com/en/videos/detail/ + - wurstfilm.com/en/videos/detail/ +movieByURL: + - action: scrapeXPath + scraper: movieScraper + url: + - alphamales.com/en/dvd/detail/ + - andolinixxl.com/en/dvd/detail/ + - attackboys.com/en/dvd/detail/ + - berryboys.fr/en/dvd/detail/ + - bolatino.com/en/dvd/detail/ + - bravofucker.com/en/dvd/detail/ + - brett-tyler.com/en/dvd/detail/ + - bulldogxxx.com/en/dvd/detail/ + - cadinot.fr/en/dvd/detail/ + - cazzofilm.com/en/dvd/detail/ + - citebeur.com/en/dvd/detail/ + - clairprod.com/en/dvd/detail/ + - crunchboy.com/en/dvd/detail/ + - darkcruising.com/en/dvd/detail/ + - enzorimenez.com/en/dvd/detail/ + - eurocreme.com/en/dvd/detail/ + - frenchporn.fr/en/dvd/detail/ + - gayarabclub.com/en/dvd/detail/ + - gayfrenchkiss.fr/en/dvd/detail/ + - hardkinks.com/en/dvd/detail/ + - harlemsex.com/en/dvd/detail/ + - hotcast.fr/en/dvd/detail/ + - jessroyan.com/en/dvd/detail/ + - jnrc.fr/en/dvd/detail/ + - kinkytwink.com/en/dvd/detail/ + - mackstudio.com/en/dvd/detail/ + - maxence-angel.com/en/dvd/detail/ + - menoboy.com/en/dvd/detail/ + - mistermale.com/en/dvd/detail/ + - paulomassaxxx.com/en/dvd/detail/ + - philippwants.com/en/dvd/detail/ + - rawfuck.com/en/dvd/detail/ + - ridleydovarez.com/en/dvd/detail/ + - sketboy.com/en/dvd/detail/ + - universblack.com/en/dvd/detail/ + - viktor-rom.com/en/dvd/detail/ + - vintagegaymovies.com/en/dvd/detail/ + - wurstfilm.com/en/dvd/detail/ +xPathScrapers: + sceneScraper: + scene: + Title: //h1 + Details: //h2 + URL: &url //link[@rel="alternate" and hreflang="en"]/@href + Date: &date + selector: //script[@type="application/ld+json"]/text() + postProcess: + - replace: + - regex: .*"datePublished"\s*:\s*"([^"]+)".* + with: $1 + Image: + selector: //script[@type="application/ld+json"]/text() + postProcess: + - replace: + - regex: .*"contentUrl"\s*:\s*"([^"]+)".* + with: $1 + Studio: + Name: //div[@class="row mb-4 px-0"]//h3[i[contains(@class, "fa-video")]]/text() + Movies: + Name: + selector: //div[@class="row mb-4 px-0"]//h3[i[contains(@class, "fa-scrubber")]]/text() + postProcess: + - replace: + - regex: DVD:\s* + with: + URL: + selector: //div[@class="row mb-4 px-0"]//a[h3[i[contains(@class, "fa-scrubber")]]]/@href | //link[//div[@class="row mb-4 px-0"]//a[h3[i[contains(@class, "fa-scrubber")]]]/@href and @rel="alternate" and @hreflang="en"]/@href + concat: __SEPARATOR__ + postProcess: + - replace: + - regex: (.*)__SEPARATOR__(https:\/\/[^\/]*)\/.* + with: $2$1 + FrontImage: + selector: //div[@class="row mb-4 px-0"]//a[h3[i[contains(@class, "fa-scrubber")]]]/@href | //link[//div[@class="row mb-4 px-0"]//a[h3[i[contains(@class, "fa-scrubber")]]]/@href and @rel="alternate" and @hreflang="en"]/@href + concat: __SEPARATOR__ + postProcess: + - replace: + - regex: (.*)__SEPARATOR__(https:\/\/[^\/]*)\/.* + with: $2$1 + - subScraper: &dvdCover //div[@class="col-sm-8 col-lg-3 pt-4 px-0 px-lg-2 align-items-center"]//img/@src + Performers: + Name: //div[@class="row mb-4 px-0"]//h3[i[contains(@class, "fa-star")]]/text() + Tags: + Name: //div[@class="row mb-4 px-0"]//h3[not(i)]/text() + movieScraper: + movie: + Name: //h1 + URL: *url + Synopsis: //div[@class="mb-2 bg-light p-3 my-3 text-justify my-text psm-rounded"] + FrontImage: *dvdCover + Date: *date +# Last Updated September 22, 2023 diff --git a/scrapers/PremiumBukkake.py b/scrapers/PremiumBukkake/PremiumBukkake.py similarity index 100% rename from scrapers/PremiumBukkake.py rename to scrapers/PremiumBukkake/PremiumBukkake.py diff --git a/scrapers/PremiumBukkake.yml b/scrapers/PremiumBukkake/PremiumBukkake.yml similarity index 100% rename from scrapers/PremiumBukkake.yml rename to scrapers/PremiumBukkake/PremiumBukkake.yml diff --git a/scrapers/Private.yml b/scrapers/Private.yml index cedc34836..4567c254b 100644 --- a/scrapers/Private.yml +++ b/scrapers/Private.yml @@ -9,23 +9,39 @@ movieByURL: url: - private.com scraper: movieScraper +performerByName: + action: scrapeXPath + queryURL: https://www.private.com/search.php?query={} + scraper: performerSearch +performerByURL: + - action: scrapeXPath + url: + - private.com + scraper: performerScraper xPathScrapers: sceneScraper: common: $content: //section[@class="video-description-and-tags clearfix"] scene: - Title: - selector: $content//ul[@class="scene-models-list"]/preceding-sibling::h1/text() + Title: //div[@class="title-zone"]/h1 + Code: + selector: //meta[@property="og:video"]/@content + postProcess: + - replace: + - regex: .+\/(.+)\/trailers\/.+ + with: $1 Date: selector: //meta[@itemprop="uploadDate"]/@content postProcess: # The format changes when another language is selected - parseDate: 01/02/2006 - Details: $content//p[@id="description-section"] + Details: + selector: $content//p[@id="description-section"]/text() + concat: "\n" Tags: - Name: $content//ul[@class="scene-tags"]/li/a/text() + Name: //li[@class="tag-tags"]//a/text() Performers: - Name: $content//ul[@class="scene-models-list"]//a/text() + Name: //li[@class="tag-models"]//a/text() Movies: Name: selector: //a[@data-track="FULL MOVIE"]/@href @@ -33,8 +49,12 @@ xPathScrapers: - subScraper: //div[@class="dvds-wrapper"]/h1/text() URL: //a[@data-track="FULL MOVIE"]/@href Studio: - Name: $content//span[@class="title-site"]/text() - #fixed: Private + Name: + selector: //div[@class="title-zone"]//li/a/span[@class="title-site"]/text()|/html/@lang + postProcess: + - replace: + - regex: ^en$ + with: Private Image: //meta[@property="og:image"]/@content movieScraper: movie: @@ -52,6 +72,57 @@ xPathScrapers: fixed: Private Synopsis: //p[@class="sinopsys"] FrontImage: //div[@class="dvds-wrapper"]//img[@class="img-responsive"]/@src + performerSearch: + common: + $searchData: //a[@data-track="PORNSTAR_NAME"] + performer: + Name: $searchData + URL: $searchData/@href + performerScraper: + common: + $performerData: //div[contains(concat(' ',normalize-space(@class),' '),' pornstar-wrapper ')] + performer: + Name: $performerData//h1 + URL: //meta[@property="og:url"]/@content + Aliases: $performerData//p[@class="aka"]/text() + Details: $performerData//li[@class="model-facts-long"]/div + Measurements: $performerData//em[text()="Measurements:"]/../text() + Height: + selector: $performerData//em[text()="Height:"]/../text() + postProcess: + - replace: + - regex: (\d+)cm.+ + with: $1 + - map: + -: "" + Weight: + selector: $performerData//em[text()="Weight:"]/../text() + postProcess: + - replace: + - regex: (\d+)kg.+ + with: $1 + - map: + -: "" + Country: $performerData//em[text()="Birth place:"]/../text() + HairColor: $performerData//em[text()="Hair Color:"]/../text() + EyeColor: + selector: $performerData//em[text()="Eye color:"]/../text() + postProcess: + - map: + -: "" + Tattoos: + selector: $performerData//em[text()="Tattoos:"]/../text() + postProcess: + - map: + -: "" + Piercings: + selector: $performerData//em[text()="Piercings:"]/../text() + postProcess: + - map: + -: "" + Image: $performerData//img/@src + Gender: + fixed: "Female" driver: cookies: - CookieURL: https://private.com @@ -60,4 +131,11 @@ driver: Domain: ".private.com" Value: "en" Path: "/" -# Last Updated March 10, 2020 + - Name: "agreed18" + Domain: ".private.com" + Value: "true" + Path: "/" + headers: + - Key: User-Agent + Value: Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:79.0) Gecko/20100101 Firefox/79.0) +# Last Updated October 21, 2023 diff --git a/scrapers/Pure-BBW.yml b/scrapers/Pure-BBW.yml deleted file mode 100644 index 9e659b32b..000000000 --- a/scrapers/Pure-BBW.yml +++ /dev/null @@ -1,33 +0,0 @@ -name: "Pure-BBW" -sceneByURL: - - action: scrapeXPath - url: - - pure-bbw.com/ - scraper: sceneScraper -xPathScrapers: - sceneScraper: - scene: - Title: - selector: //div[@class="video_membership"]/div[@class="titlebox clear"]/h3/text() - Details: //div[@class="aboutvideo"]/p/text() - Performers: - Name: //div[@class="video_description"]/ul[@class="featuredModels"]/li/a/span/text() - Image: - selector: //div[@class="videohere"]/img[contains(@class,"stdimage")]/@src | //script[contains(.,'jwplayer("jwbox").setup')]/text() - postProcess: - - replace: - - regex: (.+image:\s+")(.+jpg)(.+) - with: $2 - - regex: ^ - with: "https://pure-bbw.com/" - Studio: - Name: - fixed: Pure-BBW - Date: - selector: //div[@class="video_description"]/h4[1]/text() - postProcess: - - replace: - - regex: .*(\d{4})-(\d{2})-(\d{2}).* - with: $1-$2-$3 - - parseDate: 2006-01-02 -# Last Updated March 11, 2021 diff --git a/scrapers/PureMedia.yml b/scrapers/PureMedia.yml new file mode 100644 index 000000000..92f2504bb --- /dev/null +++ b/scrapers/PureMedia.yml @@ -0,0 +1,76 @@ +name: 'PureMedia' +sceneByURL: + - action: scrapeXPath + url: + - becomingfemme.com/tour + - pure-bbw.com/tour + - pure-ts.com/tour + - pure-xxx.com/tour + - tspov.com/tour + scraper: sceneScraper + - action: scrapeXPath + url: + - pornstarbts.com/tour + - sissypov.com/tour + scraper: newSiteScraper + +xPathScrapers: + sceneScraper: + scene: + Title: //div[@class='video_membership']//div[@class='titlebox clear'] + Details: //div[@class='aboutvideo'] + Date: + selector: //div[@class='video_description']/h4[not(contains(.,'Featured'))] + postProcess: + - replace: + - regex: .*[|] + with: + - parseDate: 2006-01-02 + Performers: + Name: //ul[@class='featuredModels']//span[last()] + Studio: + Name: //meta[@name='author']/@content + Tags: + Name: + selector: //meta[@name='keywords']/@content + split: ',' + Image: + selector: //base/@href|//div[@class='videohere']/img[@class='thumbs stdimage']/@src|//script[contains(.,'jwplayer("jwbox").setup')]/text() + concat: '|' + postProcess: + - replace: + - regex: "(^[^|]+)\\|.*/tour/([^\\.]+\\.jpg).*" + with: $1$2 + - regex: ^// + with: https:// + + newSiteScraper: + common: + $details: //div[@class='videoDetails clear'] + scene: + Title: $details/h3 + Details: $details/p + Date: + selector: //p[contains(span,'Date Added:')]/text() + postProcess: + - parseDate: January 2, 2006 + Performers: + Name: //li[@class='update_models']/a + Tags: + Name: //ul[contains(li,'Tags:')]//a + Image: + selector: //base/@href|//a[@class='fake_trailer']/img/@src0_1x + concat: '|' + postProcess: + - replace: + - regex: "(^[^|]+)\\|.*/tour/([^\\.]+\\.jpg).*" + with: $1$2 + - regex: ^// + with: https:// + Studio: + Name: + selector: //meta[@name='author']/@content + postProcess: + - map: + SISSY POV: Sissy POV +# Last Updated December 19, 2023 \ No newline at end of file diff --git a/scrapers/r18.yml b/scrapers/R18.dev.yml similarity index 65% rename from scrapers/r18.yml rename to scrapers/R18.dev.yml index 60c30d410..b9ffef7d6 100644 --- a/scrapers/r18.yml +++ b/scrapers/R18.dev.yml @@ -1,43 +1,68 @@ -name: "R18 (JAV)" +name: R18.dev (JAV) + +sceneByFragment: + action: scrapeJson + queryURL: https://r18.dev/videos/vod/movies/detail/-/dvd_id={filename}/json + queryURLReplace: + filename: + # gets just the JAV ID out of the filename. This also removes the file extension which is pretty nice. + # You can have your filename be something like "Something Something ABC123 Something Something.mp4" and it will scrape as ABC123. + - regex: (.*[^a-zA-Z0-9])*([a-zA-Z-]+\d+)(.+) + with: $2 + scraper: sceneSearchIndirect + sceneByURL: - action: scrapeJson url: - - r18.com/videos + - r18.dev/videos/vod/movies/detail/-/i scraper: sceneScraper - queryURL: "{url}" + queryURL: "https://r18.dev/videos/vod/movies/detail/-/combined={url}/json" queryURLReplace: url: - - regex: .+id=(.+)/.* - with: https://www.r18.com/api/v4f/contents/$1?lang=en + - regex: ".+/id=(.+)/?$" + with: "$1" + +sceneByName: + action: scrapeJson + scraper: sceneSearch + queryURL: "https://r18.dev/videos/vod/movies/detail/-/dvd_id={}/json" + +sceneByQueryFragment: + action: scrapeJson + queryURL: "{url}" + scraper: sceneScraper + movieByURL: - - action: scrapeXPath + - action: scrapeJson url: - - r18.com/videos/vod/movies/list - scraper: serieScraper -sceneByFragment: - action: scrapeXPath - queryURL: https://www.r18.com/common/search/searchword={filename} - queryURLReplace: - filename: - - regex: (.*[^a-zA-Z0-9])*([a-zA-Z-]+\d+)(.+) - with: $2 - - regex: ([a-zA-Z]+)(\d+) - with: $1-$2 - scraper: sceneScraper + - r18.dev/videos/vod/movies/detail/-/i + queryURL: "https://r18.dev/videos/vod/movies/detail/-/combined={url}/json" + queryURLReplace: + url: + - regex: ".+/id=(.+)/?$" + with: "$1" + scraper: movieScraper + jsonScrapers: sceneScraper: scene: - Title: data.dvd_id - URL: data.detail_url + Title: + selector: dvd_id Date: - selector: data.release_date - postProcess: - - replace: - - regex: \s.+ - with: "" + selector: release_date + Performers: + Name: + selector: actresses.#.name_romaji + Aliases: + selector: actresses.#.name_kanji + Image: + selector: jacket_full_url + Director: + selector: directors.#.name_romaji Details: - selector: "[data.title,data.comment]" - concat: "\n\n" + selector: title_en + # All this uncensoring stuff happens automatically on the HTML version of the page due to some JS on the frontend there. + # I put in a request to the site dev to add this to the backend json api as well so we don't have to do it everytime ourselves plus keep a ever growing list of this stuff postProcess: &ppUncensor - replace: - regex: A\*{3}ed\b @@ -236,6 +261,8 @@ jsonScrapers: with: "Schoolgirls" # Not Sure (PPPD-811) - regex: S\*{8}ls\b with: "Schoolgirls" + - regex: S\*{9}s\b + with: "Schoolgirls" # NFDM-234 - regex: Sch\*{2}lgirls\b with: "Schoolgirls" - regex: SK\*{2}led\b @@ -267,7 +294,7 @@ jsonScrapers: - regex: S\*{8}n\b with: "Submission" - regex: T\*{6}e\b - with: "Tentacle" #MIDD-648 + with: "Tentacle" #MIDD-648 - regex: T\*{6}es\b with: "Tentacles" - regex: T\*{5}e\b @@ -275,7 +302,7 @@ jsonScrapers: - regex: T\*{5}ed\b with: "Tortured" - regex: T\*{5}es\b - with: "Tortures" #MIDD-648 + with: "Tortures" #MIDD-648 - regex: U\*{9}sly\b with: "Unconsciously" - regex: U\*{7}g\b @@ -296,43 +323,108 @@ jsonScrapers: with: "Young Girl" # Not Sure - regex: Y\*{8}ls\b with: "Young Girls" # Not Sure + Studio: + Name: maker_name_en + Code: + selector: dvd_id Tags: - Name: data.categories.#.name - Performers: Name: - selector: data.actresses.#.name - #Uncomment below to convert to Surname Name (JavLibrary compatible) - #postProcess: - # - replace: - # - regex: (.+)(\s)(.+) - # with: $3$2$1 - Studio: - Name: data.maker.name - Image: data.images.jacket_image.large -xPathScrapers: - sceneScraper: - common: - $searchinfo: //li[contains(@class,"item-list")]/a//img[string-length(@alt)=string-length(preceding::div[@class="genre01"]/span/text())] - scene: - Title: $searchinfo/@alt - URL: $searchinfo/ancestor::a/@href - serieScraper: + selector: categories.#.name_en + postProcess: *ppUncensor + URL: + selector: content_id + postProcess: + - replace: + - regex: ^ + with: https://r18.dev/videos/vod/movies/detail/-/id= + + movieScraper: movie: Name: - selector: //h1[@class="txt01"]/text() - postProcess: *ppUncensor + selector: dvd_id + Aliases: title_ja + Duration: + selector: runtime_mins + postProcess: + - replace: + - regex: $ + with: ":00" + Date: + selector: release_date FrontImage: - selector: //li[contains(@class,"item-list")][1]//img/@data-original + selector: jacket_full_url + Director: + selector: directors.#.name_romaji + Synopsis: + selector: title_en + postProcess: *ppUncensor + Studio: + Name: maker_name_en + URL: + selector: content_id + postProcess: + - replace: + - regex: ^ + with: https://r18.dev/videos/vod/movies/detail/-/id= + + # Used for fragment scraping - the api is a little messy if we don't have the exact ID and need to do a search because the search result api doesn't have all the info we need. + # So we get the ID from the search result page and make another search for it + # Note I am not getting aliases yet for performers here as it didn't seem worth it to make that many subqueries. You should hopefully be matching them up with StashDB versions of the performers + # or doing a performer search anyways. + # I've also sent in a request to the dev of the site to make their API a little easier to use, so hopefully that goes through and we can have our cake and eat it too. :) + sceneSearchIndirect: + scene: + # Title not on this page, so we need to fetch another page to get it + Title: + selector: content_id postProcess: - replace: - - regex: ps\.jpg - with: "pl.jpg" - BackImage: - selector: //li[contains(@class,"item-list")][2]//img/@data-original + - regex: (.+) + with: https://r18.dev/videos/vod/movies/detail/-/combined=$1/json + - subScraper: + selector: dvd_id + # Code not on this page, so we need to fetch another page to get it. It's the same as title. Surely there must be a way to only query once? + Code: + selector: content_id postProcess: - replace: - - regex: ps\.jpg - with: "pl.jpg" + - regex: (.+) + with: https://r18.dev/videos/vod/movies/detail/-/combined=$1/json + - subScraper: + selector: dvd_id + Details: + selector: title + postProcess: *ppUncensor + Image: images.jacket_image.large2 + Director: director + Date: release_date + Tags: + Name: + selector: categories.#.name + postProcess: *ppUncensor Studio: - Name: //p[text()="TOP STUDIOS"]/following-sibling::ul//a/span[@class="item01"]/text() -# Last Updated May 22, 2022 + Name: maker.name + Performers: + Name: actresses.#.name + URL: + selector: content_id + postProcess: + - replace: + - regex: (.+) + with: https://r18.dev/videos/vod/movies/detail/-/id=$1/ + + sceneSearch: + scene: + Title: + selector: title + Image: + selector: images.jacket_image.large2 + Date: + selector: release_date + URL: + selector: content_id + postProcess: + - replace: + - regex: (.+) + with: https://r18.dev/videos/vod/movies/detail/-/combined=$1/json +# Last Updated August 22, 2023 diff --git a/scrapers/RachelSteele.yml b/scrapers/RachelSteele.yml new file mode 100644 index 000000000..ffcfd16c6 --- /dev/null +++ b/scrapers/RachelSteele.yml @@ -0,0 +1,56 @@ +name: RachelSteele +sceneByURL: + - action: scrapeXPath + url: + - rachel-steele.com + scraper: sceneScraper + +sceneByName: + action: scrapeXPath + queryURL: https://rachel-steele.com/x-new/new-preview-list.php?user=rachel-steele&search={}&type=all&submit=Search + scraper: sceneSearch + +sceneByQueryFragment: + action: scrapeXPath + queryURL: "{url}" + scraper: sceneScraper + +xPathScrapers: + sceneSearch: + common: + $root: //li[contains(concat(' ',normalize-space(@class),' '),' first ')] + scene: + Title: $root//h3 + Image: + selector: $root//img/@src + postProcess: + - replace: + - regex: ^ + with: https://rachel-steele.com + URL: + selector: $root/a/@href + postProcess: + - replace: + - regex: ^ + with: https://rachel-steele.com + + sceneScraper: + scene: + Title: //div[contains(concat(' ',normalize-space(@class),' '),' span12 ')]/h3/text() + Image: + selector: //div[contains(concat(' ',normalize-space(@class),' '),' album-details ')]/div/div/img/@src + postProcess: + - replace: + - regex: ^ + with: https://rachel-steele.com + Details: //meta[@name="twitter:description"]/@content + Performers: + Name: + fixed: Rachel Steele + Studio: + Name: + fixed: Rachel Steele + URL: + fixed: https://Rachel-Steele.com + +# Last Updated August 09, 2023 diff --git a/scrapers/RagingStallion/RagingStallion.yml b/scrapers/RagingStallion/RagingStallion.yml new file mode 100644 index 000000000..373a88b31 --- /dev/null +++ b/scrapers/RagingStallion/RagingStallion.yml @@ -0,0 +1,31 @@ +# requires: Algolia +name: Raging Stallion +sceneByURL: + - action: script + url: + - ragingstallion.com/en/scene + script: + - python + - ../Algolia/Algolia.py + - ragingstallion +sceneByFragment: + action: script + script: + - python + - ../Algolia/Algolia.py + - ragingstallion +sceneByName: + action: script + script: + - python + - ../Algolia/Algolia.py + - ragingstallion + - searchName +sceneByQueryFragment: + action: script + script: + - python + - ../Algolia/Algolia.py + - ragingstallion + - validName +# Last Updated September 09, 2023 diff --git a/scrapers/RandyBlue.yml b/scrapers/RandyBlue.yml index 1bc7c1296..92a257af9 100755 --- a/scrapers/RandyBlue.yml +++ b/scrapers/RandyBlue.yml @@ -2,29 +2,32 @@ name: "RandyBlue" sceneByURL: - action: scrapeXPath url: - - randyblue.com/video/ + - randyblue.com/scenes/ scraper: sceneScraper xPathScrapers: sceneScraper: + common: + $titleArea: //div[@class="title-zone"] scene: - Title: //h2[@class="content-item-name"] + Title: $titleArea/h1 Date: - selector: //div[@id="info"]//li[@class="added"]/span + selector: $titleArea/div[@class="calendar"] postProcess: - parseDate: 01/02/2006 - Details: //div[@class="short-description"] - Tags: - Name: //div[@class="tagcloud"]/a/text() - Performers: - Name: //a[@itemprop="actor"] - Image: - selector: "//script[contains(text(), 'image: ')]" + Details: + selector: //div[@id="collapseTwo"] postProcess: - replace: - - regex: '^.*image: "([^"]+)",.*$' - with: $1 + - regex: \x{0020}|\x{00A0} # unicode SP, NBSP + with: " " + Tags: + Name: $titleArea/ul[@class="scene-tags"]/li/a + Performers: + Name: $titleArea/ul[@class="scene-models-list"]/li/a + Image: //meta[@itemprop="thumbnailUrl"]/@content + URL: //link[@rel="canonical"]/@href Studio: Name: fixed: Randy Blue -# Last Updated December 17, 2021 +# Last Updated July 08, 2023 diff --git a/scrapers/RawAttack.yml b/scrapers/RawAttack.yml deleted file mode 100644 index 274dd7449..000000000 --- a/scrapers/RawAttack.yml +++ /dev/null @@ -1,28 +0,0 @@ -name: RawAttack -sceneByURL: - - action: scrapeXPath - url: - - rawattack.com/updates/ - scraper: sceneScraper -xPathScrapers: - sceneScraper: - scene: - Title: - selector: //title - postProcess: - - replace: - - regex: \s*-\s*$ - with: "" - Date: - selector: //h3[text()="Release Date:"]/following-sibling::p[@class="date"]/text() - Details: //h2[text()="Description:"]/following-sibling::p/text() - Performers: - Name: //h3[text()="playmates:"]/following-sibling::a[contains(@href,'/models')]/@title - Studio: - Name: - fixed: RawAttack - Image: //img[@class="update_thumb thumbs img-unlock"]/@src - Tags: - Name: //h3[text()="Categories:"]/following-sibling::a[contains(@href,'/categories/')]/@title - -# Last Updated December 17, 2020 diff --git a/scrapers/RawFuckClub.yml b/scrapers/RawFuckClub.yml new file mode 100644 index 000000000..390ff69cf --- /dev/null +++ b/scrapers/RawFuckClub.yml @@ -0,0 +1,30 @@ +name: Raw Fuck Club +sceneByURL: + - action: scrapeXPath + url: + - www.rawfuckclub.com + scraper: sceneScraper +xPathScrapers: + sceneScraper: + scene: + Title: //div[@class="col-12 pr-0"]/h2 + Performers: + Name: //span[@class="badge badge-primary"] + Tags: + Name: //span[@class="badge badge-secondary"] + Details: + selector: //p[@class="watch-description"] + Image: //img[@class="img-responsive"]/@src + Date: + selector: //p[@class='watch-published-date']/text() + postProcess: + - replace: + - regex: 'Reposted on (.+)([.])' + with: $1 + - regex: 'Posted on (.+)' + with: $1 + - parseDate: January 2, 2006 + Studio: + Name: + fixed: Raw Fuck Club +# Last Updated November 08, 2023 \ No newline at end of file diff --git a/scrapers/RawRoadNation.yml b/scrapers/RawRoadNation.yml new file mode 100644 index 000000000..caefa2b5c --- /dev/null +++ b/scrapers/RawRoadNation.yml @@ -0,0 +1,28 @@ +name: Raw Road Nation +sceneByURL: + - action: scrapeXPath + url: + - rawroadnation.com + scraper: sceneScraper +xPathScrapers: + sceneScraper: + scene: + Title: + selector: //div[@class="panel-heading"]/h3 + Performers: + Name: //a[@class="model_name_title"] + Tags: + Name: //div[@class="col-12 text-center px-4 py-2"]/a[contains(@href,"en/videos/")] + Details: //div[@class="video-seo-description"]/p + Image: //video[@id="videoProtectedPlayer"]/@poster + Date: + selector: //h4[contains(text(), "Release Date")]/text() + postProcess: + - replace: + - regex: "Release Date: (.+)" + with: $1 + - parseDate: 2006-01-02 + Studio: + Name: + fixed: Raw Road Nation +# Last Updated November 05, 2023 \ No newline at end of file diff --git a/scrapers/RealJamVR.yml b/scrapers/RealJamVR.yml index d3002c9ef..b897091ea 100644 --- a/scrapers/RealJamVR.yml +++ b/scrapers/RealJamVR.yml @@ -1,30 +1,50 @@ name: RealJamVR -sceneByURL: +sceneByURL: &byURL - action: scrapeXPath url: - - realjamvr.com + - realjamvr.com/scene/ + - porncornvr.com/scene scraper: sceneScraper + +galleryByURL: *byURL + xPathScrapers: sceneScraper: scene: - Title: + Title: &title selector: //h1 - Date: - selector: //div[contains(@class, "date")] postProcess: - replace: - - regex: ',' - with: "" - - parseDate: January 2 2006 - Performers: - Name: //span[text()="Featuring:"]/following-sibling::a - Tags: - Name: //span[text()="TAGS:"]/following-sibling::a - Details: - selector: //div[contains(@class, "c-video-item-desc")] + - regex: ^\s+(.+)\s+$ + with: $1 + Date: &date + selector: //div[@class="specs-icon"]/following-sibling::strong + postProcess: + - replace: + - regex: ^([a-zA-Z]{3})\D*(\d{1,2},\s*\d+)$ + with: $1. $2 + - parseDate: Jan. 2, 2006 + Performers: &performers + Name: //div[contains(@class,"scene-view")]/a[contains(@href,"/actor/")] + Tags: &tags + Name: //a[starts-with(@href, "/scenes") and @class="tag"]/text() | //div[not(@class)]/div[@class="specs-icon" and not(i)] + Details: &details + selector: //div[@class="opacity-75 my-2"] Image: - selector: //meta[@property="og:image"]/@content - Studio: + selector: //*[@id="video-player"]//@poster + Studio: &studio Name: - fixed: RealJamVR -# Last Updated June 25, 2021 + selector: //title + postProcess: + - replace: + - regex: '(.*)\| ([^\|]+VR)$' + with: $2 + gallery: + Title: *title + Date: *date + Performers: *performers + Tags: *tags + Details: *details + Studio: *studio + +# Last Updated October 22, 2023 diff --git a/scrapers/RealSensual.yml b/scrapers/RealSensual.yml deleted file mode 100644 index 2b715def1..000000000 --- a/scrapers/RealSensual.yml +++ /dev/null @@ -1,29 +0,0 @@ -name: "RealSensual" -sceneByURL: - - action: scrapeXPath - url: - - realsensual.com/updates/ - scraper: sceneScraper -xPathScrapers: - sceneScraper: - common: - $info: //div[@class="col-12 col-md-6"] - scene: - Title: //h2[@class="titular col-12"]/text() - Date: - selector: $info//p[@class="date"]/text() - postProcess: - - parseDate: 2006-01-02 - Details: //p[@class="description-scene"]/text() - Tags: - Name: $info/div[2]//a/text() - Performers: - Name: - selector: $info/div[1]/div[1]/a/text() - postProcess: - - replace: - - regex: \.$ - with: - Image: //video[@class="ampVideo"]/@poster - -# Last Updated November 08, 2020 diff --git a/scrapers/RealityLovers.py b/scrapers/RealityLovers/RealityLovers.py similarity index 100% rename from scrapers/RealityLovers.py rename to scrapers/RealityLovers/RealityLovers.py diff --git a/scrapers/RealityLovers.yml b/scrapers/RealityLovers/RealityLovers.yml similarity index 97% rename from scrapers/RealityLovers.yml rename to scrapers/RealityLovers/RealityLovers.yml index 0c70b6086..9ea81438e 100644 --- a/scrapers/RealityLovers.yml +++ b/scrapers/RealityLovers/RealityLovers.yml @@ -1,4 +1,6 @@ name: RealityLovers +# requires: py_common + performerByURL: - action: script url: diff --git a/scrapers/RedHotStraightBoys.yml b/scrapers/RedHotStraightBoys.yml new file mode 100644 index 000000000..f9f7e64c3 --- /dev/null +++ b/scrapers/RedHotStraightBoys.yml @@ -0,0 +1,41 @@ +name: RedHotStraightBoys +sceneByURL: + - action: scrapeXPath + url: + - redhotstraightboys.com/tour/updates + scraper: sceneScraper + +performerByURL: + - action: scrapeXPath + url: + - redhotstraightboys.com/tour/models + scraper: performerScraper + +xPathScrapers: + sceneScraper: + scene: + Title: + selector: //span[@class='update_title']/text() + Performers: + Name: + selector: //span[@class='tour_update_models']/a/text() + URL: + selector: //span[@class='tour_update_models']/a/@href + Details: + selector: //span[@class='latest_update_description']/text() + Tags: + Name: //span[@class='tour_update_tags']/a/text() + Image: + selector: //img[contains(@class,'large_update_thumb')]/@src + Studio: + Name: + fixed: RedHotStraightBoys.com + + performerScraper: + performer: + Name: //span[@class='title_bar_hilite']/text() + Gender: + fixed: Male + Image: + selector: //img[contains(@class,'model_bio_thumb')]/@src0_2x +# Last Updated October 14, 2023 diff --git a/scrapers/Reddit.yml b/scrapers/Reddit.yml new file mode 100644 index 000000000..69278fac2 --- /dev/null +++ b/scrapers/Reddit.yml @@ -0,0 +1,40 @@ +name: "Reddit" +sceneByURL: + - action: scrapeXPath + url: + - reddit.com + scraper: sceneScraper +galleryByURL: + - action: scrapeXPath + url: + - reddit.com + scraper: galleryScraper +xPathScrapers: + sceneScraper: + scene: &redditPost + Title: //shreddit-post/@post-title | //meta[@property="og:title"]/@content + Date: + selector: //shreddit-post/@created-timestamp | //div[@class="top-matter"]//p//time/@datetime + postProcess: + - replace: + - regex: (\d{4}-\d{2}-\d{2})T(.*) + with: $1 + - parseDate: 2006-01-02 + Performers: + Name: + selector: //shreddit-post/@author | //meta[@property="og:description"]/@content + postProcess: + - replace: + - regex: Posted in r\/([\w]+) by u\/([\w-]+)\s(.*) + with: $2 + galleryScraper: + gallery: *redditPost +driver: + cookies: # over18 cookie necessary for old.reddit.com URLs due to redirect; new reddit just blurs content instead + - CookieURL: "https://old.reddit.com/over18/" + Cookies: + - Name: "over18" + Domain: ".reddit.com" + Value: "1" + Path: "/" +# Last Updated December 13, 2023 diff --git a/scrapers/Redgifs/Redgifs.py b/scrapers/Redgifs/Redgifs.py new file mode 100644 index 000000000..6f4810e26 --- /dev/null +++ b/scrapers/Redgifs/Redgifs.py @@ -0,0 +1,185 @@ +import base64 +import json +import os +import re +import sys +from datetime import datetime + +# to import from a parent directory we need to add that directory to the system path +csd = os.path.dirname( + os.path.realpath(__file__)) # get current script directory +parent = os.path.dirname(csd) # parent directory (should be the scrapers one) +sys.path.append( + parent +) # add parent dir to sys path so that we can import py_common from there + +try: + import py_common.log as log +except ModuleNotFoundError: + print( + "You need to download the folder 'py_common' from the community repo! (CommunityScrapers/tree/master/scrapers/py_common)", + file=sys.stderr, + ) + sys.exit() +try: + import requests +except ModuleNotFoundError: + log.error( + "You need to install the requests module. (https://docs.python-requests.org/en/latest/user/install/)" + ) + log.error( + "If you have pip (normally installed with python), run this command in a terminal (cmd): pip install requests" + ) + sys.exit() + +PROXIES = {} +TIMEOUT = 10 + + +class Redgifs: + def __init__(self): + self.session = requests.Session() + self.session.headers.update( + {"content-type": "application/json; charset=UTF-8"} + ) + + self.session.proxies.update(PROXIES) + + self.getTemporaryToken() + + def log_session_headers(self): + log.debug(self.session.headers) + + def GET_req(self, url): + scraped = None + try: + scraped = self.session.get(url, timeout=TIMEOUT) + except: + log.error("scrape error") + return None + if scraped.status_code >= 400: + log.error(f"HTTP Error: {scraped.status_code}") + return None + return scraped.content + + def GET_req_json(self, url): + scraped = None + try: + scraped = self.session.get(url, timeout=TIMEOUT) + except: + log.error("scrape error") + return None + if scraped.status_code >= 400: + log.error(f"HTTP Error: {scraped.status_code}") + return None + return scraped.json() + + def output_json(self, title, tags, url, b64img, performers, date): + return { + "title": title, + "tags": [{"name": x} for x in tags], + "url": url, + "image": "data:image/jpeg;base64," + b64img.decode("utf-8"), + "performers": [{"name": x.strip()} for x in performers], + "date": date + } + + def getTemporaryToken(self): + req = self.GET_req_json("https://api.redgifs.com/v2/auth/temporary") + + authToken = req.get("token") + + self.session.headers.update( + {"Authorization": 'Bearer ' + authToken,} + ) + + log.debug(req) + + def getIdFromUrl(self, url): + id = url.split("/") + id = id[-1] + id = id.split("?")[0] + + return id; + + def getApiUrlFromId(self, id): + return f"https://api.redgifs.com/v2/gifs/{id}?views=yes&users=yes" + + + def getParseUrl(self, url): + id = self.getIdFromUrl(url) + return self.getParseId(id) + + def getParseId(self, id): + id_lowercase = id.lower() + + log.debug(str(id)) + + apiurl = self.getApiUrlFromId(id_lowercase) + + req = self.GET_req_json(apiurl) + + log.debug(req) + + gif = req.get("gif") + user = req.get("user") + + tags = gif.get("tags") + + date = gif.get("createDate") + date = datetime.fromtimestamp(date) + date = str(date.date()) + + imgurl = gif.get("urls").get("poster") + img = self.GET_req(imgurl) + b64img = base64.b64encode(img) + + studio_name = user.get("name") + + performers = [] + + if user.get("name"): + performers = [user.get("name")] + elif user.get("username"): + performers = [user.get("username")] + + + return self.output_json( + id, tags, f"https://www.redgifs.com/watch/{id}", b64img, performers, date + ) + +def parseFilename(filename): + id = filename.replace("redgifs_", "") #remove possible filename prefix + id = id.split(".")[0] #remove file extension + + return id + + +FRAGMENT = json.loads(sys.stdin.read()) + +log.debug(FRAGMENT) + +scraper = Redgifs() + +result = "" + +if sys.argv[1] == "url": + url = FRAGMENT.get("url") + + log.debug(url) + + result = json.dumps(scraper.getParseUrl(url)) +elif sys.argv[1] == "queryFragment" or sys.argv[1] == "fragment": + id = parseFilename(FRAGMENT.get("title")) + + log.debug(id) + + result = json.dumps(scraper.getParseId(id)) +elif sys.argv[1] == "name": + id = parseFilename(FRAGMENT.get("name")) + + log.debug(id) + + result = json.dumps([scraper.getParseId(id)]) + +print(result) diff --git a/scrapers/Redgifs/Redgifs.yml b/scrapers/Redgifs/Redgifs.yml new file mode 100644 index 000000000..73ff5b563 --- /dev/null +++ b/scrapers/Redgifs/Redgifs.yml @@ -0,0 +1,33 @@ +name: Redgifs +# requires: py_common + +sceneByURL: + - action: script + script: + - python + - Redgifs.py + - url + url: + - www.redgifs.com/watch/ + +sceneByFragment: + action: script + script: + - python + - Redgifs.py + - fragment + +sceneByQueryFragment: + action: script + script: + - python + - Redgifs.py + - queryFragment + +sceneByName: + action: script + script: + - python + - Redgifs.py + - name +# Last Updated August 03, 2023 diff --git a/scrapers/RiggsFilms.yml b/scrapers/RiggsFilms.yml new file mode 100644 index 000000000..76f1bf2f0 --- /dev/null +++ b/scrapers/RiggsFilms.yml @@ -0,0 +1,33 @@ +name: Riggs Films +sceneByURL: + - action: scrapeJson + url: + - riggsfilms.vip/videos/ + scraper: sceneScraper + queryURL: "https://riggsfilms.adultmembersites.com/api/videos/{url}" + queryURLReplace: + url: + - regex: '.+/videos/(\d+)-.+' + with: "$1" +jsonScrapers: + sceneScraper: + scene: + Title: title + Details: decription + Date: + selector: publish_date + postProcess: + - replace: + - regex: \s.+$ + with: + - parseDate: 2006-01-02 + Image: poster_src + Performers: + Name: casts.#.screen_name + Studio: + Name: + fixed: Riggs Films + Tags: + Name: tags.#.name + Code: id +# Last Updated January 20, 2023 diff --git a/scrapers/Algolia_RoccoSiffredi.yml b/scrapers/RoccoSiffredi/RoccoSiffredi.yml similarity index 78% rename from scrapers/Algolia_RoccoSiffredi.yml rename to scrapers/RoccoSiffredi/RoccoSiffredi.yml index 6c22a531f..1c41ad740 100644 --- a/scrapers/Algolia_RoccoSiffredi.yml +++ b/scrapers/RoccoSiffredi/RoccoSiffredi.yml @@ -1,3 +1,4 @@ +# requires: Algolia name: "RoccoSiffredi" sceneByURL: - action: script @@ -5,26 +6,26 @@ sceneByURL: - roccosiffredi.com/en/video script: - python - - Algolia.py + - ../Algolia/Algolia.py - roccosiffredi sceneByFragment: action: script script: - python - - Algolia.py + - ../Algolia/Algolia.py - roccosiffredi sceneByName: action: script script: - python - - Algolia.py + - ../Algolia/Algolia.py - roccosiffredi - searchName sceneByQueryFragment: action: script script: - python - - Algolia.py + - ../Algolia/Algolia.py - roccosiffredi - validName galleryByURL: @@ -33,7 +34,7 @@ galleryByURL: - roccosiffredi.com/en/photo/ script: - python - - Algolia.py + - ../Algolia/Algolia.py - roccosiffredi - gallery # Last Updated December 22, 2022 diff --git a/scrapers/Rule34Video.yml b/scrapers/Rule34Video.yml index 63a52c91e..69751c31a 100644 --- a/scrapers/Rule34Video.yml +++ b/scrapers/Rule34Video.yml @@ -52,4 +52,20 @@ xPathScrapers: Studio: Name: //div[@class="video_tools"]//div[text()="Artist:"]/following-sibling::a/span URL: //div[@class="video_tools"]//div[text()="Artist:"]/following-sibling::a/@href -# Last Updated November 25, 2022 +driver: + useCDP: true + cookies: + - Cookies: + - Name: kt_rt_popAccess + Value: '1' + Domain: .rule34video.com + Path: / + - Name: kt_tcookie + Value: '1' + Domain: .rule34video.com + Path: / + - Name: cf_clearance + ValueRandom: 43 + Domain: .rule34video.com + Path: / +# Last Updated September 29, 2023 diff --git a/scrapers/SARJ-LLC.py b/scrapers/SARJ-LLC/SARJ-LLC.py similarity index 90% rename from scrapers/SARJ-LLC.py rename to scrapers/SARJ-LLC/SARJ-LLC.py index 88b3e13c1..4d172ff08 100644 --- a/scrapers/SARJ-LLC.py +++ b/scrapers/SARJ-LLC/SARJ-LLC.py @@ -1,9 +1,18 @@ import base64 +import os import json import sys import re from urllib.parse import urlparse, urlencode +# to import from a parent directory we need to add that directory to the system path +csd = os.path.dirname( + os.path.realpath(__file__)) # get current script directory +parent = os.path.dirname(csd) # parent directory (should be the scrapers one) +sys.path.append( + parent +) # add parent dir to sys path so that we can import py_common from there + try: from py_common import log except ModuleNotFoundError: @@ -167,11 +176,12 @@ def scrape_model(base_url, name): def map_media(data, studio, base_url): - url = "" + urls = [] + studio_code = data["UUID"] studio_name = {'Name': ""} if studio is not None: studio_url = studio[1] - url = f"https://www.{studio_url}{data['path']}" + urls = [f"https://www.{studio_url}{data['path']}"] studio_name = {'Name': studio[0]} director = None @@ -194,11 +204,12 @@ def map_media(data, studio, base_url): return { 'Title': data['name'], 'Details': data['description'], - 'URL': url, + 'URLs': urls, 'Date': data['publishedAt'][0:data['publishedAt'].find('T')], 'Tags': list(map(lambda t: {'Name': t}, data['tags'])), 'Performers': list(map(lambda m: map_model(base_url, m), data['models'])), 'Studio': studio_name, + 'Code': studio_code, "Director": director } @@ -220,16 +231,13 @@ def scrape_movie(base_url, date, name): response = requests.get(res['Image'], headers={ 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:79.0) Gecko/20100101 Firefox/79.0' }, timeout=(3, 6)) + if response and response.status_code < 400: + mime = 'image/jpeg' + encoded = base64.b64encode(response.content).decode('utf-8') + res['Image'] = f'data:{mime};base64,{encoded}' + break except requests.exceptions.RequestException as req_ex: - log.error(f"Error fetching URL {res['Image']}: {req_ex}") - - if response.status_code < 400: - mime = 'image/jpeg' - encoded = base64.b64encode(response.content).decode('utf-8') - res['Image'] = f'data:{mime};base64,{encoded}' - break - - log.info(f"Fetching URL {res['Image']} resulted in error status: {response.status_code}") + log.info(f"Error fetching URL {res['Image']}: {req_ex}") res['Image'] = None return res @@ -290,11 +298,11 @@ def add_tag(key, tag_format): '706DF46B88884F7BB226097952427754': ('Eternal Desire', 'eternaldesire.com'), '5592E33324211E3FF640800200C93111': ('Goddess Nudes', 'goddessnudes.com'), '5A68E1D7B6E69E7401226779D559A10A': ('Love Hairy', 'lovehairy.com'), - 'E6B595104E3411DF98790800200C9A66': ('Met Art', 'metart.com'), - '5C38C84F55841824817C19987F5447B0': ('Met Art Intimate', 'metart.com'), - 'E7DFB70DF31C45B3B5E0BF10D733D349': ('Met Art X', 'metartx.com'), + 'E6B595104E3411DF98790800200C9A66': ('MetArt', 'metart.com'), + '5C38C84F55841824817C19987F5447B0': ('MetArt Intimate', 'metart.com'), + 'E7DFB70DF31C45B3B5E0BF10D733D349': ('MetArt X', 'metartx.com'), 'D99236C04DD011E1B86C0800200C9A66': ('Rylsky Art', 'rylskyart.com'), - '94DB3D0036FC11E1B86C0800200C9A66': ('Sex Art', 'sexart.com'), + '94DB3D0036FC11E1B86C0800200C9A66': ('SexArt', 'sexart.com'), '3D345D1E156910B44DB5A80CDD746318': ('Straplez', 'straplez.com'), '18A2E47EAEFD45F29033A5FCAF1F5B91': ('Stunning 18', 'stunning18.com'), 'FDAFDF209DC311E0AA820800200C9A66': ('The Life Erotic', 'thelifeerotic.com'), diff --git a/scrapers/SARJ-LLC.yml b/scrapers/SARJ-LLC/SARJ-LLC.yml similarity index 96% rename from scrapers/SARJ-LLC.yml rename to scrapers/SARJ-LLC/SARJ-LLC.yml index 0620ae429..db5d8d6d6 100644 --- a/scrapers/SARJ-LLC.yml +++ b/scrapers/SARJ-LLC/SARJ-LLC.yml @@ -1,4 +1,5 @@ name: SARJ LLC +# requires: py_common sceneByName: action: script @@ -90,4 +91,5 @@ performerByName: - SARJ-LLC.py - search - performer -# Last Updated November 29, 2022 + +# Last Updated December 05, 2023 diff --git a/scrapers/SCRAPERS-LIST.md b/scrapers/SCRAPERS-LIST.md new file mode 100644 index 000000000..0ac18cf4f --- /dev/null +++ b/scrapers/SCRAPERS-LIST.md @@ -0,0 +1,1722 @@ +## Supported Sites For Scraping + +This list is meant to keep track of which sites are already supported by existing community scrapers. And which scrapers support them. When introducting a new scraper, add the sites your scraper supports to this list in your PR. Please keep the site list in alphabetical order to keep the list tidy. If you are adding a lot of sites to the list, it may be useful to paste the sites in and then use a tool [like this](https://wordcounter.net/alphabetize) to alphabetize it. +If a scraper needs a newer stash release than the current stable/master the version info should be added in the **Needs** field. + +Column names are **S**cenes, **G**allery, **M**ovies, **P**erformers. + +Supported Site|Scraper| S | G | M | P |Needs|Contents +--------------|-------|:-:|:-:|:-:|:-:|:---:|:------: +1000facials.com|GammaEntertainment.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +10musume.com|10Musume.yml|:heavy_check_mark:|:x:|:x:|:x:|-|JAV +1111customsxxx.com|ChickPass.yml|:heavy_check_mark:|:x:|:x:|:heavy_check_mark:|-|- +18eighteen.com|TheScoreGroup.yml|:heavy_check_mark:|:heavy_check_mark:|:x:|:x:|-|- +18tokyo.com|karatmedia.yml|:heavy_check_mark:|:x:|:x:|:x:|-|JAV +18vr.com|BaDoink.yml|:heavy_check_mark:|:x:|:heavy_check_mark:|:heavy_check_mark:|-|VR +1by-day.com|DDFNetwork.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +1passforallsites.com|1passforallsites.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +1pondo.tv|1pondo.yml|:heavy_check_mark:|:x:|:x:|:x:|-|JAV +21naturals.com|Algolia_21Naturals.yml|:heavy_check_mark:|:x:|:x:|:x:|Python|- +21roles.com|21Roles.yml|:heavy_check_mark:|:x:|:x:|:x:|CDP|- +21sextreme.com|Algolia_21Sextreme.yml|:heavy_check_mark:|:x:|:x:|:x:|Python|- +21sextury.com|Algolia_21Sextury.yml|:heavy_check_mark:|:heavy_check_mark:|:x:|:x:|Python|- +2girls1camera.com|TopWebModels.yml|:heavy_check_mark:|:x:|:x:|:x:|Python|- +40somethingmag.com|TheScoreGroup.yml|:heavy_check_mark:|:heavy_check_mark:|:x:|:x:|-|- +50plusmilfs.com|TheScoreGroup.yml|:heavy_check_mark:|:heavy_check_mark:|:x:|:x:|-|- +5kporn.com|5KTeens.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +5kteens.com|5KTeens.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +60plusmilfs.com|TheScoreGroup.yml|:heavy_check_mark:|:heavy_check_mark:|:x:|:x:|-|Granny +65inchhugeasses.com|Vivid.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +abbiemaley.com|ThirdRockEnt.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +abbywinters.com|AbbyWinters.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +abuseme.com|AbuseMe.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +accidentalgangbang.com|Algolia_Adultime.yml|:heavy_check_mark:|:heavy_check_mark:|:heavy_check_mark:|:x:|Python|- +activeduty.com|Algolia_ActiveDuty.yml|:heavy_check_mark:|:x:|:x:|:x:|-|Gay +adamandevepictures.com|Algolia_Adultime.yml|:heavy_check_mark:|:heavy_check_mark:|:heavy_check_mark:|:x:|Python|- +addicted2girls.com|Algolia_Addicted2Girls.yml|:heavy_check_mark:|:heavy_check_mark:|:x:|:x:|Python|- +addisonstreet.com|ChickPass.yml|:heavy_check_mark:|:x:|:x:|:heavy_check_mark:|-|- +adultanime.dbsearch.net|Anime-DB.yml|:heavy_check_mark:|:x:|:x:|:x:|CDP|Japanese Hentai +adultdvdempire.com|AdultEmpire.yml|:heavy_check_mark:|:x:|:heavy_check_mark:|:x:|-|- +adultdvdmarketplace.com|AdultDvdMarketPlace.yml|:x:|:x:|:heavy_check_mark:|:x:|-|- +adultempire.com|AdultEmpire.yml|:heavy_check_mark:|:x:|:heavy_check_mark:|:x:|-|- +adultfilmindex.com|AdultFilmIndex.yml|:heavy_check_mark:|:x:|:heavy_check_mark:|:x:|-|- +adultprime.com|AdultPrime.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +adulttime.com|Algolia_Adultime.yml|:heavy_check_mark:|:x:|:heavy_check_mark:|:x:|Python|- +adulttimepilots.com|Algolia_Adultime.yml|:heavy_check_mark:|:heavy_check_mark:|:heavy_check_mark:|:x:|Python|- +aebn.com|AEBN.yml|:heavy_check_mark:|:x:|:heavy_check_mark:|:heavy_check_mark:|-|Straight + Gay +agentredgirl.com|Algolia_Adultime.yml|:heavy_check_mark:|:x:|:x:|:x:|Python|- +alettaoceanempire.com|GammaEntertainment.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +alexismonroe.com|bellapass.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +alexlegend.com|VNAGirls.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +aliciasgoddesses.com|Andomark.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +allanal.com|Nympho.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +allanalallthetime.com|AllAnalAllTheTime.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +allblackx.com|Algolia_xEmpire.yml|:heavy_check_mark:|:heavy_check_mark:|:x:|:x:|Python|- +allfinegirls.net|WowPorn.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +allgirlmassage.com|Algolia_AllGirlMassage.yml|:heavy_check_mark:|:heavy_check_mark:|:x:|:x:|Python|Lesbian +allherluv.com|MissaX.yml|:heavy_check_mark:|:x:|:x:|:x:|Python|Lesbian +allinternal.com|PerfectGonzo.yml|:heavy_check_mark:|:x:|:x:|:heavy_check_mark:|Python|- +alljapanesepass.com|karatmedia.yml|:heavy_check_mark:|:x:|:x:|:x:|-|JAV +allvr.porn|SLRoriginals.yml|:heavy_check_mark:|:x:|:heavy_check_mark:|:x:|-|VR +alphamales.com|PornsiteManager.yml|:heavy_check_mark:|:x:|:heavy_check_mark:|:x:|-|Gay +alsscan.com|SARJ-LLC.yml|:heavy_check_mark:|:heavy_check_mark:|:x:|:heavy_check_mark:|Python|- +amateripremium.com|PornCZ.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +amateurallure.com|AmateurAllure.yml|:heavy_check_mark:|:heavy_check_mark:|:x:|:x:|-|- +amateurav.com|Jhdv.yml|:heavy_check_mark:|:x:|:x:|:x:|-|JAV Uncensored +amateurboxxx.com|AmateurBoxxx.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +amateure-xtreme.com|AmateureExtreme.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +amateureuro.com|LetsDoeIt.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +amateursfrombohemia.com|PornCZ.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +amazinganna.com|ChickPass.yml|:heavy_check_mark:|:x:|:x:|:heavy_check_mark:|-|- +ambushmassage.com|Williamhiggins.yml|:heavy_check_mark:|:x:|:x:|:x:|-|Gay +amelielou.com|AmelieLou.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +americanmusclehunks.com|CarnalPlus.yml|:heavy_check_mark:|:x:|:x:|:x:|-|Gay +amkingdom.com|ATKExotics.yml|:heavy_check_mark:|:x:|:x:|:x:|CDP|- +amourangels.com|AmourAngels.yml|:heavy_check_mark:|:heavy_check_mark:|:x:|:x:|-|- +anal-angels.com|TeenMegaWorld.yml|:heavy_check_mark:|:x:|:x:|:heavy_check_mark:|-|- +anal-beauty.com|TeenMegaWorld.yml|:heavy_check_mark:|:x:|:x:|:heavy_check_mark:|-|- +anal4k.com|AMAMultimedia.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +analacrobats.com|GammaEntertainment.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +analamateur.com|ThirdRockEnt.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +analbbc.com|ThirdRockEnt.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +analcheckups.com|Teencoreclub.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +analhookers.com|Hustler.yml|:heavy_check_mark:|:x:|:x:|:x:|CDP|- +analized.com|ThirdRockEnt.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +analjust.com|Only3xGirls.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +analmom.com|PaperStreetMedia.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +analnippon.com|karatmedia.yml|:heavy_check_mark:|:x:|:x:|:x:|-|JAV +analonly.com|Nympho.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +analoverdose.com|PervCity.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +analteenangels.com|Algolia_Adultime.yml|:heavy_check_mark:|:x:|:x:|:x:|Python|- +analtherapyxxx.com|FamilyTherapyXXX.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +analvids.com|LegalPorno.yml|:heavy_check_mark:|:x:|:x:|:heavy_check_mark:|Python|- +analviolation.com|ThirdRockEnt.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +analyzedgirls.com|Teencoreclub.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +andolinixxl.com|PornsiteManager.yml|:heavy_check_mark:|:x:|:heavy_check_mark:|:x:|-|Gay +angelasommers.com|angelasommers.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +angelawhite.com|AngelaWhite.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +angelinacastrolive.com|VNAGirls.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +anidb.net|AniDB.yml|:heavy_check_mark:|:x:|:x:|:heavy_check_mark:|-|Hentai Database +anilos.com|Nubiles.yml|:heavy_check_mark:|:heavy_check_mark:|:x:|:x:|-|- +animecharactersdatabase.com|AnimeCharactersDatabase.yml|:x:|:x:|:x:|:heavy_check_mark:|Python|Database +antoniosuleiman.com|AntonioSuleiman.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +apovstory.com|APOVStory.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +archangelvideo.com|ArchAngelVideo.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +ariellynn.com|Andomark.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +ashemaletube.com|AShemaleTube.yml|:heavy_check_mark:|:x:|:x:|:heavy_check_mark:|-|- +ashleyfires.com|GammaEntertainment.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +ashlynnbrooke.com|NewSensationsNetworkSites.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +asian18.com|Hustler.yml|:heavy_check_mark:|:x:|:x:|:x:|CDP|- +asianamericantgirls.com|GroobyClub.yml|:heavy_check_mark:|:x:|:x:|:x:|-|Trans +asianfever.com|Hustler.yml|:heavy_check_mark:|:x:|:x:|:x:|CDP|- +asiansexdiary.com|VegasDreamsLLC.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +asiantgirl.com|GroobyNetwork-Partial.yml|:heavy_check_mark:|:heavy_check_mark:|:x:|:x:|-|Trans +asmrfantasy.com|Algolia_Adultime.yml|:heavy_check_mark:|:heavy_check_mark:|:x:|:x:|Python|- +assholefever.com|Algolia_Adultime.yml|:heavy_check_mark:|:x:|:x:|:x:|Python|- +assmeat.com|Hustler.yml|:heavy_check_mark:|:x:|:x:|:x:|CDP|- +assteenmouth.com|Teencoreclub.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +asstraffic.com|PerfectGonzo.yml|:heavy_check_mark:|:x:|:x:|:heavy_check_mark:|Python|- +assumethepositionstudios.com|AssumeThePositionStudios.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +assylum.com|Assylum.yml|:heavy_check_mark:|:x:|:x:|:x:|-|Fetish +atkexotics.com|ATKExotics.yml|:heavy_check_mark:|:x:|:x:|:x:|CDP|- +atkgirlfriends.com|ATKGirlfriends.yml|:heavy_check_mark:|:x:|:x:|:heavy_check_mark:|-|- +atkhairy.com|ATKHairy.yml|:heavy_check_mark:|:x:|:x:|:x:|CDP|- +atkpetites.com|ATKHairy.yml|:heavy_check_mark:|:x:|:x:|:x:|CDP|- +atkpremium.com|ATKHairy.yml|:heavy_check_mark:|:x:|:x:|:x:|CDP|- +attackboys.com|PornsiteManager.yml|:heavy_check_mark:|:x:|:heavy_check_mark:|:x:|-|Gay +auntjudys.com|AuntJudys.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +auntjudysxxx.com|AuntJudys.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +auntykathy.com|auntykathy.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +aussieass.com|AussieAss.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +aussiefellatioqueens.com|AussieFelatioQueens.yml|:heavy_check_mark:|:x:|:x:|:x:|CDP|- +aussiepov.com|AussieAss.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +austinwilde.com|Algolia_NextDoorStudios.yml|:heavy_check_mark:|:heavy_check_mark:|:x:|:x:|Python|Gay +av69.tv|Jhdv.yml|:heavy_check_mark:|:x:|:x:|:x:|-|JAV Uncensored +avadawn.com|bellapass.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +avanal.com|Jhdv.yml|:heavy_check_mark:|:x:|:x:|:x:|-|JAV Uncensored +aventertainments.com|AVE.yml|:heavy_check_mark:|:x:|:x:|:x:|-|Database +avidolz.com|Baberotica.yml|:heavy_check_mark:|:x:|:x:|:heavy_check_mark:|-|JAV Uncensored +avjiali.com|AVJiali.yml|:heavy_check_mark:|:x:|:x:|:heavy_check_mark:|-|JAV Uncensored +avstockings.com|Jhdv.yml|:heavy_check_mark:|:x:|:x:|:x:|-|JAV Uncensored +avtits.com|Jhdv.yml|:heavy_check_mark:|:x:|:x:|:x:|-|JAV Uncensored +aziani.com|Aziani.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +babearchives.com|BabeArchives.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +babepedia.com|Babepedia.yml|:x:|:x:|:x:|:heavy_check_mark:|-|Database +baberotica.com|Baberotica.yml|:heavy_check_mark:|:x:|:x:|:heavy_check_mark:|-|- +baberoticavr.com|Baberotica.yml|:heavy_check_mark:|:x:|:x:|:heavy_check_mark:|-|VR +babes.com|Brazzers.yml|:heavy_check_mark:|:heavy_check_mark:|:x:|:heavy_check_mark:|CDP|- +babesandstars.com|Babes&Stars.yml|:x:|:x:|:x:|:heavy_check_mark:|-|- +babesnetwork.com|MindGeek.yml|:heavy_check_mark:|:x:|:x:|:heavy_check_mark:|-|- +babevr.com|BaDoink.yml|:heavy_check_mark:|:x:|:heavy_check_mark:|:heavy_check_mark:|-|VR +backdoorpov.com|ItsPOV.yml|:heavy_check_mark:|:x:|:x:|:x:|CDP|- +backroomcastingcouch.com|ExploitedX.yml|:heavy_check_mark:|:x:|:x:|:heavy_check_mark:|-|- +baddaddypov.com|ThirdRockEnt.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +badfamilypov.com|ThirdRockEnt.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +badmilfs.com|PaperStreetMedia.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +badmommypov.com|ThirdRockEnt.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +badoinkvr.com|BaDoink.yml|:heavy_check_mark:|:x:|:heavy_check_mark:|:heavy_check_mark:|-|VR +badteenspunished.com|Nubiles.yml|:heavy_check_mark:|:heavy_check_mark:|:x:|:x:|-|- +baeb.com|AMAMultimedia.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +balletflatsfetish.com|FFCSH.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +bamvisions.com|BamVisions.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +bang.com|Bang.yml|:heavy_check_mark:|:x:|:heavy_check_mark:|:heavy_check_mark:|-|- +bangbangboys.com|CarnalPlus.yml|:heavy_check_mark:|:x:|:x:|:x:|-|Gay +bangbros.com|BangBros.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +bangingbeauties.com|PervCity.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +bangteenpussy.com|Teencoreclub.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +barbarafeet.com|FFCSH.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +barebackplus.com|CarnalPlus.yml|:heavy_check_mark:|:x:|:x:|:x:|-|Gay +barelylegal.com|Hustler.yml|:heavy_check_mark:|:x:|:x:|:x:|CDP|- +baretwinks.com|BoyCrush.yml|:heavy_check_mark:|:x:|:x:|:x:|-|Gay +bathhousebait.com|AMAMultimedia.yml|:heavy_check_mark:|:x:|:x:|:x:|-|Gay +battlebang.com|Hustler.yml|:heavy_check_mark:|:x:|:x:|:x:|CDP|- +bbcparadise.com|PaperStreetMedia.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +bbcpie.com|AMAMultimedia.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +bbcsurprise.com|ExploitedX.yml|:heavy_check_mark:|:x:|:x:|:heavy_check_mark:|-|- +beauty-angels.com|TeenMegaWorld.yml|:heavy_check_mark:|:x:|:x:|:heavy_check_mark:|-|- +beauty4k.com|TeenMegaWorld.yml|:heavy_check_mark:|:x:|:x:|:heavy_check_mark:|-|- +beaverhunt.com|Hustler.yml|:heavy_check_mark:|:x:|:x:|:x:|CDP|- +becomingfemme.com|PureMedia.yml|:heavy_check_mark:|:x:|:x:|:x:|-|Trans +behindtrans500.com|Trans500.yml|:heavy_check_mark:|:x:|:x:|:x:|-|Trans +beingphoenixmarie.com|ModelCentroAPI.yml|:heavy_check_mark:|:x:|:x:|:x:|Python|- +belamionline.com|BelAmi.yml|:heavy_check_mark:|:x:|:x:|:x:|-|Gay +bellahd.com|bellapass.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +bellanextdoor.com|bellapass.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +bellapass.com|bellapass.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +bellesafilms.com|RealityKingsOL.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +bellesahouse.com|RealityKingsOL.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +beltbound.com|BeltBound.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +berryboys.fr|PornsiteManager.yml|:heavy_check_mark:|:x:|:heavy_check_mark:|:x:|-|Gay +bestoftealconrad.com|VNAGirls.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +bffs.com|PaperStreetMedia.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +bigbootytgirls.com|BigBootyTGirls.yml|:heavy_check_mark:|:x:|:x:|:x:|-|Trans +bigfatcreampie.com|GammaEntertainment.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +biggulpgirls.com|TopWebModels.yml|:heavy_check_mark:|:x:|:x:|:x:|Python|- +bigtitstokyo.com|karatmedia.yml|:heavy_check_mark:|:x:|:x:|:x:|-|JAV +biguysfuck.com|BlurredMedia.yml|:heavy_check_mark:|:x:|:x:|:x:|CDP|- +billiestar.com|xSinsVR.yml|:heavy_check_mark:|:heavy_check_mark:|:x:|:x:|-|- +biphoria.com|Algolia_Biphoria.yml|:heavy_check_mark:|:heavy_check_mark:|:x:|:x:|Python|- +bjraw.com|BJRaw.yml|:heavy_check_mark:|:x:|:x:|:heavy_check_mark:|CDP|- +black-tgirls.com|GroobyNetwork-Partial.yml|:heavy_check_mark:|:heavy_check_mark:|:x:|:x:|-|Trans +black4k.com|Vip4K.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +blackambush.com|ExploitedX.yml|:heavy_check_mark:|:x:|:x:|:heavy_check_mark:|-|- +blackandbig.com|SmutPuppet.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +blackboyaddictionz.com|AdultSiteRunner.yml|:heavy_check_mark:|:x:|:x:|:x:|-|Gay +blacked.com|vixenNetwork.yml|:heavy_check_mark:|:x:|:x:|:x:|Python|- +blackedraw.com|vixenNetwork.yml|:heavy_check_mark:|:x:|:x:|:x:|Python|- +Blackfoxbound UK|BlackFoxBound.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +blackmarketxxx.com|CombatZone.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +blackmassivecocks.com|AdultEmpireCash.yml|:heavy_check_mark:|:x:|:heavy_check_mark:|:x:|-|- +blackmeatwhitefeet.com|Algolia_DogfartNetwork.yml|:heavy_check_mark:|:x:|:x:|:heavy_check_mark:|Python|- +blackph.com|FFCSH.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +blacksonblondes.com|Algolia_DogfartNetwork.yml|:heavy_check_mark:|:x:|:x:|:heavy_check_mark:|Python|- +blacksoncougars.com|Algolia_DogfartNetwork.yml|:heavy_check_mark:|:x:|:x:|:heavy_check_mark:|Python|- +blacksondaddies.com|AdultSiteRunner.yml|:heavy_check_mark:|:x:|:x:|:x:|-|Gay +blacktgirlshardcore.com|GroobyNetwork-Partial.yml|:heavy_check_mark:|:heavy_check_mark:|:x:|:x:|-|Trans +blackvalleygirls.com|PaperStreetMedia.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +blackwhitefuckfest.com|Vivid.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +blakemason.com|BlakeMason.yml|:heavy_check_mark:|:x:|:x:|:x:|-|Gay +blowmepov.com|Algolia_Adultime.yml|:heavy_check_mark:|:x:|:x:|:x:|Python|- +blownbyrone.com|VNAGirls.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +blowpass.com|Algolia_Blowpass.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +bobbiedenlive.com|VNAGirls.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +bobstgirls.com|GroobyNetwork-Partial.yml|:heavy_check_mark:|:heavy_check_mark:|:x:|:x:|-|Trans +bolatino.com|PornsiteManager.yml|:heavy_check_mark:|:x:|:heavy_check_mark:|:x:|-|Gay +bondagecafe.com|BondageCafe.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +bondageliberation.com|BondageLiberation.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +boobpedia.com|Boobpedia.yml|:x:|:x:|:x:|:heavy_check_mark:|-|Database +bootyclapxxx.com|Hustler.yml|:heavy_check_mark:|:x:|:x:|:x:|CDP|- +bootysisters.com|Hustler.yml|:heavy_check_mark:|:x:|:x:|:x:|CDP|- +boppingbabes.com|WankItNow.yml|:heavy_check_mark:|:x:|:x:|:x:|-|VR +bossymilfs.com|Hustler.yml|:heavy_check_mark:|:x:|:x:|:x:|CDP|- +bouncychicks.com|ChickPass.yml|:heavy_check_mark:|:x:|:x:|:heavy_check_mark:|-|- +boundheat.com|BoundHeat.yml|:heavy_check_mark:|:x:|:x:|:x:|-|Lesbian +boundhoneys.com|Boundhoneys.yml|:heavy_check_mark:|:x:|:x:|:x:|-|Lesbian +boundjocks.com|AdultSiteRunner.yml|:heavy_check_mark:|:x:|:x:|:x:|-|Gay +boundlife.com|Boundlife.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +boundtwinks.com|CarnalPlus.yml|:heavy_check_mark:|:x:|:x:|:x:|-|Gay +bountyhunterporn.com|Nubiles.yml|:heavy_check_mark:|:heavy_check_mark:|:x:|:x:|-|- +boxtrucksex.com|BoxTruckSex.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +boycrush.com|BoyCrush.yml|:heavy_check_mark:|:x:|:x:|:x:|-|Gay +boyforsale.com|CarnalPlus.yml|:heavy_check_mark:|:x:|:x:|:x:|-|Gay +boyfriendsharing.com|Mypervmom.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +boyfun.com|BoyFun.yml|:heavy_check_mark:|:x:|:x:|:x:|-|Gay +boygusher.com|BluMedia.yml|:heavy_check_mark:|:x:|:x:|:heavy_check_mark:|-|Gay +boysdestroyed.com|AMAMultimedia.yml|:heavy_check_mark:|:x:|:x:|:x:|-|Gay +boysfuckmilfs.com|PornCZ.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +boyshalfwayhouse.com|AdultSiteRunner.yml|:heavy_check_mark:|:x:|:x:|:x:|-|Gay +bradmontana.com|BradMontana.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +brandibelle.com|brandibelle.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +brandnewamateurs.com|BrandNewAmateurs.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +brandnewfaces.com|Vivid.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +brasilvr.com|POVR.yml|:heavy_check_mark:|:x:|:x:|:x:|-|VR +brattyfamily.com|Mypervmom.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +brattymilf.com|Nubiles.yml|:heavy_check_mark:|:heavy_check_mark:|:x:|:x:|-|- +brattysis.com|Nubiles.yml|:heavy_check_mark:|:heavy_check_mark:|:x:|:x:|-|- +bravofucker.com|PornsiteManager.yml|:heavy_check_mark:|:x:|:heavy_check_mark:|:x:|-|Gay +brazilian-transsexuals.com|GroobyNetwork-Partial.yml|:heavy_check_mark:|:x:|:x:|:x:|-|Trans +braziltgirls.xxx|GroobyNetwork-Brazilian.yml|:heavy_check_mark:|:x:|:x:|:x:|-|Trans +brazzers.com|Brazzers.yml|:heavy_check_mark:|:heavy_check_mark:|:x:|:heavy_check_mark:|CDP|- +breeditraw.com|BreedItRaw.yml|:heavy_check_mark:|:x:|:x:|:heavy_check_mark:|-|Gay +brett-tyler.com|PornsiteManager.yml|:heavy_check_mark:|:x:|:heavy_check_mark:|:x:|-|Gay +brickyates.com|brickyates.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +bride4k.com|Vip4K.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +brokenlatinawhores.com|BrokenLatinaWhores.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +brokensluts.net|ThirdRockEnt.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +brokestraightboys.com|BluMedia.yml|:heavy_check_mark:|:x:|:x:|:heavy_check_mark:|-|Gay +brookelynnebriar.com|ModelCentroAPI.yml|:heavy_check_mark:|:x:|:x:|:x:|Python|- +bruceandmorgan.net|bruceandmorgan.yml|:heavy_check_mark:|:x:|:x:|:x:|CDP|Amateur Fetish +brutalinvasion.com|Teencoreclub.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +bryci.com|bellapass.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +bskow.com|GammaEntertainment.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +bukkakenow.com|karatmedia.yml|:heavy_check_mark:|:x:|:x:|:x:|-|JAV +bulldogxxx.com|PornsiteManager.yml|:heavy_check_mark:|:x:|:heavy_check_mark:|:x:|-|Gay +burningangel.com|Algolia_BurningAngel.yml|:heavy_check_mark:|:x:|:x:|:x:|Python|- +bushybushy.com|GammaEntertainment.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +bustybeauties.com|Hustler.yml|:heavy_check_mark:|:x:|:x:|:x:|CDP|- +buttman.com|GammaEntertainment.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +cadinot.fr|PornsiteManager.yml|:heavy_check_mark:|:x:|:heavy_check_mark:|:x:|-|Gay +calicarter.com|bellapass.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +camwhores.tv|CamWhorestv.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +canada-tgirl.com|GroobyClub.yml|:heavy_check_mark:|:x:|:x:|:x:|-|Trans +caribbeancom.com|Carib.yml|:heavy_check_mark:|:x:|:x:|:x:|-|JAV Uncensored +caribbeancompr.com|Carib.yml|:heavy_check_mark:|:x:|:x:|:x:|-|JAV Uncensored +carmenvalentina.com|VNAGirls.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +carnalplus.com|CarnalPlus.yml|:heavy_check_mark:|:x:|:x:|:x:|-|Gay +castingcouch-x.com|AMAMultimedia.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +catalinacruz.com|Catalina Cruz.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +cazzofilm.com|PornsiteManager.yml|:heavy_check_mark:|:x:|:heavy_check_mark:|:x:|-|Gay +cfnmeu.com|Williamhiggins.yml|:heavy_check_mark:|:x:|:x:|:x:|-|Gay +chaosmen.com|Algolia_ChaosMen.yml|:heavy_check_mark:|:x:|:x:|:x:|Python|Gay +charleechaselive.com|VNAGirls.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +chastitybabes.com|chastitybabes.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +cheatingsis.com|Nubiles.yml|:heavy_check_mark:|:heavy_check_mark:|:x:|:x:|-|- +cherrypimps.com|CherryPimps.yml|:heavy_check_mark:|:heavy_check_mark:|:x:|:x:|-|- +chickpass.com|ChickPass.yml|:heavy_check_mark:|:x:|:x:|:heavy_check_mark:|-|- +chickpassinternational.com|ChickPass.yml|:heavy_check_mark:|:x:|:x:|:heavy_check_mark:|-|- +chickpasspornstars.com|ChickPass.yml|:heavy_check_mark:|:x:|:x:|:heavy_check_mark:|-|- +chickpassteens.com|ChickPass.yml|:heavy_check_mark:|:x:|:x:|:heavy_check_mark:|-|- +chloelamour.com|PornCZ.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +chocolatebjs.com|PervCity.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +citebeur.com|PornsiteManager.yml|:heavy_check_mark:|:x:|:heavy_check_mark:|:x:|-|Gay +clairprod.com|PornsiteManager.yml|:heavy_check_mark:|:x:|:heavy_check_mark:|:x:|-|Gay +class-lesbians.com|ClassLesbians.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +claudiamarie.com|ClaudiaMarie.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +clips4sale.com|Clips4Sale.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +clubdom.com|SubbyHubby.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +clubelegantangel.com|ClubElegantAngel.yml|:heavy_check_mark:|:x:|:heavy_check_mark:|:x:|-|- +clubinfernodungeon.com|Algolia_ClubInfernoDungeon.yml|:heavy_check_mark:|:heavy_check_mark:|:x:|:x:|-|Gay +clubseventeen.com|ClubSeventeen.yml|:heavy_check_mark:|:heavy_check_mark:|:x:|:x:|-|- +clubsweethearts.com|ClubSeventeen.yml|:heavy_check_mark:|:heavy_check_mark:|:x:|:x:|-|- +clubtug.com|TugPass.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +cockhero.info|Cockhero.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +cocksuremen.com|CocksureMen.yml|:heavy_check_mark:|:x:|:x:|:x:|-|Gay +cockyboys.com|CockyBoys.yml|:heavy_check_mark:|:x:|:x:|:heavy_check_mark:|-|Gay +codycummings.com|Algolia_NextDoorStudios.yml|:heavy_check_mark:|:heavy_check_mark:|:x:|:x:|Python|Gay +colbyknox.com|ColbyKnox.yml|:heavy_check_mark:|:x:|:x:|:x:|-|Gay +colette.com|Colette.yml|:heavy_check_mark:|:heavy_check_mark:|:x:|:x:|-|- +college-amateurs.com|ChickPass.yml|:heavy_check_mark:|:x:|:x:|:heavy_check_mark:|-|- +college-uniform.com|CollegeUniform.yml|:heavy_check_mark:|:heavy_check_mark:|:x:|:x:|-|- +collegeboyphysicals.com|BluMedia.yml|:heavy_check_mark:|:x:|:x:|:heavy_check_mark:|-|Gay +collegedudes.com|BluMedia.yml|:heavy_check_mark:|:x:|:x:|:heavy_check_mark:|-|Gay +collegefuckparties.com|Wtfpass.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +coltstudiogroup.com|AdultSiteRunner.yml|:heavy_check_mark:|:x:|:x:|:x:|-|Gay +combatzone.us|CombatZone.yml|:x:|:x:|:heavy_check_mark:|:x:|-|- +combatzonexxx.com|CombatZone.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +concoxxxion.com|AdultEmpireCash.yml|:heavy_check_mark:|:x:|:heavy_check_mark:|:x:|-|- +corbinfisher.com|CorbinFisher.yml|:heavy_check_mark:|:x:|:x:|:heavy_check_mark:|-|Gay +cosplayfeet.com|FFCSH.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +cospuri.com|Cospuri.yml|:heavy_check_mark:|:x:|:x:|:heavy_check_mark:|-|- +cougarseason.com|TopWebModels.yml|:heavy_check_mark:|:x:|:x:|:x:|Python|- +crashpadseries.com|CrashpadSeries.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +creampie-angels.com|TeenMegaWorld.yml|:heavy_check_mark:|:x:|:x:|:heavy_check_mark:|-|- +creativeporn.com|Czechav.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +cruelgf.com|CruelGirlfriend.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +crunchboy.com|PornsiteManager.yml|:heavy_check_mark:|:x:|:heavy_check_mark:|:x:|-|Gay +cuck4k.com|Vip4K.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +cuckhunter.com|Arx.yml|:heavy_check_mark:|:x:|:x:|:x:|Python|- +cuckoldsessions.com|Algolia_DogfartNetwork.yml|:heavy_check_mark:|:x:|:x:|:heavy_check_mark:|Python|- +culioneros.com|Pornditos.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +cum4k.com|AMAMultimedia.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +cumaholicteens.com|Teencoreclub.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +cumbang.com|Algolia_DogfartNetwork.yml|:heavy_check_mark:|:x:|:x:|:heavy_check_mark:|Python|- +cumblastcity.com|TugPass.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +cumdumpsterteens.com|ThirdRockEnt.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +cumforcover.com|PerfectGonzo.yml|:heavy_check_mark:|:x:|:x:|:heavy_check_mark:|Python|- +cumlouder.com|Cumlouder.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +cumshotoasis.com|GammaEntertainment.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +cumswappingsis.com|Nubiles.yml|:heavy_check_mark:|:heavy_check_mark:|:x:|:x:|-|- +currycreampie.com|GammaEntertainment.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +czechamateurs.com|Czechav.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +czechanalsex.com|PornCZ.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +czechbangbus.com|Czechav.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +czechbiporn.com|PornCZ.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +czechbitch.com|Czechav.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +czechboobs.com|PornCZ.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +czechcasting.com|czechcasting.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +czechcouples.com|Czechav.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +czechdeviant.com|PornCZ.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +czechdungeon.com|Czechav.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +czechescortgirls|PornCZ.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +czechestrogenolit.com|Czechav.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +czechexecutor.com|PornCZ.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +czechfantasy.com|Czechav.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +czechfirstvideo.com|Czechav.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +czechgame.com|Czechav.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +czechgangbang.com|Czechav.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +czechgardenparty.com|Czechav.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +czechgayamateurs.com|Czechav.yml|:heavy_check_mark:|:x:|:x:|:x:|-|Gay +czechgaycasting.com|Czechav.yml|:heavy_check_mark:|:x:|:x:|:x:|-|Gay +czechgaycity.com|PornCZ.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +czechgaycouples.com|Czechav.yml|:heavy_check_mark:|:x:|:x:|:x:|-|Gay +czechgayfantasy.com|Czechav.yml|:heavy_check_mark:|:x:|:x:|:x:|-|Gay +czechgaymassage.com|Czechav.yml|:heavy_check_mark:|:x:|:x:|:x:|-|Gay +czechgaysolarium.com|Czechav.yml|:heavy_check_mark:|:x:|:x:|:x:|-|Gay +czechgaytoilets.com|Czechav.yml|:heavy_check_mark:|:x:|:x:|:x:|-|Gay +czechgypsies.com|PornCZ.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +czechharem.com|Czechav.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +czechhitchhikers.com|PornCZ.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +czechhomeorgy.com|Czechav.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +czechlesbians.com|Czechav.yml|:heavy_check_mark:|:x:|:x:|:x:|-|Lesbian +czechmassage.com|Czechav.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +czechmegaswingers.com|Czechav.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +czechorgasm.com|Czechav.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +czechparties.com|Czechav.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +czechrealdolls.com|PornCZ.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +czechsexcasting.com|PornCZ.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +czechsexparty.com|PornCZ.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +czechshemale.com|PornCZ.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +czechsolarium.com|Czechav.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +czechspy.com|Czechav.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +czechstreets.com|Czechav.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +czechsupermodels.com|Czechav.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +czechtaxi.com|Czechav.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +czechvr.com|CzechVR.yml|:heavy_check_mark:|:x:|:heavy_check_mark:|:x:|-|VR +czechvrcasting.com|CzechVR.yml|:heavy_check_mark:|:x:|:heavy_check_mark:|:x:|-|VR +czechvrfetish.com|CzechVR.yml|:heavy_check_mark:|:x:|:heavy_check_mark:|:x:|-|VR +czechvrnetwork.com|CzechVR.yml|:heavy_check_mark:|:x:|:heavy_check_mark:|:x:|-|VR +czechwifeswap.com|Czechav.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +d52q.com|JavLibrary.yml|:heavy_check_mark:|:x:|:heavy_check_mark:|:x:|-|JAV +dadcrush.com|PaperStreetMedia.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +daddy4k.com|Vip4K.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +daddycarl.com|AdultSiteRunner.yml|:heavy_check_mark:|:x:|:x:|:x:|-|Gay +daddygetslucky.com|Hustler.yml|:heavy_check_mark:|:x:|:x:|:x:|CDP|- +daddyslilangel.com|Nubiles.yml|:heavy_check_mark:|:heavy_check_mark:|:x:|:x:|-|- +damnthatsbig.com|AMAMultimedia.yml|:heavy_check_mark:|:x:|:x:|:x:|-|Gay +danejones.com|RealityKingsOL.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +danidaniels.com|ModelCentroAPI.yml|:heavy_check_mark:|:x:|:x:|:x:|Python|- +danni.com|Danni.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +darkcruising.com|PornsiteManager.yml|:heavy_check_mark:|:x:|:heavy_check_mark:|:x:|-|Gay +darkroomvr.com|DarkRoomVR.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +darksodomy.com|SmutPuppet.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +darkx.com|Algolia_xEmpire.yml|:heavy_check_mark:|:heavy_check_mark:|:x:|:x:|Python|- +darttechstudios.com|DartTechStudios.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +data18.com|data18.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +datingmystepson.com|Nubiles.yml|:heavy_check_mark:|:heavy_check_mark:|:x:|:x:|-|- +daughterjoi.com|ThirdRockEnt.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +daughterswap.com|PaperStreetMedia.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +ddfbusty.com|DDFNetwork.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +deauxmalive.com|VNAGirls.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +debt4k.com|Vip4K.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +deeper.com|vixenNetwork.yml|:heavy_check_mark:|:x:|:x:|:x:|Python|- +deeplush.com|Nubiles.yml|:heavy_check_mark:|:heavy_check_mark:|:x:|:x:|-|- +deepthroatsirens.com|TopWebModels.yml|:heavy_check_mark:|:x:|:x:|:x:|Python|- +defiled18.com|Teencoreclub.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +dellaitwins.com|PornCZ.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +delphinefilms.com|ModelMediaUS.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +desperateamateurs.com|DesperateAmateurs.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +detentiongirls.com|Nubiles.yml|:heavy_check_mark:|:heavy_check_mark:|:x:|:x:|-|- +deviante.com|Deviante.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +devianthardcore.com|MindGeek.yml|:heavy_check_mark:|:x:|:x:|:heavy_check_mark:|-|- +devilsfilm.com|Algolia_Adultime.yml|:heavy_check_mark:|:heavy_check_mark:|:heavy_check_mark:|:x:|Python|- +devilsfilmparodies.com|GammaEntertainment.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +devilsgangbangs.com|GammaEntertainment.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +devilstgirls.com|Algolia_Adultime.yml|:heavy_check_mark:|:x:|:heavy_check_mark:|:x:|Python|Trans +devonlee.com|GammaEntertainment.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +dfbnetwork.com|DFB.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +dfxtra.com|Algolia_DogfartNetwork.yml|:heavy_check_mark:|:x:|:x:|:heavy_check_mark:|Python|- +diabolic.com|Algolia_zerotolerancefilms.yml|:heavy_check_mark:|:x:|:heavy_check_mark:|:x:|python|- +dianafeet.com|FFCSH.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +dickdrainers.com|Dickdrainers.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +dickontrip.com|PornCZ.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +digitaldesire.com|DigitalDesire.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +digitalplayground.com|MindGeek.yml|:heavy_check_mark:|:x:|:heavy_check_mark:|:heavy_check_mark:|-|- +dillionation.com|ModelCentroAPI.yml|:heavy_check_mark:|:x:|:x:|:x:|Python|- +dirty-coach.com|TeenMegaWorld.yml|:heavy_check_mark:|:x:|:x:|:heavy_check_mark:|-|- +dirty-doctor.com|TeenMegaWorld.yml|:heavy_check_mark:|:x:|:x:|:heavy_check_mark:|-|- +dirtyauditions.com|Nympho.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +dirtyboysociety.com|CarnalPlus.yml|:heavy_check_mark:|:x:|:x:|:x:|-|Gay +dirtycosplay.com|LetsDoeIt.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +dirtyflix.com|DirtyFlix.yml|:heavy_check_mark:|-|-|-|-|- +disruptivefilms.com|Algolia_disruptivefilms.yml|:heavy_check_mark:|:heavy_check_mark:|:x:|:x:|Python|Gay +dlsite.com|DLsite.yml|:heavy_check_mark:|:heavy_check_mark:|:x:|:x:|-|- +doegirls.com|LetsDoeIt.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +dogfartnetwork.com|Algolia_DogfartNetwork.yml|:heavy_check_mark:|:x:|:x:|:heavy_check_mark:|Python|- +doghousedigital.com|MindGeek.yml|:heavy_check_mark:|:x:|:heavy_check_mark:|:heavy_check_mark:|-|- +dollrotic.com|GymRotic.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +dollsporn.com|Wtfpass.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +domai.com|SARJ-LLC.yml|:x:|:heavy_check_mark:|:x:|:heavy_check_mark:|Python|- +dorcelclub.com|DorcelClub.yml|:heavy_check_mark:|:x:|:heavy_check_mark:|:x:|-|- +dorcelvision.com|DorcelVision.yml|:heavy_check_mark:|:x:|:heavy_check_mark:|:x:|-|- +dothewife.com|SmutPuppet.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +doubleteamedteens.com|Teencoreclub.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +downblousejerk.com|WankItNow.yml|:heavy_check_mark:|:x:|:x:|:x:|-|VR +downtofuckdating.com|ThirdRockEnt.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +dpfanatics.com|Algolia_Adultime.yml|:heavy_check_mark:|:x:|:x:|:x:|Python|- +dreamsofspanking.com|DreamsOfSpanking.yml|:heavy_check_mark:|:heavy_check_mark:|:x:|:x:|-|- +dreamteenshd.com|Teencoreclub.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +dreamtranny.com|DreamTranny.yml|:heavy_check_mark:|:heavy_check_mark:|:x:|:heavy_check_mark:|-|Trans +drilledchicks.com|Teencoreclub.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +driverxxx.com|Nubiles.yml|:heavy_check_mark:|:heavy_check_mark:|:x:|:x:|-|- +dtfsluts.com|ThirdRockEnt.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +dyke4k.com|Vip4K.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +dyked.com|PaperStreetMedia.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +dylanryder.com|GammaEntertainment.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +eastboys.com|EastBoys.yml|:heavy_check_mark:|:x:|:x:|:x:|-|Gay +ebonytugs.com|TugPass.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +edwardjames.com|CarnalPlus.yml|:heavy_check_mark:|:x:|:x:|:x:|-|Gay +elegantangel.com|AdultEmpireCash.yml|:heavy_check_mark:|:x:|:heavy_check_mark:|:x:|-|- +elitebabes.com|EliteBabes.yml|:x:|:x:|:x:|:heavy_check_mark:|-|Babes +englishlads.com|EnglishLads.yml|:heavy_check_mark:|:heavy_check_mark:|:x:|:heavy_check_mark:|-|Gay +enzorimenez.com|PornsiteManager.yml|:heavy_check_mark:|:x:|:heavy_check_mark:|:x:|-|Gay +eporner.com|Eporner.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +ericjohnssexadventures.com|EricJohnsSexAdventures.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +ericvideos.com|EricVideos.yml|:heavy_check_mark:|:x:|:x:|:heavy_check_mark:|-|Gay +erito.com|RealityKingsOL.yml|:heavy_check_mark:|:x:|:x:|:x:|-|JAV +eroprofile.com|EroProfile.yml|:heavy_check_mark:|:x:|:x:|:x:|-|Tube Site +eroticax.com|Algolia_xEmpire.yml|:heavy_check_mark:|:heavy_check_mark:|:x:|:x:|Python|- +eroticbeauty.com|SARJ-LLC.yml|:x:|:heavy_check_mark:|:x:|:heavy_check_mark:|Python|- +eroticspice.com|devianteNetwork.yml|:heavy_check_mark:|:x:|:x:|:x:|CDP|- +erotiquetvlive.com|InterracialPass.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +errotica-archives.com|SARJ-LLC.yml|:heavy_check_mark:|:heavy_check_mark:|:x:|:heavy_check_mark:|Python|- +eternaldesire.com|SARJ-LLC.yml|:heavy_check_mark:|:heavy_check_mark:|:x:|:heavy_check_mark:|Python|- +euro-tgirls.com|GroobyClub.yml|:heavy_check_mark:|:x:|:x:|:x:|-|Trans +eurocreme.com|PornsiteManager.yml|:heavy_check_mark:|:x:|:heavy_check_mark:|:x:|-|Gay +eurogirlsongirls.com|DDFNetwork.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +euroteenerotica.com|DDFNetwork.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +evilangel.com|Algolia_EvilAngel|:heavy_check_mark:|:heavy_check_mark:|:heavy_check_mark:|:x:|Python|- +evilangel.com|GammaEntertainment.yml|:x:|:x:|:x:|:heavy_check_mark:|-|- +evolvedfights.com|evolvedfightslez.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +evolvedfightslez.com|evolvedfightslez.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +exotic4k.com|AMAMultimedia.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +explicite-art.com|ExpliciteArt.yml|:heavy_check_mark:|:heavy_check_mark:|:x:|:x:|-|- +exploitedcollegegirls.com|ExploitedX.yml|:heavy_check_mark:|:x:|:x:|:heavy_check_mark:|-|- +excogigirls.com|ExploitedX.yml|:heavy_check_mark:|:x:|:x:|:heavy_check_mark:|-|- +extrapackage.com|ExtraPackage.yml|:heavy_check_mark:|:x:|:heavy_check_mark:|:x:|-|- +extremepickups.com|Algolia_Adultime.yml|:heavy_check_mark:|:x:|:x:|:x:|Python|- +exxxtrasmall.com|PaperStreetMedia.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +fabsluts.com|Teencoreclub.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +facials4k.com|AMAMultimedia.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +facialsforever.com|TopWebModels.yml|:heavy_check_mark:|:x:|:x:|:x:|Python|- +fakehostel.com|MindGeek.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +fakehub.com|MindGeek.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +fakeshooting.com|MMP_Network.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +faketaxi.com|MindGeek.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +falconstudios.com|Algolia_FalconStudios.yml|:heavy_check_mark:|:x:|:x:|:x:|-|Gay +fallinlovia.com|ModelCentroAPI.yml|:heavy_check_mark:|:x:|:x:|:x:|Python|- +famedigital.com|Algolia_Adultime.yml|:heavy_check_mark:|:x:|:x:|:heavy_check_mark:|Python|- +familycreep.com|GammaEntertainment.yml|:heavy_check_mark:|:x:|:x:|:x:|-|Gay +familyhookups.com|MindGeek.yml|:heavy_check_mark:|:x:|:x:|:heavy_check_mark:|-|- +familylust.com|TugPass.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +familysexmassage.com|FamilySexMassage.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +familysinners.com|MindGeek.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +familystrokes.com|PaperStreetMedia.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +familyswap.xxx|Nubiles.yml|:heavy_check_mark:|:heavy_check_mark:|:x:|:x:|-|- +familytherapyxxx.com|FamilyTherapyXXX.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +familyxxx.com|FamilyXXX.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +fantasyflipflop.com|FFCSH.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +fantasyhd.com|AMAMultimedia.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +fantasymassage.com|Algolia_FantasyMassage.yml|:heavy_check_mark:|:heavy_check_mark:|:x:|:x:|Python|- +faphouse.com|Faphouse.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +feetishpov.com|ItsPOV.yml|:heavy_check_mark:|:x:|:x:|:x:|CDP|- +femanic.com|Femanic.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +femdomempire.com|FemdomEmpire.yml|:heavy_check_mark:|:x:|:x:|:heavy_check_mark:|-|- +feminized.com|FemdomEmpire.yml|:heavy_check_mark:|:x:|:x:|:heavy_check_mark:|-|Trans +femjoy.com|FemJoy.yml|:heavy_check_mark:|:heavy_check_mark:|:x:|:x:|-|- +femlatex.com|GymRotic.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +femout.xxx|GroobyNetwork-Partial.yml|:heavy_check_mark:|:heavy_check_mark:|:x:|:x:|-|Trans +femoutsex.xxx|GroobyNetwork-Partial.yml|:heavy_check_mark:|:heavy_check_mark:|:x:|:x:|-|Trans +ferame.com|Jhdv.yml|:heavy_check_mark:|:x:|:x:|:x:|-|JAV Uncensored +fetishnetwork.com|FetishNetwork.yml|:heavy_check_mark:|:x:|:x:|:x:|-|Fetish +fetishpros.com|FetishPro.yml|:heavy_check_mark:|:x:|:x:|:x:|-|Fetish +ffstockings.com|FFstockings.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +filf.com|FILF.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +fillyfilms.com|CombatZone.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +filthflix.com|FilthFlix.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +filthyfamily.com|FilthyFamily.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +filthygapers.com|DollsHub.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +filthykings.com|Algolia_filthykings.yml|:heavy_check_mark:|:heavy_check_mark:|:x:|:x:|Python|- +finishesthejob.com|FinishesTheJob.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +finishhim.com|TugPass.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +firstanalquest.com|Firstanalquest.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +firstbgg.com|TeenMegaWorld.yml|:heavy_check_mark:|:x:|:x:|:heavy_check_mark:|-|- +firstclasspov.com|Spizoo.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +fist4k.com|Vip4K.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +fistertwister.com|Fistertwister.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +fistflush.com|PerfectGonzo.yml|:heavy_check_mark:|:x:|:x:|:heavy_check_mark:|Python|- +fistflush.com|sapphix.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +fistingcentral.com|GammaEntertainment.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +fit18.com|Fit18.yml|:heavy_check_mark:|:x:|:x:|:x:|Python|- +fitting-room.com|FittingRoom.yml|:heavy_check_mark:|:heavy_check_mark:|:x:|:x:|-|- +footfetishbeauties.com|FFCSH.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +footfetishdaily.com|FootFetishDaily.yml|:heavy_check_mark:|:heavy_check_mark:|:x:|:heavy_check_mark:|-|foot fetish +footsiebabes.com|Algolia_Adultime.yml|:heavy_check_mark:|:x:|:x:|:x:|Python|- +forbiddenfruitsfilms.com|AdultEmpireCash.yml|:heavy_check_mark:|:x:|:heavy_check_mark:|:x:|-|- +forbiddenseductions.com|Algolia_Adultime.yml|:heavy_check_mark:|:heavy_check_mark:|:heavy_check_mark:|:x:|Python|- +forbondage.com|LetsDoeIt.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +forgivemefather.com|devianteNetwork.yml|:heavy_check_mark:|:x:|:x:|:x:|CDP|- +fostertapes.com|PaperStreetMedia.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +fourfingerclub.com|NewSensationsNetworkSites.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +foxxedup.com|VNAGirls.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +fragileslave.com|FragileSlave.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +franks-tgirlworld.com|GroobyNetwork-Partial.yml|:heavy_check_mark:|:heavy_check_mark:|:x:|:x:|-|Trans +fratx.com|FratX.yml|:heavy_check_mark:|:x:|:x:|:x:|Python|Gay +freakmobmedia.com|FreakMobMedia.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +free.premiumbukkake.com|PremiumBukkake.yml|:heavy_check_mark:|:x:|:x:|:x:|Python|- +freeones.com|FreeonesCommunity.yml|:heavy_check_mark:|:x:|:heavy_check_mark:|:heavy_check_mark:|-|Database +freeones.xxx|FreeonesCommunity.yml|:heavy_check_mark:|:x:|:heavy_check_mark:|:heavy_check_mark:|-|- +freeusefantasy.com|PaperStreetMedia.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +freeusemilf.com|PaperStreetMedia.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +freeze.xxx|Hentaied.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +french-twinks.com|Frenchtwinks.yml|:heavy_check_mark:|:x:|:x:|:x:|-|Gay +frenchporn.fr|PornsiteManager.yml|:heavy_check_mark:|:x:|:heavy_check_mark:|:x:|-|Gay +freshmen.net|Freshmen.yml|:heavy_check_mark:|:x:|:x:|:x:|-|Gay +freshoutofhighschool.com|NewSensationsNetworkSites.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +frolicme.com|FrolicMe.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +ftmmen.com|CarnalPlus.yml|:heavy_check_mark:|:x:|:x:|:x:|-|FTM +ftmplus.com|CarnalPlus.yml|:heavy_check_mark:|:x:|:x:|:x:|-|FTM +fuckedfeet.com|VNAGirls.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +fuckedhard18.com|FuckedHard18.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +fuckermate.com|Fuckermate.yml|:heavy_check_mark:|:x:|:x:|:x:|-|Gay +fuckfiesta.com|Hustler.yml|:heavy_check_mark:|:x:|:x:|:x:|CDP|- +fuckingawesome.com|FuckingAwesome.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +fuckinghardcore.com|Hustler.yml|:heavy_check_mark:|:x:|:x:|:x:|CDP|- +fuckingoffice.com|PornCZ.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +fuckingparties.com|ChickPass.yml|:heavy_check_mark:|:x:|:x:|:heavy_check_mark:|-|- +fuckingstreet.com|PornCZ.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +fuckstudies.com|TeenMegaWorld.yml|:heavy_check_mark:|:x:|:x:|:heavy_check_mark:|-|- +fuckthegeek.com|ChickPass.yml|:heavy_check_mark:|:x:|:x:|:heavy_check_mark:|-|- +fullpornnetwork.com|ThirdRockEnt.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +funbags.com|Voyeurweb.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +funsizeboys.com|CarnalPlus.yml|:heavy_check_mark:|:x:|:x:|:x:|-|Gay +futanari.xxx|Hentaied.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +futanarica.com|Futanarica.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +gag-n-gape.com|TeenMegaWorld.yml|:heavy_check_mark:|:x:|:x:|:heavy_check_mark:|-|- +gangav.com|Jhdv.yml|:heavy_check_mark:|:x:|:x:|:x:|-|JAV Uncensored +gangbangcreampie.com|Algolia_GangBangCreampie.yml|:heavy_check_mark:|:heavy_check_mark:|:x:|:x:|Python|- +gapingangels.com|GammaEntertainment.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +gasm.com|GASM.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +gayarabclub.com|PornsiteManager.yml|:heavy_check_mark:|:x:|:heavy_check_mark:|:x:|-|Gay +gaycastings.com|AMAMultimedia.yml|:heavy_check_mark:|:x:|:x:|:x:|-|Gay +gaycest.com|CarnalPlus.yml|:heavy_check_mark:|:x:|:x:|:x:|-|Gay +gaycreeps.com|AMAMultimedia.yml|:heavy_check_mark:|:x:|:x:|:x:|-|Gay +gaydvdempire.com|GayAdultEmpire.yml|:heavy_check_mark:|:x:|:heavy_check_mark:|:x:|-|Gay +gayempire.com|GayAdultEmpire.yml|:heavy_check_mark:|:x:|:heavy_check_mark:|:x:|-|Gay +gayfrenchkiss.fr|PornsiteManager.yml|:heavy_check_mark:|:x:|:heavy_check_mark:|:x:|-|Gay +gayhoopla.com|BlurredMedia.yml|:heavy_check_mark:|:x:|:x:|:x:|CDP|Gay +gayhorrorporn.com|Czechav.yml|:heavy_check_mark:|:x:|:x:|:x:|-|Gay +gayroom.com|AMAMultimedia.yml|:heavy_check_mark:|:x:|:x:|:x:|-|Gay +gayviolations.com|AMAMultimedia.yml|:heavy_check_mark:|:x:|:x:|:x:|-|Gay +genderxfilms.com|Algolia_GenderXFilms.yml|:heavy_check_mark:|:heavy_check_mark:|:heavy_check_mark:|:x:|-|Trans +genlez.com|SmutPuppet.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +girlcum.com|AMAMultimedia.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +girlfaction.com|ThirdRockEnt.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +girlfriendsfilms.com|Algolia_Girlfriendsfilms.yml|:heavy_check_mark:|:heavy_check_mark:|:heavy_check_mark:|:x:|Python|Lesbian +girlgirl.com|JulesJordan.yml|:heavy_check_mark:|:heavy_check_mark:|:x:|:x:|-|Lesbian +girlgirlmania.com|VNAGirls.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +girlgrind.com|MindGeek.yml|:heavy_check_mark:|:x:|:x:|:heavy_check_mark:|-|Lesbian +girlsandstuds.com|GammaEntertainment.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +girlsgotcream.com|Teencoreclub.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +girlsonlyporn.com|Nubiles.yml|:heavy_check_mark:|:heavy_check_mark:|:x:|:x:|-|- +girlsoutwest.com|GirlsOutWest.yml|:heavy_check_mark:|:heavy_check_mark:|:x:|:heavy_check_mark:|-|Lesbian +girlsrimming.com|GirlsRimming.yml|:heavy_check_mark:|:x:|:x:|:x:|-|Rimjobs +girlstakeaway.com|PornCZ.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +girlstryanal.com|Algolia_Adultime.yml|:heavy_check_mark:|:x:|:x:|:x:|Python|Lesbian +girlsunderarrest.com|GirlsUnderArrest.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +girlsway.com|Algolia_GirlsWay.yml|:heavy_check_mark:|:heavy_check_mark:|:x:|:x:|Python|Lesbian +girlswhofuckgirls.com|Vivid.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +givemepink.com|sapphix.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +givemeteens.com|Algolia_Adultime.yml|:heavy_check_mark:|:x:|:x:|:x:|Python|- +gloryhole.com|Algolia_DogfartNetwork.yml|:heavy_check_mark:|:x:|:x:|:heavy_check_mark:|Python|- +gloryholeinitiations.com|Algolia_DogfartNetwork.yml|:heavy_check_mark:|:x:|:x:|:heavy_check_mark:|Python|- +gloryholesecrets.com|Algolia_Gloryholesecrets.yml|:heavy_check_mark:|:heavy_check_mark:|:x:|:x:|-|- +gloryholeswallow.com|GloryHoleSwallow.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +glosstightsglamour.com|GlossTightsGlamour.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +goddessnudes.com|SARJ-LLC.yml|:x:|:heavy_check_mark:|:x:|:heavy_check_mark:|Python|- +goddesssnow.com|GoddessSnow.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +goldenslut.com|SmutPuppet.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +gostuckyourself.net|Mypervmom.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +gotfilled.com|BJRaw.yml|:heavy_check_mark:|:x:|:x:|:heavy_check_mark:|CDP|- +grannyghetto.com|GammaEntertainment.yml|:heavy_check_mark:|:x:|:x:|:x:|-|Granny +grannyvsbbc.com|SmutPuppet.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +grooby-archives.com|GroobyNetwork-Partial.yml|:heavy_check_mark:|:heavy_check_mark:|:x:|:x:|-|- +grooby.club|GroobyClub.yml|:heavy_check_mark:|:x:|:x:|:x:|-|Trans +groobygirls.com|GroobyNetwork-Partial.yml|:heavy_check_mark:|:heavy_check_mark:|:x:|:x:|-|Trans +groobyvr.com|GroobyNetwork-Partial.yml|:heavy_check_mark:|:x:|:x:|:x:|-|Trans +guysinsweatpants.com|GuysInSweatpants.yml|:heavy_check_mark:|:x:|:x:|:heavy_check_mark:|-|Gay +gymnastic.xxx|GymRotic.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +gymrotic.com|GymRotic.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +gynoexclusive.com|DollsHub.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +hairyav.com|Jhdv.yml|:heavy_check_mark:|:x:|:x:|:x:|-|JAV Uncensored +hairyundies.com|Algolia_Adultime.yml|:heavy_check_mark:|:x:|:x:|:x:|Python|- +handdomination.com|HandDomination.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +handsonhardcore.com|DDFNetwork.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +hanime.tv|hanime.yml|:heavy_check_mark:|:x:|:x:|:x:|-|Hentai +hardcoreyouth.com|Teencoreclub.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +hardfuckgirls.com|Wtfpass.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +hardkinks.com|PornsiteManager.yml|:heavy_check_mark:|:x:|:heavy_check_mark:|:x:|-|Gay +hardonvr.com|SLRoriginals.yml|:heavy_check_mark:|:x:|:heavy_check_mark:|:x:|-|VR +hardtied.com|insex.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +hardx.com|Algolia_xEmpire.yml|:heavy_check_mark:|:heavy_check_mark:|:x:|:x:|Python|- +harlemsex.com|PornsiteManager.yml|:heavy_check_mark:|:x:|:heavy_check_mark:|:x:|-|Gay +harmonyvision.com|HarmonyVision.yml|:heavy_check_mark:|:x:|:heavy_check_mark:|:x:|-|- +hazel-tucker.com|GroobyClub.yml|:heavy_check_mark:|:x:|:x:|:x:|-|Trans +hd19.com|bellapass.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +hdmassageporn.com|Wtfpass.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +hdsex18.com|Wtfpass.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +heavyonhotties.com|HeavyOnHotties.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +hegre.com|Hegre.yml|:heavy_check_mark:|:heavy_check_mark:|:x:|:heavy_check_mark:|-|- +helixstudios.com|HelixStudios.yml|:heavy_check_mark:|:x:|:x:|:heavy_check_mark:|-|gay +helloladyboy.com|VegasDreamsLLC.yml|:heavy_check_mark:|:x:|:x:|:heavy_check_mark:|-|- +hentai2read.com|hentai2read.yml|:x:|:heavy_check_mark:|:x:|:x:|-|Hentai +hentaied.com|Hentaied.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +hergape.com|ThirdRockEnt.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +hersexdebut.com|Wtfpass.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +heymilf.com|Jhdv.yml|:heavy_check_mark:|:x:|:x:|:x:|-|JAV Uncensored +heyoutdoor.com|Jhdv.yml|:heavy_check_mark:|:x:|:x:|:x:|-|JAV Uncensored +heyzo.com|Heyzo.yml|:heavy_check_mark:|:x:|:x:|:x:|-|JAV Uncensored +hijabhookup.com|PaperStreetMedia.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +hijabmylfs.com|PaperStreetMedia.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +himeros.tv|HimerosTV.yml|:heavy_check_mark:|:x:|:x:|:x:|-|Gay +hitzefrei.com|Hitzefrei.yml|:heavy_check_mark:|:x:|:x:|:heavy_check_mark:|-|- +hmvmania.com|HmvMania.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +hobybuchanon.com|HobyBuchanon.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +holed.com|AMAMultimedia.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +hollyrandall.com|HollyRandall.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +homeclips.com|Voyeurweb.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +homemadeanalwhores.com|ThirdRockEnt.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +hometowngirls.com|Hustler.yml|:heavy_check_mark:|:x:|:x:|:x:|CDP|- +hometownhoneys.com|Hustler.yml|:heavy_check_mark:|:x:|:x:|:x:|CDP|- +honeytrans.com|Arx.yml|:heavy_check_mark:|:x:|:x:|:x:|Python|Trans +hongkongslut.com|ChickPass.yml|:heavy_check_mark:|:x:|:x:|:heavy_check_mark:|-|- +hookuphotshot.com|hussiemodels.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +hornydoctor.com|PornCZ.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +hornygirlscz.com|PornCZ.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +hornyhousehold.com|AdultEmpireCash.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +horrorporn.com|Czechav.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +hotandtatted.com|hussiemodels.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +hotcast.fr|PornsiteManager.yml|:heavy_check_mark:|:x:|:heavy_check_mark:|:x:|-|Gay +hotcrazymess.com|Nubiles.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +hotguysfuck.com|BlurredMedia.yml|:heavy_check_mark:|:x:|:x:|:x:|CDP|- +hothouse.com|Algolia_FalconStudios.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +hotlegsandfeet.com|DDFNetwork.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +hotmilfsfuck.com|ExploitedX.yml|:heavy_check_mark:|:x:|:x:|:heavy_check_mark:|-|- +hotmovies.com|Hotmovies.yml|:heavy_check_mark:|:x:|:heavy_check_mark:|:heavy_check_mark:|-|- +hotoldermale.com|AdultSiteRunner.yml|:heavy_check_mark:|:x:|:x:|:x:|-|Gay +hottiemoms.com|Hustler.yml|:heavy_check_mark:|:x:|:x:|:x:|CDP|- +hotwifexxx.com|HotWifeXXX.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +houseofgord.com|HouseOfGord.yml|:heavy_check_mark:|:x:|:x:|:x:|CDP|- +houseoftaboo.com|DDFNetwork.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +houseofyre.com|HouseOfFyre.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +hqporner.com|HQPorner.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +hucows.com|Hucows.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +hugecockbreak.com|Mypervmom.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +hungarianhoneys.com|HungarianHoneys.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +hungfuckers.com|CarnalPlus.yml|:heavy_check_mark:|:x:|:x:|:x:|-|Gay +hunt4k.com|Vip4K.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +hunterleigh.com|bellapass.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +hunterpov.com|PornCZ.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +hushpass.com|hushpass.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +hussiepass.com|hussiemodels.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +hustlaz.com|Hustler.yml|:heavy_check_mark:|:x:|:x:|:x:|CDP|- +hustler.com|Hustler.yml|:heavy_check_mark:|:x:|:x:|:x:|CDP|- +hustlerhd.com|Hustler.yml|:heavy_check_mark:|:x:|:x:|:x:|CDP|- +hustlerparodies.com|Hustler.yml|:heavy_check_mark:|:x:|:x:|:x:|CDP|- +hustlerslesbians.com|Hustler.yml|:heavy_check_mark:|:x:|:x:|:x:|CDP|Lesbian +hustlerstaboo.com|Hustler.yml|:heavy_check_mark:|:x:|:x:|:x:|CDP|- +hypnotube.com|Hypnotube.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +iafd.com|IAFD.yml|:heavy_check_mark:|:x:|:heavy_check_mark:|:heavy_check_mark:|Python|Database +iconmale.com|MindGeek.yml|:heavy_check_mark:|:x:|:heavy_check_mark:|:x:|-|Gay +idols69.com|karatmedia.yml|:heavy_check_mark:|:x:|:x:|:x:|-|JAV +ifeelmyself.com|IFeelMyself.yml|:heavy_check_mark:|:x:|:x:|:heavy_check_mark:|Python|- +ignore4k.com|Vip4K.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +ihuntmycunt.com|DollsHub.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +ikillitts.com|Trans500.yml|:heavy_check_mark:|:x:|:x:|:x:|-|Trans +ikissgirls.com|IKissGirls.yml|:heavy_check_mark:|:x:|:x:|:x:|-|Lesbian +iknowthatgirl.com|MindGeek.yml|:heavy_check_mark:|:x:|:x:|:heavy_check_mark:|-|- +imdb.com|IMDB.yml|:heavy_check_mark:|:x:|:heavy_check_mark:|:heavy_check_mark:|-|Database +immorallive.com|GammaEntertainment.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +imnotyourmommy.com|Nubiles.yml|:heavy_check_mark:|:heavy_check_mark:|:x:|:x:|-|- +infernalrestraints.com|insex.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +innocenthigh.com|PaperStreetMedia.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +inserted.com|KBProductions.yml|:heavy_check_mark:|:x:|:x:|:heavy_check_mark:|Python|- +insex.com|insex.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +insexondemand.com|insex.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +interracialblowbang.com|Algolia_DogfartNetwork.yml|:heavy_check_mark:|:x:|:x:|:heavy_check_mark:|Python|- +interracialpass.com|InterracialPass.yml|:heavy_check_mark:|:x:|:x:|:x:|-|Interracial +interracialpickups.com|Algolia_DogfartNetwork.yml|:heavy_check_mark:|:x:|:x:|:heavy_check_mark:|Python|- +interracialpovs.com|FFCSH.yml|:heavy_check_mark:|:x:|:x:|:x:|-|Interracial +intimatelesbians.com|Spizoo.yml|:heavy_check_mark:|:x:|:x:|:x:|-|Lesbian +intimatepov.com|ItsPOV.yml|:heavy_check_mark:|:x:|:x:|:x:|CDP|- +ipinkvisualpass.com|PinkVisual.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +isthisreal.com|Algolia_Adultime.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +italianshotclub.com|MVG.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +itscleolive.com|VNAGirls.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +itspov.com|ItsPOV.yml|:heavy_check_mark:|:x:|:x:|:x:|CDP|- +iwantclips.com|IWantClips.yml|:heavy_check_mark:|:x:|:x:|:heavy_check_mark:|CDP|- +iwara.tv|Iwara.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +jacquieetmichelelite.com|JacquieEtMichelElite.yml|:heavy_check_mark:|:x:|:heavy_check_mark:|:x:|-|- +jacquieetmicheltv.net|JacquieEtMichelTV.yml|:heavy_check_mark:|:x:|:x:|:x:|Python|- +jalifstudio.com|CarnalPlus.yml|:heavy_check_mark:|:x:|:x:|:x:|-|Gay +jamesdeen.com|ThirdRockEnt.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +janafox.com|bellapass.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +japaneseflashers.com|karatmedia.yml|:heavy_check_mark:|:x:|:x:|:x:|-|JAV +japaneseslurp.com|karatmedia.yml|:heavy_check_mark:|:x:|:x:|:x:|-|JAV +japanhdv.com|JapanHDV.yml|:heavy_check_mark:|:x:|:x:|:x:|-|JAV Uncensored +japanlust.com|Arx.yml|:heavy_check_mark:|:x:|:x:|:x:|Python|- +japornxxx.com|JapornXXX.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +jasonsparkslive.com|CarnalPlus.yml|:heavy_check_mark:|:x:|:x:|:x:|-|Gay +jav.land|JavLand.yml|:heavy_check_mark:|:x:|:heavy_check_mark:|:x:|-|JAV +javdb.com|javdb.yml|:heavy_check_mark:|:x:|:heavy_check_mark:|:x:|-|Database +javdb36.com|javdb.yml|:heavy_check_mark:|:x:|:heavy_check_mark:|:x:|-|Database +javhd.com|JavHD.yml|:heavy_check_mark:|:x:|:x:|:x:|-|JAV Uncensored +javhub.com|JavHub.yml|:heavy_check_mark:|:x:|:x:|:x:|-|JAV Uncensored +javlibrary.com|JavLibrary_python.yml|:heavy_check_mark:|:x:|:x:|:x:|Python|JAV +javlibrary.com|JavLibrary.yml|:heavy_check_mark:|:x:|:heavy_check_mark:|:x:|-|JAV +jaysinxxx.com|GammaEntertainment.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +jayspov.net|AdultEmpireCash.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +jbvideo.com|JBVideo.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +jcosplay.com|karatmedia.yml|:heavy_check_mark:|:x:|:x:|:x:|-|JAV +jeedoo.com|Jeedoo.yml|:heavy_check_mark:|:x:|:heavy_check_mark:|:x:|-|- +jeffsmodels.com|SmutPuppet.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +jelenajensen.com|VNAGirls.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +jerk-offpass.com|Teencoreclub.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +jerkaoke.com|ModelMediaUS.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +jessicajaymesxxx.com|Spizoo.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +jessroyan.com|PornsiteManager.yml|:heavy_check_mark:|:x:|:heavy_check_mark:|:x:|-|Gay +jimweathersarchives.com|JimWeathersArchives.yml|:heavy_check_mark:|:x:|:x:|:heavy_check_mark:|-|- +jizzbomb.com|NewSensationsNetworkSites.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +jnrc.fr|PornsiteManager.yml|:heavy_check_mark:|:x:|:heavy_check_mark:|:x:|-|Gay +jockbreeders.com|CarnalPlus.yml|:heavy_check_mark:|:x:|:x:|:x:|-|Gay +jockpussy.com|CarnalPlus.yml|:heavy_check_mark:|:x:|:x:|:x:|-|FTM +jodiwest.com|AdultEmpireCash.yml|:heavy_check_mark:|:x:|:x:|:x:|-|MILF +joeperv.com|bellapass.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +johnnyrapid.com|Algolia_Johnnyrapid.yml|:heavy_check_mark:|:heavy_check_mark:|:x:|:x:|Python|Gay +joibabes.com|Arx.yml|:heavy_check_mark:|:x:|:x:|:x:|Python|- +jonnidarkkoxxx.com|GammaEntertainment.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +joybear.com|JoyBear.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +joymii.com|Algolia_Adultime.yml|:heavy_check_mark:|:heavy_check_mark:|:x:|:x:|Python|- +jpmilfs.com|karatmedia.yml|:heavy_check_mark:|:x:|:x:|:x:|-|JAV +jpnurse.com|karatmedia.yml|:heavy_check_mark:|:x:|:x:|:x:|-|JAV +jpshavers.com|karatmedia.yml|:heavy_check_mark:|:x:|:x:|:x:|-|JAV +jpteacher.com|karatmedia.yml|:heavy_check_mark:|:x:|:x:|:x:|-|JAV +jschoolgirls.com|karatmedia.yml|:heavy_check_mark:|:x:|:x:|:x:|-|JAV +julesjordan.com|JulesJordan.yml|:heavy_check_mark:|:heavy_check_mark:|:heavy_check_mark:|:heavy_check_mark:|-|- +juliaannlive.com|VNAGirls.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +karissa-diamond.com|Karissa-Diamond.yml|:heavy_check_mark:|:heavy_check_mark:|:x:|:x:|-|- +karups.com|Karups.yml|:heavy_check_mark:|:heavy_check_mark:|:x:|:x:|-|- +katiebanks.com|bellapass.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +kellymadison.com|KellyMadisonMedia.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +kendrajames.com|VNAGirls.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +killergram.com|Killergram.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +kimberleelive.com|VNAGirls.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +kin8tengoku.com|Kin8tengoku.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +kingsoffetish.com|AdultEmpireCash.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +kink.com|Kink.yml|:heavy_check_mark:|:x:|:x:|:heavy_check_mark:|CDP|- +kink305.com|VNAGirls.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +kinkbomb.com|KinkBomb.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +kinkvr.com|BaDoink.yml|:heavy_check_mark:|:x:|:heavy_check_mark:|:heavy_check_mark:|-|VR +kinkyfamily.com|kinkyfamily.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +kinkymistresses.com|KinkyMistresses.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +kinkyspa.com|MindGeek.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +kinkytwink.com|PornsiteManager.yml|:heavy_check_mark:|:x:|:heavy_check_mark:|:x:|-|Gay +kissmefuckme.com|Algolia_Adultime.yml|:heavy_check_mark:|:x:|:x:|:x:|Python|- +kpopping.com|Kpopping.yml|:x:|:x:|:x:|:heavy_check_mark:|-|- +kristenbjorn.com|KristenBjorn.yml|:heavy_check_mark:|:x:|:heavy_check_mark:|:heavy_check_mark:|-|Gay +ladyboy-ladyboy.com|GroobyNetwork-Partial.yml|:heavy_check_mark:|:heavy_check_mark:|:x:|:x:|-|Trans +ladyboy.xxx|GroobyNetwork-Partial.yml|:heavy_check_mark:|:heavy_check_mark:|:x:|:x:|-|Trans +ladyboygold.com|LadyboyGold.yml|:heavy_check_mark:|:x:|:x:|:heavy_check_mark:|-|Trans +ladydee.xxx|PornCZ.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +lanakendrick.com|PinupDollars.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +lanesisters.com|GammaEntertainment.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +lasublimexxx.com|Lasublime.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +latinamilf.com|LetsDoeIt.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +latinoguysporn.com|AdultEmpireCash.yml|:heavy_check_mark:|:x:|:x:|:x:|-|Gay +leannecrow.com|PinupDollars.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +legsex.com|TheScoreGroup.yml|:heavy_check_mark:|:heavy_check_mark:|:x:|:x:|-|- +lesbea.com|RealityKingsOL.yml|:heavy_check_mark:|:x:|:x:|:x:|-|Lesbian +lesbiananalsluts.com|ThirdRockEnt.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +lesbianass.com|Hustler.yml|:heavy_check_mark:|:x:|:x:|:x:|CDP|Lesbian +lesbianfactor.com|GammaEntertainment.yml|:heavy_check_mark:|:x:|:x:|:x:|-|Lesbian +lesbiantribe.com|MVG.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +lesbianx.com|Algolia_xEmpire.yml|:heavy_check_mark:|:heavy_check_mark:|:x:|:x:|Python|- +lesworship.com|Arx.yml|:heavy_check_mark:|:x:|:x:|:x:|Python|Lesbian +lethalhardcore.com|AdultEmpireCash.yml|:heavy_check_mark:|:x:|:heavy_check_mark:|:x:|-|- +lethalhardcorevr.com|AdultEmpireCash.yml|:heavy_check_mark:|:x:|:x:|:x:|-|VR +lethalpass.com|lethalpass.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +letsdoeit.com|LetsDoeIt.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +lewood.com|Algolia_EvilAngel|:heavy_check_mark:|:heavy_check_mark:|:heavy_check_mark:|:x:|Python|- +lewrubens.com|LewRubens.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +lexidona.com|LexiDona.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +lexingtonsteele.com|GammaEntertainment.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +lezcuties.com|Algolia_Adultime.yml|:heavy_check_mark:|:x:|:x:|:x:|Python|Lesbian +lifeselector.com|21Roles.yml|:heavy_check_mark:|:x:|:x:|:x:|CDP|- +lilhumpers.com|MindGeek.yml|:heavy_check_mark:|:x:|:x:|:heavy_check_mark:|-|- +lingerieav.com|Jhdv.yml|:heavy_check_mark:|:x:|:x:|:x:|-|JAV Uncensored +lingerietales.com|WankItNow.yml|:heavy_check_mark:|:x:|:x:|:x:|-|VR +littleasians.com|PaperStreetMedia.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +littlecaprice-dreams.com|LittleCapriceDreams.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +littlefromasia.com|Mypervmom.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +littlehellcat.com|Teencoreclub.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +loan4k.com|Vip4K.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +lonestarangel.com|LoneStarAngel.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +lookathernow.com|MindGeek.yml|:heavy_check_mark:|:x:|:x:|:heavy_check_mark:|-|- +lordaardvark.com|LordAardvark.yml|:heavy_check_mark:|:heavy_check_mark:|:x:|:x:|CDP|- +lovehairy.com|SARJ-LLC.yml|:heavy_check_mark:|:heavy_check_mark:|:x:|:heavy_check_mark:|Python|- +loveherass.com|devianteNetwork.yml|:heavy_check_mark:|:x:|:x:|:x:|CDP|- +loveherboobs.com|LoveHerFilms.yml|:heavy_check_mark:|:x:|:x:|:heavy_check_mark:|-|Boob Fetish +loveherfeet.com|LoveHerFilms.yml|:heavy_check_mark:|:x:|:x:|:heavy_check_mark:|-|Foot Fetish +loveherfilms.com|LoveHerFilms.yml|:heavy_check_mark:|:x:|:x:|:heavy_check_mark:|-|- +lubed.com|AMAMultimedia.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +lucasentertainment.com|LucasEntertainment.yml|:heavy_check_mark:|:x:|:x:|:x:|-|Gay +lustcinema.com|LustCinema.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +lustery.com|Lustery.yml|:heavy_check_mark:|:x:|:x:|:x:|-|Amateur +lustreality.com|SLRoriginals.yml|:heavy_check_mark:|:x:|:heavy_check_mark:|:x:|-|VR +lustylina.com|ChickPass.yml|:heavy_check_mark:|:x:|:x:|:heavy_check_mark:|-|- +mackstudio.com|PornsiteManager.yml|:heavy_check_mark:|:x:|:heavy_check_mark:|:x:|-|Gay +madeincanada.xxx|MadeInCanada.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +madouqu.com|Madou.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +maggiegreenlive.com|VNAGirls.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +maketeengape.com|Teencoreclub.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +maledigital.com|GammaEntertainment.yml|:heavy_check_mark:|:x:|:x:|:x:|-|Gay +malefeet4u.com|Williamhiggins.yml|:heavy_check_mark:|:x:|:x:|:x:|-|Gay +mamacitaz.com|LetsDoeIt.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +mandyflores.com|Mandyflores.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +manojob.com|FinishesTheJob.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +manroyale.com|AMAMultimedia.yml|:heavy_check_mark:|:x:|:x:|:x:|-|Gay +manuelferrara.com|JulesJordan.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +manyvids.com|ManyVids|:heavy_check_mark:|:x:|:x:|:heavy_check_mark:|python|- +marcusmojo.com|Algolia_NextDoorStudios.yml|:heavy_check_mark:|:heavy_check_mark:|:x:|:x:|Python|Gay +mariskax.com|MariskaX.yml|:heavy_check_mark:|:x:|:x:|:heavy_check_mark:|-|- +maskurbate.com|GammaEntertainment.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +masonicboys.com|CarnalPlus.yml|:heavy_check_mark:|:x:|:x:|:x:|-|Gay +masqulin.com|Masqulin.yml|:heavy_check_mark:|:x:|:x:|:x:|-|Gay +massage-parlor.com|Algolia_Adultime.yml|:heavy_check_mark:|:x:|:x:|:x:|Python|- +massagebait.com|AMAMultimedia.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +mature.nl|MatureNL.yml|:heavy_check_mark:|:heavy_check_mark:|:x:|:x:|-|- +mature4k.com|Vip4K.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +maturegapers.com|DollsHub.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +maturegynoexam.com|DollsHub.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +maturegynospy.com|DollsHub.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +max-hardcore.com|MaxHardcore.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +maxence-angel.com|PornsiteManager.yml|:heavy_check_mark:|:x:|:heavy_check_mark:|:x:|-|Gay +maxinex.com|VNAGirls.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +meanawolf.com|MeanaWolf.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +meanbitches.com|MeanBitches.yml|:heavy_check_mark:|:x:|:x:|:heavy_check_mark:|-|-| +meanmassage.com|TugPass.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +meetsuckandfuck.com|Wtfpass.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +men.com|Brazzers.yml|:heavy_check_mark:|:x:|:x:|:x:|CDP|Gay +menatplay.com|MenAtPlay.yml|:heavy_check_mark:|:x:|:x:|:x:|-|Gay +menoboy.com|PornsiteManager.yml|:heavy_check_mark:|:x:|:heavy_check_mark:|:x:|-|Gay +menover30.com|Algolia_MenOver30.yml|:heavy_check_mark:|:heavy_check_mark:|:x:|:x:|Python|Gay +menpov.com|AMAMultimedia.yml|:heavy_check_mark:|:x:|:x:|:x:|-|Gay +messyxxx.com|ChickPass.yml|:heavy_check_mark:|:x:|:x:|:heavy_check_mark:|-|- +metadataapi.net|**Use the Endpoint**|:heavy_check_mark:|:x:|:x:|:heavy_check_mark:|-|- +metadataapi.net/jav|ThePornDBJAV.yml|:heavy_check_mark:|:x:|:heavy_check_mark:|:x:|-|JAV +metadataapi.net/movies|ThePornDBMovies.yml|:heavy_check_mark:|:x:|:heavy_check_mark:|:x:|-|Movies +metalbondage.com|MetalBondage.yml|:heavy_check_mark:|:x:|:x:|:x:|CDP|- +metart.com|SARJ-LLC.yml|:heavy_check_mark:|:heavy_check_mark:|:x:|:heavy_check_mark:|Python|- +metartnetwork.com|SARJ-LLC.yml|:heavy_check_mark:|:heavy_check_mark:|:x:|:heavy_check_mark:|Python|- +metartx.com|SARJ-LLC.yml|:heavy_check_mark:|:heavy_check_mark:|:x:|:heavy_check_mark:|Python|- +milehighmedia.com|MindGeek.yml|:heavy_check_mark:|:x:|:heavy_check_mark:|:x:|-|- +milfed.com|MindGeek.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +milfsodomy.com|SmutPuppet.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +milfthing.com|PerfectGonzo.yml|:heavy_check_mark:|:x:|:x:|:heavy_check_mark:|Python|- +milftrip.com|VegasDreamsLLC.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +milftugs.com|MilfTugs.yml|:heavy_check_mark:|:x:|:x:|:heavy_check_mark:|-|- +milfvr.com|POVR.yml|:heavy_check_mark:|:x:|:x:|:x:|-|VR +milkingtable.com|GammaEntertainment.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +milovana.com|Milovana.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +minimuff.com|ChickPass.yml|:heavy_check_mark:|:x:|:x:|:heavy_check_mark:|-|- +minnano-av.com|Minnano-AV.yml|:x:|:x:|:x:|:heavy_check_mark:|-|- +missax.com|MissaX.yml|:heavy_check_mark:|:x:|:x:|:x:|Python|- +mistermale.com|PornsiteManager.yml|:heavy_check_mark:|:x:|:heavy_check_mark:|:x:|-|Gay +mixedx.com|Algolia_Adultime.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +mmpnetwork.com|MMP_Network.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +modelcentro.com|ModelCentroAPI.yml|:heavy_check_mark:|:x:|:x:|:x:|Python|- +modelhub.com|Modelhub.yml|:heavy_check_mark:|:x:|:x:|:heavy_check_mark:|-|- +modelmediaasia.com|ModelMediaUS.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +modelmediaus.com|ModelMediaUS.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +modeltime.com|Algolia_Adultime.yml|:heavy_check_mark:|:heavy_check_mark:|:heavy_check_mark:|:x:|Python|- +moderndaysins.com|Algolia_Adultime.yml|:heavy_check_mark:|:x:|:x:|:x:|Python|- +mofos.com|MindGeek.yml|:heavy_check_mark:|:x:|:x:|:heavy_check_mark:|-|- +mofosnetwork.com|MindGeek.yml|:heavy_check_mark:|:x:|:x:|:heavy_check_mark:|-|- +mom4k.com|AMAMultimedia.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +momcomesfirst.com|FamilyTherapyXXX.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +momcum.com|AMAMultimedia.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +momisamilf.com|Vivid.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +momlover.com|Nubiles.yml|:heavy_check_mark:|:heavy_check_mark:|:x:|:x:|-|- +mommy4k.com|Vip4K.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +mommyblowsbest.com|GammaEntertainment.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +mommyjoi.com|ThirdRockEnt.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +mommysboy.com|Algolia_Adultime.yml|:heavy_check_mark:|:x:|:x:|:x:|Python|- +mommysboy.net|Mypervmom.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +mommysgirl.com|Algolia_Adultime.yml|:heavy_check_mark:|:heavy_check_mark:|:x:|:x:|Python|- +momsbangteens.com|RealityKingsOL.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +momsboytoy.com|Nubiles.yml|:heavy_check_mark:|:heavy_check_mark:|:x:|:x:|-|- +momsfamilysecrets.com|Nubiles.yml|:heavy_check_mark:|:heavy_check_mark:|:x:|:x:|-|- +momslickteens.com|RealityKingsOL.yml|:heavy_check_mark:|:x:|:x:|:x:|-|Lesbian +momsteachsex.com|Nubiles.yml|:heavy_check_mark:|:heavy_check_mark:|:x:|:x:|-|- +momstight.com|Nubiles.yml|:heavy_check_mark:|:heavy_check_mark:|:x:|:x:|-|- +momswap.com|PaperStreetMedia.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +momswapped.com|Nubiles.yml|:heavy_check_mark:|:heavy_check_mark:|:x:|:x:|-|- +momwantscreampie.com|Nubiles.yml|:heavy_check_mark:|:heavy_check_mark:|:x:|:x:|-|- +momwantstobreed.com|Nubiles.yml|:heavy_check_mark:|:heavy_check_mark:|:x:|:x:|-|- +momxxx.org|Mypervmom.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +mongerinasia.com|MongerInAsia.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +monicamendez.com|PinupDollars.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +monroelee.com|bellapass.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +monstercub.com|AdultSiteRunner.yml|:heavy_check_mark:|:x:|:x:|:x:|-|Gay +mormongirlz.com|Mormongirlz.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +motherfuckerxxx.com|GammaEntertainment.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +motherless.com|Motherless.yml|:heavy_check_mark:|:x:|:x:|:x:|-|Tube Site +movieporn.com|Czechav.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +mplstudios.com|MPLStudios.yml|:heavy_check_mark:|:heavy_check_mark:|:x:|:x:|-|- +mrbigfatdick.com|MrBigFatDick.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +mrluckypov.com|Spizoo.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +mrpov.com|FinishesTheJob.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +muchaslatinas.com|Hustler.yml|:heavy_check_mark:|:x:|:x:|:x:|CDP|- +mugfucked.com|MugFucked.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +mugfucked.com|ThirdRockEnt.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +muses.com|Algolia_Adultime.yml|:heavy_check_mark:|:heavy_check_mark:|:heavy_check_mark:|:x:|Python|- +my-slavegirl.com|my-slavegirl.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +mybabysittersclub.com|PaperStreetMedia.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +mybadmilfs.com|Mypervmom.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +mycherrycrush.com|MyCherryCrush.yml|:heavy_check_mark:|:x:|:x:|:x:|CDP|- +mydaughterswap.com|Mypervmom.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +mydirtyhobby.com|MyDirtyHobby.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +myfamilypies.com|Nubiles.yml|:heavy_check_mark:|:heavy_check_mark:|:x:|:x:|-|- +myfirstdaddy.com|AdultSiteRunner.yml|:heavy_check_mark:|:x:|:x:|:x:|-|Gay +mylf.com|Mylf.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +mylfdom.com|PaperStreetMedia.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +mylifeinmiami.com|ModelCentroAPI.yml|:heavy_check_mark:|:x:|:x:|:x:|Python|- +mylked.com|Mylked.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +mypervmom.com|Mypervmom.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +mypervyfamily.com|Algolia_mypervyfamily.yml|:heavy_check_mark:|:heavy_check_mark:|:x:|:x:|Python|- +myracequeens.com|karatmedia.yml|:heavy_check_mark:|:x:|:x:|:x:|-|JAV +mysislovesme.com|Mypervmom.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +mysweetapple.com|MySweetApple.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +myteenoasis.com|GammaEntertainment.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +myveryfirsttime.com|AMAMultimedia.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +n53i.com|JavLibrary.yml|:heavy_check_mark:|:x:|:heavy_check_mark:|:x:|-|JAV +nakedsword.com|NakedSword.yml|:heavy_check_mark:|:x:|:heavy_check_mark:|:x:|-|Gay +nannyspy.com|AMAMultimedia.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +nastypublicsex.com|DollsHub.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +nastystepfamily.com|Vivid.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +nataliastarr.com|VNAGirls.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +natashanice.com|ModelCentroAPI.yml|:heavy_check_mark:|:x:|:x:|:x:|Python|- +naturalbornbreeders.com|AdultSiteRunner.yml|:heavy_check_mark:|:x:|:x:|:x:|-|Gay +naughtyamerica.com|NaughtyAmerica.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +naughtyamericavr.com|NaughtyAmericaVR.yml|:heavy_check_mark:|:x:|:x:|:x:|-|VR +naughtynatural.com|NaughtyNatural.yml|:heavy_check_mark:|:heavy_check_mark:|-|-|-|- +netvideogirls.com|NVGNetwork.yml|:heavy_check_mark:|-|-|-|-|- +newsensations.com/tour_ns/|NewSensationsMain.yml|:heavy_check_mark:|:x:|:heavy_check_mark:|:x:|-|- +newsensations.com/tour_rs/|NewSensationsNetworkSites.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +nextdoorbuddies.com|Algolia_NextDoorStudios.yml|:heavy_check_mark:|:heavy_check_mark:|:x:|:x:|Python|Gay +nextdoorcasting.com|Algolia_NextDoorStudios.yml|:heavy_check_mark:|:heavy_check_mark:|:x:|:x:|Python|Gay +nextdoorebony.com|Algolia_NextDoorStudios.yml|:heavy_check_mark:|:heavy_check_mark:|:x:|:x:|Python|Gay +nextdoorfilms.com|Algolia_NextDoorStudios.yml|:heavy_check_mark:|:heavy_check_mark:|:x:|:x:|Python|Gay +nextdoorhomemade.com|Algolia_NextDoorStudios.yml|:heavy_check_mark:|:heavy_check_mark:|:x:|:x:|Python|Gay +nextdoormale.com|Algolia_NextDoorStudios.yml|:heavy_check_mark:|:heavy_check_mark:|:x:|:x:|Python|Gay +nextdoororiginals.com|Algolia_NextDoorStudios.yml|:heavy_check_mark:|:heavy_check_mark:|:x:|:x:|Python|Gay +nextdoorraw.com|Algolia_NextDoorStudios.yml|:heavy_check_mark:|:heavy_check_mark:|:x:|:x:|Python|Gay +nextdoorstudios.com|Algolia_NextDoorStudios.yml|:heavy_check_mark:|:heavy_check_mark:|:x:|:x:|Python|Gay +nextdoortaboo.com|Algolia_NextDoorStudios.yml|:heavy_check_mark:|:heavy_check_mark:|:x:|:x:|Python|Gay +nextdoortwink.com|Algolia_NextDoorStudios.yml|:heavy_check_mark:|:heavy_check_mark:|:x:|:x:|Python|Gay +nfbusty.com|Nubiles.yml|:heavy_check_mark:|:heavy_check_mark:|:x:|:x:|-|- +nhentai.net|nhentai.yml|:x:|:heavy_check_mark:|:x:|:x:|-|Hentai +nikkibenz.com|VNAGirls.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +nikkiphoenixxx.com|NikkiAndTera.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +ninakayy.com|VNAGirls.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +noboring.com|TeenMegaWorld.yml|:heavy_check_mark:|:x:|:x:|:heavy_check_mark:|-|- +noelalejandrofilms.com|NoelAlejandro.yml|:heavy_check_mark:|:x:|:x:|:x:|-|Gay +noirmale.com|MindGeek.yml|:heavy_check_mark:|:x:|:heavy_check_mark:|:x:|-|Gay +noodledude.io|NoodleDude.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +notmygrandpa.com|PaperStreetMedia.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +nubilefilms.com|Nubiles.yml|:heavy_check_mark:|:heavy_check_mark:|:x:|:x:|-|- +nubiles-casting.com|Nubiles.yml|:heavy_check_mark:|:heavy_check_mark:|:x:|:x:|-|- +nubiles-porn.com|Nubiles.yml|:heavy_check_mark:|:heavy_check_mark:|:x:|:x:|-|- +nubiles.net|Nubiles.yml|:heavy_check_mark:|:heavy_check_mark:|:x:|:x:|-|- +nubileset.com|Nubiles.yml|:heavy_check_mark:|:heavy_check_mark:|:x:|:x:|-|- +nubilesporn.com|Nubiles.yml|:heavy_check_mark:|:heavy_check_mark:|:x:|:x:|-|- +nubilesunscripted.com|Nubiles.yml|:heavy_check_mark:|:heavy_check_mark:|:x:|:x:|-|- +nucosplay.com|Baberotica.yml|:heavy_check_mark:|:x:|:x:|:heavy_check_mark:|-|- +nudefightclub.com|Algolia_Adultime.yml|:heavy_check_mark:|:x:|:x:|:x:|Python|- +nudeyogaporn.com|Arx.yml|:heavy_check_mark:|:x:|:x:|:x:|Python|- +nurumassage.com|Algolia_Adultime.yml|:heavy_check_mark:|:heavy_check_mark:|:x:|:x:|Python|- +nylonfeetlove.com|FFCSH.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +nylonspunkjunkies.com|Teencoreclub.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +nylonsweeties.com|Teencoreclub.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +nylonup.com|FFCSH.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +nympho.com|Nympho.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +ocreampies.com|karatmedia.yml|:heavy_check_mark:|:x:|:x:|:x:|-|JAV +officecock.com|AMAMultimedia.yml|:heavy_check_mark:|:x:|:x:|:x:|-|Gay +officemsconduct.com|Algolia_Adultime.yml|:heavy_check_mark:|:heavy_check_mark:|:heavy_check_mark:|:x:|Python|- +officepov.com|ItsPOV.yml|:heavy_check_mark:|:x:|:x:|:x:|CDP|- +officesexjp.com|karatmedia.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +ohmyholes.com|TeenMegaWorld.yml|:heavy_check_mark:|:x:|:x:|:heavy_check_mark:|-|- +old-n-young.com|TeenMegaWorld.yml|:heavy_check_mark:|:x:|:x:|:heavy_check_mark:|-|- +old4k.com|Vip4K.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +older4me.com|AdultSiteRunner.yml|:heavy_check_mark:|:x:|:x:|:x:|-|Gay +oldgoesyoung.com|OldGoesYoung.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +oldje-3some.com|Oldje.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +oldje.com|Oldje.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +oldnanny.com|OldNanny.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +oldsfuckdolls.com|DollsHub.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +only3xgirls.com|Only3xGirls.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +only3xlost.com|Only3xGirls.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +only3xseries.com|Only3xGirls.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +only3xvr.com|Only3xGirls.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +onlyblowjob.com|DDFNetwork.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +onlygolddigger.com|Only3xGirls.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +onlyprince.com|ThirdRockEnt.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +onlytarts.com|OnlyTarts.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +onlyteenblowjobs.com|GammaEntertainment.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +oopsie.com|Algolia_Adultime.yml|:heavy_check_mark:|:heavy_check_mark:|:x:|:x:|Python|- +openlife.com|GammaEntertainment.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +oraloverdose.com|PervCity.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +oreno3d.com|Oreno3d.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +orgytrain.com|Vivid.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +outdoorjp.com|karatmedia.yml|:heavy_check_mark:|:x:|:x:|:x:|-|JAV +outhim.com|AMAMultimedia.yml|:heavy_check_mark:|:x:|:x:|:x:|-|Gay +outofthefamily.com|Algolia_Adultime.yml|:heavy_check_mark:|:x:|:x:|:x:|Python|- +over40handjobs.com|TugPass.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +p54u.com|JavLibrary.yml|:heavy_check_mark:|:x:|:heavy_check_mark:|:x:|-|JAV +pacopacomama.com|Paco.yml|:heavy_check_mark:|:x:|:x:|:x:|-|JAV +paintoy.com|insexOL.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +pandafuck.com|Wtfpass.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +pansexualx.com|Algolia_EvilAngel|:heavy_check_mark:|:heavy_check_mark:|:heavy_check_mark:|:x:|Python|Trans +pantyjobs.com|pantyjobs.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +pantypops.com|GammaEntertainment.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +paradisegfs.com|VegasDreamworks.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +parasited.com|Hentaied.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +parodypass.com|NewSensationsNetworkSites.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +passion-hd.com|AMAMultimedia.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +passionxxx.com|Passionxxx.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +paulomassaxxx.com|PornsiteManager.yml|:heavy_check_mark:|:x:|:heavy_check_mark:|:x:|-|Gay +pawged.com|Pawged.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +peeonher.com|peeonher.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +pegasproductions.com|PegasProductions.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +pennypaxlive.com|VNAGirls.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +penthousegold.com|Penthouse.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +perfectgirlfriend.com|FamilyTherapyXXX.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +perfectgonzo.com|PerfectGonzo.yml|:heavy_check_mark:|:x:|:x:|:heavy_check_mark:|Python|- +pervcity.com|PervCity.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +pervdoctor.com|PaperStreetMedia.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +perversefamily.com|Czechav.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +pervertgallery.com|ThirdRockEnt.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +pervmom.com|PaperStreetMedia.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +pervnana.com|PaperStreetMedia.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +pervpricipal.com|PaperStreetMedia.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +pervtherapy.com|PaperStreetMedia.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +peternorth.com|Algolia_Adultime.yml|:heavy_check_mark:|:heavy_check_mark:|:x:|:x:|Python|- +petite18.com|TugPass.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +petiteballerinasfucked.com|Nubiles.yml|:heavy_check_mark:|:heavy_check_mark:|:x:|:x:|-|- +petited.com|Vivid.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +petitehdporn.com|Nubiles.yml|:heavy_check_mark:|:heavy_check_mark:|:x:|:x:|-|- +petiteleeanna.com|ChickPass.yml|:heavy_check_mark:|:x:|:x:|:heavy_check_mark:|-|- +petitepov.com|ItsPOV.yml|:heavy_check_mark:|:x:|:x:|:x:|CDP|- +philavise.com|Philavise.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +philippwants.com|PornsiteManager.yml|:heavy_check_mark:|:x:|:heavy_check_mark:|:x:|-|Gay +pickupfuck.com|Wtfpass.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +pie4k.com|Vip4K.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +pinklabel.tv|PinkLabelTV.yml|:heavy_check_mark:|:x:|:heavy_check_mark:|:x:|-|- +pinkoclub.com|Pinkoclub.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +pinkotgirls.com|Pinkoclub.yml|:heavy_check_mark:|:x:|:x:|:x:|-|Trans +pinupfiles.com|PinupDollars.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +pissplay.com|PissPlay.yml|:heavy_check_mark:|:x:|:x:|:x:|-|Amateur Fetish +pissynetwork.com|PissyNetwork.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +pjgirls.com|pjgirls.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +pkfstudios.com|PKFStudios.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +playboy.tv|PlayboyTV.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +playboyplus.com|PlayboyPlus.yml|:heavy_check_mark:|:heavy_check_mark:|:x:|:x:|-|- +playdaddy.com|AdultSiteRunner.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +playwithrae.com|PlayWithRae.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +plumperpass.com|PlumperPass.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +plushies.tv|Plushies.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +pmvhaven.com|PMVHeaven.yml|:heavy_check_mark:|:x:|:x:|:x:|Python|PMVs +porkvendors.com|ThirdRockEnt.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +pornbox.com|Pornbox.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +porncornvr.com|RealJamVR.yml|:heavy_check_mark:|:heavy_check_mark:|:x:|:x:|-|VR +porncz.com|PornCZ.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +pornditos.com|Pornditos.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +porndudecasting.com|PornDudeCasting.yml|:heavy_check_mark:|:x:|:x:|:heavy_check_mark:|-|- +pornfidelity.com|KellyMadisonMedia.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +pornforce.com|ThirdRockEnt.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +porngoespro.com|Spizoo.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +pornhex.com|PornHex.yml|:heavy_check_mark:|:x:|:x:|:heavy_check_mark:|-|Tube Site +pornhub.com|Pornhub.yml|:heavy_check_mark:|:x:|:x:|:heavy_check_mark:|-|- +pornhubpremium.com|PornhubPremium.yml|:heavy_check_mark:|:x:|:x:|:heavy_check_mark:|-|- +pornlandvideos.com|MVG.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +pornmegaload.com|TheScoreGroup.yml|:heavy_check_mark:|:heavy_check_mark:|:x:|:x:|-|- +pornperverts.com|ThirdRockEnt.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +pornpros.com|AMAMultimedia.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +pornstarbts.com|PureMedia.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +pornstarhardcore.com|Hustler.yml|:heavy_check_mark:|:x:|:x:|:x:|CDP|- +pornstarplatinum.com|PornstarPlatinum.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +pornstarstroker.com|AdultEmpireCash.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +pornstartease.com|Spizoo.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +pornweekends.com|Wtfpass.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +pornworld.com|pornworld.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +portagloryhole.com|PortaGloryhole.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +poundedpetite.com|TopWebModels.yml|:heavy_check_mark:|:x:|:x:|:x:|Python|- +povadventure.com|ModelMediaUS.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +povav.com|Jhdv.yml|:heavy_check_mark:|:x:|:x:|:x:|-|JAV Uncensored +povbitch.com|MMP_Network.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +povblowjobs.com|GammaEntertainment.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +povd.com|AMAMultimedia.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +povjp.com|karatmedia.yml|:heavy_check_mark:|:x:|:x:|:x:|-|JAV +povmania.com|VNAGirls.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +povmasters.com|Arx.yml|:heavy_check_mark:|:x:|:x:|:x:|Python|- +povperverts.net|ThirdRockEnt.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +povpornstars.com|hussiemodels.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +povr.com|POVR.yml|:heavy_check_mark:|:x:|:x:|:x:|-|VR +povthis.com|GammaEntertainment.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +prettydirty.com|Algolia_Adultime.yml|:heavy_check_mark:|:heavy_check_mark:|:x:|:x:|Python|- +prettydirtyteens.com|devianteNetwork.yml|:heavy_check_mark:|:x:|:x:|:x:|CDP|- +pridestudios.com|Algolia_MenOver30.yml|:heavy_check_mark:|:heavy_check_mark:|:x:|:x:|Python|Gay +primecups.com|PerfectGonzo.yml|:heavy_check_mark:|:x:|:x:|:heavy_check_mark:|Python|- +princesscum.com|Nubiles.yml|:heavy_check_mark:|:heavy_check_mark:|:x:|:x:|-|- +private.com|Private.yml|:heavy_check_mark:|:x:|:heavy_check_mark:|:heavy_check_mark:|-|- +privatecastings.com|privatecastings.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +privatesextapes.com|Wtfpass.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +producersfun.com|ProducersFun.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +profiles.myfreecams.com|MFC.yml|:x:|:x:|:x:|:heavy_check_mark:|-|- +propertysex.com|RealityKingsOL.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +publicagent.com|MindGeek.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +publicfrombohemia.com|PornCZ.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +publicsexadventures.com|Wtfpass.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +publicsexdate.com|ThirdRockEnt.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +puffynetwork.com|Puffynetwork.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +pumaswedexxx.com|VNAGirls.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +pure-bbw.com|PureMedia.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +pure-ts.com|PureMedia.yml|:heavy_check_mark:|:x:|:x:|:x:|-|Trans +pure-xxx.com|PureMedia.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +purebj.com|Only3xGirls.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +puremature.com|AMAMultimedia.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +purepov.com|PerfectGonzo.yml|:heavy_check_mark:|:x:|:x:|:heavy_check_mark:|Python|- +puretaboo.com|Algolia_Adultime.yml|:heavy_check_mark:|:heavy_check_mark:|:x:|:x:|Python|- +pussyav.com|Jhdv.yml|:heavy_check_mark:|:x:|:x:|:x:|-|JAV Uncensored +putalocura.com|Putalocura.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +r18.dev|R18.dev.yml|:heavy_check_mark:|:x:|:heavy_check_mark:|:x:|-|JAV +rachel-steele.com|RachelSteele.yml|:heavy_check_mark:|:x:|:x:|:x:|-|MILF +rachelaldana.com|PinupDollars.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +rachelstormsxxx.com|VNAGirls.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +ragingstallion.com|Algolia_RagingStallion.yml|:heavy_check_mark:|:x:|:x:|:x:|-|Gay +randyblue.com|RandyBlue.yml|:heavy_check_mark:|:x:|:x:|:x:|-|Gay +raunchybastards.com|RaunchyBastards.yml|:heavy_check_mark:|:x:|:x:|:x:|-|Gay +ravenswallowzxxx.com|Andomark.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +rawattack.com|Spizoo.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +rawcouples.com|TeenMegaWorld.yml|:heavy_check_mark:|:x:|:x:|:heavy_check_mark:|-|- +rawfuck.com|PornsiteManager.yml|:heavy_check_mark:|:x:|:heavy_check_mark:|:x:|-|Gay +rawfuckboys.com|CarnalPlus.yml|:heavy_check_mark:|:x:|:x:|:x:|-|Gay +reaganfoxx.com|AdultEmpireCash.yml|:heavy_check_mark:|:x:|:x:|:x:|-|MILF +realbikinigirls.com|WankItNow.yml|:heavy_check_mark:|:x:|:x:|:x:|-|VR +realfuckingdating.com|ThirdRockEnt.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +realityjunkies.com|MindGeek.yml|:heavy_check_mark:|:x:|:heavy_check_mark:|:x:|-|- +realitykings.com|MindGeek.yml|:heavy_check_mark:|:x:|:x:|:heavy_check_mark:|-|- +realitylovers.com|RealityLovers.yml|:heavy_check_mark:|:x:|:x:|:heavy_check_mark:|Python|- +realjamvr.com|RealJamVR.yml|:heavy_check_mark:|:heavy_check_mark:|:x:|:x:|-|VR +realsensual.com|Spizoo.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +realtgirls.com|GroobyNetwork-Partial.yml|:heavy_check_mark:|:heavy_check_mark:|:x:|:x:|-|Trans +realtimebondage.com|insex.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +realvr.com|BaDoink.yml|:heavy_check_mark:|:x:|:heavy_check_mark:|:heavy_check_mark:|-|VR +reddit.com|Reddit.yml|:heavy_check_mark:|:heavy_check_mark:|:x:|:x:|-|- +redgifs.com|Redgifs.yml|:heavy_check_mark:|:x:|:heavy_check_mark:|:heavy_check_mark:|Python|Gifs +redheadmariah.com|ChickPass.yml|:heavy_check_mark:|:x:|:x:|:heavy_check_mark:|-|- +redhotstraightboys.com|RedHotStraightBoys.yml|:heavy_check_mark:|:x:|:x:|:heavy_check_mark:|-|Gay +redpolishfeet.com|FFCSH.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +reidmylips.com|Andomark.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +reidmylips.elxcomplete.com|Andomark.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +renderfiend.com|insex.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +restrictedsenses.com|RestrictedSenses.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +retroporncz.com|PornCZ.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +rickysroom.com|KBProductions.yml|:heavy_check_mark:|:x:|:x:|:heavy_check_mark:|Python|- +ridleydovarez.com|PornsiteManager.yml|:heavy_check_mark:|:x:|:heavy_check_mark:|:x:|-|Gay +riggsfilms.vip|RiggsFilms.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +rim4k.com|Vip4K.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +rk.com|MindGeek.yml|:heavy_check_mark:|:x:|:x:|:heavy_check_mark:|-|- +roccosiffredi.com|Algolia_RoccoSiffredi.yml|:heavy_check_mark:|:heavy_check_mark:|:x:|:x:|Python|- +roddaily.com|Algolia_NextDoorStudios.yml|:heavy_check_mark:|:heavy_check_mark:|:x:|:x:|Python|Gay +rodsroom.com|Algolia_NextDoorStudios.yml|:heavy_check_mark:|:heavy_check_mark:|:x:|:x:|Python|Gay +romemajor.com|VNAGirls.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +rubberdoll.net|VNAGirls.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +rule34video.com|Rule34Video.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +russian-tgirls.com|GroobyClub.yml|:heavy_check_mark:|:x:|:x:|:x:|-|Trans +rylskyart.com|SARJ-LLC.yml|:heavy_check_mark:|:heavy_check_mark:|:x:|:heavy_check_mark:|Python|- +sabiendemonia.com|Algolia_Adultime.yml|:heavy_check_mark:|:x:|:x:|:x:|Python|- +samanthagrace.net|VNAGirls.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +samuelotoole.com|Algolia_NextDoorStudios.yml|:heavy_check_mark:|:heavy_check_mark:|:x:|:x:|Python|Gay +sapphicerotica.com|sapphix.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +sapphix.com|sapphix.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +sarajay.com|VNAGirls.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +sayuncle.com|Sayuncle.yml|:heavy_check_mark:|:x:|:x:|:x:|-|Gay +scarybigdicks.com|Hustler.yml|:heavy_check_mark:|:x:|:x:|:x:|CDP|- +schoolgirlshd.com|Jhdv.yml|:heavy_check_mark:|:x:|:x:|:x:|-|JAV Uncensored +schoolpov.com|ItsPOV.yml|:heavy_check_mark:|:x:|:x:|:x:|CDP|- +scoreland.com|TheScoreGroup.yml|:heavy_check_mark:|:heavy_check_mark:|:x:|:x:|-|- +scoreland2.com|TheScoreGroup.yml|:heavy_check_mark:|:heavy_check_mark:|:x:|:x:|-|- +scoutboys.com|CarnalPlus.yml|:heavy_check_mark:|:x:|:x:|:x:|-|Gay +screwmetoo.com|VegasDreamsLLC.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +seancody.com|MindGeek.yml|:heavy_check_mark:|:x:|:x:|:heavy_check_mark:|-|Gay +seductive18.com|Teencoreclub.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +seehimfuck.com|hussiemodels.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +seehimsolo.com|hussiemodels.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +seemomsuck.com|Tugpass.yml|:heavy_check_mark:|:x:|:x:|:x:|CDP|- +seemysextapes.com|Hustler.yml|:heavy_check_mark:|:x:|:x:|:x:|CDP|- +selfiesuck.com|SelfieSuck.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +sensualpain.com|insexOL.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +serve4k.com|Vip4K.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +severesexfilms.com|AdultEmpireCash.yml|:heavy_check_mark:|:x:|:heavy_check_mark:|:x:|-|- +sexart.com|SARJ-LLC.yml|:heavy_check_mark:|:heavy_check_mark:|:x:|:heavy_check_mark:|Python|- +sexbabesvr.com|SexBabesVR.yml|:heavy_check_mark:|:x:|:x:|:x:|-|VR +sexintaxi.com|PornCZ.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +sexlikereal.com|SexLikeReal.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +sexmex.xxx|SexMex.yml|:heavy_check_mark:|:heavy_check_mark:|:x:|:x:|-|- +sexmywife.com|VNAGirls.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +sexsee.com|Hustler.yml|:heavy_check_mark:|:x:|:x:|:x:|CDP|- +sexselector.com|SexSelector.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +sexuallybroken.com|insex.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +sexvr.com|SexVR.yml|:heavy_check_mark:|:x:|:x:|:heavy_check_mark:|-|VR +sexwithmuslims.com|PornCZ.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +sexworking.com|devianteNetwork.yml|:heavy_check_mark:|:x:|:x:|:x:|CDP|- +sexyhub.com|RealityKingsOL.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +shagmag.com|shagmag.yml|:x:|:heavy_check_mark:|:x:|:x:|-|Magazines +shame4k.com|Vip4K.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +shandafay.com|VNAGirls.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +shanedieselsbanginbabes.com|NewSensationsNetworkSites.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +share.myfreecams.com|MFC.yml|:heavy_check_mark:|:heavy_check_mark:|:x:|:x:|-|- +shefucksonthefirstdate.com|ThirdRockEnt.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +shegotsix.com|Teencoreclub.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +shelovesblack.com|LoveHerFilms.yml|:heavy_check_mark:|:x:|:x:|:heavy_check_mark:|-|- +shesbrandnew.com|TopWebModels.yml|:heavy_check_mark:|:x:|:x:|:x:|Python|- +sheseducedme.com|Andomark.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +shewillcheat.com|RealityKingsOL.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +shinybound.com|ShinyBound.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +shinysboundsluts.com|ShinyBound.yml|:heavy_check_mark:|:x:|:x:|:x:|-|Trans +shiofuky.com|Jhdv.yml|:heavy_check_mark:|:x:|:x:|:x:|-|JAV Uncensored +shoplyfter.com|PaperStreetMedia.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +shoplyftermylf.com|PaperStreetMedia.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +showerbait.com|AMAMultimedia.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +showybeauty.com|ShowyBeauty.yml|:heavy_check_mark:|:heavy_check_mark:|:x:|:x:|-|- +shylaj.com|ShylaJ.yml|:heavy_check_mark:|:heavy_check_mark:|:x:|:x:|-|- +sidechick.com|KBProductions.yml|:heavy_check_mark:|:x:|:x:|:heavy_check_mark:|Python|- +silverstonedvd.com|GammaEntertainment.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +silviasaint.com|GammaEntertainment.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +simplyanal.com|PuffyNetworkSite.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +sinematica.com|Sinematica.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +sinslife.com|SinsLife.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +siripornstar.com|VNAGirls.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +sis.porn|Vip4K.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +sislovesme.com|PaperStreetMedia.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +sisswap.com|PaperStreetMedia.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +sissypov.com|PureMedia.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +sketboy.com|PornsiteManager.yml|:heavy_check_mark:|:x:|:heavy_check_mark:|:x:|-|Gay +slayed.com|vixenNetwork.yml|:heavy_check_mark:|:x:|:x:|:x:|Python|- +slroriginals.com|SLRoriginals.yml|:heavy_check_mark:|:x:|:heavy_check_mark:|:x:|-|VR +slutinspection.com|ThirdRockEnt.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +slutsbts.com|ThirdRockEnt.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +slutspov.com|ThirdRockEnt.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +sluttybbws.com|ThirdRockEnt.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +smashed.xxx|Nubiles.yml|:heavy_check_mark:|:heavy_check_mark:|:x:|:x:|-|- +smashpictures.com|CombatZone.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +smokingmina.com|SmokingMina.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +smutfactor.com|AdultEmpireCash.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +smutmerchants.com|SmutPuppet.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +soapymassage.com|Algolia_Adultime.yml|:heavy_check_mark:|:x:|:x:|:x:|Python|- +sofiemariexxx.com|SofieMariexxx.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +sologirlsmania.com|MVG.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +soloteengirls.net|TeenMegaWorld.yml|:heavy_check_mark:|:x:|:x:|:heavy_check_mark:|-|- +sophiedeelive.com|VNAGirls.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +sororitysluts.com|Hustler.yml|:heavy_check_mark:|:x:|:x:|:x:|CDP|- +spankbang.com|SpankBang.yml|:heavy_check_mark:|:x:|:x:|:x:|-|Tube Site +spankmonster.com|AdultEmpireCash.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +spearteenpussy.com|Teencoreclub.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +spermantino.com|Teencoreclub.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +spermmania.com|SpermMania.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +spermswallowers.com|JulesJordan.yml|:heavy_check_mark:|:heavy_check_mark:|:x:|:x:|-|- +spermswap.com|PerfectGonzo.yml|:heavy_check_mark:|:x:|:x:|:heavy_check_mark:|Python|- +spizoo.com|Spizoo.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +spoiledvirgins.com|OldGoesYoung.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +spunkworthy.com|SpunkWorthy.yml|:heavy_check_mark:|:x:|:x:|:heavy_check_mark:|-|Gay +spyfam.com|AMAMultimedia.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +squirtalicious.com|GammaEntertainment.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +squirted.com|MindGeek.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +squirtinglesbian.com|GammaEntertainment.yml|:heavy_check_mark:|:x:|:x:|:x:|-|Lesbian +squirtingorgies.com|GammaEntertainment.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +stagcollective.com|Algolia_NextDoorStudios.yml|:heavy_check_mark:|:heavy_check_mark:|:x:|:x:|Python|Gay +staghomme.com|CarnalPlus.yml|:heavy_check_mark:|:x:|:x:|:x:|-|Gay +stasyq.com|StasyQ.yml|:heavy_check_mark:|:x:|:x:|:heavy_check_mark:|-|- +stasyqvr.com|StasyQVR.yml|:heavy_check_mark:|:x:|:x:|:x:|-|VR +staxus.com|Staxus.yml|:heavy_check_mark:|:x:|:x:|:heavy_check_mark:|-|gay +stayhomepov.com|PaperStreetMedia.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +stephousexxx.com|AdultEmpireCash.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +steppov.com|ItsPOV.yml|:heavy_check_mark:|:x:|:x:|:x:|CDP|- +stepsiblings.com|PaperStreetMedia.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +stepsiblingscaught.com|Nubiles.yml|:heavy_check_mark:|:heavy_check_mark:|:x:|:x:|-|- +stockingvideos.com|FFstockings.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +stockydudes.com|AdultSiteRunner.yml|:heavy_check_mark:|:x:|:x:|:x:|-|Gay +store.evilangel.com|EvilAngelStore.yml|:heavy_check_mark:|:x:|:heavy_check_mark:|:x:|-|- +store.evilangelvideo.com|EvilAngelStore.yml|:heavy_check_mark:|:x:|:heavy_check_mark:|:x:|-|- +store.freeones.com|FreeonesStore.yml|:heavy_check_mark:|:x:|:heavy_check_mark:|:x:|-|- +str8hell.com|Williamhiggins.yml|:heavy_check_mark:|:x:|:x:|:x:|-|Gay +strapattackers.com|GammaEntertainment.yml|:heavy_check_mark:|:x:|:x:|:x:|-|Femdom +straplez.com|SARJ-LLC.yml|:heavy_check_mark:|:heavy_check_mark:|:x:|:heavy_check_mark:|Python|- +straponcum.com|StrapOnCum.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +strapondreamer.com|StrapDreamer.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +streaming.iafd.com|IafdStreaming.yml|:heavy_check_mark:|:x:|:heavy_check_mark:|:x:|-|- +stretchedoutsnatch.com|NewSensationsNetworkSites.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +stripzvr.com|StripzVR.yml|:heavy_check_mark:|:x:|:heavy_check_mark:|:x:|-|VR +strokies.com|Strokies.yml|:heavy_check_mark:|:x:|:x:|:x:|CDP|- +stuck4k.com|Vip4K.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +studiofow.com|StudioFOW.yml|:heavy_check_mark:|:x:|:x:|:x:|-|3D Animation +stuffintwats.com|ChickPass.yml|:heavy_check_mark:|:x:|:x:|:heavy_check_mark:|-|- +stunning18.com|SARJ-LLC.yml|:heavy_check_mark:|:heavy_check_mark:|:x:|:heavy_check_mark:|Python|- +subbyhubby.com|SubbyHubby.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +submissivex.com|SubmissiveX.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +subspaceland.com|SubspaceLand.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +suckmevr.com|Baberotica.yml|:heavy_check_mark:|:x:|:x:|:heavy_check_mark:|-|VR +sugarcookie.xxx|sugarcookie.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +sugardaddyporn.com|BlurredMedia.yml|:heavy_check_mark:|:x:|:x:|:x:|CDP|- +suggabunny.com|SmutPuppet.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +sunnylanelive.com|VNAGirls.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +sunnyleone.com|GammaEntertainment.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +superbemodels.com|superbemodels.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +superramon.com|Trans500.yml|:heavy_check_mark:|:x:|:x:|:x:|-|Trans +susanayn.com|PornCZ.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +swallowbay.com|SwallowBay.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +swallowed.com|Nympho.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +swallowsalon.com|AmateurAllure.yml|:heavy_check_mark:|:heavy_check_mark:|:x:|:x:|-|- +sweetcarla.com|FFCSH.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +sweetfemdom.com|SweetFemdom.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +sweetheartvideo.com|MindGeek.yml|:heavy_check_mark:|:x:|:heavy_check_mark:|:x:|-|Lesbian +sweetsinner.com|MindGeek.yml|:heavy_check_mark:|:x:|:heavy_check_mark:|:heavy_check_mark:|-|- +sweetyx.com|SweetyX.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +swinger-blog.xxx|SwingerBlog.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +swnude.com|Williamhiggins.yml|:heavy_check_mark:|:x:|:x:|:x:|-|Gay +tabooheat.com|Algolia_TabooHeat.yml|:heavy_check_mark:|:heavy_check_mark:|:x:|:x:|Python|- +taboopov.com|taboopov.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +tacamateurs.com|TACAmateurs.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +tadpolexstudio.com|TadpolexStudio.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +takevan.com|TakeVan.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +taliashepard.com|bellapass.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +tamedteens.com|PerfectGonzo.yml|:heavy_check_mark:|:x:|:x:|:heavy_check_mark:|Python|- +tandaamateurs.com|ChickPass.yml|:heavy_check_mark:|:x:|:x:|:heavy_check_mark:|-|- +tandaasians.com|ChickPass.yml|:heavy_check_mark:|:x:|:x:|:heavy_check_mark:|-|- +tandablondes.com|ChickPass.yml|:heavy_check_mark:|:x:|:x:|:heavy_check_mark:|-|- +tandabrunettes.com|ChickPass.yml|:heavy_check_mark:|:x:|:x:|:heavy_check_mark:|-|- +tandaebony.com|ChickPass.yml|:heavy_check_mark:|:x:|:x:|:heavy_check_mark:|-|- +tandahousewives.com|ChickPass.yml|:heavy_check_mark:|:x:|:x:|:heavy_check_mark:|-|- +tandalatinas.com|ChickPass.yml|:heavy_check_mark:|:x:|:x:|:heavy_check_mark:|-|- +tandalesbians.com|ChickPass.yml|:heavy_check_mark:|:x:|:x:|:heavy_check_mark:|-|- +tandaredheads.com|ChickPass.yml|:heavy_check_mark:|:x:|:x:|:heavy_check_mark:|-|- +tanyatate.com|ModelCentroAPI.yml|:heavy_check_mark:|:x:|:x:|:x:|Python|- +taratainton.com|TaraTainton.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +teacherfucksteens.com|Nubiles.yml|:heavy_check_mark:|:heavy_check_mark:|:x:|:x:|-|- +teachmyass.com|Teencoreclub.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +teamskeet.com|Teamskeet.yml|:heavy_check_mark:|:x:|:x:|:x:|Python|- +teasepov.com|TeasePOV.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +teasingandpleasing.com|ThirdRockEnt.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +teenageanalsluts.com|ThirdRockEnt.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +teenagecorruption.com|ThirdRockEnt.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +teenagetryouts.com|ThirdRockEnt.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +teenanalcasting.com|Teencoreclub.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +teencoreclub.com|Teencoreclub.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +teencorezine.com|Teencoreclub.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +teencurves.com|PaperStreetMedia.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +teendrillers.com|Teencoreclub.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +teenerotica.xxx|SmutPuppet.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +teenfidelity.com|KellyMadisonMedia.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +teenfrombohemia.com|PornCZ.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +teenmegaworld.net|TeenMegaWorld.yml|:heavy_check_mark:|:x:|:x:|:heavy_check_mark:|-|- +teenpies.com|PaperStreetMedia.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +teensandtwinks.com|CarnalPlus.yml|:heavy_check_mark:|:x:|:x:|:x:|-|Gay +teensexmania.com|TeenMegaWorld.yml|:heavy_check_mark:|:x:|:x:|:heavy_check_mark:|-|- +teensexmovs.com|TeenMegaWorld.yml|:heavy_check_mark:|:x:|:x:|:heavy_check_mark:|-|- +teensgoporn.com|Teencoreclub.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +teensloveanal.com|PaperStreetMedia.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +teensloveblackcocks.com|PaperStreetMedia.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +teenslovehugecocks.com|MindGeek.yml|:heavy_check_mark:|:x:|:x:|:heavy_check_mark:|-|- +teensnaturalway.com|Teencoreclub.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +teensneaks.com|Algolia_Adultime.yml|:heavy_check_mark:|:x:|:x:|:x:|Python|- +teenstryblacks.com|Teencoreclub.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +teenthais.com|Baberotica.yml|:heavy_check_mark:|:x:|:x:|:heavy_check_mark:|-|Thai Uncensored +teentugs.com|TugPass.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +teenytaboo.com|TeenyTaboo.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +tenshigao.com|Tenshigao.yml|:heavy_check_mark:|:x:|:x:|:heavy_check_mark:|-|Jav +terapatrick.com|NikkiAndTera.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +tessafowler.com|PinupDollars.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +texasbukkake.com|TexasBukkake.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +tgirl40.com|GroobyClub.yml|:heavy_check_mark:|:x:|:x:|:x:|-|Trans +tgirlbbw.com|GroobyClub.yml|:heavy_check_mark:|:x:|:x:|:x:|-|Trans +tgirljapan.com|GroobyClub.yml|:heavy_check_mark:|:x:|:x:|:x:|-|Trans +tgirljapanhardcore.com|GroobyClub.yml|:heavy_check_mark:|:x:|:x:|:x:|-|Trans +tgirlpornstar.com|GroobyClub.yml|:heavy_check_mark:|:x:|:x:|:x:|-|Trans +tgirlpostop.com|GroobyClub.yml|:heavy_check_mark:|:x:|:x:|:x:|-|Trans +tgirls.porn|GroobyNetwork-Partial.yml|:heavy_check_mark:|:heavy_check_mark:|:x:|:x:|-|Trans +tgirls.xxx|GroobyNetwork-Partial.yml|:heavy_check_mark:|:heavy_check_mark:|:x:|:x:|-|Trans +tgirlsex.xxx|GroobyNetwork-Partial.yml|:heavy_check_mark:|:heavy_check_mark:|:x:|:x:|-|Trans +tgirlsfuck.com|GroobyNetwork-Partial.yml|:heavy_check_mark:|:x:|:x:|:x:|-|Trans +tgirlshookup.com|GroobyNetwork-Partial.yml|:heavy_check_mark:|:heavy_check_mark:|:x:|:x:|-|Trans +tgirltops.com|GroobyNetwork-Partial.yml|:heavy_check_mark:|:heavy_check_mark:|:x:|:x:|-|Trans +thatsitcomshow.com|Nubiles.yml|:heavy_check_mark:|:heavy_check_mark:|:x:|:x:|-|- +theartporn.com|Wtfpass.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +theassfactory.com|JulesJordan.yml|:heavy_check_mark:|:heavy_check_mark:|:x:|:x:|-|- +thedicksuckers.com|FinishesTheJob.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +theflourishamateurs.com|TheFlourish.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +theflourishfetish.com|TheFlourish.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +theflourishpov.com|TheFlourish.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +theflourishxxx.com|TheFlourish.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +thehabibshow.com|TheHabibShow.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +thelesbianexperience.com|NewSensationsNetworkSites.yml|:heavy_check_mark:|:x:|:x:|:x:|-|Lesbian +thelifeerotic.com|SARJ-LLC.yml|:heavy_check_mark:|:heavy_check_mark:|:x:|:heavy_check_mark:|Python|- +thenude.com|TheNude.yml|:x:|:x:|:x:|:heavy_check_mark:|-|- +thetabutales.com|NewSensationsNetworkSites.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +theyeslist.com|Algolia_Adultime.yml|:heavy_check_mark:|:x:|:x:|:x:|Python|- +thicc18.com|Fit18.yml|:heavy_check_mark:|:x:|:x:|:x:|Python|- +thickandbig.com|AMAMultimedia.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +thickumz.com|PaperStreetMedia.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +thirdsexxxx.com|ThirdSexXXX.yml|:heavy_check_mark:|:x:|:x:|:x:|-|Trans +thirdworldxxx.com|AdultEmpireCash.yml|:heavy_check_mark:|:x:|:heavy_check_mark:|:x:|-|Asian + Latin + Trans +thisvid.com|ThisVid.yml|:heavy_check_mark:|:x:|:x:|:x:|-|Tube Site +throated.com|GammaEntertainment.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +timtales.com|TimTales.yml|:heavy_check_mark:|:x:|:x:|:heavy_check_mark:|-|Gay +tiny4k.com|AMAMultimedia.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +tinysis.com|PaperStreetMedia.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +tittycreampies.com|GammaEntertainment.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +titworld.com|Hustler.yml|:heavy_check_mark:|:x:|:x:|:x:|CDP|- +tmwpov.com|TeenMegaWorld.yml|:heavy_check_mark:|:x:|:x:|:heavy_check_mark:|-|- +tmwvrnet.com|TeenMegaWorld.yml|:heavy_check_mark:|:x:|:x:|:heavy_check_mark:|-|VR +tokyo-hot.com|Tokyohot.yml|:heavy_check_mark:|:x:|:x:|:x:|-|JAV Uncensored +tokyobang.com|karatmedia.yml|:heavy_check_mark:|:x:|:x:|:x:|-|JAV +tommydxxx.com|Algolia_NextDoorStudios.yml|:heavy_check_mark:|:heavy_check_mark:|:x:|:x:|Python|Gay +tonightsgirlfriend.com|Tonightsgirlfriend.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +toomanytrannies.com|Hustler.yml|:heavy_check_mark:|:x:|:x:|:x:|CDP|Trans +topgrl.com|insex.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +toplatindaddies.com|AdultSiteRunner.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +topwebmodels.com|TopWebModels.yml|:heavy_check_mark:|:x:|:x:|:x:|Python|- +toticos.com|VegasDreamworks.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +touchmywife.com|Algolia_touchmywife.yml|:heavy_check_mark:|:heavy_check_mark:|:x:|:x:|Python|- +toughlovex.com|toughlovex.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +tour.purgatoryx.com|purgatoryx.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +trans500.com/tour/|Trans500.yml|:heavy_check_mark:|:x:|:x:|:x:|-|Trans +trans500.com/tour3/|Trans500.yml|:heavy_check_mark:|:x:|:x:|:x:|-|Trans +trans500.com/tourespanol|Trans500.yml|:heavy_check_mark:|:x:|:x:|:x:|-|Trans +transangels.com|MindGeek.yml|:heavy_check_mark:|:x:|:x:|:x:|-|Trans +transatplay.com|Trans500.yml|:heavy_check_mark:|:x:|:x:|:x:|-|Trans +transbella.com|LetsDoeIt.yml|:heavy_check_mark:|:x:|:x:|:x:|-|Trans +transcest.com|CarnalPlus.yml|:heavy_check_mark:|:x:|:x:|:x:|-|FTM +transerotica.com|Transerotica.yml|:heavy_check_mark:|:x:|:x:|:x:|-|Trans +transexdomination.com|GroobyClub.yml|:heavy_check_mark:|:x:|:x:|:x:|-|Trans +transexpov.com|GroobyNetwork-Partial.yml|:heavy_check_mark:|:heavy_check_mark:|:x:|:x:|-|Trans +transfeet.com|FFCSH.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +transfixed.com|Algolia_Adultime.yml|:heavy_check_mark:|:heavy_check_mark:|:x:|:x:|Python|Trans +transgasm.com|GroobyNetwork-Partial.yml|:heavy_check_mark:|:heavy_check_mark:|:x:|:x:|-|Trans +transgressivefilms.com|Algolia_Adultime.yml|:heavy_check_mark:|:x:|:x:|:x:|Python|Trans +transgressivexxx.com|Algolia_EvilAngel|:heavy_check_mark:|:heavy_check_mark:|:heavy_check_mark:|:x:|Python|Trans +transmodeldatabase.com|TransModelDatabase.yml|:x:|:x:|:x:|:heavy_check_mark:|-|Trans +transnificent.com|GroobyNetwork-Partial.yml|:heavy_check_mark:|:heavy_check_mark:|:x:|:x:|-|Trans +transroommates.com|Arx.yml|:heavy_check_mark:|:x:|:x:|:x:|Python|Trans +transsensual.com|MindGeek.yml|:heavy_check_mark:|:heavy_check_mark:|:heavy_check_mark:|:heavy_check_mark:|-|Trans +transsexualangel.com|GammaEntertainment.yml|:heavy_check_mark:|:x:|:x:|:x:|-|Trans +transsexualroadtrip.com|GammaEntertainment.yml|:heavy_check_mark:|:x:|:x:|:x:|-|Trans +tranzvr.com|POVR.yml|:heavy_check_mark:|:x:|:x:|:x:|-|VR +traxxx.me|Traxxx.yml|:heavy_check_mark:|:x:|:x:|:heavy_check_mark:|-|- +trickymasseur.com|TeenMegaWorld.yml|:heavy_check_mark:|:x:|:x:|:heavy_check_mark:|-|- +trickyoldteacher.com|OldGoesYoung.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +trickyspa.com|Algolia_Adultime.yml|:heavy_check_mark:|:x:|:x:|:x:|Python|- +trikepatrol.com|TrikePatrol.yml|:heavy_check_mark:|:x:|:x:|:heavy_check_mark:|-|- +tripforfuck.com|TripForFuck.yml|:heavy_check_mark:|:x:|:x:|:heavy_check_mark:|-|- +trueamateurs.com|MindGeek.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +trueanal.com|Nympho.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +truelesbian.com|Algolia_Adultime.yml|:heavy_check_mark:|:x:|:x:|:x:|Python|- +trystanbull.com|Algolia_NextDoorStudios.yml|:heavy_check_mark:|:heavy_check_mark:|:x:|:x:|Python|Gay +tryteens.com|Teencoreclub.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +ts-castingcouch.com|GroobyClub.yml|:heavy_check_mark:|:x:|:x:|:x:|-|Trans +tsfactor.com|Algolia_EvilAngel.yml|:heavy_check_mark:|:heavy_check_mark:|:heavy_check_mark:|:x:|Python|Trans +tsgirlfriendexperience.com|Trans500.yml|:heavy_check_mark:|:x:|:x:|:x:|-|Trans +tsplayground.com|TSPlayground.yml|:heavy_check_mark:|:x:|:x:|:heavy_check_mark:|-|Trans +tspov.com|PureMedia.yml|:heavy_check_mark:|:x:|:x:|:x:|-|Trans +tsraw.com|LadyboyGold.yml|:heavy_check_mark:|:x:|:x:|:heavy_check_mark:|-|Trans +tsvirtuallovers.com|RealityLovers.yml|:heavy_check_mark:|:x:|:x:|:heavy_check_mark:|Python|- +tugpass.com|TugPass.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +tuktukpatrol.com|TukTukPatrol.yml|:heavy_check_mark:|:x:|:x:|:heavy_check_mark:|-|- +tushy.com|vixenNetwork.yml|:heavy_check_mark:|:x:|:x:|:x:|Python|- +tushyraw.com|vixenNetwork.yml|:heavy_check_mark:|:x:|:x:|:x:|Python|- +tutor4k.com|Vip4K.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +twinkloads.com|CarnalPlus.yml|:heavy_check_mark:|:x:|:x:|:x:|-|Gay +twinks.com|CarnalPlus.yml|:heavy_check_mark:|:x:|:x:|:x:|-|Gay +twinktop.com|CarnalPlus.yml|:heavy_check_mark:|:x:|:x:|:x:|-|Gay +twistedvisual.com|ThirdRockEnt.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +twistys.com|Twistys.yml|:heavy_check_mark:|:x:|:x:|:heavy_check_mark:|-|- +twistysnetwork.com|Twistys.yml|:heavy_check_mark:|:x:|:x:|:heavy_check_mark:|-|- +twotgirls.com|TwoTGirls.yml|:heavy_check_mark:|:x:|:x:|:x:|-|Trans +uk-tgirls.com|GroobyClub.yml|:heavy_check_mark:|:x:|:x:|:x:|-|Trans +ultrafilms.com|UltraFilms.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +underhentai.com|UnderHentai.yml|:heavy_check_mark:|:x:|:x:|:x:|-|Hentai +universblack.com|PornsiteManager.yml|:heavy_check_mark:|:x:|:heavy_check_mark:|:x:|-|Gay +unlimitedmilfs.com|NewSensationsNetworkSites.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +unrealporn.com|Czechav.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +upherasshole.com|PervCity.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +upskirtjerk.com|WankItNow.yml|:heavy_check_mark:|:x:|:x:|:x:|-|VR +valentina.passionepiedi.com|FFCSH.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +vangoren.com|MVG.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +vcaxxx.com|Hustler.yml|:heavy_check_mark:|:x:|:x:|:x:|CDP|- +velvetveronica.com|VelvetVeronica.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +venus.allfinegirls.com|WowNetworkVenus.yml|:heavy_check_mark:|:x:|:x:|:x:|Python|- +venus.ultrafilms.com|WowNetworkVenus.yml|:heavy_check_mark:|:x:|:x:|:x:|Python|- +venus.wowgirls.com|WowNetworkVenus.yml|:heavy_check_mark:|:x:|:x:|:x:|Python|- +venus.wowporn.com|WowNetworkVenus.yml|:heavy_check_mark:|:x:|:x:|:x:|Python|- +vickyathome.com|VNAGirls.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +viktor-rom.com|PornsiteManager.yml|:heavy_check_mark:|:x:|:heavy_check_mark:|:x:|-|Gay +vinaskyxxx.com|ModelCentroAPI.yml|:heavy_check_mark:|:x:|:x:|:x:|Python|- +vintagegaymovies.com|PornsiteManager.yml|:heavy_check_mark:|:x:|:heavy_check_mark:|:x:|-|Gay +vip4k.com|Vip4K.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +vipissy.com|Vipissy.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +vipsexvault.com|LetsDoeIt.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +virtualpee.com|VirtualPee.yml|:heavy_check_mark:|:x:|:x:|:x:|-|Fetish +virtualporn.com|VirtualPorn.yml|:heavy_check_mark:|:x:|:x:|:x:|-|VR +virtualrealamateurporn.com|VirtualRealPorn.yml|:heavy_check_mark:|:x:|:heavy_check_mark:|:x:|-|VR +virtualrealgay.com|VirtualRealPorn.yml|:heavy_check_mark:|:x:|:heavy_check_mark:|:x:|-|VR +virtualrealjapan.com|VirtualRealPorn.yml|:heavy_check_mark:|:x:|:heavy_check_mark:|:x:|-|VR +virtualrealpassion.com|VirtualRealPorn.yml|:heavy_check_mark:|:x:|:heavy_check_mark:|:x:|-|VR +virtualrealporn.com|VirtualRealPorn.yml|:heavy_check_mark:|:x:|:heavy_check_mark:|:x:|-|VR +virtualrealtrans.com|VirtualRealPorn.yml|:heavy_check_mark:|:x:|:heavy_check_mark:|:x:|-|VR +virtualtaboo.com|VirtualTaboo.yml|:heavy_check_mark:|:x:|:heavy_check_mark:|:x:|-|VR +visit-x.net|Visit-X.yml|:heavy_check_mark:|:x:|:x:|:x:|CDP|- +vivid.com|Vivid.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +vividclassic.com|Vivid.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +vivthomas.com|SARJ-LLC.yml|:heavy_check_mark:|:heavy_check_mark:|:x:|:heavy_check_mark:|Python|- +vixen.com|vixenNetwork.yml|:heavy_check_mark:|:x:|:x:|:x:|Python|- +vlogxxx.com|Spizoo.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +vogov.com|vogov.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +vrbangers.com|VRBangers.yml|:heavy_check_mark:|:x:|:heavy_check_mark:|:x:|-|VR +vrbgay.com|VRBangers.yml|:heavy_check_mark:|:x:|:x:|:x:|-|VR +vrbtrans.com|VRBangers.yml|:heavy_check_mark:|:x:|:x:|:x:|-|VR +vrconk.com|VRBangers.yml|:heavy_check_mark:|:x:|:x:|:x:|-|VR +vrcosplayx.com|BaDoink.yml|:heavy_check_mark:|:x:|:heavy_check_mark:|:heavy_check_mark:|-|VR +vrfirsttimer.com|SLRoriginals.yml|:heavy_check_mark:|:x:|:heavy_check_mark:|:x:|-|VR +vrhush.com|VRHush.yml|:heavy_check_mark:|:x:|:heavy_check_mark:|:x:|-|VR +vrintimacy.com|CzechVR.yml|:heavy_check_mark:|:x:|:heavy_check_mark:|:x:|-|VR +vrlatina.com|VRLatina.yml|:heavy_check_mark:|:x:|:heavy_check_mark:|:heavy_check_mark:|-|VR +vrporn.com|VRPorn.yml|:heavy_check_mark:|:x:|:x:|:x:|-|VR +vrporncz.com|PornCZ.yml|:heavy_check_mark:|:x:|:x:|:x:|-|VR +vrpornpass.com|Baberotica.yml|:heavy_check_mark:|:x:|:x:|:heavy_check_mark:|-|VR +vrxdb.com|VrxDB.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +vtubie.com|vTubie.yml|:x:|:x:|:x:|:heavy_check_mark:|-|VTuber Database +wakeupnfuck.com|WakeUpnFuck.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +wankitnow.com|WankItNow.yml|:heavy_check_mark:|:x:|:x:|:x:|-|VR +wankz.com|wankz.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +wankzvr.com|POVR.yml|:heavy_check_mark:|:x:|:heavy_check_mark:|:x:|-|VR +warashi-asian-pornstars.fr|WAPdB.yml|:x:|:x:|:x:|:heavy_check_mark:|Python|JAV +watch4beauty.com|Watch4Beauty.yml|:heavy_check_mark:|:heavy_check_mark:|:x:|:x:|-|- +watch4fetish.com|Watch4Fetish.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +watchingmydaughtergoblack.com|Algolia_DogfartNetwork.yml|:heavy_check_mark:|:x:|:x:|:heavy_check_mark:|Python|- +watchingmymomgoblack.com|Algolia_DogfartNetwork.yml|:heavy_check_mark:|:x:|:x:|:heavy_check_mark:|Python|- +watchmygf.me|WatchMyGF.yml|:heavy_check_mark:|:x:|:x:|:x:|-|Tube Site +watchreal.com|Hustler.yml|:heavy_check_mark:|:x:|:x:|:x:|CDP|- +watchyoujerk.com|WatchYouJerk.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +waybig.com|Waybig.yml|:heavy_check_mark:|:x:|:x:|:heavy_check_mark:|-|- +wcpclub.com|AdultEmpireCash.yml|:heavy_check_mark:|:x:|:heavy_check_mark:|:x:|-|- +wearehairy.com|WeAreHairy.yml|:heavy_check_mark:|:heavy_check_mark:|:x:|:heavy_check_mark:|-|- +webyoung.com|Algolia_Adultime.yml|:heavy_check_mark:|:heavy_check_mark:|:x:|:x:|Python|Lesbian +wefuckblackgirls.com|Algolia_DogfartNetwork.yml|:heavy_check_mark:|:x:|:x:|:heavy_check_mark:|Python|- +wefuckblackgirls.com|WeFuckBlackGirls.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +welikegirls.com|Algolia_Adultime.yml|:heavy_check_mark:|:heavy_check_mark:|:heavy_check_mark:|:x:|Python|- +weliketosuck.com|PuffyNetworkSite.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +welivetogether.com|RealityKingsOL.yml|:heavy_check_mark:|:x:|:x:|:x:|-|Lesbian +weneednewtalents.com|Teencoreclub.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +westcoastproductions.com|AdultEmpireCash.yml|:heavy_check_mark:|:x:|:heavy_check_mark:|:x:|-|- +wetandpissy.com|PuffyNetworkSite.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +wetandpuffy.com|PuffyNetworkSite.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +wetvr.com|AMAMultimedia.yml|:heavy_check_mark:|:x:|:x:|:x:|-|VR +whiteghetto.com|GammaEntertainment.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +whiteteensblackcocks.com|Teencoreclub.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +whorecraftvr.com|AdultEmpireCash.yml|:heavy_check_mark:|:x:|:x:|:x:|-|Fantasy VR +wicked.com (/movies)|WickedMovies.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +wicked.com|Algolia_Wicked.yml|:heavy_check_mark:|:heavy_check_mark:|:heavy_check_mark:|:x:|Python|- +wifespov.com|ThirdRockEnt.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +wildoncam.com|trafficpimps.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +williamhiggins.com|Williamhiggins.yml|:heavy_check_mark:|:x:|:x:|:x:|-|Gay +willtilexxx.com|WillTileXXX.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +wolfwagner.com|Algolia_Adultime.yml|:heavy_check_mark:|:x:|:x:|:x:|Python|- +woodmancastingx.com|WoodmancastingX.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +wowgirls.xxx|WowPorn.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +wowgirlsblog.com|WOWGirlsBlog.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +wowporn.xxx|WowPorn.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +wtfpass.com|Wtfpass.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +wurstfilm.com|PornsiteManager.yml|:heavy_check_mark:|:x:|:heavy_check_mark:|:x:|-|Gay +www.mgstage.com|mgstage.yml|:heavy_check_mark:|:x:|:x:|:x:|-|JAV +x-angels.com|TeenMegaWorld.yml|:heavy_check_mark:|:x:|:x:|:heavy_check_mark:|-|- +x-art.com|X-artcom.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +xart.xxx|Xartxxx.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +xconfessions.com|XConfessions.yml|:heavy_check_mark:|:x:|:heavy_check_mark:|:x:|-|- +xcoreclub.com|Teencoreclub.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +xempire.com|Algolia_xEmpire.yml|:heavy_check_mark:|:heavy_check_mark:|:x:|:x:|Python|- +xes.pl|Xes.yml|:heavy_check_mark:|:x:|:x:|:x:|-|Polish porn site +xevunleashed.com|Xevunleashed.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +xhamster.com|Xhamster.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +xlgirls.com|TheScoreGroup.yml|:heavy_check_mark:|:heavy_check_mark:|:x:|:x:|-|- +xnxx.com|Xnxx.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +xrares.com|Xrares.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +xsinsvr.com|xSinsVR.yml|:heavy_check_mark:|:heavy_check_mark:|:x:|:x:|-|- +xslist.org|Xslist.yml|:x:|:x:|:x:|:heavy_check_mark:|-|JAV Database +xtube.com|Xtube.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +xvideos.com|Xvideos.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +xvideos.red|Xvideos.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +xvirtual.com|XVirtual.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +xxxjobinterviews.com|XXXJobInterviews.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +xxxnj.com|ChickPass.yml|:heavy_check_mark:|:x:|:x:|:heavy_check_mark:|-|- +xxxpawn.com|XXXPawn.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +yanks.com|Yanks.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +yesgirlz.com|YesGirls.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +yngr.com|YNGR.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +younganaltryouts.com|OldGoesYoung.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +youngerloverofmine.com|Mypervmom.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +youngermommy.com|Nubiles.yml|:heavy_check_mark:|:heavy_check_mark:|:x:|:x:|-|- +youngthroats.com|Teencoreclub.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +youporn.com|YouPorn.yml|:heavy_check_mark:|:x:|:x:|:x:|-|Tube Site +yourmomdoesanal.com|ThirdRockEnt.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +yourmomdoesporn.com|ThirdRockEnt.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +yummysofie.com|YummySofie.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +z-filmz-originals.com|Z-Filmz-Originals.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +zebragirls.com|Algolia_DogfartNetwork.yml|:heavy_check_mark:|:x:|:x:|:heavy_check_mark:|Python|- +zentaidolls.com|ZentaiDolls.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +zerotolerancefilms.com|Algolia_zerotolerancefilms.yml|:heavy_check_mark:|:heavy_check_mark:|:heavy_check_mark:|:x:|Python|- +zexyvr.com|ZexyVR.yml|:heavy_check_mark:|:x:|:x:|:heavy_check_mark:|-|VR +zishy.com|Zishy.yml|:heavy_check_mark:|:heavy_check_mark:|:x:|:x:|-|- +zoiestarr.com|ChickPass.yml|:heavy_check_mark:|:x:|:x:|:heavy_check_mark:|-|- + +## Non url scrapers + +The following scrapers do not support xxxByURL scraping and are not included to the above list. To keep the below list tidy please add scrapers keeping the list in alphabetical order by the .yml filename. +For each scraper a short description, an optional comment with the usage and the related PR(s) with usage details would be appreciated. + +Scraper | Description | Comments | PR +--------|-------------|----------|:--: +ComicInfoXML.yml| A ComixInfo XML gallery scraper | A python scraper that looks for ComicInfo xml compatible files in the gallery's folder/filename and parses them | [#827](https://github.com/stashapp/CommunityScrapers/pull/827) +CopyFromScene.yml| A gallery scraper that returns metadata from the first linked scene | A python scraper that returns metadata from copied scenes, first link the scene to the gallery then run the scraper on the gallery | +CopyToGallery.yml| A scene to gallery scraper | A python scene scraper that copies metadata from a scene to the associated galleries. Can optionally (check .py file) associate and copy meta to all galleries in the same folder as the scene| [#895](https://github.com/stashapp/CommunityScrapers/pull/895) +dc-onlyfans.yml| An Onlyfans DB scene scraper | A python scraper that scrapes Only Fans scenes using the DB file (user_data.db) created from DIGITALCRIMINAL's tool | [#847](https://github.com/stashapp/CommunityScrapers/pull/847) +dc_onlyfans_fansdb.yml | FansDB "compliant" OnlyFans metadata DB scraper | Python scraper which scrapes metadata from DIGITALCRIMINAL compatible `user_data.db` databases following FansDB guidelines | [#1500](https://github.com/stashapp/CommunityScrapers/pull/1500) +Filename.yml | Scrape a scenes (local) filename to set as scene title | Utility scraper useful if you've bulk updated filenames outside of stash and want the changes synced back into stash | [#1136](https://github.com/stashapp/CommunityScrapers/pull/1136) +jellyfin.yml| A Jellyfin/Emby scraper | A python scraper that uses the Jellyfin/Emby API to look for Scenes, Performers and Movies via URL, Query or Fragments. Needs the URL, API-Key and User from Jellyfin set in jellyfin.py and the URLs in jellyfin.yml adopted to your local Jelly/Emby Instance | +MindGeekAPI.yml| A sceneBy(Name\|Fragment) scraper for MindGeek network| A python scraper that queries directly the MindGeek API. For further **needed** instructions refer to the relevant PRs and have a look in the `MindGeekApi.py` file | [#711](https://github.com/stashapp/CommunityScrapers/pull/711) [#738](https://github.com/stashapp/CommunityScrapers/pull/738) [#411](https://github.com/stashapp/CommunityScrapers/pull/411) +multiscrape.yml| A performer scraper that can utilize multiple stash Performer scrapers| A python scraper that can use multiple existing performer scrapers in order to get performer meta. To configure it edit the `multiscrape.py` file|[#594](https://github.com/stashapp/CommunityScrapers/pull/594) +performer-image-by-scene.yml| A performer image scraper that gets images from scene covers | A python scraper that searches for scenes with the performer and sets the scene cover image as the performer image|[#1039](https://github.com/stashapp/CommunityScrapers/pull/1039) +performer-image-dir.yml| A performer image scraper compatible with the actress-pics repo | A python scraper that searches in a cloned actress-pics repo for performer images. Configuration and more info in `performer-image-dir.py`|[#453](https://github.com/stashapp/CommunityScrapers/pull/453) +ScrapeWithURL.yml| A sceneByFragment scraper to perform a sceneByURL scape on scenes with URLs provided | This scraper allows users to perform sceneByURL scrapes in bulk.| [#900](https://github.com/stashapp/CommunityScrapers/issues/900) +ShokoAPI.yml| A sceneByFragment scraper for [Shoko Server](https://shokoanime.com) | A sceneByFragment scraper that queries a local Shoko Server instance using the filename for scene meta. To configure it edit the `ShokoAPI.py` file| [#586](https://github.com/stashapp/CommunityScrapers/issues/586) [#628](https://github.com/stashapp/CommunityScrapers/pull/628) +stash-sqlite.yml| A performer scraper that searches a stash db file | This python scraper uses the sqlite database from another stash instance and allows you to parse performers |[#230](https://github.com/stashapp/CommunityScrapers/pull/230) [#460](https://github.com/stashapp/CommunityScrapers/pull/460) +torrent.yml| A torrent scene scraper | This python scraper parses all torrent files in the specified directory (edit `torrent.py`) for embedded metadata using the scene's title.The title can either be a file from the torrent or the filename of the .torrent file | [#358](https://github.com/stashapp/CommunityScrapers/pull/358) +xbvrdb.yml| A scene/gallery scraper for XBVR db files| This python scraper uses the title from stash to search the sqlite database from XBVR for metadata. You would need to copy `main.db` from your xbvr configuration and move this to `scrapers/xbvr.db` | [#190](https://github.com/stashapp/CommunityScrapers/pull/190) diff --git a/scrapers/Sayuncle.yml b/scrapers/Sayuncle.yml index 2a207a981..e002bb7f1 100644 --- a/scrapers/Sayuncle.yml +++ b/scrapers/Sayuncle.yml @@ -17,9 +17,13 @@ xPathScrapers: Name: //a[@class="model-name-link"] Details: selector: //div[@class="col sceneDesc hide"]/text() + Tags: + Name: + selector: //div[@class="col-12 tags-container"] + split: ", " Image: selector: //meta[@property="og:image"]/@content Studio: Name: selector: //div[@class="siteName float-right px-3 rounded"]/a -# Last Updated January 10, 2022 +# Last Updated October 24, 2022 diff --git a/scrapers/ScrapeWithURL.py b/scrapers/ScrapeWithURL.py deleted file mode 100644 index a660cc20d..000000000 --- a/scrapers/ScrapeWithURL.py +++ /dev/null @@ -1,51 +0,0 @@ -import json -import sys - -try: - import py_common.graphql as graphql - import py_common.log as log -except ModuleNotFoundError: - print("You need to download the folder 'py_common' from the community repo! (CommunityScrapers/tree/master/scrapers/py_common)", file=sys.stderr) - sys.exit() - -def call_graphql(query, variables=None): - return graphql.callGraphQL(query, variables) - -def scrape_scene(url): - query = """query scrapeSceneURL($url: String!) { - scrapeSceneURL(url: $url) { - title - details - date - image - studio { - name - } - tags { - name - } - performers { - name - } - url - } - }""" - - variables = {'url': url} - result = call_graphql(query, variables) - log.debug(f"result {result}") - return result["scrapeSceneURL"] - - -FRAGMENT = json.loads(sys.stdin.read()) -SCENE_ID = FRAGMENT.get("id") - -scene = graphql.getScene(SCENE_ID) -if scene: - scene_url = scene['url'] - - if scene_url: - result = scrape_scene(scene_url) - print(json.dumps(result)) - else: - print(json.dumps({})) diff --git a/scrapers/ScrapeWithURL/ScrapeWithURL.py b/scrapers/ScrapeWithURL/ScrapeWithURL.py new file mode 100644 index 000000000..340c626a8 --- /dev/null +++ b/scrapers/ScrapeWithURL/ScrapeWithURL.py @@ -0,0 +1,89 @@ +import json +import os +import sys + +# to import from a parent directory we need to add that directory to the system path +csd = os.path.dirname(os.path.realpath(__file__)) # get current script directory +parent = os.path.dirname(csd) # parent directory (should be the scrapers one) +sys.path.append( + parent +) # add parent dir to sys path so that we can import py_common from ther + +try: + import py_common.graphql as graphql + import py_common.log as log +except ModuleNotFoundError: + print( + "You need to download the folder 'py_common' from the community repo! (CommunityScrapers/tree/master/scrapers/py_common)", + file=sys.stderr, + ) + sys.exit() + + +def scrape_scene(url): + query = """ +query scrapeSceneURL($url: String!) { + scrapeSceneURL(url: $url) { + title + details + code + date + image + urls + studio { + name + url + image + parent { + name + url + image + } + } + tags { + name + } + performers { + aliases + birthdate + career_length + country + death_date + details + ethnicity + eye_color + fake_tits + gender + hair_color + height + instagram + images + measurements + name + piercings + tags { + name + } + tattoos + twitter + url + weight + } + } +}""" + + variables = {"url": url} + result = graphql.callGraphQL(query, variables) + log.debug(f"result {result}") + if result: + return result["scrapeSceneURL"] + + +FRAGMENT = json.loads(sys.stdin.read()) +url = FRAGMENT.get("url") + +if url: + result = scrape_scene(url) + print(json.dumps(result)) +else: + print("null") diff --git a/scrapers/ScrapeWithURL.yml b/scrapers/ScrapeWithURL/ScrapeWithURL.yml similarity index 86% rename from scrapers/ScrapeWithURL.yml rename to scrapers/ScrapeWithURL/ScrapeWithURL.yml index dfc7541dc..fbd2e4594 100644 --- a/scrapers/ScrapeWithURL.yml +++ b/scrapers/ScrapeWithURL/ScrapeWithURL.yml @@ -1,8 +1,10 @@ -name: Scrape with URL -sceneByFragment: - action: script - script: - - python - - ScrapeWithURL.py - -# Last Updated April 16, 2022 +name: Scrape with URL +# requires: py_common + +sceneByFragment: + action: script + script: + - python + - ScrapeWithURL.py + +# Last Updated April 16, 2022 diff --git a/scrapers/SexBabesVR.yml b/scrapers/SexBabesVR.yml index 6b16840eb..7ef89f712 100644 --- a/scrapers/SexBabesVR.yml +++ b/scrapers/SexBabesVR.yml @@ -7,30 +7,27 @@ sceneByURL: xPathScrapers: sceneScraper: common: - $info: //div[@class="video-info"] + $detail: //div[@class="video-detail"] scene: - Title: $info//div[@class="video-group-left"]//h1[@class="title"]/text() + Title: $detail//h1/text() Date: - selector: $info//span[@class="date-display-single"]/@content + selector: $detail//div[@class="video-detail__description--container"]/div[contains(@style, "padding-top")]/text() postProcess: - - replace: - - regex: (\d{4}-\d{2}-\d{2})T.+ - with: $1 - - parseDate: 2006-01-02 + - parseDate: Jan 02 Details: - selector: $info//p/text() + selector: $detail/div[@class="container"]/p/text() concat: " " Tags: - Name: $info//div[@class="video-group-left"]/div[@class="video-tags"]//a/text() + Name: $detail//div[@class="tags"]//a/text() Performers: - Name: $info//div[@class="video-group-left"]/div[@class="video-actress-name"]//a/text() - Image: - selector: //div[@class="splash-screen fullscreen-message is-visible"]/@style - postProcess: - - replace: - - regex: .+(http[^\)]+).+ - with: $1 + Name: + selector: $detail//div[@class="video-detail__description--author"]//a/text() + postProcess: + - replace: + - regex: ^\s*(.+)\s*$ + with: $1 + Image: //dl8-video/@poster Studio: Name: fixed: "SexBabesVR" -# Last Updated August 14, 2022 +# Last Updated July 21, 2023 diff --git a/scrapers/SexLikeReal.yml b/scrapers/SexLikeReal.yml index ed41984d6..3f15968bb 100644 --- a/scrapers/SexLikeReal.yml +++ b/scrapers/SexLikeReal.yml @@ -62,7 +62,13 @@ xPathScrapers: SinsVR: "XSinsVR" VirtualXPorn: "Virtual X Porn" WankitnowVR: "Wank It Now VR" - Image: //div[contains(@class,"splash-screen")]/img/@src + Image: + selector: //div[contains(@class,"splash-screen")]/@style + postProcess: + - replace: + # https://regex101.com/r/fszqAQ/4 + - regex: .*url\((?:"|")?([^");]*)(?:"|")?\).* + with: $1 URL: &sceneUrl //link[@rel="canonical"]/@href Code: selector: *sceneUrl @@ -70,4 +76,4 @@ xPathScrapers: - replace: - regex: '^(.+)-(\d+)/?$' with: $2 -# Last Updated December 21, 2022 +# Last Updated October 21, 2023 diff --git a/scrapers/SexMex.yml b/scrapers/SexMex.yml index 72f1fcf11..d9c9be4f6 100644 --- a/scrapers/SexMex.yml +++ b/scrapers/SexMex.yml @@ -1,4 +1,4 @@ -name: "SexMex" +name: SexMex sceneByURL: &byUrl - action: scrapeXPath @@ -10,10 +10,15 @@ galleryByURL: *byUrl xPathScrapers: sceneScraper: scene: - Title: &title //div[@class="panel-body"]/h4//text() + Title: &title + selector: //div[@class="panel-body"]/h4//text() + postProcess: + - replace: + - regex: ([^.]*)\s\..* + with: $1 Details: &details //div[@class="panel-body"]/p[not(@class)] Tags: &tags - Name: + Name: selector: //meta[@name="keywords"]/@content split: "," Performers: &performers @@ -21,18 +26,17 @@ xPathScrapers: Studio: &studio Name: fixed: Sex Mex - Image: + Image: selector: //video/@poster postProcess: - replace: - regex: cover.jpg$ with: 1.jpg - + gallery: Title: *title Details: *details Tags: *tags Performers: *performers Studio: *studio - -# Last Updated February 14, 2022 +# Last Updated November 26, 2023 diff --git a/scrapers/ShinyBound.yml b/scrapers/ShinyBound.yml index dcb2cfbcc..302268f35 100644 --- a/scrapers/ShinyBound.yml +++ b/scrapers/ShinyBound.yml @@ -8,24 +8,22 @@ sceneByURL: xPathScrapers: sceneScraper: - common: - $logo: //div[@class="logo"]/a/@href scene: - Title: //span[@class='update_title'] + Title: //h1 Details: - selector: //span[@class='latest_update_description'] + selector: //div[contains(@class, "videoDescription")]/p Date: - selector: //span[@class='availdate'] + selector: //div[@class="contentT"]/ul[@class="contentInfo"]/li[./i[contains(@class, "fa-calendar")]]/text() postProcess: - - parseDate: 01/02/2006 + - parseDate: Jan 2, 2006 Tags: - Name: //span[@class='update_tags']/a + Name: //div[@class="tags"]//a/text() Performers: Name: - selector: //span[@class='tour_update_models']/a + selector: //div[@class="models"]//a/text() Studio: Name: - selector: $logo + selector: //div[@class="logo"]/a/@href postProcess: - replace: - regex: https://([^.]+)\..+ @@ -34,6 +32,9 @@ xPathScrapers: shinysboundsluts: ShinysBoundSluTS shinybound: ShinyBound Image: - selector: $logo|//img[@class='stdimage promo_thumb left thumbs']/@src - concat: "/" -# Last Updated May 23, 2022 + selector: //iframe/@src + postProcess: + - replace: + - regex: .*\?poster=(.*\.jpg).* + with: $1 +# Last Updated February 23, 2023 diff --git a/scrapers/ShokoAPI/ShokoAPI.py b/scrapers/ShokoAPI/ShokoAPI.py index 9fd87e6f1..2633a4d80 100644 --- a/scrapers/ShokoAPI/ShokoAPI.py +++ b/scrapers/ShokoAPI/ShokoAPI.py @@ -29,20 +29,19 @@ try: from py_common import log + import py_common.graphql as graphql except ModuleNotFoundError: print( "You need to download the folder 'py_common' from the community repo! (CommunityScrapers/tree/master/scrapers/py_common)", file=sys.stderr) sys.exit() +import config -#user inputs start SHOKO_API_KEY = '' #leave empty it gets your Shoko api key with your shoko server username and password -STASH_API_KEY = "" #your Stash api key -STASH_URL = "http://localhost:9999/graphql" #your stash graphql url -SHOKO_URL = "http://localhost:8111" #your shoko server url -SHOKO_USER = "" #your shoko server username -SHOKO_PASS = "" #your shoko server password -#user inputs end +SHOKO_URL = config.SHOKO.get("url", "") +SHOKO_USER = config.SHOKO.get("user", "") +SHOKO_PASS = config.SHOKO.get("pass", "") + def validate_user_inputs() -> bool: @@ -50,28 +49,14 @@ def validate_user_inputs() -> bool: if shoko is False: log.error("Shoko Url needs to be hostname:port and is currently " + SHOKO_URL) - stash = bool(re.match(r"^(http|https)://.+:\d+/graphql$", STASH_URL)) - if stash is False: - log.error( - "Stash Url needs to be hostname:port/graphql and is currently " + - STASH_URL) - return (shoko and stash) + return (shoko) def get_filename(scene_id: str) -> str: log.debug(f"stash sceneid: {scene_id}") - headers = CaseInsensitiveDict() - headers["ApiKey"] = STASH_API_KEY - headers["Content-Type"] = "application/json" - data = data = '{ \"query\": \" query { findScene (id: ' + scene_id + ' ) {path , id} }\" }' - resp = requests.post(url=STASH_URL, headers=headers, data=data) - if resp.status_code == 200: - log.debug("Stash response was successful resp_code: " + str(resp.status_code)) - else: - log.error("response from stash was not successful stash resp_code: " + str(resp.status_code)) - return None - output = resp.json() - path = output['data']['findScene']['path'] + log.debug(graphql.getScene(scene_id)) + data = graphql.getScene(scene_id) + path = data['files'][0]['path'] log.debug("scene path in stash: " + str(path)) pattern = "(^.+)([\\\\]|[/])" replace = "" @@ -172,6 +157,12 @@ def get_series(apikey: str, scene_id: str): def query(fragment: dict) -> dict: + if fragment['title'] == "": + scene_id = fragment['id'] + query = """query findScene($scene_id:ID!){findScene(id:$scene_id){files{basename}}}""" + variables = {'scene_id': scene_id} + result = call_graphql(query, variables) + basename = result['findScene']['files'][0]['basename'] filename, apikey = find_scene_id(fragment['id']) try: findscene_scene_id, findscene_epnumber, find_date = find_scene(apikey, filename) @@ -197,6 +188,9 @@ def main(): data = query(fragment) print(json.dumps(data)) +def call_graphql(query, variables=None): + return graphql.callGraphQL(query, variables) + if __name__ == '__main__': - main() + main() \ No newline at end of file diff --git a/scrapers/ShokoAPI/ShokoAPI.yml b/scrapers/ShokoAPI/ShokoAPI.yml index bab4dc550..f4012c0ee 100644 --- a/scrapers/ShokoAPI/ShokoAPI.yml +++ b/scrapers/ShokoAPI/ShokoAPI.yml @@ -1,4 +1,6 @@ name: "ShokoAPI" +# requires: py_common + sceneByFragment: action: script script: @@ -6,4 +8,4 @@ sceneByFragment: - ShokoAPI.py - query -# Last Updated April 03, 2022 +# Last Updated November 27, 2023 diff --git a/scrapers/ShokoAPI/config.py b/scrapers/ShokoAPI/config.py new file mode 100644 index 000000000..51757e963 --- /dev/null +++ b/scrapers/ShokoAPI/config.py @@ -0,0 +1,9 @@ +#it gets your Shoko api key with your shoko server username and password +SHOKO = { + "url": + "http://localhost:8111", #your shoko server url + "user": + "username",#your shoko server username + "pass": + "password" #your shoko server password +} diff --git a/scrapers/SissyPov.yml b/scrapers/SissyPov.yml deleted file mode 100644 index b91aff66f..000000000 --- a/scrapers/SissyPov.yml +++ /dev/null @@ -1,31 +0,0 @@ -name: SissyPov -sceneByURL: - - action: scrapeXPath - url: - - sissypov.com - scraper: sceneScraper -xPathScrapers: - sceneScraper: - scene: - Title: - selector: //div[@class="videoDetails clear"]/h3 - Performers: - Name: //li[@class="update_models"]/a - Date: - selector: //p[contains(span,"Date Added:")]/text() - postProcess: - - parseDate: January 2, 2006 - Tags: - Name: //ul[contains(li,"Tags:")]//a - Details: - selector: //div[@class="videoDetails clear"]/p - Image: - selector: //a[@class="fake_trailer"]/img/@src0_1x - postProcess: - - replace: - - regex: ^ - with: "https://sissypov.com" - Studio: - Name: - fixed: Sissy Pov -# Last Updated May 21, 2021 diff --git a/scrapers/SmutPuppet.yml b/scrapers/SmutPuppet.yml new file mode 100644 index 000000000..9dae14d57 --- /dev/null +++ b/scrapers/SmutPuppet.yml @@ -0,0 +1,67 @@ +name: SmutPuppet +sceneByURL: + - action: scrapeXPath + url: + - blackandbig.com/update + - darksodomy.com/update + - dothewife.com/update + - dreamtranny.com/update + - genlez.com/update + - goldenslut.com/update + - grannyvsbbc.com/update + - jeffsmodels.com/update + - milfsodomy.com/update + - smutmerchants.com/update + - smutpuppet.com/update + - suggabunny.com/update + - teenerotica.xxx/update + scraper: sceneScraper +sceneByFragment: + action: scrapeXPath + queryURL: "{url}" + scraper: sceneScraper +xPathScrapers: + sceneScraper: + scene: + Performers: + Name: //div[@class="model-rich"]/h4[@class="theme-color"]/a + Title: //div[@class="section-title"]/h4 + Details: + selector: //p[@class="read-more"]/text() + postProcess: + - replace: + - regex: '^\s*:\s*' + with: "" + Date: + selector: //small[@class="updated-at"]/text() + postProcess: + - parseDate: Jan 2, 2006 + Tags: + Name: + selector: //div[@class="model-categories"]/a/text() + Studio: + Name: + selector: //div[@class="block-logo"]/a/img/@alt + Image: + selector: //img[@class="video-banner"]/@src|//video/@poster + postProcess: + - replace: + - regex: (?:.+)(\/usermedia\/.+\.jpg)(?:.+) + with: $1 + - regex: "^/usermedia/" + with: "https://smutpuppet.com/usermedia/" + Code: + selector: //script[contains(text(),"/api/update/")] + postProcess: + - replace: + - regex: .+\/api\/update\/(\d{3,})\/.+ + with: $1 + # Return the sanitized URL + URL: + selector: //div[@class="block-logo"]/a/img/@src|//script[contains(text(),"/api/update/")] + concat: "|" + postProcess: + - replace: + - regex: \/static\/(\w+\.[a-z]{3})\/.+\/api\/update\/(\d{3,})\/.+ + with: https://$1/update/$2/ +# Last Updated July 27, 2023 diff --git a/scrapers/Spizoo.yml b/scrapers/Spizoo.yml index e62d9aebc..882562bcc 100644 --- a/scrapers/Spizoo.yml +++ b/scrapers/Spizoo.yml @@ -2,34 +2,78 @@ name: Spizoo sceneByURL: - action: scrapeXPath url: + - creamher.com/updates/ - firstclasspov.com/updates/ - - intimatelesbians.com/updates/ - - jessicajaymesxxx.com/updates/ - mrluckypov.com/updates/ - - porngoespro.com/updates/ - - pornstartease.com/updates/ + - mrluckyraw.com/updates/ + - mrluckyvip.com/updates/ + - rawattack.com/updates/ + - realsensual.com/updates/ - spizoo.com/updates/ - - thestripperexperience.com/updates/ + - vlogxxx.com/updates/ scraper: sceneScraper +sceneByName: + action: scrapeXPath + queryURL: https://www.spizoo.com/search.php?query={} + scraper: sceneSearch +sceneByQueryFragment: + action: scrapeXPath + queryURL: "{url}" + scraper: sceneScraper xPathScrapers: + sceneSearch: + common: + $row: //div[@class="model-update row"] + scene: + Title: $row//h3 + URL: $row//h3/../@href + Image: $row//img/@src + Details: $row//p[contains(@class, "model-update-description")] + sceneScraper: + common: + $scene_info: //section[@id="trailer-data" or @id="sceneInfo" or @id="scene-info" or @id="des-scene"] + $video_section: (//section[@id="trailer-video" or @id="scene" or @id="scene-video"] | //div[contains(@class, "videoHolder")]) scene: - Title: //h1 + Title: + selector: //div[@class="title" or @class="row"]//h1 | //h2[contains(@class, "titular")] | //title + postProcess: + # RawAttack titles have a trailing dash and space + - replace: + - regex: \s\-\s*$ + with: Date: - selector: //p[@class="date"] + selector: $scene_info//p[@class="date"] postProcess: - parseDate: 2006-01-02 - Details: //p[@class="description"] + Details: $scene_info//p Performers: + Name: $scene_info//a[contains(@href,"/model")]/@title + Tags: + Name: $scene_info//a[contains(@href,"/categories")] | //div[contains(@class, "categories-holder")]/a + Studio: Name: - selector: //section[@id="sceneInfo"]//a[contains(@href,"/model")]/@title + # Scenes on Spizoo can have an element with the studio name, others we get from the base URL + selector: //i[@id="site"]/@value | //base/@href postProcess: - replace: - - regex: \s\(.+ - with: - Tags: - Name: //section[@id="sceneInfo"]//a[contains(@href,"/categorie")]/text() | //div[@class="categories-holder"]/a - Image: //video[@id="trailervideo"]/@poster - - -# Last Updated June 28, 2021 + - regex: https?://(?:www\.)?(.*)(?:\.com)(?:/.*)? + with: $1 + - map: + creamher: Cream Her + firstclasspov: First Class POV + mrluckypov: Mr. LuckyPOV + mrluckyraw: Mr. LuckyRaw + mrluckyvip: Mr. LuckyVIP + rawattack: RawAttack + realsensual: Real Sensual + spizoo: Spizoo + vlogxxx: Vlog XXX + Image: + selector: $video_section//video/@poster | //img[contains(@class, "update_thumb") or contains(@class, "trailer-thumb")]/@src + postProcess: + - replace: + # Remove any resizing parameters for the image, we want the original + - regex: "[?&]img(?:q|w|h)=[^&]+" + with: +# Last Updated August 26, 2023 diff --git a/scrapers/SpunkWorthy.yml b/scrapers/SpunkWorthy.yml new file mode 100644 index 000000000..2d78d5ef2 --- /dev/null +++ b/scrapers/SpunkWorthy.yml @@ -0,0 +1,73 @@ +name: SpunkWorthy +sceneByURL: + - action: scrapeXPath + url: + - spunkworthy.com/preview/view_video + scraper: sceneScraper + +performerByURL: + - action: scrapeXPath + url: + - spunkworthy.com/preview/view_guy + scraper: performerScraper + +xPathScrapers: + sceneScraper: + scene: + Title: + selector: //p[contains(@class, 'h1')]/span[contains(@class, 'h2')]/text() + Performers: + Name: + selector: //div[@class="scene_models"]//p/a/text() + postProcess: + - replace: + - regex: "More of " + with: "" + URL: + selector: //div[@class="scene_models"]//p/a/@href + postProcess: + - replace: + - regex: "(.*)" + with: "https://www.spunkworthy.com$1" + Details: + selector: //div[contains(@class, 'video_synopsis')]//p[not(ancestor::div[@class="scene_models"]) and not(@class)][not (starts-with(text(),'Tags:'))]/text() + concat: "\n\n" + Tags: + Name: //div[contains(@class, 'video_synopsis')]//p[contains(text(),"Tags:")]/a/text() + Image: + selector: //div[contains(@class, 'video_player')]/img[1]/@src + postProcess: + - replace: + - regex: "(.*)" + with: "https://www.spunkworthy.com$1" + Studio: + Name: + fixed: SpunkWorthy + + performerScraper: + performer: + Name: //p[contains(@class, 'h1')]/span[contains(@class, 'h2')]/text() + Gender: + fixed: Male + Weight: + selector: //div[@class='model_left']/p[1]/text()[contains(.,'Weight')] + postProcess: + - replace: + - regex: "Weight: " + with: "" + - lbToKg: true + Height: + selector: //div[@class='model_left']/p[1]/text()[contains(.,'Height')] + postProcess: + - replace: + - regex: "Height: " + with: "" + - feetToCm: true + Image: + selector: //div[@class='model_left']/img[1]/@src + postProcess: + - replace: + - regex: "(.*)" + with: "https://www.spunkworthy.com$1" + +# Last Updated October 02, 2023 diff --git a/scrapers/StasyQVR.yml b/scrapers/StasyQVR.yml new file mode 100644 index 000000000..c87765f2d --- /dev/null +++ b/scrapers/StasyQVR.yml @@ -0,0 +1,33 @@ +name: "StasyQVR" +sceneByURL: + - action: scrapeXPath + url: + - stasyqvr.com/virtualreality + scraper: sceneScraper +xPathScrapers: + sceneScraper: + scene: + Title: //div[@class='video-title']/h1/text() + Details: //div[@class='video-info']/p/text() + Date: + selector: //div[@class='video-meta-date']/text() + postProcess: + - parseDate: Jan 2, 2006 + Code: + selector: //script[contains(.,'vrPlayerSettings')]/text() + postProcess: + - replace: + - regex: '[\s\S]*videoId: (\d+),[\s\S]*' + with: $1 + Studio: + Name: + fixed: StasyQVR + Performers: + Name: //div[@class='video-info']//a/h2/text() + Image: + selector: //div[@id='webvr']/div[contains(@style,'background-image:')]/@style + postProcess: + - replace: + - regex: '^background-image: url\(|\);$' + with: "" +# Last Updated October 20, 2023 \ No newline at end of file diff --git a/scrapers/Strokies.yml b/scrapers/Strokies.yml new file mode 100644 index 000000000..039334f2b --- /dev/null +++ b/scrapers/Strokies.yml @@ -0,0 +1,42 @@ +name: "Strokies" +sceneByURL: + - action: scrapeXPath + url: + - strokies.com + scraper: sceneScraper +xPathScrapers: + sceneScraper: + scene: + Title: //h1[@class='video-title'] + Details: + selector: //div[@class='video-description']/p/text() + concat: "\n\n" + Date: + selector: //div/p[contains(text(),"Added on:")] + postProcess: + - replace: + - regex: Added on:\s(.+) + with: $1 + - parseDate: Jan 2, 2006 + Image: + selector: //div[@class="vjs-poster"]/@style + postProcess: + - replace: + - regex: .+url\("(.+)\".+ + with: https:$1 + Studio: + Name: + fixed: Strokies + Tags: + Name: //div[@class='model-tags']//a[contains(@href,"tag")] + Performers: + Name: //div[@class='model-tags']//a[contains(@href,"model")] + URL: + selector: //div[@class='model-tags']//a[contains(@href,"model")]/@href + postProcess: + - replace: + - regex: ^ + with: https://strokies.com +driver: + useCDP: true +# Last Updated January 20, 2023 diff --git a/scrapers/TSPlayground.yml b/scrapers/TSPlayground.yml new file mode 100644 index 000000000..9b7d4e9cb --- /dev/null +++ b/scrapers/TSPlayground.yml @@ -0,0 +1,35 @@ +name: "TS Playground" +sceneByURL: + - action: scrapeXPath + url: + - tsplayground.com/video/ + scraper: sceneScraper +performerByURL: + - action: scrapeXPath + url: + - tsplayground.com/models/ + scraper: performerScraper +xPathScrapers: + sceneScraper: + scene: + Title: //div[@class="content-title"]/h1/text() + Details: //div[@class="content-desc more-desc"] + Date: + selector: //div[@class="content-date"]/div[@class="label"]/text() + postProcess: + - replace: + - regex: (\d{2}).(\d{2}).(\d{4}) + with: $3-$2-$1 + Image: //meta[@property="og:image"]/@content + Studio: + Name: + fixed: TS Playground + Tags: + Name: //div[@class="content-tags"]//a/text() + Performers: + Name: //div[@class="content-models"]//a/span/text() + performerScraper: + performer: + Name: //div[contains(@class, "title-col")]/h1/text() + Image: //div[@class="model-avatar"]/img/@src +# Last Updated January 13, 2023 diff --git a/scrapers/Algolia_TabooHeat.yml b/scrapers/TabooHeat/TabooHeat.yml similarity index 77% rename from scrapers/Algolia_TabooHeat.yml rename to scrapers/TabooHeat/TabooHeat.yml index e695f5895..455d42d3a 100644 --- a/scrapers/Algolia_TabooHeat.yml +++ b/scrapers/TabooHeat/TabooHeat.yml @@ -1,3 +1,4 @@ +# requires: Algolia name: "TabooHeat" sceneByURL: - action: script @@ -5,26 +6,26 @@ sceneByURL: - tabooheat.com/en/video script: - python - - Algolia.py + - ../Algolia/Algolia.py - tabooheat sceneByFragment: action: script script: - python - - Algolia.py + - ../Algolia/Algolia.py - tabooheat sceneByName: action: script script: - python - - Algolia.py + - ../Algolia/Algolia.py - tabooheat - searchName sceneByQueryFragment: action: script script: - python - - Algolia.py + - ../Algolia/Algolia.py - tabooheat - validName galleryByURL: @@ -33,7 +34,7 @@ galleryByURL: - tabooheat.com/en/photo/ script: - python - - Algolia.py + - ../Algolia/Algolia.py - tabooheat - gallery # Last Updated December 22, 2022 diff --git a/scrapers/TadpolexStudio.yml b/scrapers/TadpolexStudio.yml new file mode 100644 index 000000000..42879fdcb --- /dev/null +++ b/scrapers/TadpolexStudio.yml @@ -0,0 +1,42 @@ +name: "TadpolexStudio" +sceneByURL: + - action: scrapeXPath + url: + - tadpolexstudio.com/trailers + scraper: sceneScraper +xPathScrapers: + sceneScraper: + common: + $models: //div[contains(@class, "models-list-thumbs")]//a + scene: + Studio: + Name: + selector: //a[@class="navbar-brand"]/@title + postProcess: + - map: + OFFICIAL Backroom Casting Couch: Backroom Casting Couch + BlackAmbush: Black Ambush + HOTMILFSFUCK: Hot Milfs Fuck + "Exploited College Girls: Excogi": Exploited College Girls + + Title: //div[@class="video-player"]//h2[@class="section-title"]/text() + Performers: + Name: $models/span + URL: $models/@href + Date: + selector: //strong[text()="Released:"]/following-sibling::text() + postProcess: + - parseDate: January 2, 2006 + Tags: + Name: //ul[@class="tags"]//a + Image: + selector: //base/@href|//div[@class="player-thumb"]//img[contains(@class, "update_thumb")]/@src0_1x + concat: "|" + postProcess: + - replace: + - regex: "(^[^|]+)\\|([^\\.]+\\.jpg).*" + with: $1$2 + Details: + selector: //div[@class="update-info-block"]/h3[text()="Description:"]/following-sibling::text() + concat: "\n\n" +# Last Updated August 20, 2023 diff --git a/scrapers/Teamskeet.yml b/scrapers/Teamskeet/Teamskeet.yml similarity index 90% rename from scrapers/Teamskeet.yml rename to scrapers/Teamskeet/Teamskeet.yml index 35eb892ea..dbca726b6 100644 --- a/scrapers/Teamskeet.yml +++ b/scrapers/Teamskeet/Teamskeet.yml @@ -1,4 +1,6 @@ name: "TeamskeetAPI" +# requires: py_common + sceneByURL: - url: - teamskeet.com/movies/ diff --git a/scrapers/TeamskeetAPI.py b/scrapers/Teamskeet/TeamskeetAPI.py similarity index 92% rename from scrapers/TeamskeetAPI.py rename to scrapers/Teamskeet/TeamskeetAPI.py index 4ceb22be6..eab533466 100644 --- a/scrapers/TeamskeetAPI.py +++ b/scrapers/Teamskeet/TeamskeetAPI.py @@ -5,6 +5,14 @@ import sys from datetime import datetime +# to import from a parent directory we need to add that directory to the system path +csd = os.path.dirname( + os.path.realpath(__file__)) # get current script directory +parent = os.path.dirname(csd) # parent directory (should be the scrapers one) +sys.path.append( + parent +) # add parent dir to sys path so that we can import py_common from there + try: import py_common.log as log except ModuleNotFoundError: diff --git a/scrapers/TeenMegaWorld.yml b/scrapers/TeenMegaWorld.yml index 5e74798f1..45ddc3383 100644 --- a/scrapers/TeenMegaWorld.yml +++ b/scrapers/TeenMegaWorld.yml @@ -3,70 +3,107 @@ name: TeenMegaWorld sceneByURL: - action: scrapeXPath scraper: sceneScraper - url: - - anal-angels.com/trailers/ - - anal-beauty.com/trailers/ - - beauty-angels.com/trailers/ - - beauty4k.com/trailers/ - - creampie-angels.com/trailers/ - - dirty-coach.com/trailers/ - - dirty-doctor.com/trailers/ - - firstbgg.com/trailers/ - - fuckstudies.com/trailers/ - - gag-n-gape.com/trailers/ - - lollyhardcore.com/trailers/ - - nubilegirlshd.com/trailers/ - - old-n-young.com/trailers/ - - rawcouples.com/trailers/ - - soloteengirls.net/trailers/ - - teenmegaworld.net/trailers/ - - teensexmania.com/trailers/ - - teensexmovs.com/trailers/ - - trickymasseur.com/trailers/ - - x-angels.com/trailers/ - # Behind paywall + url: &urls + - anal-angels.com + - anal-beauty.com + - beauty-angels.com + - beauty4k.com + - creampie-angels.com + - dirty-coach.com + - dirty-doctor.com + - firstbgg.com + - fuckstudies.com + - gag-n-gape.com + - noboring.com + - ohmyholes.com + - old-n-young.com + - rawcouples.com + - soloteengirls.net + - teenmegaworld.net + - teensexmania.com + - teensexmovs.com + - tmwpov.com + - tmwvrnet.com + - trickymasseur.com + - x-angels.com + # Behind paywall - Scenes for these sites can be scraped from the public https://teenmegaworld.net/ site # - 18firstsex.com # - aboutgirlslove.com # - atmovs.com # - exgfbox.com # - hometeenvids.com - # - nylonsx.com - # - squirtingvirgin.com - # - watchmefucked.com - # - wow-orgasms.com - # Unsupported sites # - hometoyteens.com + # - lollyhardcore.com + # - nubilegirlshd.com + # - nylonsx.com # - privateteenvideo.com + # - squirtingvirgin.com # - teens3some.com # - teenstarsonly.com + # - watchmefucked.com + # - wow-orgasms.com +performerByURL: + - action: scrapeXPath + scraper: performerScraper + url: *urls xPathScrapers: sceneScraper: common: - $content: //div[@class="video"] + $content: //div[@class="video-block"] scene: Title: $content//h1 Date: - selector: $content//div[@class="date"]/time + selector: $content//span[@title="Video release date"]/text() postProcess: - parseDate: January 2, 2006 Details: - selector: $content//div[@class="text"]//p + selector: $content//p[@class="video-description-text"] concat: "\n\n" Performers: Name: $content//a[contains(@href,"/models/")] + URL: $content//a[contains(@href,"/models/")]/@href Tags: - Name: $content//ul[@class="tag-list"]//a + Name: $content//a[@class="video-tag-link"] Image: - selector: $content//video/@poster + selector: $content//img[@id="video-cover-image"]/@src postProcess: - replace: - regex: ^ - with: https://teenmegaworld.net + with: https://teenmegaworld.net/ Studio: Name: - selector: $content//div[@class="site"]/a[1] + selector: //base/@href postProcess: - replace: - - regex: \.(com|net)$ - with: -# Last Updated September 26, 2021 + - regex: https://(.*?)\.(com|net)/$ + with: $1 + performerScraper: + common: + $model: //section[contains(@class,"model-profile")] + performer: + Name: $model//h1 + Details: $model//p[contains(@class,"model-profile")] + Gender: + # Only female performers are listed on the sites + fixed: Female + CareerLength: + selector: $model//dt[contains(text(), "Debut year")]/following-sibling::text() + postProcess: + - replace: + - regex: $ + with: " - " + Aliases: $model//dt[contains(text(), "AKA")]/following-sibling::text() + HairColor: + selector: $model//dt[contains(text(), "Hair")]/following-sibling::text() + postProcess: + - replace: + - regex: ^Fair + with: Blonde + EyeColor: $model//dt[contains(text(), "Eyes")]/following-sibling::text() + Image: + selector: $model//img/@src + postProcess: + - replace: + - regex: ^ + with: https://teenmegaworld.net/ +# Last Updated November 06, 2023 diff --git a/scrapers/TeenyTaboo.yml b/scrapers/TeenyTaboo.yml new file mode 100644 index 000000000..24734f6da --- /dev/null +++ b/scrapers/TeenyTaboo.yml @@ -0,0 +1,40 @@ +name: Teeny Taboo +sceneByURL: + - action: scrapeXPath + url: + - teenytaboo.com + scraper: sceneScraper +xPathScrapers: + sceneScraper: + scene: + Title: //h1[@class="customhcolor"] + Details: //h2[@class="customhcolor2"] + Date: + selector: //span[@class="date"] + postProcess: + - replace: + - regex: (\w+) (0?[1-9]|[12][0-9]|3[01])(?:st|nd|th) (\d{4}) + with: $1 $2, $3 + - parseDate: January 2, 2006 + Image: + selector: //center/img/@src + postProcess: + - replace: + - regex: ^ + with: "https://teenytaboo.com/" + Studio: + Name: + fixed: Teeny Taboo + Tags: + Name: + selector: //h4[@class="customhcolor" and not(./span)] + split: "," + Performers: + Name: + selector: //h3[@class="customhcolor"] + split: "," + postProcess: + - replace: + - regex: \x{00a0} + with: "" +# Last Updated June 11, 2023 diff --git a/scrapers/Tenshigao.yml b/scrapers/Tenshigao.yml index 04f62fb02..265b53fe6 100644 --- a/scrapers/Tenshigao.yml +++ b/scrapers/Tenshigao.yml @@ -4,8 +4,23 @@ sceneByURL: url: - tenshigao.com scraper: sceneScraper +performerByURL: + - action: scrapeXPath + url: + - tenshigao.com + scraper: performerScraper +sceneByName: + action: scrapeXPath + queryURL: https://tenshigao.com/?s={} + scraper: sceneSearch +sceneByQueryFragment: + action: scrapeXPath + queryURL: "{url}" + scraper: sceneScraper xPathScrapers: sceneScraper: + common: + $model: //div[@class="model-thumb"] scene: Title: selector: //span[@class="mroundedthumbs"]//@alt @@ -17,12 +32,17 @@ xPathScrapers: with: "$1" - parseDate: January 02, 2006 Performers: - Name: //h5/a - URL: //h5/a/@href + Name: $model//h5 + URL: $model//h5/a/@href Tags: Name: //div[@class="cat"]/a Details: - selector: //h1[@class="none"] + selector: //p[span[@class="readmore"]] + postProcess: + - replace: + - regex: ... Read More + with: + URL: //link[@rel="canonical"]/@href Image: selector: //video[@id="videohtml5tour"]/@poster postProcess: @@ -32,4 +52,119 @@ xPathScrapers: Studio: Name: fixed: Tenshigao -# Last Updated March 02, 2022 + performerScraper: + common: + $profile: //div[@class="model-profile"] + performer: + Name: + selector: $profile[contains(strong, "Name:")]//text() + postProcess: + - replace: + - regex: .*Name:\s*(.*)\s*$ + with: $1 + - regex: None + with: + Aliases: + selector: $profile[contains(strong, "Japanese name:")]//text() + postProcess: + - replace: + - regex: .*Japanese name:\s*(.*)\s*$ + with: $1 + - regex: None + with: + Birthdate: + selector: $profile[contains(strong, "Birth date:")]//text() + postProcess: + - replace: + - regex: .*Birth date:\s*(.*)\s*$ + with: $1 + - parseDate: January 2, 2006 + - parseDate: 2006-01-02 + Measurements: + selector: $profile[contains(strong, "Body:")]//text() + postProcess: + - replace: + - regex: .*Body:\s*(.*)\s*$ + with: $1 + - regex: B(\d*)-W(\d*)-H(\d*) + with: $1-$2-$3 + - regex: -- + with: + - regex: None + with: + Weight: + selector: $profile[contains(strong, "Weight:")]//text() + postProcess: + - replace: + - regex: .*Weight:\s*(.*)\s*$ + with: $1 + - regex: (\d+)kg + with: $1 + - regex: None + with: + Height: + selector: $profile[contains(strong, "Height:")]//text() + postProcess: + - replace: + - regex: .*Height:\s*(.*)\s*$ + with: $1 + - regex: (\d+)cm.* + with: $1 + - regex: None + with: + Details: + selector: //p[span[@class="readmore"]] | //div[@class="intro"]/p + postProcess: + - replace: + - regex: ... Read More + with: + Piercings: + selector: $profile[contains(strong, "Piercings:")]//text() + postProcess: + - replace: + - regex: .*Piercings:\s*(.*)\s*$ + with: $1 + - regex: None|^No$|No Piercing.* + with: + Tattoos: + selector: $profile[contains(strong, "Tattoo:")]//text() + postProcess: + - replace: + - regex: .*Tattoo:\s*(.*)\s*$ + with: $1 + - regex: None|^No$|No Tattoo.* + with: + HairColor: + selector: $profile[contains(strong, "Hair color:")]//text() + postProcess: + - replace: + - regex: (?i).*hair color:?\s*(.*)\s*$ + with: $1 + - regex: None + with: + Country: + fixed: Japan + Ethnicity: + fixed: Asian + Gender: + fixed: Female + URL: //link[@rel="canonical"][1]/@href + Image: + selector: //img[@class="smallroundedthumbs"]/@src + postProcess: + - replace: + - regex: ^// + with: https:// + - regex: 160x160 + with: 500x500 + sceneSearch: + common: + $videos: //div[@class="thumb"]/a + scene: + Title: $videos/@title + URL: $videos/@href + Image: $videos/img/@src + Studio: + Name: + fixed: Tenshigao +# Last Updated November 07, 2023 diff --git a/scrapers/TheBroNetwork.yml b/scrapers/TheBroNetwork.yml new file mode 100644 index 000000000..4a90f6ff1 --- /dev/null +++ b/scrapers/TheBroNetwork.yml @@ -0,0 +1,27 @@ +name: The Bro Network +sceneByURL: + - action: scrapeXPath + url: + - thebronetwork.com + scraper: sceneScraper +xPathScrapers: + sceneScraper: + scene: + Title: + selector: //div[@class="gallery_info spacer"]/h1 + Performers: + Name: //div[@class="gallery_info spacer"]//span[@class="tour_update_models"]/a + Tags: + Name: //a[@class="tagsVideoPage"] + Details: + selector: //p[@id="textDesc"] + Image: + selector: //video-js[@id="trailervid"]/@poster + Date: + selector: //span[@class='availdate'][1] + postProcess: + - parseDate: Jan 02, 2006 + Studio: + Name: + fixed: The Bro Network +# Last Updated November 02, 2023 \ No newline at end of file diff --git a/scrapers/TheHabibShow.yml b/scrapers/TheHabibShow.yml new file mode 100644 index 000000000..b5b377fc4 --- /dev/null +++ b/scrapers/TheHabibShow.yml @@ -0,0 +1,19 @@ +name: The Habib Show +sceneByURL: + - action: scrapeXPath + url: + - thehabibshow.com + scraper: sceneScraper +xPathScrapers: + sceneScraper: + scene: + Title: + selector: //header/h1/text() + Details: + selector: //article[@class="article"]/p/text() + Image: //div[@class="player"]/@data-poster + URL: //meta[@property='og:url']/@content + Studio: + Name: + fixed: The Habib Show +# Last Updated September 29, 2023 diff --git a/scrapers/ThePornDB.yml b/scrapers/ThePornDB.yml deleted file mode 100644 index d3378e512..000000000 --- a/scrapers/ThePornDB.yml +++ /dev/null @@ -1,193 +0,0 @@ -name: ThePornDB -performerByName: - action: scrapeJson - queryURL: https://api.metadataapi.net/performers?q={} - scraper: performerSearch -performerByURL: - - action: scrapeJson - url: - - metadataapi.net/performers/ - queryURL: "{url}" - queryURLReplace: - url: - - regex: ^(.+metadataapi.net)/ - with: https://api.metadataapi.net/ - scraper: performerScraper -sceneByURL: - - action: scrapeJson - url: - - metadataapi.net/scenes/ - queryURL: "{url}" - queryURLReplace: - url: - - regex: ^(.+metadataapi.net)/ - with: https://api.metadataapi.net/ - scraper: sceneScraper -sceneByName: - action: scrapeJson - queryURL: https://api.metadataapi.net/scenes?parse={} - scraper: sceneSearch -sceneByQueryFragment: - action: scrapeJson - queryURL: "{url}" - scraper: sceneScraper -sceneByFragment: - action: scrapeJson - queryURL: https://api.metadataapi.net/scenes?parse={filename}&hash={oshash}&limit=1 - scraper: sceneQueryScraper - queryURLReplace: - filename: - - regex: "[^a-zA-Z\\d\\-._~]" # clean filename so that it can construct a valid url - with: "." # "%20" - - regex: \.+ - with: "." -movieByURL: - - action: scrapeJson - url: - - metadataapi.net/movies/ - scraper: movieScraper - queryURL: "{url}" - queryURLReplace: - url: - - regex: ^(.+metadataapi.net)/ - with: https://api.metadataapi.net/ -jsonScrapers: - sceneSearch: - scene: - Title: - # Prevent having duplicated title so it don't get ignored by stash - selector: data.#.[title,_id] - postProcess: - - replace: - - regex: \[|\" - with: "" - - regex: \, - with: " [" - Date: data.#.date - URL: - selector: data.#.id - postProcess: - - replace: - - regex: ^ - with: "https://api.metadataapi.net/scenes/" - Image: data.#.background.small - Details: data.#.description - performerSearch: - performer: - Name: data.#.name - URL: - selector: data.#.id - postProcess: - - replace: - - regex: ^ - with: https://api.metadataapi.net/performers/ - - performerScraper: - common: - $extras: data.extras - performer: - Name: data.name - Details: data.bio - Gender: $extras.gender - Birthdate: $extras.birthday - Country: $extras.nationality - Ethnicity: $extras.ethnicity - HairColor: $extras.hair_colour - Weight: - selector: $extras.weight - postProcess: - - replace: - - regex: kg - with: - Height: - selector: $extras.height - postProcess: - - replace: - - regex: cm - with: - Measurements: $extras.measurements - Tattoos: $extras.tattoos - Piercings: $extras.piercings - Aliases: data.aliases - Image: - selector: data.image - postProcess: - - replace: - - regex: .+male\.png$ - with: - - sceneScraper: - common: - $performers: data.performers - scene: - Title: data.title - Details: data.description - Date: data.date - URL: data.url - Image: - selector: data.background.full - postProcess: - - replace: - - regex: .+default\d+\.png$ - with: - Performers: - Name: data.performers.#.parent.name - URL: - selector: data.performers.#.parent.id - postProcess: - - replace: - - regex: ^ - with: "https://api.metadataapi.net/performers/" - Studio: - Name: data.site.name - Tags: - Name: data.tags.#.tag - - sceneQueryScraper: - common: - $data: data.0 - $performers: data.0.performers - scene: - Title: $data.title - Details: $data.description - Date: $data.date - URL: $data.url - Image: $data.background.full - Performers: - Name: $data.performers.#.parent.name - URL: - selector: $data.performers.#.parent.id - postProcess: - - replace: - - regex: ^ - with: "https://api.metadataapi.net/performers/" - Studio: - Name: $data.site.name - Tags: - Name: - selector: $data.id - postProcess: - - replace: - - regex: ^ - with: "https://api.metadataapi.net/scenes/" - - subScraper: - selector: data.tags.#.tag - concat: "|" - split: "|" - movieScraper: - movie: - Name: data.title - Studio: - Name: data.site.name - FrontImage: data.background.full - Synopsis: data.description - Date: data.date - Duration: data.duration - Director: data.director.name -driver: - headers: - - Key: User-Agent - Value: stashjson/1.0.0 - #- Key: Authorization # Uncomment and add a valid API Key after the `Bearer ` part - # Value: Bearer zUotW1dT5ESmpIpMnccUNczf8q4C9Thzn07ZqygE -# Last Updated October 02, 2022 diff --git a/scrapers/ThePornDBJAV.yml b/scrapers/ThePornDBJAV.yml new file mode 100644 index 000000000..daab280b7 --- /dev/null +++ b/scrapers/ThePornDBJAV.yml @@ -0,0 +1,108 @@ +name: ThePornDB for JAV +sceneByURL: + - action: scrapeJson + url: + - metadataapi.net/jav/ + queryURL: "{url}" + queryURLReplace: &redirectToAPI + url: + - regex: ^(.+metadataapi.net)/ + with: https://api.metadataapi.net/ + scraper: sceneScraper +sceneByName: + action: scrapeJson + queryURL: https://api.metadataapi.net/jav?parse={}&per_page=10 + scraper: sceneSearch +sceneByQueryFragment: + action: scrapeJson + queryURL: "{url}" + scraper: sceneScraper +sceneByFragment: + action: scrapeJson + queryURL: https://api.metadataapi.net/jav?parse={filename}&per_page=10 + scraper: sceneQueryScraper + queryURLReplace: + filename: + - regex: "[^a-zA-Z\\d\\-._~]" # clean filename so that it can construct a valid url + with: "." # "%20" + - regex: \.+ + with: "." +movieByURL: + - action: scrapeJson + url: + - metadataapi.net/jav/ + scraper: movieScraper + queryURL: "{url}" + queryURLReplace: *redirectToAPI + +jsonScrapers: + sceneSearch: + scene: + Title: + # Prevent having duplicated title so it don't get ignored by stash + selector: data.#.[title,_id] + postProcess: + - replace: + - regex: \[|\" + with: "" + - regex: \, + with: " [" + Date: data.#.date + URL: + selector: data.#.id + postProcess: + - replace: + - regex: ^ + with: "https://api.metadataapi.net/jav/" + Image: data.#.background.full + Details: data.#.description + + sceneScraper: + common: + $data: data + $performers: data.performers + scene: &scene + Title: $data.external_id + Details: $data.description + Director: $data.director.name + Date: $data.date + Code: $data.external_id + URL: $data.url + Image: $data.background.full + Performers: + Name: $performers.#.parent.name + URL: + selector: $performers.#.parent.id + postProcess: + - replace: + - regex: ^ + with: "https://metadataapi.net/performers/" + Studio: + Name: $data.site.name + Tags: + Name: $data.tags.#.name + + sceneQueryScraper: + common: + $data: data.0 + $performers: data.0.performers + scene: *scene + + movieScraper: + movie: + Name: data.external_id + Studio: + Name: data.site.name + FrontImage: data.background.full + Synopsis: data.description + Date: data.date + Duration: data.duration + Director: data.director.name + +driver: + headers: + - Key: User-Agent + Value: stashjson/1.0.0 + - Key: Authorization + Value: Bearer YOUR_API_KEY_HERE +# Last Updated October 13, 2023 diff --git a/scrapers/ThePornDBMovies.yml b/scrapers/ThePornDBMovies.yml new file mode 100644 index 000000000..f26b7b4ed --- /dev/null +++ b/scrapers/ThePornDBMovies.yml @@ -0,0 +1,108 @@ +name: ThePornDB for Movies +sceneByURL: + - action: scrapeJson + url: + - metadataapi.net/movies/ + queryURL: "{url}" + queryURLReplace: &redirectToAPI + url: + - regex: ^(.+metadataapi.net)/ + with: https://api.metadataapi.net/ + scraper: sceneScraper +sceneByName: + action: scrapeJson + queryURL: https://api.metadataapi.net/movies?parse={}&per_page=10 + scraper: sceneSearch +sceneByQueryFragment: + action: scrapeJson + queryURL: "{url}" + scraper: sceneScraper +sceneByFragment: + action: scrapeJson + queryURL: https://api.metadataapi.net/movies?parse={filename}&per_page=10 + scraper: sceneQueryScraper + queryURLReplace: + filename: + - regex: "[^a-zA-Z\\d\\-._~]" # clean filename so that it can construct a valid url + with: "." # "%20" + - regex: \.+ + with: "." +movieByURL: + - action: scrapeJson + url: + - metadataapi.net/movies/ + scraper: movieScraper + queryURL: "{url}" + queryURLReplace: *redirectToAPI + +jsonScrapers: + sceneSearch: + scene: + Title: + # Prevent having duplicated title so it don't get ignored by stash + selector: data.#.[title,_id] + postProcess: + - replace: + - regex: \[|\" + with: "" + - regex: \, + with: " [" + Date: data.#.date + URL: + selector: data.#.id + postProcess: + - replace: + - regex: ^ + with: "https://api.metadataapi.net/movies/" + Image: data.#.background.full + Details: data.#.description + + sceneScraper: + common: + $data: data + $performers: data.performers + scene: &scene + Title: $data.title + Details: $data.description + Director: $data.director.name + Date: $data.date + Code: $data.external_id + URL: $data.url + Image: $data.background.full + Performers: + Name: $performers.#.parent.name + URL: + selector: $performers.#.parent.id + postProcess: + - replace: + - regex: ^ + with: "https://metadataapi.net/performers/" + Studio: + Name: $data.site.name + Tags: + Name: $data.tags.#.name + + sceneQueryScraper: + common: + $data: data.0 + $performers: data.0.performers + scene: *scene + + movieScraper: + movie: + Name: data.title + Studio: + Name: data.site.name + FrontImage: data.background.full + Synopsis: data.description + Date: data.date + Duration: data.duration + Director: data.director.name + +driver: + headers: + - Key: User-Agent + Value: stashjson/1.0.0 + - Key: Authorization + Value: Bearer YOUR_API_KEY_HERE +# Last Updated October 13, 2023 diff --git a/scrapers/TheScoreGroup.yml b/scrapers/TheScoreGroup.yml deleted file mode 100644 index 524ca4a4f..000000000 --- a/scrapers/TheScoreGroup.yml +++ /dev/null @@ -1,59 +0,0 @@ -name: TheScoreGroup -sceneByURL: - - action: scrapeXPath - url: &urls - - 18eighteen.com/ - - 40somethingmag.com/ - - 50plusmilfs.com/ - - 60plusmilfs.com/ - - legsex.com/ - - pornmegaload.com/ - - scoreland.com/ - - xlgirls.com/ - scraper: sceneScraper -galleryByURL: - - action: scrapeXPath - url: *urls - scraper: galleryScraper -xPathScrapers: - sceneScraper: - scene: - Title: //section[@id="videos_page-page"]/div[@class="row"]/div/h2/text() - Studio: &studioAttr - Name: - selector: //meta[@property="og:site_name"]/@content - Date: &dateAttr - selector: //div[@class="stat mb-2"]/span[contains(.,"Date:")]/../span[@class="value"]/text() - postProcess: - - replace: - - regex: .., - with: - - parseDate: January 2 2006 - Details: - selector: //div[@class="row"]/div/div[@class="p-desc"]/text() - concat: "\n" - Tags: - Name: //div[@class="row"]/div/div[@class="mb-3"]/a/text() - Performers: &performersAttr - Name: //div[@class="stat mb-2"]/span[contains(.,"Featuring:")]/../span[@class="value"]/a/text() - Image: - selector: //meta[@itemprop="image"]/@content - # Enable this post process if you want better image quality but sometimes it can fail - # postProcess: - # - replace: - # - regex: _lg - # with: _x_800 - galleryScraper: - common: - $photopage: //section[@id='photos_page-page'] - gallery: - Title: $photopage/div[@class="row"]/div/h2/text() - Studio: *studioAttr - Date: *dateAttr - Details: - selector: $photopage//div[@class="p-desc"]/text() - concat: "\n" - Tags: - Name: $photopage//div[@class='mb-3']/a/text() - Performers: *performersAttr -# Last Updated November 10, 2021 diff --git a/scrapers/TheScoreGroup/TheScoreGroup.py b/scrapers/TheScoreGroup/TheScoreGroup.py new file mode 100644 index 000000000..cc5dc4513 --- /dev/null +++ b/scrapers/TheScoreGroup/TheScoreGroup.py @@ -0,0 +1,145 @@ +import sys +import argparse +import json +import os +import requests +import re + +# to import from a parent directory we need to add that directory to the system path +csd = os.path.dirname(os.path.realpath(__file__)) # get current script directory +parent = os.path.dirname(csd) # parent directory (should be the scrapers one) +sys.path.append( + parent +) # add parent dir to sys path so that we can import py_common from ther + +try: + from lxml import html +except ModuleNotFoundError: + print( + "You need to install the lxml module. (https://lxml.de/installation.html#installation)", + file=sys.stderr, + ) + print( + "If you have pip (normally installed with python), run this command in a terminal (cmd): pip install lxml", + file=sys.stderr, + ) + sys.exit() + +try: + import py_common.log as log +except ModuleNotFoundError: + print( + "You need to download the folder 'py_common' from the community repo! (CommunityScrapers/tree/master/scrapers/py_common)", + file=sys.stderr, + ) + sys.exit() + +# Shared client because we're making multiple requests +client = requests.Session() + + +# Example element: +#
+#
+#
+# +# +# +#
+#
+#
+# +# +#
+#
+#
+#
+def map_performer(el): + url = el.xpath(".//a/@href")[0] + if "no-model" in url: + return None + name = el.xpath(".//a/@title")[1] + image = el.xpath(".//img/@src")[0] + fixed_url = re.sub(r".*?([^/]*(?=/2/0))/2/0/([^?]*)", r"https://www.\1.com/\2", url) + + if client.head(fixed_url).status_code != 200: + log.debug(f"Performer '{name}' has a broken profile link, skipping") + return None + + return { + "name": name, + "url": fixed_url, + "image": image, + } + + +def performer_query(query: str): + # Form data to be sent as the POST request body + payload = { + "ci_csrf_token": "", + "keywords": query, + "s_filters[site]": "all", + "s_filters[type]": "models", + "m_filters[sort]": "top_rated", + "m_filters[gender]": "any", + "m_filters[body_type]": "any", + "m_filters[race]": "any", + "m_filters[hair_color]": "any", + } + result = client.post("https://www.scoreland.com/search-es/", data=payload) + tree = html.fromstring(result.content) + performers = [p for x in tree.find_class("model") if (p := map_performer(x))] + + if not performers: + log.warning(f"No performers found for '{query}'") + return performers + + +def main(): + parser = argparse.ArgumentParser("ScoreGroup Scraper", argument_default="") + subparsers = parser.add_subparsers( + dest="operation", help="Operation to perform", required=True + ) + subparsers.add_parser("search", help="Search for performers").add_argument( + "name", nargs="?", help="Name to search for" + ) + + if len(sys.argv) == 1: + parser.print_help(sys.stderr) + sys.exit(1) + + args = parser.parse_args() + log.debug(f"Arguments from commandline: {args}") + # Script is being piped into, probably by Stash + if not sys.stdin.isatty(): + try: + frag = json.load(sys.stdin) + args.__dict__.update(frag) + log.debug(f"With arguments from stdin: {args}") + except json.decoder.JSONDecodeError: + log.error("Received invalid JSON from stdin") + sys.exit(1) + + if args.operation == "search": + name: str = args.name + if not name: + log.error("No query provided") + sys.exit(1) + log.debug(f"Searching for '{name}'") + matches = performer_query(name) + print(json.dumps(matches)) + sys.exit(0) + + # Just in case the above if statement doesn't trigger somehow + # Something has gone quite wrong should this ever get hit + log.error("An error has occured") + sys.exit(2) + + +if __name__ == "__main__": + main() diff --git a/scrapers/TheScoreGroup/TheScoreGroup.yml b/scrapers/TheScoreGroup/TheScoreGroup.yml new file mode 100644 index 000000000..0d52ce0fa --- /dev/null +++ b/scrapers/TheScoreGroup/TheScoreGroup.yml @@ -0,0 +1,144 @@ +name: TheScoreGroup +sceneByURL: + - action: scrapeXPath + url: &urls + - 18eighteen.com/ + - 40somethingmag.com/ + - 50plusmilfs.com/ + - 60plusmilfs.com/ + - bigtithooker.com/ + - legsex.com/ + - naughtymag.com + - pornmegaload.com/ + - scoreland.com/ + - scoreland2.com/ + - xlgirls.com/ + - scorevideos.com/ + - milftugs.com/ + scraper: sceneScraper +galleryByURL: + - action: scrapeXPath + url: *urls + scraper: galleryScraper +performerByURL: + - action: scrapeXPath + url: + - 18eighteen.com/ + - 40somethingmag.com/ + - 50plusmilfs.com/ + - 60plusmilfs.com/ + - bigtithooker.com/ + - legsex.com/ + - naughtymag.com + - pornmegaload.com/ + - scoreland.com/ + - scoreland2.com/ + - xlgirls.com/ + - scorevideos.com/ + - milftugs.com/ + scraper: performerScraper +performerByName: + action: script + script: + - python + - TheScoreGroup.py + - search + +xPathScrapers: + sceneScraper: + common: + $url: //link[@rel="canonical"]/@href + scene: + Title: &titleAttr //h1[1]/text() + Studio: &studioAttr + Name: + selector: //meta[@property="og:site_name"]/@content + Date: &dateAttr + selector: //div[@class="stat mb-2"]/span[contains(.,"Date:")]/../span[@class="value"]/text() + postProcess: + - replace: + - regex: .., + with: + - parseDate: January 2 2006 + Details: &details + selector: //div[@class="p-desc p-3"]//text() + postProcess: + - replace: + - regex: Read More » + with: + # Attempt to fix erroneous line breaks where HTML tags existed + - regex: \n\n([0-9a-zA-Z\.]+)\n\n + with: " $1 " + concat: "\n\n" + Tags: + Name: //div[@class="mb-3"]/a/text() + Performers: &performersAttr + Name: //div[@class="stat mb-2"]/span[contains(.,"Featuring:")]/../span[@class="value"]/a/text() + Image: + # This selector scrapes the canonical scene page cover image + selector: //script[contains(text(), "poster")] + postProcess: + - replace: + - regex: ^.+poster.+'(.+jpg)'.+$ + with: $1 + # This Selector scrapes the image posted on social media sites + # selector: //meta[@itemprop="image"]/@content + # Enable this post process if you want better image quality but sometimes it can fail + # postProcess: + # - replace: + # - regex: _lg + # with: _x_800 + URL: &urlAttr + selector: $url + Code: &codeAttr + selector: $url + postProcess: + - replace: + - regex: .*\/(\d+)\/?$ + with: $1 + galleryScraper: + common: + $photopage: //section[@id='photos_page-page'] + gallery: + Title: $photopage/div[@class="row"]/div/h2/text() + Studio: *studioAttr + Date: *dateAttr + Details: + selector: $photopage//div[contains(@class, 'p-desc')]/text() + concat: "\n" + Tags: + Name: $photopage//div[@class='mb-3']/a/text() + Performers: *performersAttr + + performerScraper: + performer: + Name: //h1[@class="model-title"] + Gender: + fixed: Female + URL: //meta[@property="og:url"]/@content + Ethnicity: + selector: //span[text()="Ethnicity:"]/following-sibling::span + postProcess: + - map: + White: Caucasian + HairColor: //span[text()="Hair Color:"]/following-sibling::span + Height: + selector: //span[text()="Height:"]/following-sibling::span + postProcess: + - feetToCm: true + Weight: + selector: //span[text()="Weight:"]/following-sibling::span + postProcess: + - replace: + - regex: (\d+).* + with: $1 + - lbToKg: true + Measurements: + selector: //span[text()="Bra Size:"]/following-sibling::span|//span[text()="Measurements:"]/following-sibling::span + concat: "-" + postProcess: + - replace: + - regex: (\d+[a-zA-Z]{1,3})-\d+(-\d+-\d+) + with: $1$2 + Image: //section[@id="model-page"]//img[@class="lazyload"]/@src +# Last Updated December 03, 2023 diff --git a/scrapers/ThirdRockEnt.yml b/scrapers/ThirdRockEnt.yml index e92974edb..f027b4390 100644 --- a/scrapers/ThirdRockEnt.yml +++ b/scrapers/ThirdRockEnt.yml @@ -2,29 +2,45 @@ name: ThirdRockEnt sceneByURL: - action: scrapeXPath url: - - abbiemaley.com/trailers + - abbiemaley.com/trailers/ + - analamateur.com/trailers/ - analbbc.com/trailers/ - analized.com/trailers/ - analviolation.com/trailers/ - baddaddypov.com/trailers/ - badfamilypov.com/trailers/ - badmommypov.com/trailers/ + - brokensluts.net/trailers/ + - cumdumpsterteens.com/trailers/ + - daughterjoi.com/trailers/ - downtofuckdating.com/trailers/ - dtfsluts.com/trailers/ + - fullpornnetwork.com/trailers/ - girlfaction.com/trailers/ - hergape.com/trailers/ - homemadeanalwhores.com/trailers/ - jamesdeen.com/trailers/ + - lesbiananalsluts.com/trailers/ + - mommyjoi.com/trailers/ + - mugfucked.com/trailers/ - onlyprince.com/trailers/ - pervertgallery.com/trailers/ - porkvendors.com/trailers/ - pornforce.com/trailers/ - povperverts.net/trailers/ - publicsexdate.com/trailers/ + - realfuckingdating.com/trailers/ + - shefucksonthefirstdate.com/trailers/ - slutinspection.com/trailers/ + - slutsbts.com/trailers/ + - slutspov.com/trailers/ + - sluttybbws.com/trailers/ + - teasingandpleasing.com/trailers/ - teenageanalsluts.com/trailers/ - teenagecorruption.com/trailers/ + - teenagetryouts.com/trailers/ - twistedvisual.com/trailers/ + - wifespov.com/trailers/ - yourmomdoesanal.com/trailers/ - yourmomdoesporn.com/trailers/ scraper: sceneScraper @@ -49,29 +65,44 @@ xPathScrapers: postProcess: - map: https://AbbieMaley.com: Abbie Maley + https://AnalAmateur.com: Anal Amateur https://AnalBBC.com: AnalBBC https://ANALIZED.com: Analized https://analviolation.com: Anal Violation https://BadDaddyPOV.com: Bad Daddy POV - https://BadFamilyPov.com: Bad Family POV - https://BadMommyPov.com: Bad Mommy POV + https://BadFamilyPOV.com: Bad Family POV https://BadMommyPOV.com: Bad Mommy POV + https://BrokenSluts.net: Broken Sluts + https://CumDumpsterTeens.com: Cum Dumpster Teens + https://DaughterJOI.com: Daughter JOI https://DownToFuckDating.com: Down To Fuck Dating https://DTFsluts.com: DTF Sluts + https://FullPornNetwork.com: Full Porn Network https://Girlfaction.com: Girlfaction https://HerGape.com: Her Gape https://HomemadeAnalWhores.com: Homemade Anal Whores https://JamesDeen.com: James Deen + https://LesbianAnalSluts.com: Lesbian Anal Sluts + https://MommyJOI.com: Mommy JOI + https://mugfucked.com: Mug Fucked https://OnlyPrince.com: Only Prince https://PervertGallery.com: Pervert Gallery https://PorkVendors.com: Pork Vendors https://PornForce.com: Porn Force https://POVPerverts.net: POV Perverts https://publicsexdate.com: Public Sex Date + https://RealFuckingDating.com: Real Fucking Dating + https://SheFucksOnTheFirstDate.com: She Fucks On The First Date https://SlutInspection.com: Slut Inspection + https://slutsbts.com: Sluts BTS + https://slutspov.com: Sluts POV + https://sluttybbws.com: Slutty BBWs + https://TeasingandPleasing.com: Teasing And Pleasing https://teenageanalsluts.com: Teenage Anal Sluts https://TeenageCorruption.com: Teenage Corruption + https://teenagetryouts.com: Teenage Tryouts https://TwistedVisual.com: Twisted Visual + https://wifespov.com: Wifes POV https://yourmomdoesanal.com: Your Mom Does Anal https://YourMomDoesPorn.com: Your Mom Does Porn -# Last Updated December 20, 2022 +# Last Updated August 26, 2023 diff --git a/scrapers/ThisVid.yml b/scrapers/ThisVid.yml new file mode 100644 index 000000000..bb07f45c8 --- /dev/null +++ b/scrapers/ThisVid.yml @@ -0,0 +1,54 @@ +name: "ThisVid" +sceneByURL: + - action: scrapeXPath + url: + - thisvid.com + scraper: sceneScraper +performerByURL: + - action: scrapeXPath + url: + - thisvid.com + scraper: performerScraper +xPathScrapers: + sceneScraper: + common: + $container: //div[@class='container'] + $videowrap: //div[@class='column-centre column-video']//div[@class='wrap'] + $desc: //ul[@class="description"] + scene: + Title: $container//div[@class='headline']//h1/text() + Image: + selector: $videowrap//div[@class='video-holder']//div//img/@src + postProcess: + - replace: + - regex: "//" + with: https:// + Tags: + Name: $desc//li//a[contains(@href,"/tags/")]/text() + Details: $desc//li//p/text() + Code: + selector: //meta[@property="og:video:url"]/@content + postProcess: + - replace: + - regex: .+/(\d+)/?$ + with: $1 + performerScraper: + common: + $profileCaseL: //div[@class="profile"]//div[@class="case"]//div[@class="case-left"] + $profileCaseR: //div[@class="profile"]//div[@class="case"]//div[@class="case-right"] + performer: + Name: $profileCaseL//span[contains(text(),"Name")]/strong|//div[@class="profile-menu"]//div[@class="headline"]//h2/text() + Birthdate: + selector: $profileCaseL//span[contains(text(),"Birth")]/strong + postProcess: + - parseDate: 02 January, 2006 + - parseDate: 2006-01-02 + Country: + selector: $profileCaseL//span[contains(text(),"Country")]/strong + postProcess: + - map: + United States: "USA" + Gender: $profileCaseR//span[contains(text(),"Gender")]/strong + Image: //div[@class="avatar"]/img[not(contains(@src,"no-avatar"))]/@src + +# Last Updated February 26, 2023 diff --git a/scrapers/TmwVRnet.yml b/scrapers/TmwVRnet.yml deleted file mode 100644 index 6ae21be7f..000000000 --- a/scrapers/TmwVRnet.yml +++ /dev/null @@ -1,60 +0,0 @@ -name: "TMWVRNET" -sceneByURL: - - action: scrapeXPath - url: - - tmwvrnet.com - scraper: sceneScraper -movieByURL: - - action: scrapeXPath - url: - - tmwvrnet.com - scraper: movieScraper -xPathScrapers: - sceneScraper: - common: - $info: &infoSel //div[@class="content__main"] - $about: &aboutSel //div[@class="about-video"] - scene: - Title: &titleSel $about//h1[@class="title"]/text() - Date: &dateAttr - selector: $info//img[@alt="Release date"]/following-sibling::div/text() - postProcess: - - parseDate: January 2, 2006 - Details: &detailsSel $about/p[@class="about"]/text() - Tags: - Name: $about/div[starts-with( @class,"tags-list")]/a/text() - Performers: - Name: $about/p[@class="featuring"]/a/text() - Studio: &studioSel - Name: - fixed: TmwVRnet - Image: &imageSel - selector: $info//dl8-video/@poster - postProcess: - - replace: - - regex: ^ - with: https://tmwvrnet.com - movieScraper: - common: - $info: *infoSel - $about: *aboutSel - movie: - Name: - selector: *titleSel - postProcess: - - replace: - - regex: ^ - with: "TmwVRnet - " - Duration: - selector: $info//div[starts-with(@class,"durations")]/div/text() - postProcess: - - replace: - - regex: \shour - with: ":" - - regex: \smin - with: ":00" - Date: *dateAttr - Studio: *studioSel - Synopsis: *detailsSel - FrontImage: *imageSel -# Last Updated September 26, 2021 diff --git a/scrapers/Tokyohot/Tokyohot.py b/scrapers/Tokyohot/Tokyohot.py new file mode 100644 index 000000000..eb01e79f3 --- /dev/null +++ b/scrapers/Tokyohot/Tokyohot.py @@ -0,0 +1,371 @@ +import base64 +import json +import sys +import os +import re + + +# to import from a parent directory we need to add that directory to the system path +csd = os.path.dirname(os.path.realpath(__file__)) # get current script directory +parent = os.path.dirname(csd) # parent directory (should be the scrapers one) +sys.path.append( + parent +) # add parent dir to sys path so that we can import py_common from ther + +BASE_QUERY_MEDIA_SEARCH = "https://my.tokyo-hot.com/product/?q=" +BASE_DETAIL_URL = "https://my.tokyo-hot.com" + +JAP_TO_US_BUST = { + "A": "AA", + "B": "A", + "C": "B", + "D": "C", + "E": "D", + "F": "DD", + "G": "DDD", + "H": "F", + "I": "G", + "J": "H", + "K": "I", +} + +MEDIA_CONFIGURATIONS = [ + ## must contain either 1 or 2 capture groups + ## group 1 = the code + ## group 2 (optional) = the part number if it's a multi-part (split) scene + r"(n\d{4})\D*_\D{2}(\d)\S*", # "mult-part N series" + r"(n\d{4})\S*", # "single part N series" + r"(k\d{4})\S*", # "single part K series" + r"(kb\d{4})\S*", # "single part KB series" +] + +try: + from py_common import log +except ModuleNotFoundError: + print( + "You need to download the folder 'py_common' from the community repo! (CommunityScrapers/tree/master/scrapers/py_common)", + file=sys.stderr, + ) + sys.exit() + +try: + import requests +except ModuleNotFoundError: + print( + "You need to install the requests module. (https://docs.python-requests.org/en/latest/user/install/)", + file=sys.stderr, + ) + print( + "If you have pip (normally installed with python), run this command in a terminal (cmd): pip install requests", + file=sys.stderr, + ) + sys.exit() + +try: + from bs4 import BeautifulSoup +except ModuleNotFoundError: + print( + "You need to install the Beautiful Soup module. (https://pypi.org/project/beautifulsoup4/)", + file=sys.stderr, + ) + print( + "If you have pip (normally installed with python), run this command in a terminal (cmd): pip install beautifulsoup4", + file=sys.stderr, + ) + sys.exit() + + +class ScenePage: + def __init__(self, scene_id, multipart, partnum, url): + self.url = url + self.soup = _soup_maker(self.url) + self.scene_id = scene_id + self.multipart = multipart + self.partnum = partnum + self.title = self.get_title() + self.studio = self.get_studio() + self.image = self.get_image() + self.details = self.get_details() + self.performers = self.get_performers() + self.date = self.get_date() + self.tags = self.get_tags() + + def get_title(self): + title = self.scene_id + if self.multipart: + title = title + f" - Part {self.partnum}" + scene_title = self.soup.find("div", {"class": "pagetitle"}) + if scene_title: + title = title + " - " + scene_title.text.strip() + return title + + def get_studio(self): + info = self.soup.find("div", {"class": "infowrapper"}) + info_links = info.find_all("a") + for link in info_links: + if "vendor" in link.get("href"): + return link.text + return None + + def get_image(self): + info = self.soup.find("video") + if info: + return get_image(info.get("poster")) + return None + + def get_performers(self): + performers = [] + info = self.soup.find("div", {"class": "infowrapper"}) + info_links = info.find_all("a") + for link in info_links: + if "cast" in link.get("href"): + perf = TokyoHotModel( + model_url=BASE_DETAIL_URL + link.get("href") + ).get_json() + performers.append(perf) + return performers + + def get_details(self): + details = None + scene_details = self.soup.find("div", {"class": "sentence"}) + if scene_details: + details = scene_details.text.strip() + return details + + def get_date(self): + log.info("Invoking self date") + info_dd = self.soup.find("div", {"class": "infowrapper"}).find_all("dd") + for dd in info_dd: + search = re.search("(\d{4})/(\d{2})/(\d{2})", dd.text) + if search: + date = f"{search[1]}-{search[2]}-{search[3]}" + return date + return None + + def get_tags(self): + potential_tags = self.soup.find("div", {"class": "infowrapper"}).find_all("a") + return [ + {"Name": a.text} for a in potential_tags if "type=play" in a.get("href") + ] + + def get_json(self): + return { + "Title": self.title, + "Details": self.details, + "URL": self.url, + "Date": self.date, + "Performers": self.performers, + "Studio": {"Name": self.studio}, + "Code": self.scene_id, + "Image": self.image, + "Tags": self.tags, + } + + +class TokyoHotModel: + def __init__(self, model_url): + self.url = model_url + self.model_soup = _soup_maker(self.url) + self.model_name = self.get_name() + self.height = self.get_height() + self.weight = self.get_weight() + self.measurements = self.get_measurements() + self.images = self.get_images() + self.gender = "Female" + self.ethnicity = "Asian" + self.country = "JP" + + def get_name(self): + name = None + model_name = self.model_soup.find("div", {"class": "pagetitle mb0"}) + if model_name: + name = model_name.text.strip() + return name + + def get_height(self): + info_dt = self.model_soup.find("dl", {"class": "info"}).find_all("dt") + info_dd = self.model_soup.find("dl", {"class": "info"}).find_all("dd") + info_dict = dict(map(lambda k, v: (k.text, v.text), info_dt, info_dd)) + + if info_dict.get("Height"): + parse_data = re.search("(\d{3})cm\s~\s(\d{3})cm", info_dict.get("Height")) + if parse_data: + data = (int(parse_data[1]) + int(parse_data[2])) / 2 + return str(data) + return None + + def get_weight(self): + info_dt = self.model_soup.find("dl", {"class": "info"}).find_all("dt") + info_dd = self.model_soup.find("dl", {"class": "info"}).find_all("dd") + info_dict = dict(map(lambda k, v: (k.text, v.text), info_dt, info_dd)) + if info_dict.get("Weight"): + parse_data = re.search( + "(\d{2,3})cm\s~\s(\d{2,3})cm", info_dict.get("Weight") + ) + if parse_data: + data = (int(parse_data[1]) + int(parse_data[2])) / 2 + return str(data) + return None + + def get_measurements(self): + info_dt = self.model_soup.find("dl", {"class": "info"}).find_all("dt") + info_dd = self.model_soup.find("dl", {"class": "info"}).find_all("dd") + info_dict = dict(map(lambda k, v: (k.text, v.text), info_dt, info_dd)) + + cup = None + bust = None + waist = None + hip = None + + if info_dict.get("Cup Size"): + parse_cup = re.search("^(\w)", info_dict.get("Cup Size")) + if parse_cup: + cup = JAP_TO_US_BUST.get(parse_cup[1].strip()) + + if info_dict.get("Bust Size"): + parse_bust = re.search( + "(\d{2,3})cm\s~\s(\d{2,3})cm", info_dict.get("Bust Size") + ) + if parse_bust: + bust = round(((int(parse_bust[1]) + int(parse_bust[2])) / 2) * 0.393701) + + if info_dict.get("Waist Size"): + parse_waist = re.search( + "(\d{2,3})cm\s~\s(\d{2,3})cm", info_dict.get("Waist Size") + ) + if parse_waist: + waist = round( + ((int(parse_waist[1]) + int(parse_waist[2])) / 2) * 0.393701 + ) + + if info_dict.get("Hip"): + parse_hip = re.search("(\d{2,3})cm\s~\s(\d{2,3})cm", info_dict.get("Hip")) + if parse_hip: + hip = round(((int(parse_hip[1]) + int(parse_hip[2])) / 2) * 0.393701) + + if cup and bust and waist and hip: + return f"{bust}{cup}-{waist}-{hip}" + + return None + + def get_images(self): + try: + model_url = ( + self.model_soup.find("div", {"id": "profile"}).find("img").get("src") + ) + return [get_image(model_url)] + except: + return None + + def get_json(self): + return { + "Name": self.model_name, + "Gender": self.gender, + "URL": self.url, + "Ethnicity": self.ethnicity, + "Country": self.country, + "Height": self.height, + "Weight": self.weight, + "Measurements": self.measurements, + "Images": self.images, + } + + +def query(fragment, query_type): + res = None + media_info = None + + if query_type in ("scene"): + name = re.sub(r"\s", "_", fragment["title"]).lower() + media_info = _extract_media_id(name) + + if media_info: + res = scrape_scene( + name=media_info["code"], + multipart=media_info["multipart"], + partnum=media_info["partnum"], + ) + + return res + + +def _soup_maker(url: str): + requests.packages.urllib3.disable_warnings() + try: + html = requests.get(url, verify=False) + soup = BeautifulSoup(html.text, "html.parser") + except Exception as e: + log.error("Error retrieving specified URL") + raise e + return soup + + +def _parse_media_search(soup): + detail_page_url = None + detail_object = soup.find("a", {"class": "rm"}) + if detail_object: + detail_page_url = BASE_DETAIL_URL + detail_object.get("href") + log.info(f"Scene URL found: {detail_page_url}") + return detail_page_url + + +def _extract_media_id(media_title: str, configuration: dict = MEDIA_CONFIGURATIONS): + log.info(f"Extracting Media ID for {media_title}") + + def _extract_multi_part(search_results): + if len(search_results.groups()) > 1: + return (True, search_results[2]) + return (False, False) + + for config in configuration: + search = re.search(pattern=config, string=media_title) + if search: + scene_info = { + "code": search[1], + "multipart": _extract_multi_part(search)[0], + "partnum": _extract_multi_part(search)[1], + } + log.info(f"Regex matched. Details {scene_info}") + return scene_info + return None + + +def scrape_scene(name, multipart, partnum): + search_soup = _soup_maker(BASE_QUERY_MEDIA_SEARCH + name) + scene_url = _parse_media_search(soup=search_soup) + if scene_url is None: + log.info(f"Scene not found: {name}. Try another server region, e.g. Hong Kong") + return None + scene_page = ScenePage( + scene_id=name, multipart=multipart, partnum=partnum, url=scene_url + ) + response = scene_page.get_json() + return response + + +def get_image(image_url): + try: + response = requests.get(image_url, verify=False, timeout=(3, 6)) + except requests.exceptions.RequestException: + log.error(f"Error fetching URL {image_url}") + + if response.status_code < 400: + mime = "image/jpeg" + encoded = base64.b64encode(response.content).decode("utf-8") + return f"data:{mime};base64,{encoded}" + + log.info(f"Fetching {image_url} resulted in error: {response.status_code}") + return None + + +def main(): + scraper_input = sys.stdin.read() + i = json.loads(scraper_input) + ret = {} + if sys.argv[1] == "query": + ret = query(i, sys.argv[2]) + output = json.dumps(ret) + print(output) + + +main() diff --git a/scrapers/Tokyohot.yml b/scrapers/Tokyohot/Tokyohot.yml similarity index 80% rename from scrapers/Tokyohot.yml rename to scrapers/Tokyohot/Tokyohot.yml index 6e1935766..1f976b3da 100644 --- a/scrapers/Tokyohot.yml +++ b/scrapers/Tokyohot/Tokyohot.yml @@ -1,4 +1,6 @@ name: tokyohot +# requires: py_common + sceneByURL: - action: scrapeXPath url: @@ -19,5 +21,12 @@ xPathScrapers: Performers: Name: $movieinfo/dd[1]/a/text() Image: //li[@class="package"]/a[1]/@href +sceneByFragment: + action: script + script: + - python3 + - Tokyohot.py + - query + - scene -# Last Updated November 08, 2020 +# Last Updated July 20, 2023 diff --git a/scrapers/Tonightsgirlfriend.yml b/scrapers/Tonightsgirlfriend.yml index 608ee33f0..fc96aba99 100644 --- a/scrapers/Tonightsgirlfriend.yml +++ b/scrapers/Tonightsgirlfriend.yml @@ -9,6 +9,12 @@ xPathScrapers: scene: Title: //h1 Details: //p[@class="scene-description"] + Date: + selector: //script[contains(text(),"uploadDate")]/text() + postProcess: + - replace: + - regex: '.+(\d{4}-\d{2}-\d{2}).+' + with: "$1" Performers: Name: selector: //p[@class="grey-performers"]//text() @@ -28,4 +34,4 @@ xPathScrapers: URL: //link[@rel='canonical']/@href Tags: Name: //a[@class="cat-tag"] -# Last Updated April 22, 2022 +# Last Updated August 03, 2023 diff --git a/scrapers/TopWebModels.py b/scrapers/TopWebModels.py deleted file mode 100644 index 5b982b2f9..000000000 --- a/scrapers/TopWebModels.py +++ /dev/null @@ -1,119 +0,0 @@ -import json -import os -import re -import sys - -try: - import py_common.log as log -except ModuleNotFoundError: - print("You need to download the folder 'py_common' from the community repo! (CommunityScrapers/tree/master/scrapers/py_common)", file=sys.stderr) - sys.exit() -# make sure to install below modules if needed -try: - import requests -except ModuleNotFoundError: - print("You need to install the requests module. (https://docs.python-requests.org/en/latest/user/install/)", file=sys.stderr) - print("If you have pip (normally installed with python), run this command in a terminal (cmd): pip install requests", file=sys.stderr) - sys.exit() - -try: - from bs4 import BeautifulSoup -except ModuleNotFoundError: - print("You need to install the BeautifulSoup module. (https://pypi.org/project/beautifulsoup4/)", file=sys.stderr) - print("If you have pip (normally installed with python), run this command in a terminal (cmd): pip install beautifulsoup4", file=sys.stderr) - sys.exit() - -def get_from_url(url_to_parse): - m = re.match(r'https?://tour\.((\w+)\.com)/scenes/(\d+)/([a-z0-9-]+)', url_to_parse) - if m is None: - return None, None, None, None - return m.groups() - - -def make_request(request_url, origin_site): - requests.packages.urllib3.util.ssl_.DEFAULT_CIPHERS += 'HIGH:!DH:!aNULL' - requests.packages.urllib3.disable_warnings() - - try: - requests.packages.urllib3.contrib.pyopenssl.DEFAULT_SSL_CIPHER_LIST += 'HIGH:!DH:!aNULL' - except AttributeError: - # no pyopenssl support used / needed / available - pass - - try: - r = requests.get(request_url, headers={ - 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:79.0) Gecko/20100101 Firefox/79.0', - 'Origin': origin_site, - 'Referer': request_url - }, timeout=(3, 6), verify=False) - except requests.exceptions.RequestException as e: - return None, e - - if r.status_code == 200: - return r.text, None - return None, f"HTTP Error: {r.status_code}" - -def fetch_page_json(page_html): - matches = re.findall(r'window\.__DATA__ = (.+)$', page_html, re.MULTILINE) - return json.loads(matches[0]) if matches else None - -def main(): - stdin = sys.stdin.read() - log.debug(stdin) - fragment = json.loads(stdin) - - if not fragment['url']: - log.error('No URL entered.') - sys.exit(1) - url = fragment['url'].strip() - site, studio, sid, slug = get_from_url(url) - if site is None: - log.error('The URL could not be parsed') - sys.exit(1) - response, err = make_request(url, f"https://{site}") - if err is not None: - log.error('Could not fetch page HTML', err) - sys.exit(1) - j = fetch_page_json(response) - if j is None: - log.error('Could not find JSON on page') - sys.exit(1) - if 'video' not in j['data']: - log.error('Could not locate scene within JSON') - sys.exit(1) - - scene = j["data"]["video"] - - if scene.get('id'): - if str(scene['id']) != sid: - log.error('Wrong scene within JSON') - sys.exit(1) - log.info(f"Scene {sid} found") - scrape = {} - if scene.get('title'): - scrape['title'] = scene['title'] - if scene.get('release_date'): - scrape['date'] = scene['release_date'][:10] - if scene.get('description'): - details = BeautifulSoup(scene['description'], "html.parser").get_text() - scrape['details'] = details - if scene.get('sites'): - scene_studio = scene['sites'][0]['name'] - scrape['studio'] = {'name': scene_studio} - if scene.get('models'): - models = [] - for m in scene['models']: - models.extend([x.strip() for x in m['name'].split("&") ]) - scrape['performers'] = [{'name': x} for x in models] - if scene.get('tags'): - scrape['tags'] = [{'name': x['name']} for x in scene['tags']] - if j['data'].get('file_poster'): - scrape['image'] = j['data']['file_poster'] - print(json.dumps(scrape)) - - -if __name__ == '__main__': - try: - main() - except Exception as e: - log.error(e) diff --git a/scrapers/TopWebModels/TopWebModels.py b/scrapers/TopWebModels/TopWebModels.py new file mode 100644 index 000000000..2066e6905 --- /dev/null +++ b/scrapers/TopWebModels/TopWebModels.py @@ -0,0 +1,123 @@ +import html +import json +import os +import re +import sys + +# to import from a parent directory we need to add that directory to the system path +csd = os.path.dirname(os.path.realpath(__file__)) # get current script directory +parent = os.path.dirname(csd) # parent directory (should be the scrapers one) +sys.path.append( + parent +) # add parent dir to sys path so that we can import py_common from there + +try: + import py_common.log as log +except ModuleNotFoundError: + print( + "You need to download the folder 'py_common' from the community repo!" + " (CommunityScrapers/tree/master/scrapers/py_common)", + file=sys.stderr, + ) + sys.exit(1) + +# make sure to install below modules if needed +try: + import requests +except ModuleNotFoundError: + log.error( + "You need to install the requests module. (https://docs.python-requests.org/en/latest/user/install/)" + ) + log.error("Run this command in a terminal (cmd): python -m pip install requests") + sys.exit(1) + +try: + from bs4 import BeautifulSoup +except ModuleNotFoundError: + log.error( + "You need to install the BeautifulSoup module. (https://pypi.org/project/beautifulsoup4/)" + ) + log.error( + "Run this command in a terminal (cmd): python -m pip install beautifulsoup4" + ) + sys.exit(1) + + +def parse_url(url): + if m := re.match(r"https?://tour\.((\w+)\.com)/scenes/(\d+)/([a-z0-9-]+)", url): + return m.groups() + log.error("The URL could not be parsed") + sys.exit(1) + + +def make_request(request_url): + try: + r = requests.get( + request_url, + headers={ + "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:79.0) Gecko/20100101 Firefox/79.0", + "Referer": request_url, + }, + timeout=(3, 6), + ) + except requests.exceptions.RequestException as e: + log.error(f"Request to '{request_url}' failed: {e}") + exit(1) + + if r.status_code != 200: + log.error(f": {r.status_code}") + exit(1) + return BeautifulSoup(r.text, "html.parser") + + +if __name__ == "__main__": + fragment = json.loads(sys.stdin.read()) + + if not (url := fragment["url"]): + log.error("No URL entered.") + sys.exit(1) + log.debug(f"Scraping URL: {url}") + + soup = make_request(url) + props = soup.find("script", {"type": "application/json"}) + if not props: + log.error("Could not find JSON in page") + sys.exit(1) + + props = json.loads(props.text) + content = props["props"]["pageProps"]["content"] + + with open("debug.json", "w", encoding="utf-8") as f: + f.write(json.dumps(content, indent=2)) + + if not (scene_id := content.get("id")): + log.error("Could not find scene ID") + sys.exit(1) + log.info(f"Scene {scene_id} found") + + scene = { + "code": str(scene_id), + } + + if title := content.get("title"): + scene["title"] = html.unescape(title) + if date := content.get("publish_datedate"): + from datetime import datetime + + scene["date"] = datetime.strptime(date[:10], "%Y/%m/%d").strftime("%Y-%m-%d") + if description := content.get("description"): + scene["details"] = html.unescape(description).replace("\u00a0", " ") + if sites := content.get("sites"): + scene_studio = sites[0]["name"] + scene["studio"] = {"name": scene_studio} + if models := content.get("models"): + scene["performers"] = [{"name": x} for x in models] + if tags := content.get("tags"): + scene["tags"] = [{"name": x} for x in tags] + if scene_cover := content.get("thumb"): + if not scene_cover.endswith(".gif"): + scene["image"] = scene_cover + elif alternative_covers := content.get("thumbs"): + # We don't want gifs + scene["image"] = alternative_covers[0] + print(json.dumps(scene)) diff --git a/scrapers/TopWebModels.yml b/scrapers/TopWebModels/TopWebModels.yml similarity index 88% rename from scrapers/TopWebModels.yml rename to scrapers/TopWebModels/TopWebModels.yml index 0fb0532a3..e8140ac3c 100644 --- a/scrapers/TopWebModels.yml +++ b/scrapers/TopWebModels/TopWebModels.yml @@ -1,4 +1,6 @@ name: "Top Web Models" +# requires: py_common + sceneByURL: - url: - tour.2girls1camera.com/scenes @@ -13,4 +15,4 @@ sceneByURL: script: - python3 - TopWebModels.py -# Last Updated November 20, 2021 +# Last Updated December 27, 2023 diff --git a/scrapers/Trans500.yml b/scrapers/Trans500.yml index 13250a28a..4f3b4a995 100644 --- a/scrapers/Trans500.yml +++ b/scrapers/Trans500.yml @@ -7,7 +7,12 @@ sceneByURL: scraper: sceneScraper - action: scrapeXPath url: + - behindtrans500.com/tour/ + - ikillitts.com/tour/ + - superramon.com/tour/ - trans500.com/tourespanol + - transatplay.com/tour/ + - tsgirlfriendexperience.com/tour/ scraper: sceneScraperEspanol xPathScrapers: @@ -66,4 +71,4 @@ xPathScrapers: with: "Behind Trans500" Tags: # Either //meta[@name="keywords"]/@content OR: //div[@class="scene-infobrick"][contains(text(), "Categories:")]/a/text() Name: //div[@class="scene-infobrick"][contains(text(), "Categories:")]/a/text() -# Last Updated November 10, 2020 +# Last Updated January 03, 2023 diff --git a/scrapers/Transerotica.yml b/scrapers/Transerotica.yml index ac7e0b2a3..4a1545c1a 100644 --- a/scrapers/Transerotica.yml +++ b/scrapers/Transerotica.yml @@ -12,9 +12,14 @@ xPathScrapers: Title: //h1[@class='title_bar'] Image: //div[@id="player"]/video/@poster Date: - selector: $update//span[@class="upddate"][1] + selector: $update/p/span/preceding-sibling::comment() postProcess: + - replace: + - regex: .*(\d{2}/\d{2}/\d{4}).* + with: $1 - parseDate: "01/02/2006" + - map: + 1970-01-01: "" Details: selector: $update/p[starts-with(text(),"Description:")] postProcess: @@ -30,5 +35,5 @@ xPathScrapers: split: "," Studio: Name: - fixed: Trans Erotica -# Last Updated August 07, 2021 + fixed: TransErotica +# Last Updated July 07, 2023 diff --git a/scrapers/Traxxx.py b/scrapers/Traxxx/Traxxx.py similarity index 100% rename from scrapers/Traxxx.py rename to scrapers/Traxxx/Traxxx.py diff --git a/scrapers/Traxxx.yml b/scrapers/Traxxx/Traxxx.yml similarity index 93% rename from scrapers/Traxxx.yml rename to scrapers/Traxxx/Traxxx.yml index c63407e7b..f3d6a851b 100644 --- a/scrapers/Traxxx.yml +++ b/scrapers/Traxxx/Traxxx.yml @@ -1,4 +1,6 @@ name: "Traxxx" +# requires: py_common + sceneByURL: - url: - traxxx.me/scene/ @@ -46,4 +48,4 @@ performerByURL: - Traxxx.py - performer_url -# Last Updated December 14, 2021 +# Last Updated April 24, 2023 diff --git a/scrapers/traxxx_interface.py b/scrapers/Traxxx/traxxx_interface.py similarity index 89% rename from scrapers/traxxx_interface.py rename to scrapers/Traxxx/traxxx_interface.py index f3c9d1421..b86daa92b 100644 --- a/scrapers/traxxx_interface.py +++ b/scrapers/Traxxx/traxxx_interface.py @@ -1,4 +1,4 @@ -import re, sys, copy, json +import re, sys # local modules try: @@ -29,9 +29,9 @@ def parse_response(json_input): def transform_type(value): if value.get("__typename") == "Media": - if value.get("isS3") == True: - return f'https://cdn.traxxx.me/{value.get("path")}' - if value.get("isS3") == False: + if value.get("isS3"): + return f'https://cdndev.traxxx.me/{value.get("path")}' + else: return f'https://traxxx.me/media/{value.get("path")}' return value @@ -47,7 +47,7 @@ class TraxxxInterface: def __init__(self, fragments={}): scheme = "https" - domain = 'www.traxxx.me' + domain = 'traxxx.me' if self.port: domain = f'{domain}:{self.port}' @@ -161,7 +161,7 @@ def search_performers(self, search, numResults=20): results = self.__callGraphQL(query, variables).get("actors") log.info(f'performer search "{search}" returned {len(results)} results') - return [p for p in results] + return results # shootID refers to a media sources uniqueID e.x. a LegalPorno shootID might be "GIO0001" def get_scene_by_shootID(self, shootId): @@ -181,10 +181,8 @@ def get_scene_by_shootID(self, shootId): log.info(f'scene shootID lookup "{shootId}" returned {len(response)} results') - if len(response) > 0: - return response[0] - else: - return None + return next(iter(response), None) + def get_scene(self, traxxx_scene_id): query = """ @@ -205,10 +203,7 @@ def get_scene(self, traxxx_scene_id): log.info(f'scene traxxxID lookup "{traxxx_scene_id}" returned {len(response)} results') - if len(response) > 0: - return response[0] - else: - return None + return next(iter(response), None) def get_performer(self, traxxx_performer_id): query = """ @@ -229,10 +224,7 @@ def get_performer(self, traxxx_performer_id): log.info(f'performer traxxxID lookup "{traxxx_performer_id}" returned {len(response)} results') - if len(response) > 0: - return response[0] - else: - return None + return next(iter(response), None) def parse_to_stash_scene_search(self, s): fragment = {} @@ -245,6 +237,9 @@ def parse_to_stash_scene_search(self, s): if s.get("slug"): fragment["url"] = f'https://traxxx.me/scene/{s["id"]}/{s["slug"]}/' + if s.get("shootId"): + fragment["code"] = s["shootId"] + if s.get("date"): fragment["date"] = s["date"].split("T")[0] @@ -260,29 +255,19 @@ def parse_to_stash_scene_search(self, s): # #tags take too much space in the results page #if s.get("tags"): - # tags = [] - # for t in s["tags"]: - # if t.get("tag"): - # if t["tag"].get("name"): - # tags.append({ - # "name": t["tag"]["name"] - # }) - # fragment["tags"] = tags + # fragment["tags"] = [{"name": t["tag"]["name"]} for t in s.get("tags",{}) if t["tag"] and t["tag"].get("name")] if s.get("actors"): - performers = [] - for a in s["actors"]: - if a["actor"].get("name"): - performers.append({ - "name": a["actor"]["name"] - }) - fragment["performers"] = performers + fragment["performers"] = [{"name": a["actor"]["name"]} for a in s["actors"] if a["actor"] and a["actor"].get("name")] return fragment def parse_to_stash_scene(self, s): fragment = {} + if s.get("shootId"): + fragment["code"] = s["shootId"] + if s.get("title"): fragment["title"] = s["title"] @@ -301,45 +286,33 @@ def parse_to_stash_scene(self, s): if s.get("tags"): - tags = [] - for t in s["tags"]: - if t.get("tag"): - if t["tag"].get("name"): - tags.append({ - "name": t["tag"]["name"] - }) - fragment["tags"] = tags + fragment["tags"] = [{"name": t["tag"]["name"]} for t in s.get("tags",{}) if t["tag"] and t["tag"].get("name")] if s.get("actors"): - performers = [] - for a in s["actors"]: - if a["actor"].get("name"): - performers.append({ - "name": a["actor"]["name"] - }) - fragment["performers"] = performers + fragment["performers"] = [{"name": a["actor"]["name"]} for a in s["actors"] if a["actor"] and a["actor"].get("name")] if s.get("movies"): movies = [] for m in s["movies"]: - m = m.movie + m = m["movie"] if m.get("title"): movie = { "name": m["title"] } - if m.get["date"]: + if m.get("date"): movie["date"] = m["date"] if m.get("url"): movie["url"] = m["url"] if m.get("description"): movie["synopsis"] = m["description"] - if m.get["covers"]: - if len(m.covers) >= 1: - movie["frontImage"] = m["covers"][0] - if len(m.covers) >= 2: - movie["backImage"] = m["covers"][1] + if m.get("covers"): + covers = m["covers"] + if len(covers) >= 1: + movie["front_image"] = covers[0]["media"] + if len(covers) >= 2: + movie["back_image"] = covers[1]["media"] movies.append(movie) fragment["movies"] = movies @@ -409,9 +382,9 @@ def parse_to_stash_performer(self, p): if p.get("piercings"): fragment["piercings"] = p["piercings"] - if p["naturalBoobs"] == False: + if p["naturalBoobs"] is False: fragment["fake_tits"] = "Augmented" - if p["naturalBoobs"] == True: + if p["naturalBoobs"] is True: fragment["fake_tits"] = "Natural" if all( k in p for k in ['cup','bust','waist','hip'] ): diff --git a/scrapers/TugPass.yml b/scrapers/TugPass.yml new file mode 100644 index 000000000..04b5ef451 --- /dev/null +++ b/scrapers/TugPass.yml @@ -0,0 +1,70 @@ +name: "Tug Pass" +sceneByURL: + - action: scrapeXPath + url: + - tugpass.com/videos + scraper: sceneScraper_network + - action: scrapeXPath + url: + - familylust.com/videos + - petite18.com/videos + - seemomsuck.com/videos + - clubtug.com/videos + - teentugs.com/videos + - teasepov.com/videos + - over40handjobs.com/videos + - meanmassage.com/videos + - finishhim.com/videos + - ebonytugs.com/videos + - cumblastcity.com/videos + scraper: sceneScraper_sites +xPathScrapers: + sceneScraper_network: + scene: + Details: &details //meta[@name="description"]/@content + Performers: &performers + Name: //div[@class="featuringWrapper"]/a + Title: &title + selector: //title/text() + postProcess: + - replace: + - regex: \s\|.+$ + with: + - regex: \s\-.+$ + with: + # Scrape sanitized URL + URL: &url //link[@rel="canonical"]/@href + Image: + selector: //div[@class="player"]/img/@src + postProcess: + - replace: + - regex: ^ + with: https://www.tugpass.com/ + Studio: + Name: + selector: //div[@class="player"]/img/@src + postProcess: + - replace: + - regex: ^\/[A-Za-z_-]+\/([A-Za-z0-9-]+)\/.+ + with: $1 + sceneScraper_sites: + scene: + Details: *details + Performers: *performers + Title: *title + URL: *url + Image: + selector: //img[@id="playerImagePreview"]/@src|//script/text()[contains(.,"posterImage")]|//link[@rel="canonical"]/@href + concat: "|" + postProcess: + - replace: + - regex: .*(\/.+\/.+\.(jpe?g|gif)).*(https?\:\/\/.+\.com).* + with: $3$1 + Studio: + Name: + selector: //meta[@name="copyright"]/@content + postProcess: + - replace: + - regex: Copyright\s(.+)\s\d{4} + with: $1 +# Last Updated July 18, 2023 diff --git a/scrapers/UnderHentai.yml b/scrapers/UnderHentai.yml new file mode 100644 index 000000000..f9fd2f26b --- /dev/null +++ b/scrapers/UnderHentai.yml @@ -0,0 +1,63 @@ +name: UnderHentai +################################################################################################################ +# HOW TO SET UP # +# Store this file in the ~/stash/scrapers/UnderHentai.yml # +# (If the scrapers directory is not there it needs to be created) # +################################################################################################################ +# HOW TO USE # +# SCENES: # +# The scene Scraper by Fragment is the best option in case the file name is the name of the anime # +# Scenes that were not found can easily be found by the name scraper # +# Don't put the episode number otherwise it won't find it # +# It is also possible to scrape individually with the anime URL # +# The scraper doesn't recognize the episode number, I recommend changing it manually at the end # +# THAT'S IT, ENJOY! # +# Made by @escargotbuffed # +################################################################################################################ +sceneByFragment: + action: scrapeXPath + queryURL: https://www.underhentai.net/{filename} + queryURLReplace: + filename: + - regex: \..+$|\d+ + with: "" + - regex: \s+ + with: "-" + scraper: sceneScraper +sceneByURL: + - action: scrapeXPath + url: + - underhentai.net/ + scraper: sceneScraper +sceneByName: + action: scrapeXPath + queryURL: https://www.underhentai.net/?s={} + scraper: sceneSearch +sceneByQueryFragment: + action: scrapeXPath + queryURL: "{url}" + scraper: sceneScraper + +xPathScrapers: + sceneSearch: + scene: + Title: //article[@class="data-block"]//h2/a + URL: + selector: //article[@class="data-block"]//h2/a/@href + postProcess: + - replace: + - regex: ^ + with: https://www.underhentai.net/ + Image: //article[@class="data-block"]//img/@src + sceneScraper: + scene: + Title: //h1[@class="content-box content-head sidebar-light"] + Details: //p[contains(text(),"Official Title")]/following-sibling::span + URL: //link[@rel="canonical"]/@href + Tags: + Name: //p[contains(text(),"Genres")]/following-sibling::a + Studio: + Name: + selector: //p[contains(text(),"Brand")]/following-sibling::a + Image: //div[@class="loading"]/img/@src +# Last Updated June 23, 2023 diff --git a/scrapers/UpskirtJerk.yml b/scrapers/UpskirtJerk.yml deleted file mode 100644 index 9f668053a..000000000 --- a/scrapers/UpskirtJerk.yml +++ /dev/null @@ -1,29 +0,0 @@ -name: Upskirt Jerk -sceneByURL: - - action: scrapeXPath - url: - - upskirtjerk.com/videoentry/ - scraper: sceneScraper -xPathScrapers: - sceneScraper: - scene: - Title: - selector: //h1[@class='page_title']/text() - Date: - selector: //span[@class="day"]/span/text()|//span[@class="day"]/text() - concat: "/" - postProcess: - - replace: - - regex: ^(\w+)/(\d+)$ # for dates with current year - with: $1/22/$2 # the year part needs to be updated every year - - parseDate: Jan/06/02 - Performers: - Name: //span[@class="meta_modelcategory meta_category"]//a - Details: //div[@class="post_excerpt"]/p - Tags: - Name: //span[@class="meta_videotag meta_category"]//a - Image: //meta[@property="og:image:secure_url"]/@content - Studio: - Name: - fixed: Upskirt Jerk -# Last Updated March 06, 2022 diff --git a/scrapers/VRBangers.yml b/scrapers/VRBangers.yml index 4bf55690b..791f87ed5 100644 --- a/scrapers/VRBangers.yml +++ b/scrapers/VRBangers.yml @@ -35,7 +35,7 @@ sceneByFragment: xPathScrapers: sceneScraper: common: - $info: &info //div[starts-with(@class,"video-item__info ")] + $info: &info //div[starts-with(@class,"video-item__info ")]|//div[@class="single-video-info"] scene: Title: &titleSel //h1 Date: &dateAttr @@ -43,21 +43,26 @@ xPathScrapers: postProcess: - parseDate: Jan 2, 2006 Details: &detailsAttr - selector: //div[contains(@class,"second-text")]/div//text() + selector: //div[contains(@class,"second-text")]/div//text()|//div[contains(@class,"single-video-description")]/div//text() concat: " " Tags: - Name: //div[@data-testid="video-categories-list"]/a[contains(@href,"category/")]/text() + Name: //div[@data-testid="video-categories-list"]/a[contains(@href,"category/")]/text()|//div[@class="single-video-categories"]//a[contains(@href,"category/")]/text() Performers: - Name: //div[starts-with(@class, 'video-item__info-starring')]//a/text() + Name: //div[starts-with(@class, 'video-item__info-starring')]//a/text()|//div[contains(@class, "single-video-info__starring")]//a/text() Studio: Name: &studioName - selector: //meta[@name="dl8-customization-brand-name"]/@content + selector: &studioURLSel //meta[@name="dl8-customization-brand-url"]/@content postProcess: - replace: - - regex: \# + - regex: ^// with: "" + - map: + vrbangers.com: VR Bangers + vrbgay.com: VRB Gay + vrbtrans.com: VRB Trans + vrconk.com: VR Conk URL: &studioURL - selector: //meta[@name="dl8-customization-brand-url"]/@content + selector: *studioURLSel postProcess: - replace: - regex: ^ @@ -87,4 +92,4 @@ xPathScrapers: Name: *studioName URL: *studioURL FrontImage: *imageSel -# Last Updated November 27, 2022 +# Last Updated February 27, 2023 diff --git a/scrapers/VRHush.yml b/scrapers/VRHush.yml index 6f754c486..990dc9635 100644 --- a/scrapers/VRHush.yml +++ b/scrapers/VRHush.yml @@ -30,7 +30,7 @@ xPathScrapers: Performers: Name: $info//h5[@class="latest-scene-subtitle"]//a/text() Image: &imageAttr - selector: $info//deo-video[1]/@cover-image + selector: //web-vr-video-player/@coverimage postProcess: - replace: - regex: ^ diff --git a/scrapers/VRPorn.yml b/scrapers/VRPorn.yml new file mode 100644 index 000000000..4887cee80 --- /dev/null +++ b/scrapers/VRPorn.yml @@ -0,0 +1,36 @@ +name: "VRPorn" + +sceneByURL: + - action: scrapeXPath + url: + - vrporn.com/ + scraper: sceneScraper + +xPathScrapers: + sceneScraper: + common: + $header: //header[@class="entry-header"] + $footer: //footer[@class="entry-footer"] + $videoarea: //footer[@class="entry-footer"]/div[@class="layout_area_video"] + scene: + Title: $header//h1[@class="content-title"]/text() + Date: + selector: $videoarea/div[2]/span[@class="footer-titles"]/text() + postProcess: + - replace: + - regex: Posted on (?:Premium on )?(.*) + with: $1 + - parseDate: January 02, 2006 + Details: + selector: //main/article/div//p/text() + concat: "\n\n" + Tags: + Name: $footer//a[@rel="tag"]/text() + Performers: + Name: $header//div[@class="name_pornstar"]/text() + Studio: + Name: $header//a[@id="studio-logo"]//span[@class="footer-titles"]/text() + Image: //main/article/header//dl8-video/@poster + URL: &sceneUrl //link[@rel="canonical"]/@href + +# Last Updated April 20, 2023 diff --git a/scrapers/Vip4K.yml b/scrapers/Vip4K.yml index c63805696..e09d62e93 100644 --- a/scrapers/Vip4K.yml +++ b/scrapers/Vip4K.yml @@ -1,51 +1,60 @@ -name: vip4k +name: 'Vip4K' sceneByURL: - action: scrapeXPath url: - black4k.com + - bride4k.com + - cuck4k.com - daddy4k.com + - debt4k.com + - dyke4k.com - fist4k.com - hunt4k.com + - ignore4k.com + - loan4k.com - mature4k.com + - mommy4k.com - old4k.com + - pie4k.com - rim4k.com + - serve4k.com + - shame4k.com - sis.porn - stuck4k.com - tutor4k.com + - vip4k.com scraper: sceneScraper + queryURL: '{url}' + queryURLReplace: + url: + - regex: ^.+(/\w+/videos/\d{1,4}) + with: https://vip4k.com$1 xPathScrapers: sceneScraper: scene: - Title: //div[@class="title_player" or @class="title"]| //h1[contains(@class,"player")][contains(@class,"title")] + Title: //h1[@class='player-description__title'] Performers: - Name: - selector: - //a[@class="link_player"]|//div[@class="player-item__row"][contains(div,"Name:")]//span| - //div[contains(@class,"player-item")]//*[local-name()="svg"][@class="ico ico--star"]/../following-sibling::div - Details: - selector: - //div[@class="player-item__text"]|//span[@class="player-info__text-area"]|//span[@class="episode-about__text text"]|//div[@class='desc_frame']/p| - //div[@class='wrap_player_desc']/p|//div[@class='wrap_post']/p|//div[@class="player-item__text text text--sm"] - Image: //div[@class="player_watch"]/img[@src]/@src | //img[@class="player-item__inner"]/@data-src + Name: //div[@class='model__name'] + Details: //div[@class="player-description__text"] + Code: + selector: //link[@rel='canonical']/@href + postProcess: + - replace: + - regex: https.+/(\w+)/?$ + with: $1 + Image: + selector: //picture[@class='player-item__inner']/img/@data-src + postProcess: + - replace: + - regex: ^// + with: https:// + Date: //span[@class='player-additional__text'][1] Tags: - Name: - selector: //div[@class="tag_line"]//a + Name: //div[@class='tags']/a Studio: Name: - selector: //head/title/text() + selector: //ul[@class='player-additional']//a postProcess: - - replace: - - regex: ^([^:\s]+).* - with: $1 - map: - Black4k.Com: Black4k - Daddy4k.Com: Daddy4k - Fist4k.Com: Fist4k - Hunt4k.Com: Hunt4k - Mature4k.Com: Mature4k - Old4k.Com: Old4k - Rim4k.Com: Rim4k - Sis.porn: Sis Porn - Stuck4k.Com: Struck4k - Tutor4k.Com: Tutor4k -# Last Updated April 05, 2021 + Sis: Sis.Porn +# Last Updated December 11, 2023 \ No newline at end of file diff --git a/scrapers/VirtualTaboo.yml b/scrapers/VirtualTaboo.yml index c7794c0b5..4d9978800 100644 --- a/scrapers/VirtualTaboo.yml +++ b/scrapers/VirtualTaboo.yml @@ -20,7 +20,7 @@ xPathScrapers: selector: $genInfo/span[@class="bullet"]/following-sibling::text() postProcess: - parseDate: 02 January, 2006 - Details: &detailsSel $info//div[@class="description"]/text() + Details: &detailsSel $info//*[@class="description"] Tags: Name: $info//div[starts-with( @class,"tag-list")]/a/text() Performers: @@ -54,5 +54,5 @@ xPathScrapers: fixed: VirtualTaboo Synopsis: *detailsSel FrontImage: *imageSel -# Last Updated August 14, 2022 +# Last Updated August 04, 2023 diff --git a/scrapers/Visit-X.yml b/scrapers/Visit-X.yml new file mode 100644 index 000000000..3fcf546f6 --- /dev/null +++ b/scrapers/Visit-X.yml @@ -0,0 +1,26 @@ +name: Visit-X +sceneByURL: + - action: scrapeXPath + url: + - visit-x.net + scraper: sceneScraper +xPathScrapers: + sceneScraper: + scene: + Title: //div[@class="profile-video-details"]//h2 + Tags: + Name: //a[@class="profile-video-details--description-tags-list-item-tag"]/text() + Performers: + Name: //a[contains(@class, 'profile-video-details--description-complex-actors-link')]/text() + Details: //div[contains(@class, 'profile-video-details--description-text')]/text() + Date: + selector: //span[contains(@class, 'profile-video-details--description-complex-meta-item')][1]/text() + postProcess: + - parseDate: 02.01.2006 + Image: //div[contains(@class, 'gallery__preview-image-container')]/img/@src + Studio: + Name: + fixed: Visit-X +driver: + useCDP: true +# Last Updated April 24, 2023 diff --git a/scrapers/VlogXXX.yml b/scrapers/VlogXXX.yml deleted file mode 100644 index 3810b0860..000000000 --- a/scrapers/VlogXXX.yml +++ /dev/null @@ -1,27 +0,0 @@ -name: VlogXXX -sceneByURL: - - action: scrapeXPath - url: - - vlogxxx.com - scraper: sceneScraper -xPathScrapers: - sceneScraper: - scene: - Title: - selector: //div[@class="row"]/h1 - Performers: - Name: //h3[contains(.,"pornstars")]/following-sibling::a - Date: - selector: //p[@class="date"] - postProcess: - - parseDate: 2006-01-02 - Tags: - Name: //h3[contains(.,"Categories")]/following-sibling::a - Details: - selector: //div[h3[text()="Description"]]/p - Image: - selector: //div[@id="noMore"]/img/@src - Studio: - Name: - fixed: VlogXXX -# Last Updated May 21, 2021 diff --git a/scrapers/WAPdB.py b/scrapers/WAPdB/WAPdB.py similarity index 100% rename from scrapers/WAPdB.py rename to scrapers/WAPdB/WAPdB.py diff --git a/scrapers/WAPdB.yml b/scrapers/WAPdB/WAPdB.yml similarity index 100% rename from scrapers/WAPdB.yml rename to scrapers/WAPdB/WAPdB.yml diff --git a/scrapers/WankItNow.yml b/scrapers/WankItNow.yml new file mode 100644 index 000000000..7324c8def --- /dev/null +++ b/scrapers/WankItNow.yml @@ -0,0 +1,52 @@ +name: Wank It Now +sceneByURL: + - action: scrapeXPath + url: + - wankitnow.com/videos/ + - upskirtjerk.com/videos/ + - realbikinigirls.com/videos/ + - boppingbabes.com/videos/ + - lingerietales.com/videos/ + - downblousejerk.com/videos/ + + scraper: sceneScraper + +xPathScrapers: + sceneScraper: + scene: + Title: + selector: //h1[contains(@class, "scene-title")]/text() + postProcess: + - replace: + - regex: .*"(.*). + with: $1 + Studio: + Name: + selector: //meta[@property="og:site_name"]/@content + postProcess: + - map: + WankItNow: Wank It Now + UpSkirtJerk: Upskirt Jerk + LingerieTales: Lingerie Tales + RealBikiniGirls: Real Bikini Girls + BoppingBabes: Bopping Babes + DownBlouseJerk: Downblouse Jerk + Performers: + Name: //div[@class="model-name"]/text() + Date: + selector: //span[@class="meta" and not(following-sibling::*)] + postProcess: + - replace: + - regex: ^(\d+)(st|nd|rd|th)\s+(\w+\s+\d{4})$ + with: $1 $3 + - parseDate: 2 Jan 2006 + Details: //meta[@property="og:description"]/@content + Image: //meta[@property="og:image"]/@content + Tags: + Name: //div[@class="tags"]//a +#TODO: Requested by Maista +# WankItNow and the sites covered by this scraper possibly are part of the Radical Cash Network (used to be Radical Cash). +# Sites that appear to be associated with Radical Cash are here: https://nats.radicalcash.com/external.php?page=sites +# Stash has WankItNow as the Parent for: Bopping Babes, Downblouse Jerk, Lingerie Tales, Upskirt Jerk, Wank It Now VR. +# Please note, this scraper does not cover Wank It Now VR. +# Last Updated September 12, 2023 \ No newline at end of file diff --git a/scrapers/Wankitnow.yml b/scrapers/Wankitnow.yml deleted file mode 100644 index 827b08cbb..000000000 --- a/scrapers/Wankitnow.yml +++ /dev/null @@ -1,34 +0,0 @@ -name: Wank It Now -sceneByURL: - - action: scrapeXPath - url: - - wankitnow.com/videoentry/ - scraper: sceneScraper -xPathScrapers: - sceneScraper: - scene: - Title: - selector: //h1[@class='page_title']/text() - postProcess: - - replace: - - regex: .*\x{201C}(.*). - with: $1 - URL: //link[@rel="canonical"]/@href - Performers: - Name: //span[@class="meta_modelcategory meta_category"]//a - Details: //div[@class="post_excerpt"]/p - Tags: - Name: //span[@class="meta_videotag meta_category"]//a - Image: //meta[@property="og:image:secure_url"]/@content - Studio: - Name: - fixed: Wank It Now - Date: - selector: //span[@class="day"]/span/text()|//span[@class="day"]/text() - concat: "/" - postProcess: - - replace: - - regex: ^(\w+)/(\d+)$ - with: $1/22/$2 # Needs to be updated every year - - parseDate: Jan/06/02 -# Last Updated June 13, 2022 diff --git a/scrapers/WankzVR.yml b/scrapers/WankzVR.yml deleted file mode 100644 index 19f05aa20..000000000 --- a/scrapers/WankzVR.yml +++ /dev/null @@ -1,53 +0,0 @@ -name: "WankzVR" -sceneByURL: - - action: scrapeXPath - url: - - wankzvr.com - scraper: sceneScraper -movieByURL: - - action: scrapeXPath - url: - - wankzvr.com - scraper: movieScraper -xPathScrapers: - sceneScraper: - common: - $info: &infoSel //div[@class="detail"] - scene: - Title: &titleSel //div[@class="detail__header detail__header-lg"]/h1 - Date: &dateAttr - selector: $info//span[@class="detail__date"]/text() - postProcess: - - parseDate: 2 January, 2006 - Details: &detailsAttr - selector: //div[@class="detail__txt detail__txt-show_lg"]/text()|//span[@class="more__body"]/text() - concat: " " - Tags: - Name: $info//div[@class="tag-list__body"]//a/text() - Performers: - Name: //div[@class="detail__inf detail__inf-align_right"]/div[@class="detail__models"]/a/text() -# Blocked by website's anti-scraping. -# Image: &imageSel //meta[@property="og:image"]/@content - movieScraper: - common: - $info: *infoSel - movie: - Name: - selector: *titleSel - postProcess: - - replace: - - regex: ^ - with: "WankzVR - " - Duration: - selector: $info//span[@class="time"]/text() - postProcess: - - replace: - - regex: \smin - with: ":00" - Date: *dateAttr - Studio: - Name: - fixed: WankzVR - Synopsis: *detailsAttr -# FrontImage: *imageSel -# Last Updated September 28, 2021 diff --git a/scrapers/WatchingMyDaughterGoBlack.yml b/scrapers/WatchingMyDaughterGoBlack.yml deleted file mode 100644 index f8a760ce1..000000000 --- a/scrapers/WatchingMyDaughterGoBlack.yml +++ /dev/null @@ -1,66 +0,0 @@ -name: "watchingmydaughtergoblack" -sceneByURL: - - action: scrapeXPath - url: - - watchingmydaughtergoblack.com/ - scraper: sceneScraper -xPathScrapers: - sceneScraper: - scene: - Title: //h3[@class="scene-title col-xs-12"]/text() - Date: - selector: //meta[@itemprop="uploadDate"]/@content - postProcess: - - replace: - - regex: ([\d-]*).+ - with: $1 - - parseDate: 2006-01-02 - Details: - selector: //p[@itemprop="description"]/text() - Tags: - Name: - selector: //p[@class="categories"]/text() - postProcess: - - replace: - - regex: ',(\x{00A0}|\s)+' - with: "," - split: "," - Performers: - Name: //span[@class="starring-list"]/a/text() - Image: - selector: //div[@class="trailer-container"]//@data-img|//meta[@itemprop="thumbnailUrl"]/@content - postProcess: - - replace: - - regex: ^ - with: "https:" - Studio: - Name: - selector: //meta[@itemprop="thumbnailUrl"]/@content - postProcess: - - replace: - - regex: '.*/([^/]+)\.com/[^/]+$' - with: $1 - - map: - barbcummings: Barb Cummings - blackmeatwhitefeet: Black Meat White Feet - blacksonblondes: Blacks On Blondes - blacksoncougars: Blacks On Cougars - candymonroe: Candy Monroe - cuckoldsessions: Cuckold Sessions - cumbang: Cum Bang - dogfartbehindthescenes: Dogfart Behind The Scenes - gloryhole-initiations: Gloryhole-Initiations - gloryhole: Glory Hole - interracialblowbang: Interracial Blow Bang - interracialpickups: Interracial Pickups - katiethomas: Katie Thomas - ruthblackwell: Ruth Blackwell - springthomas: Spring Thomas - theminion: The Minion - watchingmydaughtergoblack: Watching My Daughter Go Black - watchingmymomgoblack: Watching My Mom Go Black - wefuckblackgirls: We Fuck Black Girls - wifewriting: Wife Writing - zebragirls: Zebra Girls - URL: //link[@rel='canonical']/@href -# Last Updated May 03, 2021 diff --git a/scrapers/WeAreHairy.yml b/scrapers/WeAreHairy.yml new file mode 100644 index 000000000..273973527 --- /dev/null +++ b/scrapers/WeAreHairy.yml @@ -0,0 +1,68 @@ +name: "We Are Hairy" +sceneByURL: + - action: scrapeXPath + url: + - wearehairy.com + scraper: sceneScraper +galleryByURL: + - action: scrapeXPath + url: + - wearehairy.com/models + scraper: galleryScraper +performerByURL: + - action: scrapeXPath + url: + - wearehairy.com/models + scraper: performerScraper +xPathScrapers: + sceneScraper: + scene: + Title: &title //title + Date: &date + selector: //time/@datetime + postProcess: + - parseDate: 2006-01-02 + Details: &details //meta[@name="description"]/@content + Tags: &tags + Name: //div[@class="tagline"]//a + Image: + selector: //meta[@itemprop="thumbnailUrl"]/@content + postProcess: + - replace: + - regex: \/\/(.*) + with: https://$1 + Performers: &performers + Name: //div[@class="meet"]//a + Director: //h3[contains(text(),"Shot by")]/following-sibling::p + Studio: &studio + Name: + fixed: We Are Hairy + galleryScraper: + gallery: + Title: *title + Date: *date + Studio: *studio + Performers: *performers + Tags: *tags + Details: *details + performerScraper: + performer: + Name: //span/span[@itemprop="name"]/text() + Gender: + fixed: Female +# Uncomment to scrape performer image +# Image: +# selector: //div[@class="lhs"]/img/@src + Height: + selector: //p[@id="height_both"]/text() + postProcess: + - replace: + - regex: .*(\d{3}) cm + with: $1 + Weight: + selector: //p[@id="weight_both"]/text() + postProcess: + - replace: + - regex: .*(\d{2,}) kg + with: $1 +# Last Updated July 27, 2023 diff --git a/scrapers/Algolia_Wicked.yml b/scrapers/Wicked/Wicked.yml similarity index 77% rename from scrapers/Algolia_Wicked.yml rename to scrapers/Wicked/Wicked.yml index 236c3bf70..390020a40 100644 --- a/scrapers/Algolia_Wicked.yml +++ b/scrapers/Wicked/Wicked.yml @@ -1,3 +1,4 @@ +# requires: Algolia name: "Wicked" sceneByURL: - action: script @@ -5,7 +6,7 @@ sceneByURL: - wicked.com/en/video/ script: - python - - Algolia.py + - ../Algolia/Algolia.py - wicked movieByURL: - action: script @@ -13,27 +14,27 @@ movieByURL: - wicked.com/en/movie/ script: - python - - Algolia.py + - ../Algolia/Algolia.py - wicked - movie sceneByFragment: action: script script: - python - - Algolia.py + - ../Algolia/Algolia.py - wicked sceneByName: action: script script: - python - - Algolia.py + - ../Algolia/Algolia.py - wicked - searchName sceneByQueryFragment: action: script script: - python - - Algolia.py + - ../Algolia/Algolia.py - wicked - validName galleryByURL: @@ -42,7 +43,7 @@ galleryByURL: - wicked.com/en/photo/ script: - python - - Algolia.py + - ../Algolia/Algolia.py - wicked - gallery # Last Updated December 22, 2022 diff --git a/scrapers/WoodmancastingX.yml b/scrapers/WoodmancastingX.yml index 65e4a3615..dfa4b2937 100644 --- a/scrapers/WoodmancastingX.yml +++ b/scrapers/WoodmancastingX.yml @@ -35,7 +35,13 @@ xPathScrapers: with: "https://www.woodmancastingx.com/" Studio: Name: - fixed: Woodman Casting X + selector: //div[@id="breadcrumb"] + postProcess: + - replace: + - regex: ^[^>]*> (Casting|Sthuf).*$ + with: "Woodman Casting X" + - regex: ^[^>]*> (Scenes|Backstage|Live).*$ + with: "Pierre Woodman" Tags: Name: //a[@class="tag"] Image: //meta[@property="og:image"]/@content @@ -52,4 +58,4 @@ xPathScrapers: URL: selector: $res/@href postProcess: *pp -# Last Updated September 28, 2022 +# Last Updated May 30, 2023 diff --git a/scrapers/WowNetworkVenus/WowNetworkVenus.yml b/scrapers/WowNetworkVenus/WowNetworkVenus.yml index c8fa2aa40..5f4a86727 100644 --- a/scrapers/WowNetworkVenus/WowNetworkVenus.yml +++ b/scrapers/WowNetworkVenus/WowNetworkVenus.yml @@ -1,4 +1,6 @@ name: WowNetworkVenus +# requires: py_common + sceneByURL: - action: script script: diff --git a/scrapers/XXXJobInterviews.yml b/scrapers/XXXJobInterviews.yml new file mode 100644 index 000000000..84ba9ab98 --- /dev/null +++ b/scrapers/XXXJobInterviews.yml @@ -0,0 +1,35 @@ +name: XXX Job Interviews +sceneByURL: + - action: scrapeXPath + url: + - xxxjobinterviews.com/video/ + scraper: sceneScraper +xPathScrapers: + sceneScraper: + common: + $scene: //div[contains(@class,"custom-80")][1] + scene: + Title: //meta[@property="og:title"]/@content + Details: $scene//div[@class="row profile"]/div[contains(@class, "description")] + Date: + selector: $scene//div[@class="item-inner"][2]/span + postProcess: + - replace: + - regex: (\w+)\s*(\d+).*?(\d+) + with: $1 $2 $3 + - parseDate: January 2 2006 + Image: + selector: $scene//script[contains(., "posterImage")] + postProcess: + - replace: + - regex: .*posterImage\s*:\s*['"]([^'"]+).* + with: https:$1 + URL: //meta[@property="og:url"]/@content + Studio: + Name: + fixed: XXX Job Interviews + Tags: + Name: //div[@class="tags"]//a + Performers: + Name: $scene//a[contains(@href, "pornstars")] +# Last Updated October 21, 2023 diff --git a/scrapers/Xrares.yml b/scrapers/Xrares.yml new file mode 100644 index 000000000..a834d3b4f --- /dev/null +++ b/scrapers/Xrares.yml @@ -0,0 +1,29 @@ +name: "Xrares" +sceneByURL: + - action: scrapeXPath + url: + - xrares.com + scraper: sceneScraper +xPathScrapers: + sceneScraper: + scene: + Title: //meta[@property="og:title"]/@content + URL: //meta[@property="og:url"]/@content + Image: //meta[@property="og:image"]/@content + Tags: + Name: //meta[@property="video:tag"]/@content + Details: //meta[@property="og:description"]/@content + Date: + selector: //div[@class="pull-right big-views hidden-xs"]//span[@class="text-white"]/text() + postProcess: + - replace: + - regex: (\d+)\sdays\sago + with: $1 + - subtractDays: true + Code: + selector: //meta[@property="og:url"]/@content + postProcess: + - replace: + - regex: .+/video/(\d+)/.+?$ + with: $1 +# Last Updated June 10, 2023 diff --git a/scrapers/Xvideos.yml b/scrapers/Xvideos.yml index 71a331786..2fef8a494 100644 --- a/scrapers/Xvideos.yml +++ b/scrapers/Xvideos.yml @@ -1,24 +1,56 @@ -name: xvideos +name: Xvideos sceneByURL: - action: scrapeXPath url: - xvideos.com + - xvideos2.com + #- xvideos.es - xvideos.red scraper: sceneScraper + +sceneByFragment: + action: scrapeXPath + scraper: sceneScraper + queryURL: https://www.xvideos.com/video{filename}/x + queryURLReplace: + filename: + # expects an id in square brackets before extension, as saved by yt-dlp by default + - regex: '.*\[([0-9a-zA-Z]{4,})\]\.[^\.]+' + with: $1 + # or expects an id + - regex: '(?i)^.+-(\d+)\..+$' + with: $1 + # if no id is found in the filename + - regex: .*\.[^\.]+$ + with: # clear the filename so that it doesn't leak + xPathScrapers: sceneScraper: scene: Title: //h2[@class="page-title"]/text()[1] Tags: - Name: //li/a[@class="btn btn-default"][contains(@href,"/tags/")]/text() + Name: //li/a[@class="is-keyword btn btn-default"]/text() Performers: - Name: //a[@class="btn btn-default label profile hover-name"]/span[@class="name"]/text() + Name: //li[@class="model"]/a/span[1]/text() Studio: - Name: //a[@class="btn btn-default label main uploader-tag hover-name"]/span[@class="name"]/text() + Name: //li[@class="main-uploader"]/a[ + starts-with(@href,'/channels') or + starts-with(@href,'/amateur-channels') or + starts-with(@href,'/model-channels') or + starts-with(@href,'/pornstar-channels') + ]/span[@class="name"]/text() Image: selector: //script[contains(text(), "setThumbUrl169")]/text()|//div[@id="html5video"][span[@class="premium-log-overlay"]]/div[@class="embed-responsive-item"]/@style postProcess: - replace: - - regex: ^.+(?:setThumbUrl169|url)\('(.+?\.jpg).+$ - with: $1 -# Last Updated November 11, 2020 + - regex: ^.+(?:setThumbUrl169|url)\('(.+?\.jpg).+$ + with: $1 + URL: //link[@rel="alternate" and @hreflang="x-default"]/@href + Date: + selector: //script[@type="application/ld+json"]/text() + postProcess: + - replace: + - regex: '[\S\s]+"uploadDate"\s*:\s*"(\d+-\d{2}-\d{2})[^"]+"[\S\s]+' + with: $1 + - parseDate: 2006-01-02 +# Last Updated February 15, 2023 diff --git a/scrapers/Yanks.yml b/scrapers/Yanks.yml index 8a051786d..165be887f 100644 --- a/scrapers/Yanks.yml +++ b/scrapers/Yanks.yml @@ -6,23 +6,30 @@ sceneByURL: scraper: sceneScraper xPathScrapers: sceneScraper: + common: + $scene: //div[contains(@class,"update-info ")] scene: Title: - selector: //div[@class='blocktitle clear']/p/../h2/text() + selector: $scene//h1 Performers: Name: - selector: //div[@class='blocktitle clear']/p/a/text() + selector: $scene//strong[contains(text(),'Added')]/following-sibling::a[contains(@href, '/models/')]/text() Date: - selector: //div[@class='blocktitle clear']/p/text() + selector: ($scene//strong[contains(text(),'Added')]/following-sibling::text())[1] postProcess: - - replace: - - regex: .+(\d{4}-\d{2}-\d{2}).+ - with: $1 - - parseDate: 2006-01-02 + - parseDate: January 2, 2006 Image: - selector: //div[@class='mainvideo_here']/a/img/@src + # No point going for 0_2/3/4x, they're poorly upscaled versions of 0_1x + selector: >- + //div[@id='fakeplayer']//img/@src0_1x | + //div[@id='fakeplayer']//img/@src + postProcess: + - replace: + - regex: ^ + with: https://yanks.com + Tags: + Name: $scene//ul[@class='tags']/li/a/text() Studio: Name: fixed: Yanks - -# Last Updated October 30, 2020 +# Last Updated October 11, 2023 diff --git a/scrapers/YesGirlz.yml b/scrapers/YesGirlz.yml new file mode 100644 index 000000000..2376cef3d --- /dev/null +++ b/scrapers/YesGirlz.yml @@ -0,0 +1,30 @@ +name: Yes Girlz +sceneByURL: + - action: scrapeXPath + url: + - yesgirlz.com + scraper: sceneScraper +xPathScrapers: + sceneScraper: + common: + $siteContent: //div[@class="site-content"] + scene: + Title: $siteContent//h2 + Performers: + Name: + selector: $siteContent//h2[contains(text(), "Starring")] + postProcess: + - replace: + - regex: ^\s*Starring:\s* + with: "" + - regex: \s*$ + with: "" + split: " & " + Details: + selector: $siteContent//div[@class="elementor-widget-container"]/p + Image: + selector: $siteContent//video/@data-poster + Studio: + Name: + fixed: Yes Girlz +# Last Updated May 29, 2023 diff --git a/scrapers/Zishy.yml b/scrapers/Zishy.yml new file mode 100644 index 000000000..d6c696dda --- /dev/null +++ b/scrapers/Zishy.yml @@ -0,0 +1,54 @@ +name: Zishy +sceneByURL: + - action: scrapeXPath + url: + - zishy.com/albums + scraper: sceneScraper + +galleryByURL: + - action: scrapeXPath + url: + - zishy.com/albums + scraper: galleryScraper +xPathScrapers: + sceneScraper: + scene: + Title: &title + selector: //head/title + postProcess: + - replace: + - regex: ([\w ]*) - Zishy + with: $1 + Details: &details + selector: //div[@id='descrip'] + Date: &date + selector: //div[@id='headline']/span[last()] + postProcess: + - replace: + - regex: added on (\w{3}) (\d{2}). (\d{4}) + with: $3-$1-$2 + - parseDate: 2006-Jan-02 + Image: + selector: //div[@class="player"]//@style[contains(.,"background")] + postProcess: + - replace: + - regex: .+url\((/uploads/.+)\);.+ + with: https://www.zishy.com/$1 + Studio: &studio + Name: + fixed: Zishy + Performers: &performers + Name: + selector: //span[@class='moreof']/a[text()] + postProcess: + - replace: + - regex: \#([\w\s]+) + with: $1 + galleryScraper: + gallery: + Title: *title + Details: *details + Date: *date + Studio: *studio + Performers: *performers +# Last Updated February 26, 2023 diff --git a/scrapers/data18.yml b/scrapers/data18.yml index ec2eef975..0b533c24a 100644 --- a/scrapers/data18.yml +++ b/scrapers/data18.yml @@ -1,4 +1,15 @@ name: data18 +driver: + cookies: + - CookieURL: "https://data18.com" + Cookies: + - Name: "data_user_captcha" + Domain: ".data18.com" + Value: "1" + Path: "/" + headers: + - Key: User-Agent + Value: Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:79.0) Gecko/20100101 Firefox/79.0) movieByURL: - action: scrapeXPath url: @@ -49,7 +60,12 @@ xPathScrapers: $movieInfo: //div[@id="body2div_b"] $studio: //b[text()='Studio']/following-sibling::b/a movie: - Name: //div[@id="topmedia"]//a/text() + Name: + selector: //title + postProcess: + - replace: + - regex: (.+?)(?:\s\(\d{4}\)\sPorn\sMovie\s\|\sDATA18) + with: $1 Duration: selector: $movieInfo//b[contains(text(),"Length")]/following-sibling::span|$movieInfo//b[contains(text(),"Length")]/following-sibling::text() postProcess: @@ -76,6 +92,6 @@ xPathScrapers: - replace: - regex: '^Description\s*-\s*' with: - FrontImage: //a[@id='enlargecover']/@href + FrontImage: //a[@id='enlargecover']/@data-featherlight BackImage: //a[text()='+Back']/@href -# Last Updated June 30, 2022 \ No newline at end of file +# Last Updated July 18, 2023 diff --git a/scrapers/dc-onlyfans.py b/scrapers/dc-onlyfans/dc-onlyfans.py similarity index 95% rename from scrapers/dc-onlyfans.py rename to scrapers/dc-onlyfans/dc-onlyfans.py index 71f8a1aa3..e2d177a44 100644 --- a/scrapers/dc-onlyfans.py +++ b/scrapers/dc-onlyfans/dc-onlyfans.py @@ -50,7 +50,7 @@ def lookup_scene(file,db,parent): performer={"name":parent.name} image=findPerformerImage(parent) if image is not None: - performer['image']=make_image_data_url(image) + performer['images']=[make_image_data_url(image)] res['performers']=[performer] @@ -86,7 +86,7 @@ def lookup_gallery(file,db,parent): performer={"name":parent.name} image=findPerformerImage(parent) if image is not None: - performer['image']=make_image_data_url(image) + performer['images']=[make_image_data_url(image)] res['performers']=[performer] @@ -96,7 +96,7 @@ def findFilePath(id): scene=graphql.getScene(id) if scene: return scene["path"] - log.error(f"Error connecting to api") + log.error("Error connecting to api") print("{}") sys.exit() @@ -128,9 +128,8 @@ def make_image_data_url(image_path): file=Path(findFilePath(id)) elif sys.argv[1] == "querygallery": lookup = lookup_gallery - gallery = graphql.getGalleryPath(id) - if gallery: - gallery_path = gallery.get("path") + gallery_path = graphql.getGalleryPath(id) + if gallery_path: file = Path(gallery_path) if file: @@ -149,4 +148,4 @@ def make_image_data_url(image_path): print(json.dumps(scene)) sys.exit() # not found return an empty map - print("{}") \ No newline at end of file + print("{}") diff --git a/scrapers/dc-onlyfans.yml b/scrapers/dc-onlyfans/dc-onlyfans.yml similarity index 93% rename from scrapers/dc-onlyfans.yml rename to scrapers/dc-onlyfans/dc-onlyfans.yml index 3e9edb526..16df8e81a 100644 --- a/scrapers/dc-onlyfans.yml +++ b/scrapers/dc-onlyfans/dc-onlyfans.yml @@ -1,4 +1,6 @@ name: "DC Onlyfans" +# requires: py_common + sceneByFragment: action: script script: diff --git a/scrapers/dc_onlyfans_fansdb/README.md b/scrapers/dc_onlyfans_fansdb/README.md new file mode 100644 index 000000000..007a49c80 --- /dev/null +++ b/scrapers/dc_onlyfans_fansdb/README.md @@ -0,0 +1,154 @@ +# DC OnlyFans (FansDB) + +This script is a companion to the OnlyFans/Fansly data scrapers by DIGITALCRIMINAL and derivatives.\ +Above tools download posts from OnlyFans/Fansly and save metadata to 'user_data.db' SQLite files. + +> [!NOTE]\ +> This script requires python3, stashapp-tools, and sqlite3. + +## Scenes + +The post information for scenes will be scraped from the metadata database based on file name. + +Currently the scraper returns the following information for scenes: + +- Title +- Details +- Date +- Code +- Studio +- URLs +- Performers +- Tags + +Please refer to [Post Metadata](#post-metadata) for more information. + +## Galleries + +The post information for galleries will be scraped from the metadata database based on directory. + +> [!IMPORTANT]\ +> Since galleries are matched on directory, each post should be contained in a separate directory. + +Currently the scraper returns the following information for galleries: + +- Title +- Details +- Date +- Studio +- URLs +- Performers +- Tags + +Please refer to [Post Metadata](#post-metadata) for more information. + +## Post Metadata + +### Title + +In all cases, the title will be truncated on word boundaries (if possible) up to the configured `max_title_length` in `config.json` (default 64 characters). + +- When post contains no text: ` - [()]`\ + Example: `jonsnow - 2023-10-16 (2)` +- When first line of post text contains less than six (6) characters: ` - [()]`\ + Example: `Hi! - 2023-10-16` +- When first line of post text does not contain alpha-numeric characters: ` - [()]`\ + Example: `❤️❤️❤️❤️❤️❤️❤️❤️ - 2023-10-16 (4)` +- Else: ` [()]`\ + Example: `Lorem ipsum dolor sit amet, consectetur adipiscing elit.` + +### Details + +The details will contain the entirety of the post text. + +### Date + +The date will contain the date on which the post was created. + +### Code + +The code will contain the `post_id` of the post as stored in the database. This may be the same across multiple scenes if they originate from the same OnlyFans post. + +### Studio + +The creator studio name will be set to the following: ` ()` e.g. `jonsnow (OnlyFans)`\ +The creator studio URL will be set to the following: + +- OnlyFans: `https://onlyfans.com/` +- Fansly: `https://fansly.com/` + +The parent studio name will be set to the following: ` (network)` e.g. `Fansly (network)`\ +The parent studio URL will be set to the following: + +- OnlyFans: `https://onlyfans.com/` +- Fansly: `https://fansly.com/` + +### URLs + +For scenes and galleries, the URL will be set to the following: + +- OnlyFans: `https://onlyfans.com//` +- Fansly: `https://fansly.com/post/` + +### Performers + +The performer username is taken from the name of the folder proceeding "OnlyFans" or "Fansly". + +Example:\ +`D:\stash-library\of-scraper\OnlyFans\\...` + +> [!NOTE]\ +> The only performer that is being matched is the "owner" of the profile. + +The scraper will try to resolve performer names by searching for performers with an alias matching the username. + +By default, the scraper will search recursively from the performer directory for `.jpg` and `.png` files and base64 encode up to three (3) images for use as a performer image. These files are (by default) cached for 5 minutes by saving the base64 encoded images to disk to speed up bulk scraping. + +If desired this behavior can be tweaked by changing these values in `config.json`: + +``` + "max_performer_images": 3 # Maximum performer images to generate. + "cache_time": 300 # Image expiration time (in seconds). + "cache_dir": "cache" # Directory to store cached base64 encoded images. + "cache_file": "cache.json" # File to store cache information in. +``` + +### Tags + +By default, the scraper will tag scenes and galleries sent as messages with the tag `[DC: Messages]`. + +This behaviour is configurable by changing these values in `config.json`: + +``` + "tag_messages": True, # Whether to tag messages. + "tag_messages_name": "[DC: Messages]", # Name of tag for messages. +``` + +## Configuration + +> [!IMPORTANT]\ +> If you have enabled password protection on your Stash instance, filling in the `apikey` is required. + +On first run, the scraper will write a default `config.json` file if it does not already exist. + +Additionally, the `cache_dir` and `cache_file` will be created if they do not yet exist. + +The values in the default config are as follows: + +``` +{ + "stash_connection": { + "scheme": "http", + "host": "localhost", + "port": 9999, + "apikey": "" + }, + "max_title_length": 64, # Maximum length for scene/gallery titles. + "tag_messages": True, # Whether to tag messages. + "tag_messages_name": "[DC: Messages]", # Name of tag for messages. + "max_performer_images": 3, # Maximum performer images to generate. + "cache_time": 300, # Image expiration time (in seconds). + "cache_dir": "cache", # Directory to store cached base64 encoded images. + "cache_file": "cache.json" # File to store cache information in. +} +``` diff --git a/scrapers/dc_onlyfans_fansdb/dc_onlyfans_fansdb.py b/scrapers/dc_onlyfans_fansdb/dc_onlyfans_fansdb.py new file mode 100644 index 000000000..84d7b616a --- /dev/null +++ b/scrapers/dc_onlyfans_fansdb/dc_onlyfans_fansdb.py @@ -0,0 +1,506 @@ +""" +This script is a companion to the OnlyFans/Fansly data scrapers by DIGITALCRIMINAL and derivatives. +Above tools download posts from OnlyFans/Fansly and save metadata to 'user_data.db' SQLite files. + +This script requires python3, stashapp-tools, and sqlite3. +""" +import json +import sys +import re +import sqlite3 +from pathlib import Path +import time +import random +import uuid +from typing import Dict + +try: + from stashapi import log + from stashapi.tools import file_to_base64 + from stashapi.stashapp import StashInterface +except ModuleNotFoundError: + print( + "You need to install the stashapp-tools (stashapi) python module. (cmd): " + "pip install stashapp-tools", + file=sys.stderr + ) + sys.exit() + +# CONFIG ########################################################################################### + +# Default config +default_config = { + "stash_connection": { + "scheme": "http", + "host": "localhost", + "port": 9999, + "apikey": "" + }, + "max_title_length": 64, # Maximum length for scene/gallery titles. + "tag_messages": True, # Whether to tag messages. + "tag_messages_name": "[DC: Messages]", # Name of tag for messages. + "max_performer_images": 3, # Maximum performer images to generate. + "cache_time": 300, # Image expiration time (in seconds). + "cache_dir": "cache", # Directory to store cached base64 encoded images. + "cache_file": "cache.json" # File to store cache information in. +} + +# Read config file +try: + with open('config.json', 'r', encoding="utf-8") as config_file: + config = json.load(config_file) +except FileNotFoundError: + # If the file doesn't exist, use the default configuration + config = default_config + +# Update config with missing keys +config.update((k, v) for k, v in default_config.items() if k not in config) + +# Write config file +with open('config.json', 'w', encoding="utf-8") as config_file: + json.dump(config, config_file, indent=2) + +STASH_CONNECTION = config['stash_connection'] +MAX_TITLE_LENGTH = config['max_title_length'] +TAG_MESSAGES = config['tag_messages'] +TAG_MESSAGES_NAME = config['tag_messages_name'] +MAX_PERFORMER_IMAGES = config['max_performer_images'] +CACHE_TIME = config['cache_time'] +CACHE_DIR = config['cache_dir'] +CACHE_FILE = config['cache_file'] + +# STASH ############################################################################################ +try: + stash = StashInterface(STASH_CONNECTION) +except SystemExit: + log.error("Unable to connect to Stash, please verify your config.") + print('null') + sys.exit() + +# CACHE ########################################################################################### +# Create cache directory +Path(CACHE_DIR).mkdir(parents=True, exist_ok=True) + + +def load_cache(): + """ + Load and update cache data, removing stale entries and associated files when necessary. + """ + try: + with open(CACHE_FILE, 'r', encoding="utf-8") as file: + cache = json.load(file) + current_time = time.time() + updated_cache = {} + for path, (timestamp, image_filenames) in cache.items(): + if current_time - timestamp <= CACHE_TIME: + updated_cache[path] = (timestamp, image_filenames) + else: + log.info(f'[CACHE PURGE] Purging stale image(s) for path: {path}') + for image_filename in image_filenames: + image_path = Path(CACHE_DIR) / image_filename + log.debug(f'[CACHE PURGE] Deleting stale image from disk: {image_path}') + if Path(image_path).exists() and Path(image_path).is_file(): + Path(image_path).unlink() + return updated_cache + except (FileNotFoundError, json.JSONDecodeError): + return {} + + +def save_cache(cache): + """ + Save cache data and log update. + """ + with open(CACHE_FILE, 'w', encoding="utf-8") as file: + json.dump(cache, file, indent=2) + log.info('[CACHE UPDATED]') + + +# SCENES ########################################################################################### +def lookup_scene(file, db, media_dir, username, network): + """ + Query database for scene metadata and create a structured scrape result. + """ + log.info(f"Using database: {db} for {file}") + conn = sqlite3.connect( + db, detect_types=sqlite3.PARSE_DECLTYPES | sqlite3.PARSE_COLNAMES + ) + c = conn.cursor() + + c.execute(""" + SELECT medias.filename, medias.post_id, match.api_type + FROM medias + JOIN ( + SELECT api_type, post_id + FROM medias + WHERE medias.filename = ? + ) AS match + ON medias.post_id = match.post_id + WHERE medias.media_type = 'Videos' + ORDER BY medias.id ASC + """, (file.name,)) + + result = c.fetchall() + + if not result: + log.error(f'Could not find metadata for scene: {file}') + print('null') + sys.exit() + + api_type = result[0][2] + post_id = result[0][1] + + if api_type in ("Posts", "Stories", "Messages", "Products", "Others"): + query = f""" + SELECT posts.post_id, posts.text, posts.created_at + FROM {api_type.lower()} AS posts, medias + WHERE posts.post_id = medias.post_id + AND medias.filename = ? + """ + c.execute(query, (file.name,)) + else: + log.error(f"Unknown api_type {api_type} for post: {post_id}") + print('null') + sys.exit() + + log.debug(f'Found {len(result)} video(s) in post {post_id}') + if len(result) > 1: + scene_index = [item[0] for item in result].index(file.name) + 1 + log.debug(f'Video is {scene_index} of {len(result)} in post') + else: + scene_index = 0 + + scene = process_row(c.fetchone(), username, network, scene_index) + + scrape = { + "title": scene["title"], "details": scene["details"], "date": scene["date"], + "code": scene["code"], "urls": scene["urls"], + "studio": get_studio_info(username, network), + "performers": get_performer_info(username, media_dir) + } + if api_type == "Messages" and TAG_MESSAGES: + scrape["tags"] = [{"name": TAG_MESSAGES_NAME}] + + return scrape + + +# GALLERIES ######################################################################################## +def lookup_gallery(file, db, media_dir, username, network): + """ + Query database for gallery metadata and create a structured scrape result. + """ + log.info(f"Using database: {db} for {file}") + conn = sqlite3.connect( + db, detect_types=sqlite3.PARSE_DECLTYPES | sqlite3.PARSE_COLNAMES + ) + c = conn.cursor() + # which media type should we look up for our file? + c.execute(""" + SELECT DISTINCT api_type, post_id + FROM medias + WHERE medias.directory = ? + """, (file.as_posix(),)) + row = c.fetchone() + if not row: + log.error(f'Could not find metadata for gallery: {file}') + print('null') + sys.exit() + # check for each api_type the right tables + api_type = str(row[0]) + post_id = str(row[1]) + if api_type in ("Posts", "Stories", "Messages", "Products", "Others"): + query = f""" + SELECT posts.post_id, posts.text, posts.created_at + FROM {api_type.lower()} AS posts + WHERE posts.post_id = ? + """ + c.execute(query, (post_id,)) + else: + log.error(f"Unknown api_type {api_type} for post: {post_id}") + print('null') + sys.exit() + + gallery = process_row(c.fetchone(), username, network) + + scrape = { + "title": gallery["title"], "details": gallery["details"], "date": gallery["date"], + "urls": gallery["urls"], + "studio": get_studio_info(username, network), + "performers": get_performer_info(username, media_dir) + } + if api_type == "Messages" and TAG_MESSAGES: + scrape["tags"] = [{"name": TAG_MESSAGES_NAME}] + + return scrape + + +# UTILS ############################################################################################ +def get_scene_path(scene_id): + """ + Find and return the path for a scene by its ID. + """ + scene = stash.find_scene(scene_id) + # log.debug(scene) + if scene: + return scene["files"][0]["path"] + + log.error(f'Path for scene {scene_id} could not be found') + print('null') + sys.exit() + + +def get_gallery_path(gallery_id): + """ + Find and return the path for a gallery by its ID. + """ + gallery = stash.find_gallery(gallery_id) + # log.debug(gallery) + if gallery: + return gallery["folder"]["path"] + + log.error(f'Path for gallery {gallery_id} could not be found') + print('null') + sys.exit() + + +def get_performer_info(username, media_dir): + """ + Resolve performer based on username + """ + req = stash.find_performers( + f={ + "name": {"value": username, "modifier": "EQUALS"}, + "OR":{ + "aliases": {"value": username, "modifier": "EQUALS"} + } + }, + filter={"page": 1, "per_page": 5}, + fragment="id, name, aliases", # type: ignore + ) + log.debug(f'found performer(s): {req}') + res: Dict = {} + if len(req) == 1: + log.debug(f"Found performer id: {req[0]['id']}") + res['stored_id'] = req[0]['id'] + res['name'] = username + + images = get_performer_images(media_dir) + if images is not None: + res["images"] = images + + return [res] + + +def get_studio_info(studio_name, studio_network): + """ + Resolve studio based on name and network + """ + req = stash.find_studios( + f={ + "name": {"value": f"{studio_name} ({studio_network})", "modifier": "EQUALS"}, + "OR":{ + "aliases": {"value": f"{studio_name} ({studio_network})", "modifier": "EQUALS"} + } + }, + filter={"page": 1, "per_page": 5}, + fragment="id, name, aliases" + ) + log.debug(f'found studio(s): {req}') + res: Dict = {'parent': {}} + if len(req) == 1: + log.debug(f"Found studio id: {req[0]['id']}") + res['stored_id'] = req[0]['id'] + res['name'] = f'{studio_name} ({studio_network})' + res['parent']['name'] = f'{studio_network} (network)' + if studio_network == 'OnlyFans': + res['url'] = f'https://onlyfans.com/{studio_name}' + res['parent']['url'] = 'https://onlyfans.com/' + elif studio_network == 'Fansly': + res['url'] = f'https://fansly.com/{studio_name}' + res['parent']['url'] = 'https://fansly.com/' + return res + + +def get_performer_images(path): + """ + Find and encode performer images to base64. + """ + log.debug(f'Finding image(s) for path: {path}') + + cache = load_cache() + + if str(path) in cache: # check if the images are cached + log.debug(f'[CACHE HIT] Using cached image(s) for path: {path}') + image_filenames = cache[f'{path}'][1] + log.debug(image_filenames) + cached_images = [] + for image_filename in image_filenames: + with open(Path(CACHE_DIR) / image_filename, 'r', encoding="utf-8") as f: + base64_data = f.read() + cached_images.append(base64_data) + return cached_images + + image_types = ["*.jpg", "*.png"] + image_list = [] + for image_type in image_types: # get jpg and png files in provided path + type_result = list(path.rglob(image_type)) + image_list += type_result + + if len(image_list) == 0: # if no images found + log.warning(f'No image(s) found for path: {path}') + return None + + # if images found, encode up to `max_images` to base64 + log.debug(f'[CACHE MISS] Generating image(s) for path: {path}') + selected_images = random.choices(image_list, k=min(MAX_PERFORMER_IMAGES, len(image_list))) + + encoded_images = [] + cache_filenames = [] + + for index, image in enumerate(selected_images): + log.debug(f""" + [CACHE MISS] Encoding {index + 1} of {len(selected_images)} image(s) to base64: {image}' + """) + base64_data = file_to_base64(image) + if base64_data is None: + log.error(f"Error converting image to base64: {image}") + print('null') + sys.exit() + + encoded_images.append(base64_data) + + # Store the base64 image data on disk + image_filename = f'{uuid.uuid4().hex}.b64' + with open(Path(CACHE_DIR) / image_filename, 'w', encoding="utf-8") as f: + f.write(base64_data) + + cache_filenames.append(image_filename) + + # Store the file name and timestamp in the cache + cache[f'{path}'] = (time.time(), cache_filenames) + save_cache(cache) + + return encoded_images + + +def truncate_title(title, max_length): + """ + Truncate title to provided maximum length while preserving word boundaries. + """ + # Check if the title is already within the desired length + if len(title) <= max_length: + return title + + # Find the last space character before the max length + last_space_index = title.rfind(" ", 0, max_length) + # If there's no space before the max length, simply truncate the string + if last_space_index == -1: + return title[:max_length] + # Otherwise, truncate at the last space character + return title[:last_space_index] + + +def format_title(title, username, date, scene_index): + """ + Format a post title based on various conditions. + """ + if len(title) == 0: + scene_info = f' ({scene_index})' if scene_index > 0 else '' + return f'{username} - {date}{scene_info}' + + f_title = truncate_title(title.split("\n")[0].strip().replace("
", ""), MAX_TITLE_LENGTH) + scene_info = f' ({scene_index})' if scene_index > 0 else '' + + if len(f_title) <= 5: + return f'{f_title} - {date}{scene_info}' + + if not bool(re.search("[A-Za-z0-9]", f_title)): + if scene_index == 0: + title_max_len = MAX_TITLE_LENGTH - 13 + else: + title_max_len = MAX_TITLE_LENGTH - 16 - len(str(scene_index)) + t_title = truncate_title(f_title, title_max_len) + scene_info = f' ({scene_index})' if scene_index > 0 else '' + return f'{t_title} - {date}{scene_info}' + + scene_info = f' ({scene_index})' if scene_index > 0 else '' + return f'{f_title}{scene_info}' + + +def process_row(row, username, network, scene_index=0): + """ + Process a database row and format post details. + """ + res = {} + res['date'] = row[2].strftime("%Y-%m-%d") + res['title'] = format_title(row[1], username, res['date'], scene_index) + res['details'] = row[1] + res['code'] = str(row[0]) + if network == 'OnlyFans': + res['urls'] = [f"https://onlyfans.com/{res['code']}/{username}"] + elif network == 'Fansly': + res['urls'] = [f"https://fansly.com/post/{res['code']}"] + return res + + +def get_metadata_db(search_path, username, network): + """ + Recursively search for 'user_data.db' file starting from 'search_path' + """ + search_path = Path(search_path).resolve() + + while search_path != search_path.parent: + db_files = list(search_path.rglob(f"{network}/**/{username}/**/user_data.db")) + db_files = [db for db in db_files if db.is_file()] + if db_files: + return db_files[0] + + search_path = search_path.parent + + return None + + +def get_path_info(path): + """ + Extract the username and network from a given path + """ + network = 'Fansly' if 'Fansly' in str(path) else 'OnlyFans' + try: + index = path.parts.index(network) + if index + 1 < len(path.parts): + return path.parts[index + 1], network, Path(*path.parts[:index + 2]) + raise ValueError + except ValueError: + log.error(f'Could not find username or network in path: {path}') + print('null') + sys.exit(1) + + +# MAIN ############################################################################################# +def main(): + """ + Execute scene or gallery lookup and print the result as JSON to stdout + """ + fragment = json.loads(sys.stdin.read()) + scrape_id = fragment["id"] + + if sys.argv[1] == "queryScene": + lookup = lookup_scene + path = Path(get_scene_path(scrape_id)) + elif sys.argv[1] == "queryGallery": + lookup = lookup_gallery + path = Path(get_gallery_path(scrape_id)) + else: + log.error('Invalid argument(s) provided: ' + str(sys.argv)) + print('null') + sys.exit() + + username, network, media_dir = get_path_info(path) + db = get_metadata_db(path, username, network) + + media = lookup(path, db, media_dir, username, network) + print(json.dumps(media)) + sys.exit() + + +if __name__ == "__main__": + main() diff --git a/scrapers/dc_onlyfans_fansdb/dc_onlyfans_fansdb.yml b/scrapers/dc_onlyfans_fansdb/dc_onlyfans_fansdb.yml new file mode 100644 index 000000000..5af390548 --- /dev/null +++ b/scrapers/dc_onlyfans_fansdb/dc_onlyfans_fansdb.yml @@ -0,0 +1,17 @@ +name: "DC OnlyFans (FansDB)" +sceneByFragment: + action: script + script: + - python + # use python3 instead if needed + - dc_onlyfans_fansdb.py + - queryScene +galleryByFragment: + action: script + script: + - python + # use python3 instead if needed + - dc_onlyfans_fansdb.py + - queryGallery + +# Last Updated October 21, 2023 \ No newline at end of file diff --git a/scrapers/Algolia_disruptivefilms.yml b/scrapers/disruptivefilms/disruptivefilms.yml similarity index 78% rename from scrapers/Algolia_disruptivefilms.yml rename to scrapers/disruptivefilms/disruptivefilms.yml index f857fb8e5..47373a0d5 100644 --- a/scrapers/Algolia_disruptivefilms.yml +++ b/scrapers/disruptivefilms/disruptivefilms.yml @@ -1,3 +1,4 @@ +# requires: Algolia name: "DisruptiveFilms" sceneByURL: - action: script @@ -5,26 +6,26 @@ sceneByURL: - disruptivefilms.com/en/video script: - python - - Algolia.py + - ../Algolia/Algolia.py - disruptivefilms sceneByFragment: action: script script: - python - - Algolia.py + - ../Algolia/Algolia.py - disruptivefilms sceneByName: action: script script: - python - - Algolia.py + - ../Algolia/Algolia.py - disruptivefilms - searchName sceneByQueryFragment: action: script script: - python - - Algolia.py + - ../Algolia/Algolia.py - disruptivefilms - validName galleryByURL: @@ -33,7 +34,7 @@ galleryByURL: - disruptivefilms.com/en/photo/ script: - python - - Algolia.py + - ../Algolia/Algolia.py - disruptivefilms - gallery # Last Updated December 22, 2022 diff --git a/scrapers/Algolia_filthykings.yml b/scrapers/filthykings/filthykings.yml similarity index 77% rename from scrapers/Algolia_filthykings.yml rename to scrapers/filthykings/filthykings.yml index 55dee88d3..8c0941b7a 100644 --- a/scrapers/Algolia_filthykings.yml +++ b/scrapers/filthykings/filthykings.yml @@ -1,3 +1,4 @@ +# requires: Algolia name: "FilthyKings" sceneByURL: - action: script @@ -5,26 +6,26 @@ sceneByURL: - filthykings.com/en/video script: - python - - Algolia.py + - ../Algolia/Algolia.py - filthykings sceneByFragment: action: script script: - python - - Algolia.py + - ../Algolia/Algolia.py - filthykings sceneByName: action: script script: - python - - Algolia.py + - ../Algolia/Algolia.py - filthykings - searchName sceneByQueryFragment: action: script script: - python - - Algolia.py + - ../Algolia/Algolia.py - filthykings - validName galleryByURL: @@ -33,7 +34,7 @@ galleryByURL: - filthykings.com/en/photo/ script: - python - - Algolia.py + - ../Algolia/Algolia.py - filthykings - gallery # Last Updated December 22, 2022 diff --git a/scrapers/hanime.yml b/scrapers/hanime.yml index 0610201c2..20b479e88 100644 --- a/scrapers/hanime.yml +++ b/scrapers/hanime.yml @@ -1,4 +1,12 @@ name: hanime +sceneByFragment: + action: scrapeXPath + queryURL: https://hanime.tv/videos/hentai/{filename} + queryURLReplace: + filename: + - regex: ^([a-z\-0-9]+)(\-[0-9]{3,4}p).* + with: $1 + scraper: sceneScraper sceneByURL: - action: scrapeXPath url: @@ -35,4 +43,4 @@ xPathScrapers: - regex: "https://" with: "https://i1.wp.com/" -# Last Updated August 18, 2020 +# Last Updated May 22, 2023 diff --git a/scrapers/hussiemodels.yml b/scrapers/hussiemodels.yml index 2115ed8ab..cb0118581 100644 --- a/scrapers/hussiemodels.yml +++ b/scrapers/hussiemodels.yml @@ -8,7 +8,6 @@ sceneByURL: - seehimfuck.com/trailers - seehimsolo.com/trailers - povpornstars.com - # Removes all emoticons from Title, Description, Performers and Tags. scraper: sceneScraper xPathScrapers: sceneScraper: @@ -22,24 +21,16 @@ xPathScrapers: Performers: Name: selector: //li[@class="update_models"]//a - postProcess: + postProcess: &stripEmojis - replace: - regex: "[^\x00-\x7F]" with: - Details: - selector: //meta[@property="og:description"]/@content - postProcess: - - replace: - - regex: "[^\x00-\x7F]" - with: + Details: //meta[@property="og:description"]/@content Image: //meta[@property="og:image"]/@content Tags: Name: selector: //li[@class="label" and contains(text(), "Tags:")]/following-sibling::text() - postProcess: - - replace: - - regex: "[^\x00-\x7F]" - with: + postProcess: *stripEmojis Studio: Name: selector: //head/base/@href @@ -54,4 +45,4 @@ xPathScrapers: seehimfuck: See Him Fuck seehimsolo: See Him Solo povpornstars: POV Pornstars -# Last Updated July 13, 2022 +# Last Updated August 25, 2023 diff --git a/scrapers/insex.yml b/scrapers/insex.yml index d795c3f19..afb291ca7 100644 --- a/scrapers/insex.yml +++ b/scrapers/insex.yml @@ -73,4 +73,43 @@ xPathScrapers: Name: $studio Image: *imageSelector -# Last Updated January 18, 2021 +driver: + cookies: + - CookieURL: "https://hardtied.com" + Cookies: + - Name: "consent" + Value: "yes" + Domain: "hardtied.com" + Path: "/" + - CookieURL: "https://infernalrestraints.com" + Cookies: + - Name: "consent" + Value: "yes" + Domain: "infernalrestraints.com" + Path: "/" + - CookieURL: "https://insex.com" + Cookies: + - Name: "consent" + Value: "yes" + Domain: "insex.com" + Path: "/" + - CookieURL: "https://insexondemand.com" + Cookies: + - Name: "consent" + Value: "yes" + Domain: "insexondemand.com" + Path: "/" + - CookieURL: "https://realtimebondage.com" + Cookies: + - Name: "consent" + Value: "yes" + Domain: "realtimebondage.com" + Path: "/" + - CookieURL: "https://sexuallybroken.com" + Cookies: + - Name: "consent" + Value: "yes" + Domain: "sexuallybroken.com" + Path: "/" + +# Last Updated August 02, 2023 diff --git a/scrapers/javdb.yml b/scrapers/javdb.yml index f9d7606e1..326cbbe4e 100644 --- a/scrapers/javdb.yml +++ b/scrapers/javdb.yml @@ -1,7 +1,7 @@ name: JavDB sceneByFragment: action: scrapeXPath - queryURL: https://javdb36.com/search?q={filename}&f=all + queryURL: https://javdb.com/search?q={filename}&f=all queryURLReplace: filename: - regex: \..+$ @@ -17,10 +17,10 @@ sceneByURL: queryURLReplace: url: - regex: javdb\.com - with: "javdb36.com" + with: "javdb.com" sceneByName: action: scrapeXPath - queryURL: https://javdb36.com/search?q={} + queryURL: https://javdb.com/search?q={} scraper: sceneSearch sceneByQueryFragment: action: scrapeXPath @@ -37,7 +37,7 @@ movieByURL: queryURLReplace: url: - regex: javdb\.com - with: "javdb36.com" + with: "javdb.com" xPathScrapers: sceneSearch: common: @@ -49,7 +49,7 @@ xPathScrapers: postProcess: - replace: - regex: ^ - with: "https://javdb36.com" + with: "https://javdb.com" Image: selector: $videoItem//img/@src postProcess: @@ -68,7 +68,7 @@ xPathScrapers: postProcess: - replace: - regex: ^ - with: "https://javdb36.com" + with: "https://javdb.com" # If you don't support cookie you can use this regex. # - regex: $ # with: "?locale=en" @@ -135,6 +135,16 @@ driver: Domain: "javdb.com" Value: "en" Path: "/" + # Access to certain titles requires a javdb account + # Uncomment the below replacing the Value part + #- Name: "_jdb_session" + # Value: "Add here the actual value from your cookies" + # Path: "/" + # Domain: "javdb.com" + #- Name: "remember_me_token" + # Value: "Add here the actual value from your cookies" + # Path: "/" + # Domain: "javdb.com" - CookieURL: "https://javdb36.com" Cookies: - Name: "locale" @@ -151,4 +161,4 @@ driver: # Value: "Add here the actual value from your cookies" # Path: "/" # Domain: "javdb36.com" -# Last Updated June 02, 2022 +# Last Updated July 24, 2023 diff --git a/scrapers/jellyfin.py b/scrapers/jellyfin/jellyfin.py similarity index 100% rename from scrapers/jellyfin.py rename to scrapers/jellyfin/jellyfin.py diff --git a/scrapers/jellyfin.yml b/scrapers/jellyfin/jellyfin.yml similarity index 97% rename from scrapers/jellyfin.yml rename to scrapers/jellyfin/jellyfin.yml index 9017d10e8..0efe6754a 100644 --- a/scrapers/jellyfin.yml +++ b/scrapers/jellyfin/jellyfin.yml @@ -1,4 +1,6 @@ name: Jellyfin +# requires: py_common + performerByURL: - url: #Replace this with you local jellyfin host diff --git a/scrapers/lethalpass.yml b/scrapers/lethalpass.yml index e00210ae9..1183ddb62 100644 --- a/scrapers/lethalpass.yml +++ b/scrapers/lethalpass.yml @@ -17,7 +17,7 @@ xPathScrapers: with: "" - parseDate: 2 January 2006 Performers: - Name: //a[@class="model"]//a + Name: //a[@class="model"]//text() Details: //div[@class="description"]/p Movies: Name: @@ -29,4 +29,4 @@ xPathScrapers: Studio: Name: selector: //div[@class="pdSRC"]//img/@alt -# Last Updated March 04, 2021 +# Last Updated October 31, 2023 diff --git a/scrapers/multiscrape.py b/scrapers/multiscrape/multiscrape.py similarity index 100% rename from scrapers/multiscrape.py rename to scrapers/multiscrape/multiscrape.py diff --git a/scrapers/multiscrape.yml b/scrapers/multiscrape/multiscrape.yml similarity index 100% rename from scrapers/multiscrape.yml rename to scrapers/multiscrape/multiscrape.yml diff --git a/scrapers/Algolia_mypervyfamily.yml b/scrapers/mypervyfamily/mypervyfamily.yml similarity index 78% rename from scrapers/Algolia_mypervyfamily.yml rename to scrapers/mypervyfamily/mypervyfamily.yml index 2474ce329..74e353a1c 100644 --- a/scrapers/Algolia_mypervyfamily.yml +++ b/scrapers/mypervyfamily/mypervyfamily.yml @@ -1,3 +1,4 @@ +# requires: Algolia name: "MyPervyFamily" sceneByURL: - action: script @@ -5,26 +6,26 @@ sceneByURL: - mypervyfamily.com/en/video script: - python - - Algolia.py + - ../Algolia/Algolia.py - mypervyfamily sceneByFragment: action: script script: - python - - Algolia.py + - ../Algolia/Algolia.py - mypervyfamily sceneByName: action: script script: - python - - Algolia.py + - ../Algolia/Algolia.py - mypervyfamily - searchName sceneByQueryFragment: action: script script: - python - - Algolia.py + - ../Algolia/Algolia.py - mypervyfamily - validName galleryByURL: @@ -33,7 +34,7 @@ galleryByURL: - mypervyfamily.com/en/photo/ script: - python - - Algolia.py + - ../Algolia/Algolia.py - mypervyfamily - gallery # Last Updated December 22, 2022 diff --git a/scrapers/performer-image-by-scene.py b/scrapers/performer-image-by-scene/performer-image-by-scene.py similarity index 95% rename from scrapers/performer-image-by-scene.py rename to scrapers/performer-image-by-scene/performer-image-by-scene.py index f4ab52c7d..65eafd696 100644 --- a/scrapers/performer-image-by-scene.py +++ b/scrapers/performer-image-by-scene/performer-image-by-scene.py @@ -63,10 +63,10 @@ def announce_result_to_stash(result): performer_name = parsed_name.group(1) scene_id = parsed_name.group(2) log.debug(f"Using scene {scene_id} to get performer image") - performer_scene = graphql.getSceneScreenshot(scene_id) + screenshot = graphql.getSceneScreenshot(scene_id) performer = {'Name': performer_name, - 'Image': performer_scene['paths']['screenshot'], - 'Images': [performer_scene['paths']['screenshot']]} + 'Image': screenshot, + 'Images': [screenshot]} announce_result_to_stash(performer) else: # search for scenes with the performer diff --git a/scrapers/performer-image-by-scene.yml b/scrapers/performer-image-by-scene/performer-image-by-scene.yml similarity index 92% rename from scrapers/performer-image-by-scene.yml rename to scrapers/performer-image-by-scene/performer-image-by-scene.yml index fbbacc265..c046fb2ea 100644 --- a/scrapers/performer-image-by-scene.yml +++ b/scrapers/performer-image-by-scene/performer-image-by-scene.yml @@ -1,4 +1,5 @@ name: Performer Image by scene cover +# requires: py_common performerByFragment: action: script diff --git a/scrapers/performer-image-dir.py b/scrapers/performer-image-dir/performer-image-dir.py similarity index 100% rename from scrapers/performer-image-dir.py rename to scrapers/performer-image-dir/performer-image-dir.py diff --git a/scrapers/performer-image-dir.yml b/scrapers/performer-image-dir/performer-image-dir.yml similarity index 100% rename from scrapers/performer-image-dir.yml rename to scrapers/performer-image-dir/performer-image-dir.yml diff --git a/scrapers/pornworld.yml b/scrapers/pornworld.yml index 24e3baa83..9bd4a92d9 100644 --- a/scrapers/pornworld.yml +++ b/scrapers/pornworld.yml @@ -1,31 +1,43 @@ +# yaml-language-server: $schema=../validator/scraper.schema.json + name: Pornworld + sceneByURL: - action: scrapeXPath url: - pornworld.com/watch/ scraper: sceneScraper + sceneByFragment: action: scrapeJson queryURL: https://pornworld.com/autocomplete?query={filename} queryURLReplace: filename: - - regex: .+(GP\d+).* + - regex: .*(GP\d+).* with: $1 scraper: sceneQueryScraper + jsonScrapers: sceneQueryScraper: scene: Title: terms.Scene.0.name URL: terms.Scene.0.url + xPathScrapers: sceneScraper: scene: - Title: + Title: selector: //h1 postProcess: - replace: - - regex: featuring.+ - with: "" + - regex: featuring.+ + with: "" + Code: + selector: //h1 + postProcess: + - replace: + - regex: .*(GP\d+).* + with: $1 Date: //i[contains(@class,"calendar3")]/text() Details: selector: //meta[@name="description"]/@content @@ -41,5 +53,5 @@ xPathScrapers: Name: fixed: Porn World Image: //video/@data-poster - -# Last Updated August 10, 2021 + URL: //div[@class="pagination"]/a[1]/@href +# Last Updated March 03, 2023 diff --git a/scrapers/py_common/graphql.py b/scrapers/py_common/graphql.py index 09f619adf..3fa7eb93b 100644 --- a/scrapers/py_common/graphql.py +++ b/scrapers/py_common/graphql.py @@ -1,66 +1,83 @@ import sys -from typing import Union try: import requests except ModuleNotFoundError: - print("You need to install the requests module. (https://docs.python-requests.org/en/latest/user/install/)", file=sys.stderr) - print("If you have pip (normally installed with python), run this command in a terminal (cmd): pip install requests", file=sys.stderr) + print( + "You need to install the requests module. (https://docs.python-requests.org/en/latest/user/install/)", + file=sys.stderr, + ) + print( + "If you have pip (normally installed with python), run this command in a terminal (cmd): pip install requests", + file=sys.stderr, + ) sys.exit() try: import py_common.config as config import py_common.log as log + from py_common.util import dig except ModuleNotFoundError: - print("You need to download the folder 'py_common' from the community repo! (CommunityScrapers/tree/master/scrapers/py_common)", file=sys.stderr) + print( + "You need to download the folder 'py_common' from the community repo! (CommunityScrapers/tree/master/scrapers/py_common)", + file=sys.stderr, + ) sys.exit() -def callGraphQL(query, variables=None): - api_key = "" - if config.STASH.get("api_key"): - api_key = config.STASH["api_key"] - - if config.STASH.get("url") is None: +def callGraphQL(query: str, variables: dict | None = None): + api_key = config.STASH.get("api_key", "") + url = config.STASH.get("url", "") + if not url: log.error("You need to set the URL in 'config.py'") return None + elif "stashdb.org" in url: + log.error("You need to set the URL in 'config.py' to your own stash server") + return None stash_url = config.STASH["url"] + "/graphql" headers = { - "Accept-Encoding": "gzip, deflate, br", + "Accept-Encoding": "gzip, deflate", "Content-Type": "application/json", "Accept": "application/json", "Connection": "keep-alive", "DNT": "1", - "ApiKey": api_key - } - json = { - 'query': query + "ApiKey": api_key, } + json = {"query": query} if variables is not None: - json['variables'] = variables - try: - response = requests.post(stash_url, json=json, headers=headers) - if response.status_code == 200: - result = response.json() - if result.get("error"): - for error in result["error"]["errors"]: - raise Exception("GraphQL error: {}".format(error)) - if result.get("data"): - return result.get("data") - elif response.status_code == 401: - log.error( - "[ERROR][GraphQL] HTTP Error 401, Unauthorised. You can add a API Key in 'config.py' in the 'py_common' folder") + json["variables"] = variables # type: ignore + response = requests.post(stash_url, json=json, headers=headers) + if response.status_code == 200: + result = response.json() + if errors := result.get("error"): + errors = "\n".join(errors) + log.error(f"[ERROR][GraphQL] {errors}") return None + if result.get("data"): + return result.get("data") + elif response.status_code == 401: + log.error( + "[ERROR][GraphQL] HTTP Error 401, Unauthorised. You can add a API Key in 'config.py' in the 'py_common' folder" + ) + return None + elif response.status_code == 404: + if "localhost:9999" in url: + log.error( + "[ERROR][GraphQL] HTTP Error 404, Not Found. Your local stash server is your endpoint, but port 9999 did not respond. Did you change stash's port? Edit 'config.py' in the 'py_common' folder to point at the correct port for stash!" + ) else: - raise ConnectionError( - "GraphQL query failed:{} - {}".format(response.status_code, response.content)) - except Exception as err: - log.error(err) + log.error( + "[ERROR][GraphQL] HTTP Error 404, Not Found. Make sure 'config.py' in the 'py_common' folder points at the correct address and port!" + ) return None + raise ConnectionError( + f"GraphQL query failed: {response.status_code} - {response.content}" + ) -def configuration(): + +def configuration() -> dict | None: query = """ query Configuration { configuration { @@ -110,7 +127,6 @@ def configuration(): username password maxSessionAge - trustedProxies logFile logOut logLevel @@ -122,9 +138,6 @@ def configuration(): excludes imageExcludes customPerformerImageLocation - scraperUserAgent - scraperCertCheck - scraperCDPPath stashBoxes { name endpoint @@ -145,8 +158,15 @@ def configuration(): css cssEnabled language - slideshowDelay - disabledDropdownCreate { + imageLightbox { + slideshowDelay + displayMode + scaleUp + resetZoomOnNav + scrollMode + scrollAttemptsBeforeChange + } + disableDropdownCreate { performer tag studio @@ -168,8 +188,6 @@ def configuration(): } fragment ConfigDefaultSettingsData on ConfigDefaultSettingsResult { scan { - useFileMetadata - stripFileExtension scanGeneratePreviews scanGenerateImagePreviews scanGenerateSprites @@ -216,7 +234,6 @@ def configuration(): deleteGenerated } fragment ScraperSourceData on ScraperSource { - stash_box_index stash_box_endpoint scraper_id } @@ -234,13 +251,11 @@ def configuration(): createMissing } """ - result = callGraphQL(query) - if result: - return result.get("configuration") - return None + result = callGraphQL(query) or {} + return dig(result, "configuration") -def getScene(scene_id): +def getScene(scene_id: str | int) -> dict | None: query = """ query FindScene($id: ID!, $checksum: String) { findScene(id: $id, checksum: $checksum) { @@ -249,27 +264,24 @@ def getScene(scene_id): } fragment SceneData on Scene { id - checksum - oshash title details - url + urls date - rating + rating100 o_counter organized - path - phash interactive - file { + files { + path size duration video_codec audio_codec width height - framerate - bitrate + frame_rate + bit_rate } paths { screenshot @@ -329,21 +341,14 @@ def getScene(scene_id): } fragment SlimGalleryData on Gallery { id - checksum - path title date - url + urls details - rating + rating100 organized image_count cover { - file { - size - width - height - } paths { thumbnail } @@ -367,7 +372,10 @@ def getScene(scene_id): scenes { id title - path + files { + path + basename + } } } fragment SlimStudioData on Studio { @@ -382,17 +390,16 @@ def getScene(scene_id): id } details - rating + rating100 aliases } fragment MovieData on Movie { id - checksum name aliases duration date - rating + rating100 director studio { ...SlimStudioData @@ -405,7 +412,9 @@ def getScene(scene_id): scenes { id title - path + files { + path + } } } fragment SlimTagData on Tag { @@ -416,7 +425,6 @@ def getScene(scene_id): } fragment PerformerData on Performer { id - checksum name url gender @@ -426,13 +434,13 @@ def getScene(scene_id): ethnicity country eye_color - height + height_cm measurements fake_tits career_length tattoos piercings - aliases + alias_list favorite image_path scene_count @@ -446,24 +454,19 @@ def getScene(scene_id): stash_id endpoint } - rating + rating100 details death_date hair_color weight } """ - - variables = { - "id": scene_id - } - result = callGraphQL(query, variables) - if result: - return result.get('findScene') - return None + variables = {"id": str(scene_id)} + result = callGraphQL(query, variables) or {} + return dig(result, "findScene") -def getSceneScreenshot(scene_id): +def getSceneScreenshot(scene_id: str | int) -> str | None: query = """ query FindScene($id: ID!, $checksum: String) { findScene(id: $id, checksum: $checksum) { @@ -474,18 +477,14 @@ def getSceneScreenshot(scene_id): } } """ - variables = { - "id": scene_id - } - result = callGraphQL(query, variables) - if result: - return result.get('findScene') - return None + variables = {"id": str(scene_id)} + result = callGraphQL(query, variables) or {} + return dig(result, "findScene", "paths", "screenshot") -def getSceneByPerformerId(performer_id): +def getSceneByPerformerId(performer_id: str | int) -> dict | None: query = """ - query FindScenes($filter: FindFilterType, $scene_filter: SceneFilterType, $scene_ids: [Int!]) { +query FindScenes($filter: FindFilterType, $scene_filter: SceneFilterType, $scene_ids: [Int!]) { findScenes(filter: $filter, scene_filter: $scene_filter, scene_ids: $scene_ids) { count filesize @@ -500,17 +499,25 @@ def getSceneByPerformerId(performer_id): fragment SceneData on Scene { id - checksum - oshash title details - url + urls date - rating + rating100 o_counter organized - path - phash + files { + path + size + duration + video_codec + audio_codec + width + height + frame_rate + bit_rate + __typename + } interactive interactive_speed captions { @@ -520,17 +527,6 @@ def getSceneByPerformerId(performer_id): } created_at updated_at - file { - size - duration - video_codec - audio_codec - width - height - framerate - bitrate - __typename - } paths { screenshot preview @@ -613,22 +609,14 @@ def getSceneByPerformerId(performer_id): fragment SlimGalleryData on Gallery { id - checksum - path title date - url + urls details - rating + rating100 organized image_count cover { - file { - size - width - height - __typename - } paths { thumbnail __typename @@ -657,7 +645,9 @@ def getSceneByPerformerId(performer_id): scenes { id title - path + files { + path + } __typename } __typename @@ -677,19 +667,18 @@ def getSceneByPerformerId(performer_id): __typename } details - rating + rating100 aliases __typename } fragment MovieData on Movie { id - checksum name aliases duration date - rating + rating100 director studio { ...SlimStudioData @@ -703,7 +692,9 @@ def getSceneByPerformerId(performer_id): scenes { id title - path + files { + path + } __typename } __typename @@ -719,7 +710,6 @@ def getSceneByPerformerId(performer_id): fragment PerformerData on Performer { id - checksum name url gender @@ -729,13 +719,13 @@ def getSceneByPerformerId(performer_id): ethnicity country eye_color - height + height_cm measurements fake_tits career_length tattoos piercings - aliases + alias_list favorite ignore_auto_tag image_path @@ -752,7 +742,7 @@ def getSceneByPerformerId(performer_id): endpoint __typename } - rating + rating100 details death_date hair_color @@ -761,34 +751,25 @@ def getSceneByPerformerId(performer_id): } """ variables = { - "filter": { - "page": 1, - "per_page": 20, - "sort": "title", - "direction": "ASC" - }, + "filter": {"page": 1, "per_page": 20, "sort": "title", "direction": "ASC"}, "scene_filter": { - "performers": { - "value": [str(performer_id)], - "modifier": "INCLUDES_ALL" - } - } + "performers": {"value": [str(performer_id)], "modifier": "INCLUDES_ALL"} + }, } + result = callGraphQL(query, variables) or {} + return dig(result, "findScenes") - result = callGraphQL(query, variables) - if result: - return result.get('findScenes') - return None - -def getSceneIdByPerformerId(performer_id): +def getSceneIdByPerformerId(performer_id: str | int) -> dict | None: query = """ query FindScenes($filter: FindFilterType, $scene_filter: SceneFilterType, $scene_ids: [Int!]) { findScenes(filter: $filter, scene_filter: $scene_filter, scene_ids: $scene_ids) { scenes { id title - path + files { + path + } paths { screenshot } @@ -797,27 +778,16 @@ def getSceneIdByPerformerId(performer_id): } """ variables = { - "filter": { - "page": 1, - "per_page": 20, - "sort": "id", - "direction": "DESC" - }, + "filter": {"page": 1, "per_page": 20, "sort": "id", "direction": "DESC"}, "scene_filter": { - "performers": { - "value": [str(performer_id)], - "modifier": "INCLUDES_ALL" - } - } + "performers": {"value": [str(performer_id)], "modifier": "INCLUDES_ALL"} + }, } - - result = callGraphQL(query, variables) - if result: - return result.get('findScenes') - return None + result = callGraphQL(query, variables) or {} + return dig(result, "findScenes") -def getPerformersByName(performer_name): +def getPerformersByName(performer_name: str) -> dict | None: query = """ query FindPerformers($filter: FindFilterType, $performer_filter: PerformerFilterType) { findPerformers(filter: $filter, performer_filter: $performer_filter) { @@ -832,7 +802,6 @@ def getPerformersByName(performer_name): fragment PerformerData on Performer { id - checksum name url gender @@ -842,13 +811,13 @@ def getPerformersByName(performer_name): ethnicity country eye_color - height + height_cm measurements fake_tits career_length tattoos piercings - aliases + alias_list favorite ignore_auto_tag image_path @@ -865,7 +834,7 @@ def getPerformersByName(performer_name): endpoint __typename } - rating + rating100 details death_date hair_color @@ -888,18 +857,15 @@ def getPerformersByName(performer_name): "page": 1, "per_page": 20, "sort": "name", - "direction": "ASC" + "direction": "ASC", }, - "performer_filter": {} + "performer_filter": {}, } + result = callGraphQL(query, variables) or {} + return dig(result, "findPerformers") - result = callGraphQL(query, variables) - if result: - return result.get('findPerformers') - return None - -def getPerformersIdByName(performer_name): +def getPerformersIdByName(performer_name: str) -> dict | None: query = """ query FindPerformers($filter: FindFilterType, $performer_filter: PerformerFilterType) { findPerformers(filter: $filter, performer_filter: $performer_filter) { @@ -913,7 +879,7 @@ def getPerformersIdByName(performer_name): fragment PerformerData on Performer { id name - aliases + alias_list } """ @@ -923,18 +889,16 @@ def getPerformersIdByName(performer_name): "page": 1, "per_page": 20, "sort": "name", - "direction": "ASC" + "direction": "ASC", }, - "performer_filter": {} + "performer_filter": {}, } - result = callGraphQL(query, variables) - if result: - return result.get('findPerformers') - return None + result = callGraphQL(query, variables) or {} + return dig(result, "findPerformers") -def getGallery(gallery_id): +def getGallery(gallery_id: str | int) -> dict | None: query = """ query FindGallery($id: ID!) { findGallery(id: $id) { @@ -943,18 +907,16 @@ def getGallery(gallery_id): } fragment GalleryData on Gallery { id - checksum - path created_at updated_at title date - url + urls details - rating + rating100 organized - images { - ...SlimImageData + folder { + path } cover { ...SlimImageData @@ -965,7 +927,6 @@ def getGallery(gallery_id): tags { ...SlimTagData } - performers { ...PerformerData } @@ -975,18 +936,18 @@ def getGallery(gallery_id): } fragment SlimImageData on Image { id - checksum title - rating + rating100 organized o_counter - path - - file { - size - width - height + visual_files { + ... on ImageFile { + path + size + height + width } + } paths { thumbnail @@ -995,7 +956,9 @@ def getGallery(gallery_id): galleries { id - path + files { + path + } title } @@ -1030,7 +993,7 @@ def getGallery(gallery_id): id } details - rating + rating100 aliases } fragment SlimTagData on Tag { @@ -1041,7 +1004,6 @@ def getGallery(gallery_id): } fragment PerformerData on Performer { id - checksum name url gender @@ -1051,13 +1013,13 @@ def getGallery(gallery_id): ethnicity country eye_color - height + height_cm measurements fake_tits career_length tattoos piercings - aliases + alias_list favorite image_path scene_count @@ -1073,7 +1035,7 @@ def getGallery(gallery_id): stash_id endpoint } - rating + rating100 details death_date hair_color @@ -1081,28 +1043,25 @@ def getGallery(gallery_id): } fragment SlimSceneData on Scene { id - checksum - oshash title details - url + urls date - rating + rating100 o_counter organized - path - phash interactive - file { + files { + path size duration video_codec audio_codec width height - framerate - bitrate + frame_rate + bit_rate } paths { @@ -1111,7 +1070,6 @@ def getGallery(gallery_id): stream webp vtt - chapters_vtt sprite funscript } @@ -1124,8 +1082,10 @@ def getGallery(gallery_id): galleries { id - path title + files { + path + } } studio { @@ -1161,30 +1121,27 @@ def getGallery(gallery_id): stash_id } } - - """ - variables = { - "id": gallery_id - } - result = callGraphQL(query, variables) - if result: - return result.get('findGallery') - return None + variables = {"id": gallery_id} + result = callGraphQL(query, variables) or {} + return dig(result, "findGallery") -def getGalleryPath(gallery_id): +def getGalleryPath(gallery_id: str | int) -> str | None: query = """ query FindGallery($id: ID!) { findGallery(id: $id) { - path + folder { + path + } + files { + path + } } } """ - variables = { - "id": gallery_id - } - result = callGraphQL(query, variables) - if result: - return result.get('findGallery') - return None + variables = {"id": gallery_id} + result = callGraphQL(query, variables) or {} + # Galleries can either be a folder full of files or a zip file + return dig(result, "findGallery", "folder", "path") \ + or dig(result, "findGallery", "files", 0, "path") diff --git a/scrapers/py_common/package b/scrapers/py_common/package new file mode 100644 index 000000000..6c171ab07 --- /dev/null +++ b/scrapers/py_common/package @@ -0,0 +1,2 @@ +# script used as a dependency only +name: py_common diff --git a/scrapers/py_common/types.py b/scrapers/py_common/types.py new file mode 100644 index 000000000..de08ed577 --- /dev/null +++ b/scrapers/py_common/types.py @@ -0,0 +1,116 @@ +from typing import Literal, Required, TypedDict + +""" +Types for outputs that scrapers can produce and that Stash will accept +""" + +class ScrapedTag(TypedDict): + name: str + "Name is the only required field" + +class ScrapedPerformer(TypedDict, total=False): + name: Required[str] + "Name is the only required field" + disambiguation: str + "This is only added through Tagger view" + gender: Literal["MALE", "FEMALE", "TRANSGENDER_MALE", "TRANSGENDER_FEMALE", "INTERSEX", "NON_BINARY"] + url: str + twitter: str + instagram: str + birthdate: str + "Must be in the format YYYY-MM-DD" + death_date: str + "Must be in the format YYYY-MM-DD" + ethnicity: Literal["CAUCASIAN", "BLACK", "ASIAN", "INDIAN", "LATIN", "MIDDLE_EASTERN", "MIXED", "OTHER"] + country: str + "Not validated" + eye_color: Literal["BLUE", "BROWN", "GREEN", "GREY", "HAZEL", "RED"] + hair_color: Literal["BLONDE", "BRUNETTE", "BLACK", "RED", "AUBURN", "GREY", "BALD", "VARIOUS", "OTHER"] + "Hair color, can be 'VARIOUS' or 'OTHER' if the performer has multiple hair colors" + height: str + "Height in centimeters" + weight: str + "Weight in kilograms" + measurements: str + "bust-waist-hip measurements in centimeters, with optional cupsize for bust (e.g. 90-60-90, 90C-60-90)" + fake_tits: str + penis_length: str + circumcised: str + career_length: str + tattoos: str + piercings: str + aliases: str + "Must be comma-delimited in order to be parsed correctly" + tags: list[ScrapedTag] + image: str + images: list[str] + "Images can be URLs or base64-encoded images" + details: str + +class ScrapedStudio(TypedDict, total=False): + name: Required[str] + "Name is the only required field" + url: str + parent: 'ScrapedStudio' + image: str + +class ScrapedMovie(TypedDict, total=False): + name: Required[str] + date: str + "Must be in the format YYYY-MM-DD" + duration: str + "Duration in seconds" + director: str + synopsis: str + studio: ScrapedStudio + rating: str + front_image: str + back_image: str + URL: str + aliases: str + +class ScrapedGallery(TypedDict, total=False): + title: Required[str] + details: str + url: str + urls: list[str] + date: str + "Must be in the format YYYY-MM-DD" + studio: ScrapedStudio + tags: list[ScrapedTag] + performers: list[ScrapedPerformer] + +class ScrapedScene(TypedDict, total=False): + title: str + details: str + url: str + urls: list[str] + date: str + image: str + studio: ScrapedStudio + movies: list[ScrapedMovie] + tags: list[ScrapedTag] + performers: list[ScrapedPerformer] + code: str + director: str + +# Technically we can return a full ScrapedPerformer but the current UI only +# shows the name. The URL is absolutely necesserary for the result to be used +# in the next step: actually scraping the performer +class PerformerSearchResult(TypedDict): + name: str + url: str + +# Technically we can return a full ScrapedScene but the current UI only +# shows the name, image, studio, tags and performers. The URL is absolutely +# necesserary for the result to be used in the next step: actually scraping the scene +class SceneSearchResult(TypedDict, total=False): + title: Required[str] + url: Required[str] + date: str + "Must be in the format YYYY-MM-DD" + image: str + "Image can be a URL or base64-encoded image" + tags: list[ScrapedTag] + performers: list[ScrapedPerformer] + studio: ScrapedStudio diff --git a/scrapers/py_common/util.py b/scrapers/py_common/util.py new file mode 100644 index 000000000..846e2c7ef --- /dev/null +++ b/scrapers/py_common/util.py @@ -0,0 +1,35 @@ +from functools import reduce +from typing import Any + + +def dig(c: dict | list, *keys: str | int | tuple, default=None) -> Any: + """ + Helper function to get a value from a nested dict or list + + If a key is a tuple the items will be tried in order until a value is found + + :param c: dict or list to search + :param keys: keys to search for + :param default: default value to return if not found + :return: value if found, None otherwise + + >>> obj = {"a": {"b": ["c", "d"], "f": {"g": "h"}}} + >>> dig(obj, "a", "b", 1) + 'd' + >>> dig(obj, "a", ("e", "f"), "g") + 'h' + """ + + def inner(d: dict | list, key: str | int | tuple): + if isinstance(d, dict): + if isinstance(key, tuple): + for k in key: + if k in d: + return d[k] + return d.get(key) + elif isinstance(d, list) and isinstance(key, int) and key < len(d): + return d[key] + else: + return default + + return reduce(inner, keys, c) # type: ignore diff --git a/scrapers/rb_common/package b/scrapers/rb_common/package new file mode 100644 index 000000000..363e2f4ff --- /dev/null +++ b/scrapers/rb_common/package @@ -0,0 +1,2 @@ +# script used as a dependency only +name: rb_common diff --git a/scrapers/sexuria.yml b/scrapers/sexuria.yml new file mode 100644 index 000000000..30a82264c --- /dev/null +++ b/scrapers/sexuria.yml @@ -0,0 +1,51 @@ +name: Sexuria +sceneByURL: + - action: scrapeXPath + url: + - sexuria.net + scraper: sceneScraper +sceneByName: + action: scrapeXPath + queryURL: "https://sexuria.net/f/j.x.name-released;p.title={}/sort=date/order=desc/" + scraper: searchScraper +sceneByQueryFragment: + action: scrapeXPath + queryURL: "{url}" + scraper: sceneScraper +xPathScrapers: + sceneScraper: + scene: + Title: //*[@id="dle-content"]/article/h1 + Details: + selector: //h4[text()='Description']/following-sibling::div/text() + concat: " " + Date: + selector: //div[text()='Release Date:']/following-sibling::div/text() + postProcess: + - parseDate: Jan 02 2006 + Image: //*[@id="dle-content"]/article/div[1]/div[1]/a/@href + Studio: + Name: //div[text()="Production:"]/following-sibling::div/a/text() + Movies: + Name: //*[@id="dle-content"]/article/h1 + URL: //link[@rel="canonical"]/@href + Tags: + Name: //div[text()="Tags:"]/following-sibling::div/a/text() + Performers: + Name: //div[@class="stars-porn"]/a/text() + URL: //meta[@name='og:url']/@content + searchScraper: + common: + $result: //article[contains(@class, "short")] + scene: + Title: $result//h2 + URL: ($result//a)[1]/@href + Details: $result//div[@class="author-cat"] + Date: $result//div[@class="date"] + Image: + selector: $result//img/@data-src + postProcess: + - replace: + - regex: ^ + with: https://sexuria.net +# Last Updated December 25, 2023 diff --git a/scrapers/stash-sqlite.py b/scrapers/stash-sqlite/stash-sqlite.py similarity index 100% rename from scrapers/stash-sqlite.py rename to scrapers/stash-sqlite/stash-sqlite.py diff --git a/scrapers/stash-sqlite.yml b/scrapers/stash-sqlite/stash-sqlite.yml similarity index 100% rename from scrapers/stash-sqlite.yml rename to scrapers/stash-sqlite/stash-sqlite.yml diff --git a/scrapers/torrent.py b/scrapers/torrent.py deleted file mode 100644 index 2e5e283d2..000000000 --- a/scrapers/torrent.py +++ /dev/null @@ -1,66 +0,0 @@ -import os -import sys -import json - -try: - import torrent_parser as tp -except ModuleNotFoundError: - print("You need to download the file 'torrent_parser.py' from the community repo! (CommunityScrapers/tree/master/scrapers/torrent_parser.py)", file=sys.stderr) - sys.exit() - -''' This script parses all torrent files in the specified directory for embedded metadata. - The title can either be a filename or the filename of the .torrent file - - This requires python3. - This uses the torrent_parser library to parse torrent files from: https://github.com/7sDream/torrent_parser - This library is under the MIT Licence. - - ''' - -path='./torrents/' - -def query(title): -# print(f"Test",file=sys.stderr) - for root,dirs,files in os.walk(path): - for name in files: - if '.torrent' in name: - query_torrent(title,os.path.join(root,name)) - -def query_torrent(title,path,found=False): - data=tp.parse_torrent_file(path) - # does the torrent contain more than one file and check if the file name we want is in the list - if not found and 'files' in data['info']: - for d in data['info']['files']: - for f in d['path']: - if title in f: - found=True - elif title in data['info']['name']: - found=True - if found: - res={'title':title} - if 'metadata' in data: - if 'title' in data['metadata']: - res['title']=data['metadata']['title'] - if 'cover url' in data['metadata']: - res['image']=data['metadata']['cover url'] - if 'description' in data['metadata']: - res['details']=data['metadata']['description'] - if 'taglist' in data['metadata']: - res['tags']=[{"name":x} for x in data['metadata']['taglist']] - - print(json.dumps(res)) - exit(0) -def lookup_torrent(title): - for root,dirs,files in os.walk(path): - if title in files: - query_torrent(title,os.path.join(root,title),found=True) - -if sys.argv[1] == "query": - fragment = json.loads(sys.stdin.read()) - title=fragment['title'] - if '.torrent' in title: - lookup_torrent(title) - else: - query(title) - print(json.dumps(fragment)) -# Last Updated February 18, 2021 diff --git a/scrapers/torrent.yml b/scrapers/torrent.yml deleted file mode 100644 index 3eca5cbc8..000000000 --- a/scrapers/torrent.yml +++ /dev/null @@ -1,10 +0,0 @@ -name: "Torrent" -sceneByFragment: - action: script - script: - - python - # use python3 instead if needed - - torrent.py - - query - -# Last Updated February 04, 2021 diff --git a/scrapers/torrent/torrent.py b/scrapers/torrent/torrent.py new file mode 100644 index 000000000..2ee32ac71 --- /dev/null +++ b/scrapers/torrent/torrent.py @@ -0,0 +1,150 @@ +import sys +import json +import os +from pathlib import Path +import re +from datetime import datetime +import difflib + +# to import from a parent directory we need to add that directory to the system path +csd = os.path.dirname(os.path.realpath(__file__)) # get current script directory +parent = os.path.dirname(csd) # parent directory (should be the scrapers one) +sys.path.append( + parent +) # add parent dir to sys path so that we can import py_common from there + +try: + from bencoder import bdecode +except ModuleNotFoundError: + print("You need to install the 'bencoder.pyx' module. (https://pypi.org/project/bencoder.pyx/)", file=sys.stderr) + sys.exit() + +try: + from py_common import graphql +except ModuleNotFoundError: + print("You need to download the folder 'py_common' from the community repo! " + "(CommunityScrapers/tree/master/scrapers/py_common)", file=sys.stderr) + sys.exit() + +TORRENTS_PATH = Path("torrents") + + +def get_scene_data(fragment_data): + scene_id = fragment_data["id"] + scene_title = fragment_data["title"] + scene_files = [] + + response = graphql.callGraphQL(""" + query FileInfoBySceneId($id: ID) { + findScene(id: $id) { + files { + path + size + } + } + }""", {"id": scene_id}) + + if response and response["findScene"]: + for f in response["findScene"]["files"]: + scene_files.append({"filename": os.path.basename(f["path"]), "size": f["size"]}) + return {"id": scene_id, "title": scene_title, "files": scene_files} + return {} + +def process_tags_performers(tagList): + return map(lambda tag: decode_bytes(tag).replace('.', ' '), tagList) + +def process_description_bbcode(description): + #Remove image tags + res = re.sub(r'\[img\]([^\[]*)\[\/img\]',r"", description ) + + #Remove bbcode & replace with the contained text + res = re.sub(r'\[.*?\]([^\[]*)\[\/(?:b|i|u|s|url|quote)\]',r"\1", res ) + + #Cleanup any bbcode tags that may have been left behind + res = re.sub(r'\[.*?\]',r'',res) + + #Remove excessive newlines + res = re.sub(r'[\r|\n]{3,}', '\r\n\r\n', res) + return res.strip() + +def get_torrent_metadata(torrent_data): + res = {} + + if b"metadata" in torrent_data: + if b"title" in torrent_data[b"metadata"]: + res["title"] = decode_bytes(torrent_data[b"metadata"][b"title"]) + if b"cover url" in torrent_data[b"metadata"]: + res["image"] = decode_bytes(torrent_data[b"metadata"][b"cover url"]) + if b"description" in torrent_data[b"metadata"]: + res["details"] = process_description_bbcode(decode_bytes(torrent_data[b"metadata"][b"description"])) + if b"taglist" in torrent_data[b"metadata"]: + res["tags"] = [{"name": decode_bytes(t)} for t in torrent_data[b"metadata"][b"taglist"]] + if b"taglist" in torrent_data[b"metadata"]: + res["performers"]=[{"name":x} for x in process_tags_performers(torrent_data[b"metadata"][b"taglist"])] + if b"comment" in torrent_data: + res["url"] = decode_bytes(torrent_data[b"comment"]) + if b"creation date" in torrent_data: + res["date"] = datetime.fromtimestamp(torrent_data[b"creation date"]).strftime("%Y-%m-%d") + return res + + +def decode_bytes(s, encodings=("utf-8", "latin-1")): + for enc in encodings: + try: + return s.decode(enc) + except UnicodeDecodeError: + pass + return s.decode("utf-8", "ignore") + + +def scene_in_torrent(scene_data, torrent_data): + for scene in scene_data["files"]: + if b"length" in torrent_data[b"info"]: + if scene["filename"] in decode_bytes(torrent_data[b"info"][b"name"]) and torrent_data[b"info"][b"length"] == scene["size"]: + return True + elif b"files" in torrent_data[b"info"]: + for file in torrent_data[b"info"][b"files"]: + if scene["filename"] in decode_bytes(file[b"path"][-1]) and file[b"length"] == scene["size"]: + return True + + +def process_torrents(scene_data): + if scene_data: + for name in TORRENTS_PATH.glob("*.torrent"): + with open(name, "rb") as f: + torrent_data = bdecode(f.read()) + if scene_in_torrent(scene_data, torrent_data): + return get_torrent_metadata(torrent_data) + return {} + +def similarity_file_name(search, fileName): + result = difflib.SequenceMatcher(a=search.lower(), b=fileName.lower()) + return result.ratio() + +def cleanup_name(name): + ret = str(name) + ret = ret.removeprefix("torrents\\").removesuffix(".torrent") + return ret + +if sys.argv[1] == "query": + fragment = json.loads(sys.stdin.read()) + print(json.dumps(process_torrents(get_scene_data(fragment)))) +elif sys.argv[1] == "fragment": + filename = json.loads(sys.stdin.read()).get('url') + with open(filename, 'rb') as f: + torrent_data = bdecode(f.read()) + print(json.dumps(get_torrent_metadata(torrent_data))) +elif sys.argv[1] == "search": + search = json.loads(sys.stdin.read()).get('name') + torrents = list(TORRENTS_PATH.rglob('*.torrent')) + ratios = {} + for t in torrents: + clean_t = cleanup_name(t) + ratios[round(10000*(1-similarity_file_name(search, clean_t)))] = {'url': str(t.absolute()), 'title': clean_t} + + # Order ratios + ratios_sorted = dict(sorted(ratios.items())[:5]) + + print(json.dumps(list(ratios_sorted.values()))) + +# Last Updated June 12, 2023 diff --git a/scrapers/torrent/torrent.yml b/scrapers/torrent/torrent.yml new file mode 100644 index 000000000..ab8f49a7d --- /dev/null +++ b/scrapers/torrent/torrent.yml @@ -0,0 +1,25 @@ +name: Torrent +# requires: py_common + +sceneByFragment: + action: script + script: + - python3 + - torrent.py + - query + +sceneByName: + action: script + script: + - python3 + - torrent.py + - search + +sceneByQueryFragment: + action: script + script: + - python3 + - torrent.py + - fragment + +# Last Updated December 16, 2022 \ No newline at end of file diff --git a/scrapers/torrent_parser.py b/scrapers/torrent_parser.py deleted file mode 100644 index bc6e0e7b7..000000000 --- a/scrapers/torrent_parser.py +++ /dev/null @@ -1,646 +0,0 @@ -#!/usr/bin/env python -# coding: utf-8 - -""" -A .torrent file parser for both Python 2 and 3 - -Usage: - - data = parse_torrent_file(filename) - - # or - - with open(filename, 'rb') as f: # the binary mode 'b' is necessary - data = TorrentFileParser(f).parse() - - # then you can edit the data - - data['announce-list'].append(['http://127.0.0.1:8080']) - - # and create a new torrent file from data - - create_torrent_file('new.torrent', data) - - # or - - with open('new.torrent', 'wb') as f: - f.write(TorrentFileCreator(data).encode()) - - # or you don't deal with file, just object in memory - - data = decode(b'i12345e') # data = 12345 - content = encode(data) # content = b'i12345e' - -""" - -from __future__ import print_function, unicode_literals - -import argparse -import binascii -import collections -import io -import json -import sys -import warnings - -try: - FileNotFoundError -except NameError: - # Python 2 do not have FileNotFoundError, use IOError instead - # noinspection PyShadowingBuiltins - FileNotFoundError = IOError - -try: - # noinspection PyPackageRequirements - from chardet import detect as _detect -except ImportError: - def _detect(_): - warnings.warn("No chardet module installed, encoding will be utf-8") - return {'encoding': 'utf-8', 'confidence': 1} - -try: - # noinspection PyUnresolvedReferences - # For Python 2 - str_type = unicode -except NameError: - # For Python 3 - str_type = str - -__all__ = [ - 'InvalidTorrentDataException', - 'BEncoder', - 'BDecoder', - 'encode', - 'decode', - 'TorrentFileParser', - 'create_torrent_file', - 'parse_torrent_file', -] - -__version__ = '0.3.0' - - -def detect(content): - return _detect(content)['encoding'] - - -class InvalidTorrentDataException(Exception): - def __init__(self, pos, msg=None): - msg = msg or "Invalid torrent format when read at pos {pos}" - msg = msg.format(pos=pos) - super(InvalidTorrentDataException, self).__init__(msg) - - -class __EndCls(object): - pass - - -_END = __EndCls() - - -def _check_hash_field_params(name, value): - return isinstance(name, str_type) \ - and isinstance(value, tuple) and len(value) == 2 \ - and isinstance(value[0], int) and isinstance(value[1], bool) - - -class TorrentFileParser(object): - - TYPE_LIST = 'list' - TYPE_DICT = 'dict' - TYPE_INT = 'int' - TYPE_STRING = 'string' - TYPE_END = 'end' - - LIST_INDICATOR = b'l' - DICT_INDICATOR = b'd' - INT_INDICATOR = b'i' - END_INDICATOR = b'e' - STRING_INDICATOR = b'' - STRING_DELIMITER = b':' - - HASH_FIELD_PARAMS = { - # field length need_list - 'pieces': (20, True), - 'ed2k': (16, False), - 'filehash': (20, False), - } - - TYPES = [ - (TYPE_LIST, LIST_INDICATOR), - (TYPE_DICT, DICT_INDICATOR), - (TYPE_INT, INT_INDICATOR), - (TYPE_END, END_INDICATOR), - (TYPE_STRING, STRING_INDICATOR), - ] - - def __init__( - self, fp, use_ordered_dict=False, encoding='utf-8', errors='strict', - hash_fields=None, hash_raw=False, - ): - """ - :param fp: a **binary** file-like object to parse, - which means need 'b' mode when use built-in open function - :param bool use_ordered_dict: Use collections.OrderedDict as dict - container default False, which mean use built-in dict - :param str encoding: file content encoding, default utf-8, use 'auto' - to enable charset auto detection (need 'chardet' package installed) - :param str errors: how to deal with encoding error when try to parse - string from content with ``encoding`` - :param Dict[str, Tuple[int, bool]] hash_fields: extra fields should - be treated as hash value. dict key is the field name, value is a - two-element tuple of (hash_block_length, as_a_list). - See :any:`hash_field` for detail - """ - if getattr(fp, 'read', ) is None \ - or getattr(fp, 'seek') is None: - raise ValueError('Parameter fp needs a file like object') - - self._pos = 0 - self._encoding = encoding - self._content = fp - self._use_ordered_dict = use_ordered_dict - self._error_handler = errors - self._hash_fields = dict(TorrentFileParser.HASH_FIELD_PARAMS) - if hash_fields is not None: - for k, v in hash_fields.items(): - if _check_hash_field_params(k, v): - self._hash_fields[k] = v - else: - raise ValueError( - "Invalid hash field parameter, it should be type of " - "Dict[str, Tuple[int, bool]]" - ) - self._hash_raw = bool(hash_raw) - - def hash_field(self, name, block_length=20, need_list=False): - """ - Let field with the `name` to be treated as hash value, don't decode it - as a string. - - :param str name: field name - :param int block_length: hash block length for split - :param bool need_list: if True, when the field only has one block( - or even empty) its parse result will be a one-element list( - or empty list); If False, will be a string in 0 or 1 block condition - :return: return self, so you can chained call - """ - v = (block_length, need_list) - if _check_hash_field_params(name, v): - self._hash_fields[name] = v - else: - raise ValueError("Invalid hash field parameter") - return self - - def parse(self): - """ - :rtype: dict|list|int|str|bytes - :raise: :any:`InvalidTorrentDataException` when parse failed or error - happened when decode string using specified encoding - """ - self._restart() - data = self._next_element() - - try: - c = self._read_byte(1, True) - raise InvalidTorrentDataException( - 0, 'Expect EOF, but get [{}] at pos {}'.format(c, self._pos) - ) - except EOFError: # expect EOF - pass - - return data - - def _read_byte(self, count=1, raise_eof=False): - assert count >= 0 - gotten = self._content.read(count) - if count != 0 and len(gotten) == 0: - if raise_eof: - raise EOFError() - raise InvalidTorrentDataException( - self._pos, - 'Unexpected EOF when reading torrent file' - ) - self._pos += count - return gotten - - def _seek_back(self, count): - self._content.seek(-count, 1) - self._pos = self._pos - count - - def _restart(self): - self._content.seek(0, 0) - self._pos = 0 - - def _dict_items_generator(self): - while True: - k = self._next_element() - if k is _END: - return - if not isinstance(k, str_type): - raise InvalidTorrentDataException( - self._pos, "Type of dict key can't be " + type(k).__name__ - ) - if k in self._hash_fields: - v = self._next_hash(*self._hash_fields[k]) - else: - v = self._next_element(k) - if k == 'encoding': - self._encoding = v - yield k, v - - def _next_dict(self): - data = collections.OrderedDict() if self._use_ordered_dict else dict() - for key, element in self._dict_items_generator(): - data[key] = element - return data - - def _list_items_generator(self): - while True: - element = self._next_element() - if element is _END: - return - yield element - - def _next_list(self): - return [element for element in self._list_items_generator()] - - def _next_int(self, end=END_INDICATOR): - value = 0 - char = self._read_byte(1) - neg = False - while char != end: - if not neg and char == b'-': - neg = True - elif not b'0' <= char <= b'9': - raise InvalidTorrentDataException(self._pos - 1) - else: - value = value * 10 + int(char) - int(b'0') - char = self._read_byte(1) - return -value if neg else value - - def _next_string(self, need_decode=True, field=None): - length = self._next_int(self.STRING_DELIMITER) - raw = self._read_byte(length) - if need_decode: - encoding = self._encoding - if encoding == 'auto': - self.encoding = encoding = detect(raw) - try: - string = raw.decode(encoding, self._error_handler) - except UnicodeDecodeError as e: - msg = [ - "Fail to decode string at pos {pos} using encoding ", - e.encoding - ] - if field: - msg.extend([ - ' when parser field "', field, '"' - ', maybe it is an hash field. ', - 'You can use self.hash_field("', field, '") ', - 'to let it be treated as hash value, ', - 'so this error may disappear' - ]) - raise InvalidTorrentDataException( - self._pos - length + e.start, - ''.join(msg) - ) - return string - return raw - - def _next_hash(self, p_len, need_list): - raw = self._next_string(need_decode=False) - if len(raw) % p_len != 0: - raise InvalidTorrentDataException( - self._pos - len(raw), "Hash bit length not match at pos {pos}" - ) - if self._hash_raw: - return raw - res = [ - binascii.hexlify(chunk).decode('ascii') - for chunk in (raw[x:x+p_len] for x in range(0, len(raw), p_len)) - ] - if len(res) == 0 and not need_list: - return '' - if len(res) == 1 and not need_list: - return res[0] - return res - - @staticmethod - def _next_end(): - return _END - - def _next_type(self): - for (element_type, indicator) in self.TYPES: - indicator_length = len(indicator) - char = self._read_byte(indicator_length) - if indicator == char: - return element_type - self._seek_back(indicator_length) - raise InvalidTorrentDataException(self._pos) - - def _type_to_func(self, t): - return getattr(self, '_next_' + t) - - def _next_element(self, field=None): - element_type = self._next_type() - if element_type is TorrentFileParser.TYPE_STRING and field is not None: - element = self._type_to_func(element_type)(field=field) - else: - element = self._type_to_func(element_type)() - return element - - -class BEncoder(object): - - TYPES = { - (dict,): TorrentFileParser.TYPE_DICT, - (list,): TorrentFileParser.TYPE_LIST, - (int,): TorrentFileParser.TYPE_INT, - (str_type, bytes): TorrentFileParser.TYPE_STRING, - } - - def __init__(self, data, encoding='utf-8', hash_fields=None): - """ - :param dict|list|int|str data: data will be encoded - :param str encoding: string field output encoding - :param List[str] hash_fields: see - :any:`TorrentFileParser.__init__` - """ - self._data = data - self._encoding = encoding - self._hash_fields = list(TorrentFileParser.HASH_FIELD_PARAMS.keys()) - if hash_fields is not None: - self._hash_fields.extend(str_type(hash_fields)) - - def hash_field(self, name): - """ - see :any:`TorrentFileParser.hash_field` - - :param str name: - :return: return self, so you can chained call - """ - return self._hash_fields.append(str_type(name)) - - def encode(self): - """ - Encode to bytes - - :rtype: bytes - """ - return b''.join(self._output_element(self._data)) - - def encode_to_filelike(self): - """ - Encode to a file-like(BytesIO) object - - :rtype: BytesIO - """ - return io.BytesIO(self.encode()) - - def _output_string(self, data): - if isinstance(data, str_type): - data = data.encode(self._encoding) - yield str(len(data)).encode('ascii') - yield TorrentFileParser.STRING_DELIMITER - yield data - - @staticmethod - def _output_int(data): - yield TorrentFileParser.INT_INDICATOR - yield str(data).encode('ascii') - yield TorrentFileParser.END_INDICATOR - - def _output_decode_hash(self, data): - if isinstance(data, str_type): - data = [data] - result = [] - for hash_line in data: - if not isinstance(hash_line, str_type): - raise InvalidTorrentDataException( - None, - "Hash must be " + str_type.__name__ + " not " + - type(hash_line).__name__, - ) - if len(hash_line) % 2 != 0: - raise InvalidTorrentDataException( - None, - "Hash(" + hash_line + ") length(" + str(len(hash_line)) + - ") is a not even number", - ) - try: - raw = binascii.unhexlify(hash_line) - except binascii.Error as e: - raise InvalidTorrentDataException( - None, str(e), - ) - result.append(raw) - for x in self._output_string(b''.join(result)): - yield x - - def _output_dict(self, data): - yield TorrentFileParser.DICT_INDICATOR - for k, v in data.items(): - if not isinstance(k, str_type): - raise InvalidTorrentDataException( - None, "Dict key must be " + str_type.__name__, - ) - for x in self._output_element(k): - yield x - if k in self._hash_fields: - for x in self._output_decode_hash(v): - yield x - else: - for x in self._output_element(v): - yield x - yield TorrentFileParser.END_INDICATOR - - def _output_list(self, data): - yield TorrentFileParser.LIST_INDICATOR - for v in data: - for x in self._output_element(v): - yield x - yield TorrentFileParser.END_INDICATOR - - def _type_to_func(self, t): - return getattr(self, '_output_' + t) - - def _output_element(self, data): - for types, t in self.TYPES.items(): - if isinstance(data, types): - # noinspection PyCallingNonCallable - return self._type_to_func(t)(data) - raise InvalidTorrentDataException( - None, - "Invalid type for torrent file: " + type(data).__name__, - ) - - -class BDecoder(object): - def __init__( - self, data, use_ordered_dict=False, encoding='utf-8', errors='strict', - hash_fields=None, hash_raw=False, - ): - """ - See :any:`TorrentFileParser.__init__` for parameter description. - - :param bytes data: raw data to be decoded - :param bool use_ordered_dict: - :param str encoding: - :param str errors: - :param Dict[str, Tuple[int, bool]] hash_fields: - :param bool hash_raw: - """ - self._parser = TorrentFileParser( - io.BytesIO(bytes(data)), - use_ordered_dict, - encoding, - errors, - hash_fields, - hash_raw, - ) - - def hash_field(self, name, block_length=20, need_dict=False): - """ - See :any:`TorrentFileParser.hash_field` for parameter description - - :param name: - :param block_length: - :param need_dict: - :return: return self, so you can chained call - """ - self._parser.hash_field(name, block_length, need_dict) - return self - - def decode(self): - return self._parser.parse() - - -def encode(data, encoding='utf-8', hash_fields=None): - """ - Shortcut function for encode python object to torrent file format(bencode) - - See :any:`BEncoder.__init__` for parameter description - - :param dict|list|int|str|bytes data: data to be encoded - :param str encoding: - :param List[str] hash_fields: - :rtype: bytes - """ - return BEncoder(data, encoding, hash_fields).encode() - - -def decode( - data, use_ordered_dict=False, encoding='utf-8', errors='strict', - hash_fields=None, hash_raw=False, -): - """ - Shortcut function for decode bytes as torrent file format(bencode) to python - object - - See :any:`BDecoder.__init__` for parameter description - - :param bytes data: raw data to be decoded - :param bool use_ordered_dict: - :param str encoding: - :param str errors: - :param Dict[str, Tuple[int, bool]] hash_fields: - :param bool hash_raw: - :rtype: dict|list|int|str|bytes|bytes - """ - return BDecoder( - data, use_ordered_dict, encoding, errors, hash_fields, hash_raw, - ).decode() - - -def parse_torrent_file( - filename, use_ordered_dict=False, encoding='utf-8', errors='strict', - hash_fields=None, hash_raw=False, -): - """ - Shortcut function for parse torrent object using TorrentFileParser - - See :any:`TorrentFileParser.__init__` for parameter description - - :param str filename: torrent filename - :param bool use_ordered_dict: - :param str encoding: - :param str errors: - :param Dict[str, Tuple[int, bool]] hash_fields: - :param bool hash_raw: - :rtype: dict|list|int|str|bytes - """ - with open(filename, 'rb') as f: - return TorrentFileParser( - f, use_ordered_dict, encoding, errors, hash_fields, hash_raw, - ).parse() - - -def create_torrent_file(filename, data, encoding='utf-8', hash_fields=None): - """ - Shortcut function for create a torrent file using BEncoder - - see :any:`BDecoder.__init__` for parameter description - - :param str filename: output torrent filename - :param dict|list|int|str|bytes data: - :param str encoding: - :param List[str] hash_fields: - """ - with open(filename, 'wb') as f: - f.write(BEncoder(data, encoding, hash_fields).encode()) - - -def __main(): - parser = argparse.ArgumentParser() - parser.add_argument('file', nargs='?', default='', - help='input file, will read form stdin if empty') - parser.add_argument('--dict', '-d', action='store_true', default=False, - help='use built-in dict, default will be OrderedDict') - parser.add_argument('--sort', '-s', action='store_true', default=False, - help='sort output json item by key') - parser.add_argument('--indent', '-i', type=int, default=None, - help='json output indent for every inner level') - parser.add_argument('--ascii', '-a', action='store_true', default=False, - help='ensure output json use ascii char, ' - 'escape other char use \\u') - parser.add_argument('--coding', '-c', default='utf-8', - help='string encoding, default "utf-8"') - parser.add_argument('--errors', '-e', default='strict', - help='decoding error handler, default "strict", you can' - ' use "ignore" or "replace" to avoid exception') - parser.add_argument('--version', '-v', action='store_true', default=False, - help='print version and exit') - args = parser.parse_args() - - if args.version: - print(__version__) - exit(0) - - try: - if args.file == '': - target_file = io.BytesIO( - getattr(sys.stdin, 'buffer', sys.stdin).read() - ) - else: - target_file = open(args.file, 'rb') - except FileNotFoundError: - sys.stderr.write('File "{}" not exist\n'.format(args.file)) - exit(1) - - # noinspection PyUnboundLocalVariable - data = TorrentFileParser( - target_file, not args.dict, args.coding, args.errors - ).parse() - - data = json.dumps( - data, ensure_ascii=args.ascii, - sort_keys=args.sort, indent=args.indent - ) - - print(data) - - -if __name__ == '__main__': - __main() -# Last Updated February 18, 2021 diff --git a/scrapers/Algolia_touchmywife.yml b/scrapers/touchmywife/touchmywife.yml similarity index 77% rename from scrapers/Algolia_touchmywife.yml rename to scrapers/touchmywife/touchmywife.yml index bf1ac810c..20081577e 100644 --- a/scrapers/Algolia_touchmywife.yml +++ b/scrapers/touchmywife/touchmywife.yml @@ -1,3 +1,4 @@ +# requires: Algolia name: "TouchMyWife" sceneByURL: - action: script @@ -5,26 +6,26 @@ sceneByURL: - touchmywife.com/en/video script: - python - - Algolia.py + - ../Algolia/Algolia.py - touchmywife sceneByFragment: action: script script: - python - - Algolia.py + - ../Algolia/Algolia.py - touchmywife sceneByName: action: script script: - python - - Algolia.py + - ../Algolia/Algolia.py - touchmywife - searchName sceneByQueryFragment: action: script script: - python - - Algolia.py + - ../Algolia/Algolia.py - touchmywife - validName galleryByURL: @@ -33,7 +34,7 @@ galleryByURL: - touchmywife.com/en/photo/ script: - python - - Algolia.py + - ../Algolia/Algolia.py - touchmywife - gallery # Last Updated December 22, 2022 diff --git a/scrapers/vTubie.yml b/scrapers/vTubie.yml new file mode 100644 index 000000000..562c40800 --- /dev/null +++ b/scrapers/vTubie.yml @@ -0,0 +1,48 @@ +name: "vTubie" +performerByName: + action: scrapeXPath + queryURL: https://vtubie.com/wp-admin/admin-ajax.php?s={}&action=ma_s_ajax + scraper: performerSearch +performerByURL: + - action: scrapeXPath + url: + - vtubie.com + scraper: performerScraper + +xPathScrapers: + performerSearch: + performer: + Name: //div[div/text()="VTubers"]/following-sibling::a/@title + URL: //div[div/text()="VTubers"]/following-sibling::a/@href + Image: + selector: //div[div/text()="VTubers"]/following-sibling::a//div[@class="ma-s-ajax-result-item-image"]/@style + postProcess: + - replace: + - regex: ^background\-image:url\((.*)\);$ + with: $1 + + performerScraper: + performer: + Name: //meta[@property="og:title"]/@content + Aliases: + selector: //div[@class="ma-section-content ma-aname-content"]//a/text() + concat: ", " + URL: + selector: //a[@class="ma-pag-next"]/@href + postProcess: + - replace: + - regex: \?pg=\d + with: + Gender: //div[@class="ma-section-title" and span/text()="Gender"]/following-sibling::div[1]//a/text() + Twitter: //div[@class="ma-section-title" and text()="Twitter "]/following-sibling::div[1]//a/@href + Tags: + Name: //div[@class="ma-section-title" and text()="Type "]/following-sibling::div[1]//a/text() | //div[@class="ma-section-title" and text()="Group"]/following-sibling::div[1]//a/text() + CareerLength: + selector: //div[@class="ma-section-title" and text()="Debut "]/following-sibling::div[1]/div/text() + postProcess: + - replace: + - regex: .*(\d{4}).* + with: $1- + Image: //link[@rel="image_src"]/@href + +# Last Updated January 22, 2023 diff --git a/scrapers/vixenNetwork.py b/scrapers/vixenNetwork/vixenNetwork.py similarity index 85% rename from scrapers/vixenNetwork.py rename to scrapers/vixenNetwork/vixenNetwork.py index d1a92c3af..c409a1424 100644 --- a/scrapers/vixenNetwork.py +++ b/scrapers/vixenNetwork/vixenNetwork.py @@ -1,7 +1,16 @@ import json +import os import sys from urllib.parse import urlparse +# to import from a parent directory we need to add that directory to the system path +csd = os.path.dirname( + os.path.realpath(__file__)) # get current script directory +parent = os.path.dirname(csd) # parent directory (should be the scrapers one) +sys.path.append( + parent +) # add parent dir to sys path so that we can import py_common from there + try: import requests except ModuleNotFoundError: @@ -110,6 +119,10 @@ def parse_scene(self, response): scene['title'] = data.get('title') scene['details'] = data.get('description') scene['studio'] = {"name": self.name} + scene['code'] = data.get('videoId') + director = data.get("directors") + if director is not None: + scene["director"] = ", ".join(d["name"] for d in data.get("directors", [])) date = data.get('releaseDate') if date: @@ -120,7 +133,12 @@ def parse_scene(self, response): scene['performers'].append({"name": model['name']}) scene['tags'] = [] - if data.get('tags'): + tags = data.get('tags') + categories = data.get('categories') + if tags == [] and categories: + for tag in data['categories']: + scene['tags'].append({"name": tag['name']}) + elif tags: for tag in data['tags']: scene['tags'].append({"name": tag}) @@ -155,6 +173,7 @@ def parse_search(self, response): sc['title'] = scene.get('title') sc['details'] = scene.get('description') sc['url'] = f"https://www.{self.id.lower()}.com/videos/{slug}" + sc['code'] = scene.get('videoId') sc['studio'] = {"name": self.name} date = scene.get('releaseDate') if date: @@ -164,7 +183,6 @@ def parse_search(self, response): for model in scene['modelsSlugged']: sc['performers'].append( {"name": model['name']}) - if scene.get('images'): if scene['images'].get('listing'): maxWidth = 0 @@ -172,12 +190,13 @@ def parse_search(self, response): if image['width'] > maxWidth: sc['image'] = image['src'] maxWidth = image['width'] - search_result.append(sc) + search_result.append(sc) return search_result return None - def length(self, studio): - return len(studio.id) + @property + def length(self): + return len(self.id) getVideoQuery = """ query getVideo($videoSlug: String, $site: Site) { @@ -188,6 +207,10 @@ def length(self, studio): models { name } + videoId + directors { + name + } images { poster { src @@ -195,6 +218,9 @@ def length(self, studio): } } tags + categories { + name + } } } """ @@ -211,6 +237,7 @@ def length(self, studio): name slugged: slug } + videoId images { listing { src @@ -224,20 +251,11 @@ def length(self, studio): """ -# sort site dicts into a list -# by reverse id length -def sortByLength(sites): - sorted = [] - for s in sites: - sorted.append(s) - sorted.sort(reverse=True, key=s.length) - return sorted - - studios = { Site('Blacked Raw'), Site('Blacked'), Site('Deeper'), + Site('Milfy'), Site('Tushy'), Site('Tushy Raw'), Site('Slayed'), @@ -264,7 +282,7 @@ def sortByLength(sites): if search_query and "search" in sys.argv: search_query = search_query.lower() lst = [] - filter = [] + wanted = [] # Only search on specific site if the studio name is in the search query # ('Ariana Vixen Cecilia' will search only on Vixen) @@ -272,25 +290,25 @@ def sortByLength(sites): # if the first character is $, filter will be ignored. if search_query[0] != "$": # make sure longer matches are filtered first - studios_sorted = sortByLength(studios) + studios_sorted = sorted(studios, reverse=True, key=lambda s: s.length) for x in studios_sorted: if x.id.lower() in search_query: - filter.append(x.id.lower()) + wanted.append(x.id.lower()) continue # remove the studio from the search result search_query = search_query.replace(x.id.lower(), "") else: search_query = search_query[1:] - if filter: - log.info(f"Filter: {filter} applied") + if wanted: + log.info(f"Filter: {wanted} applied") log.debug(f"Query: '{search_query}'") for x in studios: - if filter: - if x.id.lower() not in filter: - #log.debug(f"[Filter] {x.id} ignored") + if wanted: + if x.id.lower() not in wanted: + log.debug(f"[Filter] ignoring {x.id}") continue s = x.getSearchResult(search_query) # merge all list into one diff --git a/scrapers/vixenNetwork.yml b/scrapers/vixenNetwork/vixenNetwork.yml similarity index 91% rename from scrapers/vixenNetwork.yml rename to scrapers/vixenNetwork/vixenNetwork.yml index 9e6559504..5515e8b59 100644 --- a/scrapers/vixenNetwork.yml +++ b/scrapers/vixenNetwork/vixenNetwork.yml @@ -1,9 +1,12 @@ name: "Vixen Media Group" +# requires: py_common + sceneByURL: - url: - blacked.com/videos - blackedraw.com/videos - deeper.com/videos + - milfy.com/videos - slayed.com/videos - tushy.com/videos - tushyraw.com/videos @@ -29,4 +32,4 @@ sceneByQueryFragment: - python - vixenNetwork.py -# Last Updated January 07, 2022 +# Last Updated August 05, 2023 diff --git a/scrapers/wearehairy.yml b/scrapers/wearehairy.yml deleted file mode 100644 index c0d769530..000000000 --- a/scrapers/wearehairy.yml +++ /dev/null @@ -1,25 +0,0 @@ -name: wearehairy -sceneByURL: - - action: scrapeXPath - url: - - wearehairy.com - scraper: sceneScraper -xPathScrapers: - sceneScraper: - scene: - Title: - selector: //h1[@itemprop="name"]//h1 - Date: - selector: //span[@class="added"]//@datetime - postProcess: - - parseDate: 2006-01-02 - Details: //div[@class="line"][contains(h3,"Description")]/p - Tags: - Name: //div[@class="tagline"]//a - Image: //video[@id="trailer"]/@poster - Performers: - Name: //div[@class="meet"]//a - Studio: - Name: - fixed: WeAreHairy -# Last Updated March 01, 2021 diff --git a/scrapers/Algolia_xEmpire.yml b/scrapers/xEmpire/xEmpire.yml similarity index 83% rename from scrapers/Algolia_xEmpire.yml rename to scrapers/xEmpire/xEmpire.yml index 75f6c400a..8ff92c4bd 100644 --- a/scrapers/Algolia_xEmpire.yml +++ b/scrapers/xEmpire/xEmpire.yml @@ -1,3 +1,4 @@ +# requires: Algolia name: "xEmpire" sceneByURL: - action: script @@ -10,26 +11,26 @@ sceneByURL: - xempire.com/en/video/ script: - python - - Algolia.py + - ../Algolia/Algolia.py - xempire sceneByFragment: action: script script: - python - - Algolia.py + - ../Algolia/Algolia.py - xempire sceneByName: action: script script: - python - - Algolia.py + - ../Algolia/Algolia.py - xempire - searchName sceneByQueryFragment: action: script script: - python - - Algolia.py + - ../Algolia/Algolia.py - xempire - validName galleryByURL: @@ -43,7 +44,7 @@ galleryByURL: - xempire.com/en/photo/ script: - python - - Algolia.py + - ../Algolia/Algolia.py - xempire - gallery # Last Updated December 22, 2022 diff --git a/scrapers/xbvrdb.py b/scrapers/xbvrdb/xbvrdb.py similarity index 100% rename from scrapers/xbvrdb.py rename to scrapers/xbvrdb/xbvrdb.py diff --git a/scrapers/xbvrdb.yml b/scrapers/xbvrdb/xbvrdb.yml similarity index 100% rename from scrapers/xbvrdb.yml rename to scrapers/xbvrdb/xbvrdb.yml diff --git a/scrapers/Algolia_zerotolerancefilms.yml b/scrapers/zerotolerancefilms/zerotolerancefilms.yml similarity index 81% rename from scrapers/Algolia_zerotolerancefilms.yml rename to scrapers/zerotolerancefilms/zerotolerancefilms.yml index 7015c48a4..2a4fbac96 100644 --- a/scrapers/Algolia_zerotolerancefilms.yml +++ b/scrapers/zerotolerancefilms/zerotolerancefilms.yml @@ -1,3 +1,4 @@ +# requires: Algolia name: "ZeroToleranceFilms" sceneByURL: - action: script @@ -6,26 +7,26 @@ sceneByURL: - zerotolerancefilms.com/en/video script: - python - - Algolia.py + - ../Algolia/Algolia.py - zerotolerancefilms sceneByFragment: action: script script: - python - - Algolia.py + - ../Algolia/Algolia.py - zerotolerancefilms sceneByName: action: script script: - python - - Algolia.py + - ../Algolia/Algolia.py - zerotolerancefilms - searchName sceneByQueryFragment: action: script script: - python - - Algolia.py + - ../Algolia/Algolia.py - zerotolerancefilms - validName movieByURL: @@ -35,7 +36,7 @@ movieByURL: - zerotolerancefilms.com/en/movie script: - python - - Algolia.py + - ../Algolia/Algolia.py - zerotolerancefilms - movie galleryByURL: @@ -44,7 +45,7 @@ galleryByURL: - zerotolerancefilms.com/en/photo/ script: - python - - Algolia.py + - ../Algolia/Algolia.py - zerotolerancefilms - gallery # Last Updated December 22, 2022 diff --git a/validator/index.js b/validator/index.js index 2a9650d98..00b73e082 100755 --- a/validator/index.js +++ b/validator/index.js @@ -55,8 +55,6 @@ class Validator { }); this.mappingPattern = /^([a-z]+)By(Fragment|Name|URL)$/; - this.commentPrefix = /^ *# *Last Updated/i; - this.commentPattern = /^#( *)Last Updated ((?:Jan|Febr)uary|March|April|May|June|July|August|(?:Septem|Octo|Novem|Decem)ber) (0[1-9]|[1-3]\d), (\d{4})$/; if (!!this.ajv.getKeyword('deprecated')) { this.ajv.removeKeyword('deprecated'); @@ -135,44 +133,6 @@ class Validator { console.log(output); } - // Verify that there is a "Last Updated" comment - if (valid) { - const lines = contents - .split(/\r?\n/g) - .slice(-5) - .reverse() - .filter(line => !!line.trim()); - - const commentLine = lines.findIndex(line => this.commentPrefix.test(line)); - let validComment = false; - if (commentLine === -1) { - console.error(chalk.red(`${chalk.bold('ERROR')} 'Last Updated' comment is missing.`)); - } else { - if (commentLine !== 0) { - console.error(chalk.red(`${chalk.bold('ERROR')} 'Last Updated' comment is not the last line.`)); - } - - const comment = lines[commentLine]; - const match = comment.trim().match(this.commentPattern); - if (!match) { - console.error(chalk.red(`${chalk.bold('ERROR')} 'Last Updated' comment's format is invalid: ${comment}`)); - } else { - // Validate leading spaces (trailing spaces are ignored) - const leadingSpaces = comment != comment.trimLeft(); - if (leadingSpaces) { - console.error(chalk.red(`${chalk.bold('ERROR')} Remove leading spaces: '${comment}'`)); - } - // Validate spacing between '#' and 'Last Updated' - if (match[1] !== ' ') { - console.error(chalk.red(`${chalk.bold('ERROR')} Missing single space between '#' and 'Last Updated': ${comment}`)); - } else { - validComment = true; - } - } - } - valid = valid && validComment; - } - if (this.verbose || !valid) { const validColor = valid ? chalk.green : chalk.red; console.log(`${relPath} Valid: ${validColor(valid)}`); diff --git a/validator/package.json b/validator/package.json index 6e42778c4..5fc3e8e38 100644 --- a/validator/package.json +++ b/validator/package.json @@ -10,6 +10,6 @@ "ajv": "^6.12.6", "better-ajv-errors": "^0.6.7", "chalk": "^4.1.1", - "yaml": "^1.10.0" + "yaml": "^2.2.2" } } diff --git a/validator/scraper.schema.json b/validator/scraper.schema.json index 83b857e93..8e83a0dc1 100644 --- a/validator/scraper.schema.json +++ b/validator/scraper.schema.json @@ -427,7 +427,6 @@ "title": "Scene object", "type": "object", "additionalProperties": false, - "required": ["Title"], "properties": { "Title": { "title": "Scene title", @@ -485,6 +484,10 @@ "title": "Performer name", "allOf": [{ "$ref": "#/definitions/nodeSelector" }] }, + "Disambiguation": { + "title": "Performer disambiguation", + "allOf": [{ "$ref": "#/definitions/nodeSelector" }] + }, "Aliases": { "title": "Performer aliases", "allOf": [{ "$ref": "#/definitions/nodeSelector" }] @@ -546,6 +549,15 @@ "title": "Performer fake tits", "allOf": [{ "$ref": "#/definitions/nodeSelector" }] }, + "PenisLength": { + "title": "Performer penis length", + "allOf": [{ "$ref": "#/definitions/nodeSelector" }] + }, + "Circumcised": { + "title": "Performer circumcised", + "description": "Must be one of (case insensitive): `cut`, `uncut`", + "allOf": [{ "$ref": "#/definitions/nodeSelector" }] + }, "CareerLength": { "title": "Performer career length", "allOf": [{ "$ref": "#/definitions/nodeSelector" }] @@ -779,6 +791,11 @@ "description": "Convert height measurement in feet and inches to centimetres.", "type": "boolean" }, + "dimensionToMetric": { + "title": "Convert dimension to metric", + "description": "Smart conversion to centimeters: can handle feet, inches or centimeters.", + "type": "boolean" + }, "lbToKg": { "title": "Pounds to Kilograms", "description": "Convert weight measurement in pounds to kilograms.", diff --git a/validator/yarn.lock b/validator/yarn.lock index 59c876e43..6dbea9c4c 100644 --- a/validator/yarn.lock +++ b/validator/yarn.lock @@ -207,7 +207,7 @@ uri-js@^4.2.2: dependencies: punycode "^2.1.0" -yaml@^1.10.0: - version "1.10.0" - resolved "https://registry.yarnpkg.com/yaml/-/yaml-1.10.0.tgz#3b593add944876077d4d683fee01081bd9fff31e" - integrity sha512-yr2icI4glYaNG+KWONODapy2/jDdMSDnrONSjblABjD9B4Z5LgiircSt8m8sRZFNi08kG9Sm0uSHtEmP3zaEGg== +yaml@^2.2.2: + version "2.2.2" + resolved "https://registry.yarnpkg.com/yaml/-/yaml-2.2.2.tgz#ec551ef37326e6d42872dad1970300f8eb83a073" + integrity sha512-CBKFWExMn46Foo4cldiChEzn7S7SRV+wqiluAb6xmueD/fGyRHIhX8m14vVGgeFWjN540nKCNVj6P21eQjgTuA==