From b2a7ca30a3a97f670ded5b0a401be432dde32d56 Mon Sep 17 00:00:00 2001 From: Tweeticoats Date: Wed, 21 Feb 2024 22:07:20 +1030 Subject: [PATCH 1/2] Bugfixes and adding a feature to also scrape any profile urls if there is a scraper for that url in stash. --- .../stashdb-performer-gallery.py | 114 +++++++++++++++--- 1 file changed, 99 insertions(+), 15 deletions(-) diff --git a/plugins/stashdb-performer-gallery/stashdb-performer-gallery.py b/plugins/stashdb-performer-gallery/stashdb-performer-gallery.py index 643d06c9..c4a50e33 100644 --- a/plugins/stashdb-performer-gallery/stashdb-performer-gallery.py +++ b/plugins/stashdb-performer-gallery/stashdb-performer-gallery.py @@ -1,5 +1,5 @@ import stashapi.log as log -from stashapi.stashapp import StashInterface +from stashapi.stashapp import StashInterface,StashItem from stashapi.stashbox import StashBoxInterface import os import sys @@ -15,7 +15,7 @@ per_page = 100 request_s = requests.Session() stash_boxes = {} - +scrapers={} def processImages(img): log.debug("image: %s" % (img,)) @@ -34,8 +34,7 @@ def processImages(img): def processPerformers(): - performers = stash.find_performers( - f={ + query={ "tags": { "depth": 0, "excludes": [], @@ -43,7 +42,8 @@ def processPerformers(): "value": [tag_stashbox_performer_gallery], } } - ) + performers = stash.find_performers(f=query) + for performer in performers: processPerformer(performer) @@ -96,7 +96,11 @@ def processPerformerStashid(endpoint, stashid, p): images { id url - } + } + urls{ + url + type + } """ perf = stashbox.find_performer(stashid, fragment=query) log.debug(perf) @@ -174,13 +178,88 @@ def processPerformerStashid(endpoint, stashid, p): ) ) r = requests.get(img["url"]) - with open(filename, "xb") as f: + with open(filename, "wb") as f: f.write(r.content) f.close() # modified=True else: log.debug("image already downloaded") + # scrape urls on the performer using the url scrapers in stash + if settings['runPerformerScraper'] and len(perf['urls'])>0: + + # we need to determine what scrapers we have and what url patterns they accept, query what url patterns are supported, should only need to check once + if len(scrapers) == 0: + scrapers_graphql="""query ListPerformerScrapers { + listScrapers(types: [PERFORMER]) { + id + name + performer { + urls + supported_scrapes + } + } + }""" + res = stash.callGQL(scrapers_graphql) + for r in res['listScrapers']: + if r['performer']['urls']: + for url in r['performer']['urls']: + scrapers[url]=r + + for u in perf['urls']: + for url in scrapers.keys(): + if url in u['url']: + log.info('Running stash scraper on performer url: %s' % (u['url'],)) + res=stash.scrape_performer_url(u['url']) + # Check if the scraper returned a result + if res is not None: + log.debug(res) + # it's possible for multiple images to be returned by a scraper so incriment a number each image + image_id = 1 + if res['images']: + for image in res['images']: + image_index = Path(settings["path"]) / p["id"] / ("%s-%s.json" % (scrapers[url]['id'],image_id ,)) + if not image_index.exists(): + with open(image_index, "w") as f: + image_data = { + "title": '%s - %s ' % (scrapers[url]['id'],image_id,), + "details": "name: %s\ngender: %s\nurl: %s\ntwitter: %s\ninstagram: %s\nbirthdate: %s\nethnicity: %s\ncountry: %s\neye_color: %s\nheight: %s\nmeasurements: %s\nfake tits: %s\npenis_length: %s\n career length: %s\ntattoos: %s\npiercings: %s\nhair_color: %s\nweight: %s\n description: %s\n" % (res['name'], res['gender'], res['url'], res['twitter'], res['instagram'], res['birthdate'], res['ethnicity'], res['country'], res['eye_color'], res['height'], res['measurements'], res['fake_tits'], res['penis_length'], res['career_length'], res['tattoos'], res['piercings'], res['hair_color'], res['weight'], res['details'],), + "urls": [u['url'],], + "performer_ids": [p["id"]], + "tag_ids": [tag_stashbox_performer_gallery], + "gallery_ids": [index["galleries"][endpoint]], + } + json.dump(image_data, f) + filename = Path(settings["path"]) / p["id"] / ("%s-%s.jpg" % (scrapers[url]['id'],image_id ,)) + if not filename.exists(): + if image.startswith('data:'): + with open(filename, "wb") as f: + f.write(base64.b64decode(image.split('base64,')[1])) + f.close() + else: + with open(image_index, "w") as f: + image_data = { + "title": '%s - %s ' % (scrapers[url]['id'],image_id,), + "details": "%s"% (res,), + "urls": [u['url'],image], + "performer_ids": [p["id"]], + "tag_ids": [tag_stashbox_performer_gallery], + "gallery_ids": [index["galleries"][endpoint]], + } + json.dump(image_data, f) + filename = Path(settings["path"]) / p["id"] / ("%s.jpg" % (image_id,)) + r = requests.get(img["url"]) + if r.status_code==200: + with open(filename, "wb") as f: + f.write(r.content) + f.close() + image_id=image_id+1 + + + # log.debug('%s %s' % (url['url'],url['type'],)) +# stash.scraper +# scrape=stash.scrape_performer_url(ur) + else: log.error("endpoint %s not configured, skipping" % (endpoint,)) @@ -230,15 +309,20 @@ def processQueue(): def relink_images(): - images = stash.find_images( - f={ + query={ "path": {"modifier": "INCLUDES", "value": settings["path"]}, - "performer_count": {"modifier": "EQUALS", "value": 0}, + "is_missing": "galleries" } - ) - log.debug(images) - for img in images: - processImages(img) + total = stash.find_images(f=query,get_count=True)[0] + i = 0 + images=[] + while i < total: + images = stash.find_images(f=query,filter={"page": 0, "per_page": per_page}) + for img in images: + log.debug('image: %s' %(img,)) + processImages(img) + i=i+1 + log.progress((i / total)) json_input = json.loads(sys.stdin.read()) @@ -249,6 +333,7 @@ def relink_images(): config = stash.get_configuration()["plugins"] settings = { "path": "/download_dir", + "runPerformerScraper":False, } if "stashdb-performer-gallery" in config: settings.update(config["stashdb-performer-gallery"]) @@ -263,7 +348,6 @@ def relink_images(): if "stasdb-performer-gallery" in config: settings.update(config["stasdb-performer-gallery"]) - if "mode" in json_input["args"]: PLUGIN_ARGS = json_input["args"]["mode"] if "performer" in json_input["args"]: From 21369bd24d85bd6849e4d5a3f2f2fd1da618043b Mon Sep 17 00:00:00 2001 From: Tweeticoats Date: Wed, 21 Feb 2024 22:42:29 +1030 Subject: [PATCH 2/2] Adding boolean setting --- .../stashdb-performer-gallery/stashdb-performer-gallery.yml | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/plugins/stashdb-performer-gallery/stashdb-performer-gallery.yml b/plugins/stashdb-performer-gallery/stashdb-performer-gallery.yml index 7abcc530..b4893b37 100644 --- a/plugins/stashdb-performer-gallery/stashdb-performer-gallery.yml +++ b/plugins/stashdb-performer-gallery/stashdb-performer-gallery.yml @@ -11,6 +11,10 @@ settings: displayName: Download parent folder description: Download location for files, note this should be in a different folder to stash and in a folder covered by stash. You may need to create a new library path to cover this directory. type: STRING + runPerformerScraper: + displayName: Run stash scrapers on profile urls + description: Run scrapers on profile urls + type: BOOLEAN hooks: - name: modify performer