From b50a71dfd803e78f536c997f1d26063ed4c936bd Mon Sep 17 00:00:00 2001 From: CuddleBear92 Date: Sun, 30 May 2021 18:09:48 +0200 Subject: [PATCH 001/624] Added data18.empirestores.co support Tested and seemingly work perfectly out of the box. --- scrapers/AdultEmpireCash.yml | 3 +++ 1 file changed, 3 insertions(+) diff --git a/scrapers/AdultEmpireCash.yml b/scrapers/AdultEmpireCash.yml index 434f32e05..f1e4f8cc4 100644 --- a/scrapers/AdultEmpireCash.yml +++ b/scrapers/AdultEmpireCash.yml @@ -23,6 +23,7 @@ sceneByURL: - thirdworldxxx.com/ - touchmywife.com/ - westcoastproductions.com/ + - data18.empirestores.co/ # VR Sites - action: scrapeXPath @@ -48,6 +49,8 @@ movieByURL: # - shemalestrokers.com/ # Scenes published as movies? - thirdworldxxx.com/ - westcoastproductions.com/ + - data18.empirestores.co/ + xPathScrapers: sceneScraper: From f32f58e0564a40757cf8fe620f1c05a88908c27e Mon Sep 17 00:00:00 2001 From: Bacchus Enorches Date: Thu, 14 Apr 2022 10:05:51 -0500 Subject: [PATCH 002/624] Add ATK Girlfriends scene fragment scraper Uses filename titles to extract performer information and look up scenes from their pages, because some valid scene URLs have no content --- scrapers/ATKGirlfriends.py | 70 +++++++++++++++++++++++++++++++++++++ scrapers/ATKGirlfriends.yml | 7 ++++ 2 files changed, 77 insertions(+) create mode 100644 scrapers/ATKGirlfriends.py create mode 100644 scrapers/ATKGirlfriends.yml diff --git a/scrapers/ATKGirlfriends.py b/scrapers/ATKGirlfriends.py new file mode 100644 index 000000000..255a38f2d --- /dev/null +++ b/scrapers/ATKGirlfriends.py @@ -0,0 +1,70 @@ +import json +import re +import requests +import sys +try: + import py_common.log as log +except ModuleNotFoundError: + print("You need to download the folder 'py_common' from the community repo! (CommunityScrapers/tree/master/scrapers/py_common)", file=sys.stderr) + sys.exit() +try: + from lxml import html +except ModuleNotFoundError: + print("You need to install the lxml module. (https://lxml.de/installation.html#installation)", file=sys.stderr) + print("If you have pip (normally installed with python), run this command in a terminal (cmd): pip install lxml", file=sys.stderr) + sys.exit() + +kgs_per_lb = 0.45359237 +cms_per_in = 2.54 +filename_pattern = re.compile(r"(?P[a-z]{3}\d{3})ATK_(?P\d{6})(?P\d{3})_(?P\w+)(?:\.(?P\w+))?", re.IGNORECASE) + +def getSceneByFilename(filename): + # Parse filename + filename_match = filename_pattern.match(filename) + (model_id, movie_id, _, _, _) = filename_match.groups() + + # Fetch model page + model_url = f"https://www.atkgirlfriends.com/tour/model/{model_id}" + log.debug(f"Fetching {model_url} ({movie_id})") + response = requests.get(model_url, cookies=dict(start_session_galleria = 'stash')) + if (response.url.startswith("https://www.atkgirlfriends.com?nats")): + # Refetch page on cookie failure + response = requests.get(model_url, cookies=dict(start_session_galleria = 'stash')) + + # Build performer + tree = html.fromstring(response.text) + performer = dict(Gender = "female") + model_profile_wrap_xpath = '//div[contains(@class, "model-profile-wrap")]' + performer["name"] = tree.xpath('//h1[contains(@class, "page-title")]')[0].text + performer["url"] = f"{model_url}/1/atk-girlfriends-{performer['name'].replace(' ', '-')}" + performer["ethnicity"] = tree.xpath(f'{model_profile_wrap_xpath}/b[contains(text(), "Ethnicity")]/following-sibling::text()')[0].strip().capitalize() + performer["hair_color"] = tree.xpath(f'{model_profile_wrap_xpath}/b[contains(text(), "Hair Color")]/following-sibling::text()')[0].strip().capitalize() + height_ft_ins_str = tree.xpath(f'{model_profile_wrap_xpath}/b[contains(text(), "Height")]/following-sibling::text()')[0].strip() + (height_ft_str, height_ins_str) = re.compile(r"(\d+)[\"'](\d+)").findall(height_ft_ins_str)[0] + height_ins = float(height_ft_str) * 12 + float(height_ins_str) + performer["height"] = str(int(height_ins * cms_per_in)) + weight_lbs_str = tree.xpath(f'{model_profile_wrap_xpath}/b[contains(text(), "Weight")]/following-sibling::text()')[0].strip() + weight_lbs = float(re.compile(r"\d+").findall(weight_lbs_str)[0]) + performer["weight"] = str(int(weight_lbs * kgs_per_lb)) + performer["measurements"] = tree.xpath(f'{model_profile_wrap_xpath}/b[contains(text(), "Bust Size")]/following-sibling::text()')[0].strip() + performer["image"] = tree.xpath(f'{model_profile_wrap_xpath}/img/@src')[0] + + # Build scene + scene = dict(studio = dict(name = "ATK Girlfriends"), performers = [performer]) + movie_wrap_xpath = f'//img[contains(@src, "/{model_id}/{movie_id}")]/../../../..' + scene["title"] = tree.xpath(f'{movie_wrap_xpath}//h1')[0].text.strip() + scene["details"] = tree.xpath(f'{movie_wrap_xpath}//b[contains(text(), "Description")]/following-sibling::text()')[0].strip() + movie_url_relative = tree.xpath(f'{movie_wrap_xpath}//a/@href')[0] + scene["url"] = f'https://www.atkgirlfriends.com{movie_url_relative}' + scene["image"] = tree.xpath(f'{movie_wrap_xpath}//img/@src')[0] + + return scene + +input = sys.stdin.read() +match = filename_pattern.search(input) +if (match): + scene = getSceneByFilename(match.group()) + output = json.dumps(scene) + print(output) +else: + print(r"{}") diff --git a/scrapers/ATKGirlfriends.yml b/scrapers/ATKGirlfriends.yml new file mode 100644 index 000000000..fef16f771 --- /dev/null +++ b/scrapers/ATKGirlfriends.yml @@ -0,0 +1,7 @@ +name: ATK Girlfriends +sceneByFragment: + action: script + script: + - python + - ATKGirlfriends.py +# Last Updated April 14, 2022 From e9ac25018fab3b9766d11e349927e1c829679060 Mon Sep 17 00:00:00 2001 From: Bacchus Enorches Date: Thu, 14 Apr 2022 10:08:35 -0500 Subject: [PATCH 003/624] Repurpose XPath scrapers from #812 Includes performerByURL and sceneByURL since the fragment scraper stops working when the scene no longer has a filename title --- scrapers/ATKGirlfriends.yml | 83 +++++++++++++++++++++++++++++++++++++ 1 file changed, 83 insertions(+) diff --git a/scrapers/ATKGirlfriends.yml b/scrapers/ATKGirlfriends.yml index fef16f771..a0c8d8b34 100644 --- a/scrapers/ATKGirlfriends.yml +++ b/scrapers/ATKGirlfriends.yml @@ -4,4 +4,87 @@ sceneByFragment: script: - python - ATKGirlfriends.py +performerByURL: + - action: scrapeXPath + url: + - https://www.atkgirlfriends.com/tour/model/ + scraper: performerScraper +sceneByURL: + - action: scrapeXPath + url: + - https://www.atkgirlfriends.com/tour/movie/ + scraper: sceneScraper +xPathScrapers: + performerScraper: + common: + $modelWrap: &modelWrap //div[contains(@class, "model-profile-wrap")] + performer: + Name: //h1[contains(@class, "page-title")] + Gender: + fixed: female + Ethnicity: + selector: $modelWrap/b[contains(text(), "Ethnicity")]/following-sibling::text() + HairColor: + selector: $modelWrap/b[contains(text(), "Hair Color")]/following-sibling::text() + postProcess: + - map: + black: Black + blond: Blonde + brown: Brown + red: Red + white: White + Height: + selector: $modelWrap/b[contains(text(), "Height")]/following-sibling::text() + postProcess: + - feetToCm: true + Weight: + selector: $modelWrap/b[contains(text(), "Weight")]/following-sibling::text() + postProcess: + - replace: + - regex: (\d+).* + with: $1 + - lbToKg: true + Measurements: + selector: $modelWrap/b[contains(text(), "Bust Size")]/following-sibling::text() + Image: + selector: $modelWrap/img/@src + sceneScraper: + common: + $movieWrap: //div[contains(@class, "movie-wrap")] + $modelWrap: *modelWrap + scene: + Title: //title + Details: $movieWrap/b[contains(text(), "Description")]/following-sibling::text() + URL: + selector: //video/source/@src + postProcess: + - replace: + # 0: Match (ale138ATK_290698001_hd.mp4) + # 1: Model (ale138) + # 2: Movie (290698) + # 3: Scene (001) + # 4: Resolution (hd) + # 5: Extension (mp4) + - regex: &filenameRegex ^.*([a-z]{3}\d{3})ATK_(\d+)(\d{3})_(\w*)\.(\w+).*$ + with: https://www.atkgirlfriends.com/tour/movie/$2 + Image: //video/@poster + Studio: + Name: + fixed: ATK Girlfriends + URL: + fixed: https://www.atkgirlfriends.com/ + Tags: + Name: + selector: $movieWrap/b[contains(text(), "Tags")]/following-sibling::text() + split: ',' + Performers: + Name: $modelWrap/text()[1] +driver: + cookies: + - CookieURL: https://www.atkgirlfriends.com + Cookies: + - Name: start_session_galleria + Domain: www.atkgirlfriends.com + Value: stash # Rotate this value if the scraper is blocked. The first request with the new value should fail. + Path: / # Last Updated April 14, 2022 From 420e81c148c31866fb4077bce8d80d3a1e45a30a Mon Sep 17 00:00:00 2001 From: Bacchus Enorches Date: Thu, 14 Apr 2022 10:14:12 -0500 Subject: [PATCH 004/624] Add ATKGirlfriends.yml to scrapers list --- SCRAPERS-LIST.md | 1 + 1 file changed, 1 insertion(+) diff --git a/SCRAPERS-LIST.md b/SCRAPERS-LIST.md index 464c5f55b..736192286 100644 --- a/SCRAPERS-LIST.md +++ b/SCRAPERS-LIST.md @@ -98,6 +98,7 @@ assteenmouth.com|Teencoreclub.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- asstraffic.com|PerfectGonzo.yml|:heavy_check_mark:|:x:|:x:|:heavy_check_mark:|Python|- assylum.com|Assylum.yml|:heavy_check_mark:|:x:|:x:|:x:|-|Fetish atkexotics.com|ATKExotics.yml|:heavy_check_mark:|:x:|:x:|:x:|CDP|- +atkgirlfriends.com|ATKGirlfriends.yml|:heavy_check_mark:|:x:|:x:|:heavy_check_mark:|-|- atkhairy.com|ATKHairy.yml|:heavy_check_mark:|:x:|:x:|:x:|CDP|- atkpetites.com|ATKHairy.yml|:heavy_check_mark:|:x:|:x:|:x:|CDP|- atkpremium.com|ATKHairy.yml|:heavy_check_mark:|:x:|:x:|:x:|CDP|- From 64c00f3b5e5a07d159e100450ce31b137354a2f0 Mon Sep 17 00:00:00 2001 From: DogmaDragon <103123951+DogmaDragon@users.noreply.github.com> Date: Mon, 24 Oct 2022 08:59:04 +0300 Subject: [PATCH 005/624] Updated for the new site design --- scrapers/FemJoy.yml | 48 --------------------------------------------- scrapers/Femjoy.yml | 26 ++++++++++++++++++++++++ 2 files changed, 26 insertions(+), 48 deletions(-) delete mode 100644 scrapers/FemJoy.yml create mode 100644 scrapers/Femjoy.yml diff --git a/scrapers/FemJoy.yml b/scrapers/FemJoy.yml deleted file mode 100644 index 8b904681d..000000000 --- a/scrapers/FemJoy.yml +++ /dev/null @@ -1,48 +0,0 @@ -name: FemJoy -galleryByURL: - - action: scrapeXPath - url: - - femjoy.com/photos/ - scraper: galleryScraper -sceneByURL: - - action: scrapeXPath - url: - - femjoy.com/videos/ - scraper: sceneScraper - -xPathScrapers: - galleryScraper: - common: &commonSel - $performer: //div[@id='model-info']//a[starts-with(@href,"/models")] - gallery: - Title: &titleSel //div[@id='model-info']//span[normalize-space(.)='in']/following-sibling::text() - Studio: &studioAttr - Name: - fixed: Fem Joy - Date: &dateAttr - selector: //div[@id='model-info']//p[contains(.,"released")] - postProcess: - - replace: - - regex: '.*released on\s+' - with: - - parseDate: Jan 2, 2006 - Performers: &performersAttr - Name: $performer - URL: - selector: $performer/@href - postProcess: - - replace: - - regex: ^ - with: https://femjoy.com - Details: &detailsSel //div[@id='model-info']//div[@class="col-md-12"] - - sceneScraper: - common: *commonSel - scene: - Title: *titleSel - Studio: *studioAttr - Date: *dateAttr - Performers: *performersAttr - Image: //img[@class="lazy comment-photo"]/@data-original - Details: *detailsSel -# Last Updated October 24, 2021 diff --git a/scrapers/Femjoy.yml b/scrapers/Femjoy.yml new file mode 100644 index 000000000..34df8bd0e --- /dev/null +++ b/scrapers/Femjoy.yml @@ -0,0 +1,26 @@ +name: Femjoy +sceneByURL: + - action: scrapeXPath + url: + - femjoy.com + scraper: sceneScraper +xPathScrapers: + sceneScraper: + scene: + Title: //div[@class='post_details']/h1[@class='post_title']/span/text() + Studio: + Name: + fixed: Femjoy + Date: + selector: //div[@class='post_details']/h2[@class='post_title'] + postProcess: + - replace: + - regex: .+on\s(.+) + with: $1 + - parseDate: Jan 2, 2006 + Performers: + Name: //div[@class='post_details']/h1[@class='post_title']/a[contains(@href,"/models/")] + URL: //div[@class='post_details']/h1[@class='post_title']/a[contains(@href,"/models/")]/@href + #Image: Uses placeholder cover now + Details: //div[@class='post_details']/h2[@class='post_description']/p +# Last Updated October 13, 2022 From 1a685bb270d1d303b66aa0701cfd7a01a25a2271 Mon Sep 17 00:00:00 2001 From: SnZ <3882467+EsEnZeT@users.noreply.github.com> Date: Wed, 7 Dec 2022 19:00:35 +0100 Subject: [PATCH 006/624] Update torrent_parser.py to 0.4.1 --- scrapers/torrent_parser.py | 512 +++++++++++++++++++++++++------------ 1 file changed, 353 insertions(+), 159 deletions(-) diff --git a/scrapers/torrent_parser.py b/scrapers/torrent_parser.py index bc6e0e7b7..967c8ec12 100644 --- a/scrapers/torrent_parser.py +++ b/scrapers/torrent_parser.py @@ -54,34 +54,38 @@ # noinspection PyPackageRequirements from chardet import detect as _detect except ImportError: + def _detect(_): warnings.warn("No chardet module installed, encoding will be utf-8") - return {'encoding': 'utf-8', 'confidence': 1} + return {"encoding": "utf-8", "confidence": 1} try: # noinspection PyUnresolvedReferences # For Python 2 str_type = unicode + bytes_type = str except NameError: # For Python 3 str_type = str + bytes_type = bytes __all__ = [ - 'InvalidTorrentDataException', - 'BEncoder', - 'BDecoder', - 'encode', - 'decode', - 'TorrentFileParser', - 'create_torrent_file', - 'parse_torrent_file', + "InvalidTorrentDataException", + "BEncoder", + "BDecoder", + "encode", + "decode", + "TorrentFileParser", + "TorrentFileCreator", + "create_torrent_file", + "parse_torrent_file", ] -__version__ = '0.3.0' +__version__ = "0.4.1" def detect(content): - return _detect(content)['encoding'] + return _detect(content)["encoding"] class InvalidTorrentDataException(Exception): @@ -99,32 +103,28 @@ class __EndCls(object): def _check_hash_field_params(name, value): - return isinstance(name, str_type) \ - and isinstance(value, tuple) and len(value) == 2 \ - and isinstance(value[0], int) and isinstance(value[1], bool) - - -class TorrentFileParser(object): - - TYPE_LIST = 'list' - TYPE_DICT = 'dict' - TYPE_INT = 'int' - TYPE_STRING = 'string' - TYPE_END = 'end' + return ( + isinstance(name, str_type) + and isinstance(value, tuple) + and len(value) == 2 + and isinstance(value[0], int) + and isinstance(value[1], bool) + ) - LIST_INDICATOR = b'l' - DICT_INDICATOR = b'd' - INT_INDICATOR = b'i' - END_INDICATOR = b'e' - STRING_INDICATOR = b'' - STRING_DELIMITER = b':' - HASH_FIELD_PARAMS = { - # field length need_list - 'pieces': (20, True), - 'ed2k': (16, False), - 'filehash': (20, False), - } +class BDecoder(object): + TYPE_LIST = "list" + TYPE_DICT = "dict" + TYPE_INT = "int" + TYPE_STRING = "string" + TYPE_END = "end" + + LIST_INDICATOR = b"l" + DICT_INDICATOR = b"d" + INT_INDICATOR = b"i" + END_INDICATOR = b"e" + STRING_INDICATOR = b"" + STRING_DELIMITER = b":" TYPES = [ (TYPE_LIST, LIST_INDICATOR), @@ -134,34 +134,55 @@ class TorrentFileParser(object): (TYPE_STRING, STRING_INDICATOR), ] + # see https://docs.python.org/3/library/codecs.html#error-handlers + # for other usable error handler string + ERROR_HANDLER_USEBYTES = "usebytes" + def __init__( - self, fp, use_ordered_dict=False, encoding='utf-8', errors='strict', - hash_fields=None, hash_raw=False, + self, + data, + use_ordered_dict=False, + encoding="utf-8", + errors="strict", + hash_fields=None, + hash_raw=False, ): """ - :param fp: a **binary** file-like object to parse, + :param bytes|file data: bytes or a **binary** file-like object to parse, which means need 'b' mode when use built-in open function :param bool use_ordered_dict: Use collections.OrderedDict as dict container default False, which mean use built-in dict :param str encoding: file content encoding, default utf-8, use 'auto' to enable charset auto detection (need 'chardet' package installed) :param str errors: how to deal with encoding error when try to parse - string from content with ``encoding`` + string from content with ``encoding``. + see https://docs.python.org/3/library/codecs.html#error-handlers + for usable error handler string. + in particular, you can use "usebytes" to use "strict" decode mode + and let it return raw bytes if error happened. :param Dict[str, Tuple[int, bool]] hash_fields: extra fields should be treated as hash value. dict key is the field name, value is a two-element tuple of (hash_block_length, as_a_list). See :any:`hash_field` for detail """ - if getattr(fp, 'read', ) is None \ - or getattr(fp, 'seek') is None: - raise ValueError('Parameter fp needs a file like object') + if isinstance(data, bytes_type): + data = io.BytesIO(data) + elif getattr(data, "read") is not None and getattr(data, "seek") is not None: + pass + else: + raise ValueError("Parameter data must be bytes or file like object") self._pos = 0 self._encoding = encoding - self._content = fp + self._content = data self._use_ordered_dict = use_ordered_dict self._error_handler = errors - self._hash_fields = dict(TorrentFileParser.HASH_FIELD_PARAMS) + self._error_use_bytes = False + if self._error_handler == BDecoder.ERROR_HANDLER_USEBYTES: + self._error_handler = "strict" + self._error_use_bytes = True + + self._hash_fields = {} if hash_fields is not None: for k, v in hash_fields.items(): if _check_hash_field_params(k, v): @@ -192,9 +213,9 @@ def hash_field(self, name, block_length=20, need_list=False): raise ValueError("Invalid hash field parameter") return self - def parse(self): + def decode(self): """ - :rtype: dict|list|int|str|bytes + :rtype: dict|list|int|str|unicode|bytes :raise: :any:`InvalidTorrentDataException` when parse failed or error happened when decode string using specified encoding """ @@ -204,7 +225,7 @@ def parse(self): try: c = self._read_byte(1, True) raise InvalidTorrentDataException( - 0, 'Expect EOF, but get [{}] at pos {}'.format(c, self._pos) + 0, "Expect EOF, but get [{}] at pos {}".format(c, self._pos) ) except EOFError: # expect EOF pass @@ -218,8 +239,7 @@ def _read_byte(self, count=1, raise_eof=False): if raise_eof: raise EOFError() raise InvalidTorrentDataException( - self._pos, - 'Unexpected EOF when reading torrent file' + self._pos, "Unexpected EOF when reading torrent file" ) self._pos += count return gotten @@ -237,7 +257,7 @@ def _dict_items_generator(self): k = self._next_element() if k is _END: return - if not isinstance(k, str_type): + if not isinstance(k, str_type) and not isinstance(k, bytes_type): raise InvalidTorrentDataException( self._pos, "Type of dict key can't be " + type(k).__name__ ) @@ -245,7 +265,7 @@ def _dict_items_generator(self): v = self._next_hash(*self._hash_fields[k]) else: v = self._next_element(k) - if k == 'encoding': + if k == "encoding": self._encoding = v yield k, v @@ -270,12 +290,12 @@ def _next_int(self, end=END_INDICATOR): char = self._read_byte(1) neg = False while char != end: - if not neg and char == b'-': + if not neg and char == b"-": neg = True - elif not b'0' <= char <= b'9': + elif not b"0" <= char <= b"9": raise InvalidTorrentDataException(self._pos - 1) else: - value = value * 10 + int(char) - int(b'0') + value = value * 10 + int(char) - int(b"0") char = self._read_byte(1) return -value if neg else value @@ -284,27 +304,34 @@ def _next_string(self, need_decode=True, field=None): raw = self._read_byte(length) if need_decode: encoding = self._encoding - if encoding == 'auto': + if encoding == "auto": self.encoding = encoding = detect(raw) try: string = raw.decode(encoding, self._error_handler) except UnicodeDecodeError as e: - msg = [ - "Fail to decode string at pos {pos} using encoding ", - e.encoding - ] - if field: - msg.extend([ - ' when parser field "', field, '"' - ', maybe it is an hash field. ', - 'You can use self.hash_field("', field, '") ', - 'to let it be treated as hash value, ', - 'so this error may disappear' - ]) - raise InvalidTorrentDataException( - self._pos - length + e.start, - ''.join(msg) - ) + if self._error_use_bytes: + return raw + else: + msg = [ + "Fail to decode string at pos {pos} using encoding ", + e.encoding, + ] + if field: + msg.extend( + [ + ' when parser field "', + field, + '"' ", maybe it is an hash field. ", + 'You can use self.hash_field("', + field, + '") ', + "to let it be treated as hash value, ", + "so this error may disappear", + ] + ) + raise InvalidTorrentDataException( + self._pos - length + e.start, "".join(msg) + ) return string return raw @@ -317,11 +344,11 @@ def _next_hash(self, p_len, need_list): if self._hash_raw: return raw res = [ - binascii.hexlify(chunk).decode('ascii') - for chunk in (raw[x:x+p_len] for x in range(0, len(raw), p_len)) + binascii.hexlify(chunk).decode("ascii") + for chunk in (raw[x: x + p_len] for x in range(0, len(raw), p_len)) ] if len(res) == 0 and not need_list: - return '' + return "" if len(res) == 1 and not need_list: return res[0] return res @@ -340,11 +367,11 @@ def _next_type(self): raise InvalidTorrentDataException(self._pos) def _type_to_func(self, t): - return getattr(self, '_next_' + t) + return getattr(self, "_next_" + t) def _next_element(self, field=None): element_type = self._next_type() - if element_type is TorrentFileParser.TYPE_STRING and field is not None: + if element_type is BDecoder.TYPE_STRING and field is not None: element = self._type_to_func(element_type)(field=field) else: element = self._type_to_func(element_type)() @@ -352,30 +379,29 @@ def _next_element(self, field=None): class BEncoder(object): - TYPES = { - (dict,): TorrentFileParser.TYPE_DICT, - (list,): TorrentFileParser.TYPE_LIST, - (int,): TorrentFileParser.TYPE_INT, - (str_type, bytes): TorrentFileParser.TYPE_STRING, + (dict,): BDecoder.TYPE_DICT, + (list,): BDecoder.TYPE_LIST, + (int,): BDecoder.TYPE_INT, + (str_type, bytes_type): BDecoder.TYPE_STRING, } - def __init__(self, data, encoding='utf-8', hash_fields=None): + def __init__(self, data, encoding="utf-8", hash_fields=None): """ :param dict|list|int|str data: data will be encoded :param str encoding: string field output encoding :param List[str] hash_fields: see - :any:`TorrentFileParser.__init__` + :any:`BDecoder.__init__` """ self._data = data self._encoding = encoding - self._hash_fields = list(TorrentFileParser.HASH_FIELD_PARAMS.keys()) + self._hash_fields = [] if hash_fields is not None: - self._hash_fields.extend(str_type(hash_fields)) + self._hash_fields = hash_fields def hash_field(self, name): """ - see :any:`TorrentFileParser.hash_field` + see :any:`BDecoder.hash_field` :param str name: :return: return self, so you can chained call @@ -388,7 +414,7 @@ def encode(self): :rtype: bytes """ - return b''.join(self._output_element(self._data)) + return b"".join(self._output_element(self._data)) def encode_to_filelike(self): """ @@ -401,15 +427,15 @@ def encode_to_filelike(self): def _output_string(self, data): if isinstance(data, str_type): data = data.encode(self._encoding) - yield str(len(data)).encode('ascii') - yield TorrentFileParser.STRING_DELIMITER + yield str(len(data)).encode("ascii") + yield BDecoder.STRING_DELIMITER yield data @staticmethod def _output_int(data): - yield TorrentFileParser.INT_INDICATOR - yield str(data).encode('ascii') - yield TorrentFileParser.END_INDICATOR + yield BDecoder.INT_INDICATOR + yield str(data).encode("ascii") + yield BDecoder.END_INDICATOR def _output_decode_hash(self, data): if isinstance(data, str_type): @@ -419,31 +445,41 @@ def _output_decode_hash(self, data): if not isinstance(hash_line, str_type): raise InvalidTorrentDataException( None, - "Hash must be " + str_type.__name__ + " not " + - type(hash_line).__name__, + "Hash must be " + + str_type.__name__ + + " not " + + type(hash_line).__name__, ) if len(hash_line) % 2 != 0: raise InvalidTorrentDataException( None, - "Hash(" + hash_line + ") length(" + str(len(hash_line)) + - ") is a not even number", + "Hash(" + + hash_line + + ") length(" + + str(len(hash_line)) + + ") is a not even number", ) try: raw = binascii.unhexlify(hash_line) except binascii.Error as e: raise InvalidTorrentDataException( - None, str(e), + None, + str(e), ) result.append(raw) - for x in self._output_string(b''.join(result)): + for x in self._output_string(b"".join(result)): yield x def _output_dict(self, data): - yield TorrentFileParser.DICT_INDICATOR + yield BDecoder.DICT_INDICATOR for k, v in data.items(): - if not isinstance(k, str_type): + if not isinstance(k, str_type) and not isinstance(k, bytes_type): raise InvalidTorrentDataException( - None, "Dict key must be " + str_type.__name__, + None, + "Dict key must be " + + str_type.__name__ + + " or " + + bytes_type.__name__, ) for x in self._output_element(k): yield x @@ -453,17 +489,17 @@ def _output_dict(self, data): else: for x in self._output_element(v): yield x - yield TorrentFileParser.END_INDICATOR + yield BDecoder.END_INDICATOR def _output_list(self, data): - yield TorrentFileParser.LIST_INDICATOR + yield BDecoder.LIST_INDICATOR for v in data: for x in self._output_element(v): yield x - yield TorrentFileParser.END_INDICATOR + yield BDecoder.END_INDICATOR def _type_to_func(self, t): - return getattr(self, '_output_' + t) + return getattr(self, "_output_" + t) def _output_element(self, data): for types, t in self.TYPES.items(): @@ -476,47 +512,119 @@ def _output_element(self, data): ) -class BDecoder(object): +class TorrentFileParser(object): + HASH_FIELD_DEFAULT_PARAMS = { + # field length need_list + "pieces": (20, True), + "ed2k": (16, False), + "filehash": (20, False), + "pieces root": (32, False), + } + def __init__( - self, data, use_ordered_dict=False, encoding='utf-8', errors='strict', - hash_fields=None, hash_raw=False, + self, + fp, + use_ordered_dict=False, + encoding="utf-8", + errors=BDecoder.ERROR_HANDLER_USEBYTES, + hash_fields=None, + hash_raw=False, ): """ - See :any:`TorrentFileParser.__init__` for parameter description. + See :any:`BDecoder.__init__` for parameter description. + This class will use some default ``hash_fields`` values, and use "usebytes" as error handler + compare to use :any:`BDecoder` directly. - :param bytes data: raw data to be decoded + :param file fp: file to be parse :param bool use_ordered_dict: :param str encoding: :param str errors: :param Dict[str, Tuple[int, bool]] hash_fields: :param bool hash_raw: """ - self._parser = TorrentFileParser( - io.BytesIO(bytes(data)), + torrent_hash_fields = dict(TorrentFileParser.HASH_FIELD_DEFAULT_PARAMS) + if hash_fields is not None: + torrent_hash_fields.update(hash_fields) + + self._decoder = BDecoder( + fp, use_ordered_dict, encoding, errors, - hash_fields, + torrent_hash_fields, hash_raw, ) def hash_field(self, name, block_length=20, need_dict=False): """ - See :any:`TorrentFileParser.hash_field` for parameter description + See :any:`BDecoder.hash_field` for parameter description :param name: :param block_length: :param need_dict: :return: return self, so you can chained call """ - self._parser.hash_field(name, block_length, need_dict) + self._decoder.hash_field(name, block_length, need_dict) return self - def decode(self): - return self._parser.parse() + def parse(self): + """ + Parse provided file + """ + return self._decoder.decode() + + +class TorrentFileCreator(object): + def __init__(self, data, encoding="utf-8", hash_fields=None): + """ + See :any:`BEncoder.__init__` for parameter description. + This class will use some default ``hash_fields`` values, + compare to use ``BEncoder`` directly. + :param dict|list|int|str data: + :param str encoding: + :param List[str] hash_fields: + """ + torrent_hash_fields = list(TorrentFileParser.HASH_FIELD_DEFAULT_PARAMS.keys()) + if hash_fields is not None: + torrent_hash_fields.extend(hash_fields) -def encode(data, encoding='utf-8', hash_fields=None): + self._encoder = BEncoder( + data, + encoding, + torrent_hash_fields, + ) + + def hash_field(self, name): + """ + See :any:`BEncoder.hash_field` for parameter description + + :param name: + :return: return self, so you can chained call + """ + self._encoder.hash_field(name) + return self + + def create_filelike(self): + """ + Create a file-like(BytesIO) object according to provided data + + :rtype: BytesIO + """ + return self._encoder.encode_to_filelike() + + def create(self, filename): + """ + Create torrent file according to provided data + + :param filename: output filename + :return: + """ + with open(filename, "wb") as f: + f.write(self._encoder.encode()) + + +def encode(data, encoding="utf-8", hash_fields=None): """ Shortcut function for encode python object to torrent file format(bencode) @@ -531,8 +639,12 @@ def encode(data, encoding='utf-8', hash_fields=None): def decode( - data, use_ordered_dict=False, encoding='utf-8', errors='strict', - hash_fields=None, hash_raw=False, + data, + use_ordered_dict=False, + encoding="utf-8", + errors="strict", + hash_fields=None, + hash_raw=False, ): """ Shortcut function for decode bytes as torrent file format(bencode) to python @@ -540,7 +652,7 @@ def decode( See :any:`BDecoder.__init__` for parameter description - :param bytes data: raw data to be decoded + :param bytes|file data: data or file object to be decoded :param bool use_ordered_dict: :param str encoding: :param str errors: @@ -549,13 +661,22 @@ def decode( :rtype: dict|list|int|str|bytes|bytes """ return BDecoder( - data, use_ordered_dict, encoding, errors, hash_fields, hash_raw, + data, + use_ordered_dict, + encoding, + errors, + hash_fields, + hash_raw, ).decode() def parse_torrent_file( - filename, use_ordered_dict=False, encoding='utf-8', errors='strict', - hash_fields=None, hash_raw=False, + filename, + use_ordered_dict=False, + encoding="utf-8", + errors="usebytes", + hash_fields=None, + hash_raw=False, ): """ Shortcut function for parse torrent object using TorrentFileParser @@ -570,13 +691,18 @@ def parse_torrent_file( :param bool hash_raw: :rtype: dict|list|int|str|bytes """ - with open(filename, 'rb') as f: + with open(filename, "rb") as f: return TorrentFileParser( - f, use_ordered_dict, encoding, errors, hash_fields, hash_raw, + f, + use_ordered_dict, + encoding, + errors, + hash_fields, + hash_raw, ).parse() -def create_torrent_file(filename, data, encoding='utf-8', hash_fields=None): +def create_torrent_file(filename, data, encoding="utf-8", hash_fields=None): """ Shortcut function for create a torrent file using BEncoder @@ -587,30 +713,93 @@ def create_torrent_file(filename, data, encoding='utf-8', hash_fields=None): :param str encoding: :param List[str] hash_fields: """ - with open(filename, 'wb') as f: - f.write(BEncoder(data, encoding, hash_fields).encode()) + TorrentFileCreator(data, encoding, hash_fields).create(filename) + + +class DataWrapper: + def __init__(self, data): + self.data = data + + +class JSONEncoderDataWrapperBytesToString(json.JSONEncoder): + def process(self, o): + if isinstance(o, bytes_type): + return binascii.hexlify(o).decode("ascii") + if isinstance(o, collections.OrderedDict): + output = collections.OrderedDict() + for k, v in o.items(): + output[self.process(k)] = self.process(v) + return output + if isinstance(o, dict): + return {self.process(k): self.process(v) for k, v in o.items()} + if isinstance(o, list): + return [self.process(v) for v in o] + return o + + def default(self, o): + if isinstance(o, DataWrapper): + return self.process(o.data) + return json.JSONEncoder.default(self, o) def __main(): parser = argparse.ArgumentParser() - parser.add_argument('file', nargs='?', default='', - help='input file, will read form stdin if empty') - parser.add_argument('--dict', '-d', action='store_true', default=False, - help='use built-in dict, default will be OrderedDict') - parser.add_argument('--sort', '-s', action='store_true', default=False, - help='sort output json item by key') - parser.add_argument('--indent', '-i', type=int, default=None, - help='json output indent for every inner level') - parser.add_argument('--ascii', '-a', action='store_true', default=False, - help='ensure output json use ascii char, ' - 'escape other char use \\u') - parser.add_argument('--coding', '-c', default='utf-8', - help='string encoding, default "utf-8"') - parser.add_argument('--errors', '-e', default='strict', - help='decoding error handler, default "strict", you can' - ' use "ignore" or "replace" to avoid exception') - parser.add_argument('--version', '-v', action='store_true', default=False, - help='print version and exit') + parser.add_argument( + "file", nargs="?", default="", help="input file, will read form stdin if empty" + ) + parser.add_argument( + "--dict", + "-d", + action="store_true", + default=False, + help="use built-in dict, default will be OrderedDict", + ) + parser.add_argument( + "--sort", + "-s", + action="store_true", + default=False, + help="sort output json item by key", + ) + parser.add_argument( + "--indent", + "-i", + type=int, + default=None, + help="json output indent for every inner level", + ) + parser.add_argument( + "--ascii", + "-a", + action="store_true", + default=False, + help="ensure output json use ascii char, " "escape other char use \\u", + ) + parser.add_argument( + "--coding", "-c", default="utf-8", help='string encoding, default "utf-8"' + ) + parser.add_argument( + "--errors", + "-e", + default=BDecoder.ERROR_HANDLER_USEBYTES, + help='decoding error handler, default "' + + BDecoder.ERROR_HANDLER_USEBYTES + + '"', + ) + parser.add_argument( + "--hash-raw", + "-r", + action="store_true", + default=False, + help="do not group hash field by block, keeps it as raw bytes", + ) + parser.add_argument( + "--version", + "-v", + action="store_true", + default=False, + help="print version and exit", + ) args = parser.parse_args() if args.version: @@ -618,29 +807,34 @@ def __main(): exit(0) try: - if args.file == '': - target_file = io.BytesIO( - getattr(sys.stdin, 'buffer', sys.stdin).read() - ) + if args.file == "": + target_file = io.BytesIO(getattr(sys.stdin, "buffer", sys.stdin).read()) else: - target_file = open(args.file, 'rb') + target_file = open(args.file, "rb") except FileNotFoundError: sys.stderr.write('File "{}" not exist\n'.format(args.file)) exit(1) # noinspection PyUnboundLocalVariable data = TorrentFileParser( - target_file, not args.dict, args.coding, args.errors + target_file, + use_ordered_dict=not args.dict, + encoding=args.coding, + errors=args.errors, + hash_raw=args.hash_raw, ).parse() - data = json.dumps( - data, ensure_ascii=args.ascii, - sort_keys=args.sort, indent=args.indent + text = json.dumps( + DataWrapper(data), + ensure_ascii=args.ascii, + sort_keys=args.sort, + indent=args.indent, + cls=JSONEncoderDataWrapperBytesToString, ) - print(data) + print(text) -if __name__ == '__main__': +if __name__ == "__main__": __main() -# Last Updated February 18, 2021 +# Last Updated December 07, 2022 From e3d953856c79f06c7b1d145cdd7676f204762a8a Mon Sep 17 00:00:00 2001 From: SnZ <3882467+EsEnZeT@users.noreply.github.com> Date: Wed, 7 Dec 2022 19:02:36 +0100 Subject: [PATCH 007/624] Update torrent.yml - force to python3 --- scrapers/torrent.yml | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/scrapers/torrent.yml b/scrapers/torrent.yml index 3eca5cbc8..c9875d0b6 100644 --- a/scrapers/torrent.yml +++ b/scrapers/torrent.yml @@ -1,10 +1,9 @@ -name: "Torrent" +name: Torrent sceneByFragment: action: script script: - - python - # use python3 instead if needed + - python3 - torrent.py - query -# Last Updated February 04, 2021 +# Last Updated December 07, 2022 \ No newline at end of file From 157c6851d36ec8cb582ec893180f2bb738a9fdd2 Mon Sep 17 00:00:00 2001 From: SnZ <3882467+EsEnZeT@users.noreply.github.com> Date: Wed, 7 Dec 2022 19:10:25 +0100 Subject: [PATCH 008/624] Update torrent.py scraper - less imports - code refactored - handle encoding issues with exotic torrents - query stash for all files for specific scene - update matching by more realistic scenarios (filename & size) --- scrapers/torrent.py | 126 +++++++++++++++++++++++++++----------------- 1 file changed, 78 insertions(+), 48 deletions(-) diff --git a/scrapers/torrent.py b/scrapers/torrent.py index 2e5e283d2..788e80992 100644 --- a/scrapers/torrent.py +++ b/scrapers/torrent.py @@ -1,66 +1,96 @@ -import os import sys import json +from os import walk +from os.path import join, dirname, realpath, basename try: - import torrent_parser as tp + from py_common import graphql + from torrent_parser import parse_torrent_file except ModuleNotFoundError: - print("You need to download the file 'torrent_parser.py' from the community repo! (CommunityScrapers/tree/master/scrapers/torrent_parser.py)", file=sys.stderr) + print("You need to download the file 'torrent_parser.py' from the community repo! " + "(CommunityScrapers/tree/master/scrapers/torrent_parser.py)", file=sys.stderr) sys.exit() - ''' This script parses all torrent files in the specified directory for embedded metadata. The title can either be a filename or the filename of the .torrent file This requires python3. This uses the torrent_parser library to parse torrent files from: https://github.com/7sDream/torrent_parser This library is under the MIT Licence. +''' + +TORRENTS_PATH = join(dirname(dirname(realpath(__file__))), "torrents") + + +def get_scene_data(fragment_data): + scene_id = fragment_data["id"] + scene_title = fragment_data["title"] + scene_files = [] + + response = graphql.callGraphQL(""" + query FileInfoBySceneId($id: ID) { + findScene(id: $id) { + files { + path + size + } + } + }""", {"id": scene_id}) + + if response: + for f in response["findScene"]["files"]: + scene_files.append({"filename": basename(f["path"]), "size": f["size"]}) + return {"id": scene_id, "title": scene_title, "files": scene_files} + return {} + - ''' +def get_torrent_metadata(scene_data, torrent_data): + res = {"title": scene_data["title"], "url": torrent_data["comment"]} + if "metadata" in torrent_data: + if "title" in torrent_data["metadata"]: + res["title"] = torrent_data["metadata"]["title"] + if "cover url" in torrent_data["metadata"]: + res["image"] = torrent_data["metadata"]["cover url"] + if "description" in torrent_data["metadata"]: + res["details"] = torrent_data["metadata"]["description"] + if "taglist" in torrent_data["metadata"]: + res["tags"] = [{"name": t} for t in torrent_data["metadata"]["taglist"]] + return res -path='./torrents/' -def query(title): -# print(f"Test",file=sys.stderr) - for root,dirs,files in os.walk(path): +def decode_bytes(s, encodings=("utf-8", "latin-1")): + for enc in encodings: + try: + return s.decode(enc) + except UnicodeDecodeError: + pass + return s.decode("utf-8", "ignore") + + +def scene_in_torrent(scene_data, torrent_data): + for scene in scene_data["files"]: + if "length" in torrent_data["info"]: + if scene["filename"] in torrent_data["info"]["name"] and torrent_data["info"]["length"] == scene["size"]: + return True + elif "files" in torrent_data["info"]: + for file in torrent_data["info"]["files"]: + file_name = file["path"][-1] + if type(file_name) is bytes: + file_name = decode_bytes(file_name) + if scene["filename"] in file_name and file["length"] == scene["size"]: + return True + + +def process_torrents(scene_data): + for root, dirs, files in walk(TORRENTS_PATH): for name in files: - if '.torrent' in name: - query_torrent(title,os.path.join(root,name)) - -def query_torrent(title,path,found=False): - data=tp.parse_torrent_file(path) - # does the torrent contain more than one file and check if the file name we want is in the list - if not found and 'files' in data['info']: - for d in data['info']['files']: - for f in d['path']: - if title in f: - found=True - elif title in data['info']['name']: - found=True - if found: - res={'title':title} - if 'metadata' in data: - if 'title' in data['metadata']: - res['title']=data['metadata']['title'] - if 'cover url' in data['metadata']: - res['image']=data['metadata']['cover url'] - if 'description' in data['metadata']: - res['details']=data['metadata']['description'] - if 'taglist' in data['metadata']: - res['tags']=[{"name":x} for x in data['metadata']['taglist']] - - print(json.dumps(res)) - exit(0) -def lookup_torrent(title): - for root,dirs,files in os.walk(path): - if title in files: - query_torrent(title,os.path.join(root,title),found=True) - + if name.endswith(".torrent"): + torrent_data = parse_torrent_file(join(root, name)) + if scene_in_torrent(scene_data, torrent_data): + return get_torrent_metadata(scene_data, torrent_data) + return {} + + if sys.argv[1] == "query": fragment = json.loads(sys.stdin.read()) - title=fragment['title'] - if '.torrent' in title: - lookup_torrent(title) - else: - query(title) - print(json.dumps(fragment)) -# Last Updated February 18, 2021 + print(json.dumps(process_torrents(get_scene_data(fragment)))) +# Last Updated December 07, 2022 From f6c1efaea804c87715b8c42f8306457fb1d17214 Mon Sep 17 00:00:00 2001 From: SnZ <3882467+EsEnZeT@users.noreply.github.com> Date: Mon, 12 Dec 2022 22:34:43 +0100 Subject: [PATCH 009/624] Remove old files related to Torrent scraper --- scrapers/torrent_parser.py | 840 ------------------------------------- 1 file changed, 840 deletions(-) delete mode 100644 scrapers/torrent_parser.py diff --git a/scrapers/torrent_parser.py b/scrapers/torrent_parser.py deleted file mode 100644 index 967c8ec12..000000000 --- a/scrapers/torrent_parser.py +++ /dev/null @@ -1,840 +0,0 @@ -#!/usr/bin/env python -# coding: utf-8 - -""" -A .torrent file parser for both Python 2 and 3 - -Usage: - - data = parse_torrent_file(filename) - - # or - - with open(filename, 'rb') as f: # the binary mode 'b' is necessary - data = TorrentFileParser(f).parse() - - # then you can edit the data - - data['announce-list'].append(['http://127.0.0.1:8080']) - - # and create a new torrent file from data - - create_torrent_file('new.torrent', data) - - # or - - with open('new.torrent', 'wb') as f: - f.write(TorrentFileCreator(data).encode()) - - # or you don't deal with file, just object in memory - - data = decode(b'i12345e') # data = 12345 - content = encode(data) # content = b'i12345e' - -""" - -from __future__ import print_function, unicode_literals - -import argparse -import binascii -import collections -import io -import json -import sys -import warnings - -try: - FileNotFoundError -except NameError: - # Python 2 do not have FileNotFoundError, use IOError instead - # noinspection PyShadowingBuiltins - FileNotFoundError = IOError - -try: - # noinspection PyPackageRequirements - from chardet import detect as _detect -except ImportError: - - def _detect(_): - warnings.warn("No chardet module installed, encoding will be utf-8") - return {"encoding": "utf-8", "confidence": 1} - -try: - # noinspection PyUnresolvedReferences - # For Python 2 - str_type = unicode - bytes_type = str -except NameError: - # For Python 3 - str_type = str - bytes_type = bytes - -__all__ = [ - "InvalidTorrentDataException", - "BEncoder", - "BDecoder", - "encode", - "decode", - "TorrentFileParser", - "TorrentFileCreator", - "create_torrent_file", - "parse_torrent_file", -] - -__version__ = "0.4.1" - - -def detect(content): - return _detect(content)["encoding"] - - -class InvalidTorrentDataException(Exception): - def __init__(self, pos, msg=None): - msg = msg or "Invalid torrent format when read at pos {pos}" - msg = msg.format(pos=pos) - super(InvalidTorrentDataException, self).__init__(msg) - - -class __EndCls(object): - pass - - -_END = __EndCls() - - -def _check_hash_field_params(name, value): - return ( - isinstance(name, str_type) - and isinstance(value, tuple) - and len(value) == 2 - and isinstance(value[0], int) - and isinstance(value[1], bool) - ) - - -class BDecoder(object): - TYPE_LIST = "list" - TYPE_DICT = "dict" - TYPE_INT = "int" - TYPE_STRING = "string" - TYPE_END = "end" - - LIST_INDICATOR = b"l" - DICT_INDICATOR = b"d" - INT_INDICATOR = b"i" - END_INDICATOR = b"e" - STRING_INDICATOR = b"" - STRING_DELIMITER = b":" - - TYPES = [ - (TYPE_LIST, LIST_INDICATOR), - (TYPE_DICT, DICT_INDICATOR), - (TYPE_INT, INT_INDICATOR), - (TYPE_END, END_INDICATOR), - (TYPE_STRING, STRING_INDICATOR), - ] - - # see https://docs.python.org/3/library/codecs.html#error-handlers - # for other usable error handler string - ERROR_HANDLER_USEBYTES = "usebytes" - - def __init__( - self, - data, - use_ordered_dict=False, - encoding="utf-8", - errors="strict", - hash_fields=None, - hash_raw=False, - ): - """ - :param bytes|file data: bytes or a **binary** file-like object to parse, - which means need 'b' mode when use built-in open function - :param bool use_ordered_dict: Use collections.OrderedDict as dict - container default False, which mean use built-in dict - :param str encoding: file content encoding, default utf-8, use 'auto' - to enable charset auto detection (need 'chardet' package installed) - :param str errors: how to deal with encoding error when try to parse - string from content with ``encoding``. - see https://docs.python.org/3/library/codecs.html#error-handlers - for usable error handler string. - in particular, you can use "usebytes" to use "strict" decode mode - and let it return raw bytes if error happened. - :param Dict[str, Tuple[int, bool]] hash_fields: extra fields should - be treated as hash value. dict key is the field name, value is a - two-element tuple of (hash_block_length, as_a_list). - See :any:`hash_field` for detail - """ - if isinstance(data, bytes_type): - data = io.BytesIO(data) - elif getattr(data, "read") is not None and getattr(data, "seek") is not None: - pass - else: - raise ValueError("Parameter data must be bytes or file like object") - - self._pos = 0 - self._encoding = encoding - self._content = data - self._use_ordered_dict = use_ordered_dict - self._error_handler = errors - self._error_use_bytes = False - if self._error_handler == BDecoder.ERROR_HANDLER_USEBYTES: - self._error_handler = "strict" - self._error_use_bytes = True - - self._hash_fields = {} - if hash_fields is not None: - for k, v in hash_fields.items(): - if _check_hash_field_params(k, v): - self._hash_fields[k] = v - else: - raise ValueError( - "Invalid hash field parameter, it should be type of " - "Dict[str, Tuple[int, bool]]" - ) - self._hash_raw = bool(hash_raw) - - def hash_field(self, name, block_length=20, need_list=False): - """ - Let field with the `name` to be treated as hash value, don't decode it - as a string. - - :param str name: field name - :param int block_length: hash block length for split - :param bool need_list: if True, when the field only has one block( - or even empty) its parse result will be a one-element list( - or empty list); If False, will be a string in 0 or 1 block condition - :return: return self, so you can chained call - """ - v = (block_length, need_list) - if _check_hash_field_params(name, v): - self._hash_fields[name] = v - else: - raise ValueError("Invalid hash field parameter") - return self - - def decode(self): - """ - :rtype: dict|list|int|str|unicode|bytes - :raise: :any:`InvalidTorrentDataException` when parse failed or error - happened when decode string using specified encoding - """ - self._restart() - data = self._next_element() - - try: - c = self._read_byte(1, True) - raise InvalidTorrentDataException( - 0, "Expect EOF, but get [{}] at pos {}".format(c, self._pos) - ) - except EOFError: # expect EOF - pass - - return data - - def _read_byte(self, count=1, raise_eof=False): - assert count >= 0 - gotten = self._content.read(count) - if count != 0 and len(gotten) == 0: - if raise_eof: - raise EOFError() - raise InvalidTorrentDataException( - self._pos, "Unexpected EOF when reading torrent file" - ) - self._pos += count - return gotten - - def _seek_back(self, count): - self._content.seek(-count, 1) - self._pos = self._pos - count - - def _restart(self): - self._content.seek(0, 0) - self._pos = 0 - - def _dict_items_generator(self): - while True: - k = self._next_element() - if k is _END: - return - if not isinstance(k, str_type) and not isinstance(k, bytes_type): - raise InvalidTorrentDataException( - self._pos, "Type of dict key can't be " + type(k).__name__ - ) - if k in self._hash_fields: - v = self._next_hash(*self._hash_fields[k]) - else: - v = self._next_element(k) - if k == "encoding": - self._encoding = v - yield k, v - - def _next_dict(self): - data = collections.OrderedDict() if self._use_ordered_dict else dict() - for key, element in self._dict_items_generator(): - data[key] = element - return data - - def _list_items_generator(self): - while True: - element = self._next_element() - if element is _END: - return - yield element - - def _next_list(self): - return [element for element in self._list_items_generator()] - - def _next_int(self, end=END_INDICATOR): - value = 0 - char = self._read_byte(1) - neg = False - while char != end: - if not neg and char == b"-": - neg = True - elif not b"0" <= char <= b"9": - raise InvalidTorrentDataException(self._pos - 1) - else: - value = value * 10 + int(char) - int(b"0") - char = self._read_byte(1) - return -value if neg else value - - def _next_string(self, need_decode=True, field=None): - length = self._next_int(self.STRING_DELIMITER) - raw = self._read_byte(length) - if need_decode: - encoding = self._encoding - if encoding == "auto": - self.encoding = encoding = detect(raw) - try: - string = raw.decode(encoding, self._error_handler) - except UnicodeDecodeError as e: - if self._error_use_bytes: - return raw - else: - msg = [ - "Fail to decode string at pos {pos} using encoding ", - e.encoding, - ] - if field: - msg.extend( - [ - ' when parser field "', - field, - '"' ", maybe it is an hash field. ", - 'You can use self.hash_field("', - field, - '") ', - "to let it be treated as hash value, ", - "so this error may disappear", - ] - ) - raise InvalidTorrentDataException( - self._pos - length + e.start, "".join(msg) - ) - return string - return raw - - def _next_hash(self, p_len, need_list): - raw = self._next_string(need_decode=False) - if len(raw) % p_len != 0: - raise InvalidTorrentDataException( - self._pos - len(raw), "Hash bit length not match at pos {pos}" - ) - if self._hash_raw: - return raw - res = [ - binascii.hexlify(chunk).decode("ascii") - for chunk in (raw[x: x + p_len] for x in range(0, len(raw), p_len)) - ] - if len(res) == 0 and not need_list: - return "" - if len(res) == 1 and not need_list: - return res[0] - return res - - @staticmethod - def _next_end(): - return _END - - def _next_type(self): - for (element_type, indicator) in self.TYPES: - indicator_length = len(indicator) - char = self._read_byte(indicator_length) - if indicator == char: - return element_type - self._seek_back(indicator_length) - raise InvalidTorrentDataException(self._pos) - - def _type_to_func(self, t): - return getattr(self, "_next_" + t) - - def _next_element(self, field=None): - element_type = self._next_type() - if element_type is BDecoder.TYPE_STRING and field is not None: - element = self._type_to_func(element_type)(field=field) - else: - element = self._type_to_func(element_type)() - return element - - -class BEncoder(object): - TYPES = { - (dict,): BDecoder.TYPE_DICT, - (list,): BDecoder.TYPE_LIST, - (int,): BDecoder.TYPE_INT, - (str_type, bytes_type): BDecoder.TYPE_STRING, - } - - def __init__(self, data, encoding="utf-8", hash_fields=None): - """ - :param dict|list|int|str data: data will be encoded - :param str encoding: string field output encoding - :param List[str] hash_fields: see - :any:`BDecoder.__init__` - """ - self._data = data - self._encoding = encoding - self._hash_fields = [] - if hash_fields is not None: - self._hash_fields = hash_fields - - def hash_field(self, name): - """ - see :any:`BDecoder.hash_field` - - :param str name: - :return: return self, so you can chained call - """ - return self._hash_fields.append(str_type(name)) - - def encode(self): - """ - Encode to bytes - - :rtype: bytes - """ - return b"".join(self._output_element(self._data)) - - def encode_to_filelike(self): - """ - Encode to a file-like(BytesIO) object - - :rtype: BytesIO - """ - return io.BytesIO(self.encode()) - - def _output_string(self, data): - if isinstance(data, str_type): - data = data.encode(self._encoding) - yield str(len(data)).encode("ascii") - yield BDecoder.STRING_DELIMITER - yield data - - @staticmethod - def _output_int(data): - yield BDecoder.INT_INDICATOR - yield str(data).encode("ascii") - yield BDecoder.END_INDICATOR - - def _output_decode_hash(self, data): - if isinstance(data, str_type): - data = [data] - result = [] - for hash_line in data: - if not isinstance(hash_line, str_type): - raise InvalidTorrentDataException( - None, - "Hash must be " - + str_type.__name__ - + " not " - + type(hash_line).__name__, - ) - if len(hash_line) % 2 != 0: - raise InvalidTorrentDataException( - None, - "Hash(" - + hash_line - + ") length(" - + str(len(hash_line)) - + ") is a not even number", - ) - try: - raw = binascii.unhexlify(hash_line) - except binascii.Error as e: - raise InvalidTorrentDataException( - None, - str(e), - ) - result.append(raw) - for x in self._output_string(b"".join(result)): - yield x - - def _output_dict(self, data): - yield BDecoder.DICT_INDICATOR - for k, v in data.items(): - if not isinstance(k, str_type) and not isinstance(k, bytes_type): - raise InvalidTorrentDataException( - None, - "Dict key must be " - + str_type.__name__ - + " or " - + bytes_type.__name__, - ) - for x in self._output_element(k): - yield x - if k in self._hash_fields: - for x in self._output_decode_hash(v): - yield x - else: - for x in self._output_element(v): - yield x - yield BDecoder.END_INDICATOR - - def _output_list(self, data): - yield BDecoder.LIST_INDICATOR - for v in data: - for x in self._output_element(v): - yield x - yield BDecoder.END_INDICATOR - - def _type_to_func(self, t): - return getattr(self, "_output_" + t) - - def _output_element(self, data): - for types, t in self.TYPES.items(): - if isinstance(data, types): - # noinspection PyCallingNonCallable - return self._type_to_func(t)(data) - raise InvalidTorrentDataException( - None, - "Invalid type for torrent file: " + type(data).__name__, - ) - - -class TorrentFileParser(object): - HASH_FIELD_DEFAULT_PARAMS = { - # field length need_list - "pieces": (20, True), - "ed2k": (16, False), - "filehash": (20, False), - "pieces root": (32, False), - } - - def __init__( - self, - fp, - use_ordered_dict=False, - encoding="utf-8", - errors=BDecoder.ERROR_HANDLER_USEBYTES, - hash_fields=None, - hash_raw=False, - ): - """ - See :any:`BDecoder.__init__` for parameter description. - This class will use some default ``hash_fields`` values, and use "usebytes" as error handler - compare to use :any:`BDecoder` directly. - - :param file fp: file to be parse - :param bool use_ordered_dict: - :param str encoding: - :param str errors: - :param Dict[str, Tuple[int, bool]] hash_fields: - :param bool hash_raw: - """ - torrent_hash_fields = dict(TorrentFileParser.HASH_FIELD_DEFAULT_PARAMS) - if hash_fields is not None: - torrent_hash_fields.update(hash_fields) - - self._decoder = BDecoder( - fp, - use_ordered_dict, - encoding, - errors, - torrent_hash_fields, - hash_raw, - ) - - def hash_field(self, name, block_length=20, need_dict=False): - """ - See :any:`BDecoder.hash_field` for parameter description - - :param name: - :param block_length: - :param need_dict: - :return: return self, so you can chained call - """ - self._decoder.hash_field(name, block_length, need_dict) - return self - - def parse(self): - """ - Parse provided file - """ - return self._decoder.decode() - - -class TorrentFileCreator(object): - def __init__(self, data, encoding="utf-8", hash_fields=None): - """ - See :any:`BEncoder.__init__` for parameter description. - This class will use some default ``hash_fields`` values, - compare to use ``BEncoder`` directly. - - :param dict|list|int|str data: - :param str encoding: - :param List[str] hash_fields: - """ - torrent_hash_fields = list(TorrentFileParser.HASH_FIELD_DEFAULT_PARAMS.keys()) - if hash_fields is not None: - torrent_hash_fields.extend(hash_fields) - - self._encoder = BEncoder( - data, - encoding, - torrent_hash_fields, - ) - - def hash_field(self, name): - """ - See :any:`BEncoder.hash_field` for parameter description - - :param name: - :return: return self, so you can chained call - """ - self._encoder.hash_field(name) - return self - - def create_filelike(self): - """ - Create a file-like(BytesIO) object according to provided data - - :rtype: BytesIO - """ - return self._encoder.encode_to_filelike() - - def create(self, filename): - """ - Create torrent file according to provided data - - :param filename: output filename - :return: - """ - with open(filename, "wb") as f: - f.write(self._encoder.encode()) - - -def encode(data, encoding="utf-8", hash_fields=None): - """ - Shortcut function for encode python object to torrent file format(bencode) - - See :any:`BEncoder.__init__` for parameter description - - :param dict|list|int|str|bytes data: data to be encoded - :param str encoding: - :param List[str] hash_fields: - :rtype: bytes - """ - return BEncoder(data, encoding, hash_fields).encode() - - -def decode( - data, - use_ordered_dict=False, - encoding="utf-8", - errors="strict", - hash_fields=None, - hash_raw=False, -): - """ - Shortcut function for decode bytes as torrent file format(bencode) to python - object - - See :any:`BDecoder.__init__` for parameter description - - :param bytes|file data: data or file object to be decoded - :param bool use_ordered_dict: - :param str encoding: - :param str errors: - :param Dict[str, Tuple[int, bool]] hash_fields: - :param bool hash_raw: - :rtype: dict|list|int|str|bytes|bytes - """ - return BDecoder( - data, - use_ordered_dict, - encoding, - errors, - hash_fields, - hash_raw, - ).decode() - - -def parse_torrent_file( - filename, - use_ordered_dict=False, - encoding="utf-8", - errors="usebytes", - hash_fields=None, - hash_raw=False, -): - """ - Shortcut function for parse torrent object using TorrentFileParser - - See :any:`TorrentFileParser.__init__` for parameter description - - :param str filename: torrent filename - :param bool use_ordered_dict: - :param str encoding: - :param str errors: - :param Dict[str, Tuple[int, bool]] hash_fields: - :param bool hash_raw: - :rtype: dict|list|int|str|bytes - """ - with open(filename, "rb") as f: - return TorrentFileParser( - f, - use_ordered_dict, - encoding, - errors, - hash_fields, - hash_raw, - ).parse() - - -def create_torrent_file(filename, data, encoding="utf-8", hash_fields=None): - """ - Shortcut function for create a torrent file using BEncoder - - see :any:`BDecoder.__init__` for parameter description - - :param str filename: output torrent filename - :param dict|list|int|str|bytes data: - :param str encoding: - :param List[str] hash_fields: - """ - TorrentFileCreator(data, encoding, hash_fields).create(filename) - - -class DataWrapper: - def __init__(self, data): - self.data = data - - -class JSONEncoderDataWrapperBytesToString(json.JSONEncoder): - def process(self, o): - if isinstance(o, bytes_type): - return binascii.hexlify(o).decode("ascii") - if isinstance(o, collections.OrderedDict): - output = collections.OrderedDict() - for k, v in o.items(): - output[self.process(k)] = self.process(v) - return output - if isinstance(o, dict): - return {self.process(k): self.process(v) for k, v in o.items()} - if isinstance(o, list): - return [self.process(v) for v in o] - return o - - def default(self, o): - if isinstance(o, DataWrapper): - return self.process(o.data) - return json.JSONEncoder.default(self, o) - - -def __main(): - parser = argparse.ArgumentParser() - parser.add_argument( - "file", nargs="?", default="", help="input file, will read form stdin if empty" - ) - parser.add_argument( - "--dict", - "-d", - action="store_true", - default=False, - help="use built-in dict, default will be OrderedDict", - ) - parser.add_argument( - "--sort", - "-s", - action="store_true", - default=False, - help="sort output json item by key", - ) - parser.add_argument( - "--indent", - "-i", - type=int, - default=None, - help="json output indent for every inner level", - ) - parser.add_argument( - "--ascii", - "-a", - action="store_true", - default=False, - help="ensure output json use ascii char, " "escape other char use \\u", - ) - parser.add_argument( - "--coding", "-c", default="utf-8", help='string encoding, default "utf-8"' - ) - parser.add_argument( - "--errors", - "-e", - default=BDecoder.ERROR_HANDLER_USEBYTES, - help='decoding error handler, default "' - + BDecoder.ERROR_HANDLER_USEBYTES - + '"', - ) - parser.add_argument( - "--hash-raw", - "-r", - action="store_true", - default=False, - help="do not group hash field by block, keeps it as raw bytes", - ) - parser.add_argument( - "--version", - "-v", - action="store_true", - default=False, - help="print version and exit", - ) - args = parser.parse_args() - - if args.version: - print(__version__) - exit(0) - - try: - if args.file == "": - target_file = io.BytesIO(getattr(sys.stdin, "buffer", sys.stdin).read()) - else: - target_file = open(args.file, "rb") - except FileNotFoundError: - sys.stderr.write('File "{}" not exist\n'.format(args.file)) - exit(1) - - # noinspection PyUnboundLocalVariable - data = TorrentFileParser( - target_file, - use_ordered_dict=not args.dict, - encoding=args.coding, - errors=args.errors, - hash_raw=args.hash_raw, - ).parse() - - text = json.dumps( - DataWrapper(data), - ensure_ascii=args.ascii, - sort_keys=args.sort, - indent=args.indent, - cls=JSONEncoderDataWrapperBytesToString, - ) - - print(text) - - -if __name__ == "__main__": - __main() -# Last Updated December 07, 2022 From 3d6d80b35b45e7c7d5223ba4d21eb24a729b86af Mon Sep 17 00:00:00 2001 From: SnZ <3882467+EsEnZeT@users.noreply.github.com> Date: Mon, 12 Dec 2022 22:36:29 +0100 Subject: [PATCH 010/624] Rewrite v1 implementation to v2 for quicker parsing --- scrapers/torrent.py | 60 +++++++++++++++++++++------------------------ 1 file changed, 28 insertions(+), 32 deletions(-) diff --git a/scrapers/torrent.py b/scrapers/torrent.py index 788e80992..4cf2cb457 100644 --- a/scrapers/torrent.py +++ b/scrapers/torrent.py @@ -3,20 +3,18 @@ from os import walk from os.path import join, dirname, realpath, basename +try: + from bencoder import bdecode +except ModuleNotFoundError: + print("You need to install the 'bencoder.pyx' module. (https://pypi.org/project/bencoder.pyx/)", file=sys.stderr) + sys.exit() + try: from py_common import graphql - from torrent_parser import parse_torrent_file except ModuleNotFoundError: - print("You need to download the file 'torrent_parser.py' from the community repo! " - "(CommunityScrapers/tree/master/scrapers/torrent_parser.py)", file=sys.stderr) + print("You need to download the folder 'py_common' from the community repo! " + "(CommunityScrapers/tree/master/scrapers/py_common)", file=sys.stderr) sys.exit() -''' This script parses all torrent files in the specified directory for embedded metadata. - The title can either be a filename or the filename of the .torrent file - - This requires python3. - This uses the torrent_parser library to parse torrent files from: https://github.com/7sDream/torrent_parser - This library is under the MIT Licence. -''' TORRENTS_PATH = join(dirname(dirname(realpath(__file__))), "torrents") @@ -44,16 +42,16 @@ def get_scene_data(fragment_data): def get_torrent_metadata(scene_data, torrent_data): - res = {"title": scene_data["title"], "url": torrent_data["comment"]} - if "metadata" in torrent_data: - if "title" in torrent_data["metadata"]: - res["title"] = torrent_data["metadata"]["title"] - if "cover url" in torrent_data["metadata"]: - res["image"] = torrent_data["metadata"]["cover url"] - if "description" in torrent_data["metadata"]: - res["details"] = torrent_data["metadata"]["description"] - if "taglist" in torrent_data["metadata"]: - res["tags"] = [{"name": t} for t in torrent_data["metadata"]["taglist"]] + res = {"title": scene_data["title"], "url": decode_bytes(torrent_data[b"comment"])} + if b"metadata" in torrent_data: + if b"title" in torrent_data[b"metadata"]: + res["title"] = decode_bytes(torrent_data[b"metadata"][b"title"]) + if b"cover url" in torrent_data[b"metadata"]: + res["image"] = decode_bytes(torrent_data[b"metadata"][b"cover url"]) + if b"description" in torrent_data[b"metadata"]: + res["details"] = decode_bytes(torrent_data[b"metadata"][b"description"]) + if b"taglist" in torrent_data[b"metadata"]: + res["tags"] = [{"name": decode_bytes(t)} for t in torrent_data[b"metadata"][b"taglist"]] return res @@ -68,15 +66,12 @@ def decode_bytes(s, encodings=("utf-8", "latin-1")): def scene_in_torrent(scene_data, torrent_data): for scene in scene_data["files"]: - if "length" in torrent_data["info"]: - if scene["filename"] in torrent_data["info"]["name"] and torrent_data["info"]["length"] == scene["size"]: + if b"length" in torrent_data[b"info"]: + if scene["filename"] in decode_bytes(torrent_data[b"info"][b"name"]) and torrent_data[b"info"][b"length"] == scene["size"]: return True - elif "files" in torrent_data["info"]: - for file in torrent_data["info"]["files"]: - file_name = file["path"][-1] - if type(file_name) is bytes: - file_name = decode_bytes(file_name) - if scene["filename"] in file_name and file["length"] == scene["size"]: + elif b"files" in torrent_data[b"info"]: + for file in torrent_data[b"info"][b"files"]: + if scene["filename"] in decode_bytes(file[b"path"][-1]) and file[b"length"] == scene["size"]: return True @@ -84,13 +79,14 @@ def process_torrents(scene_data): for root, dirs, files in walk(TORRENTS_PATH): for name in files: if name.endswith(".torrent"): - torrent_data = parse_torrent_file(join(root, name)) - if scene_in_torrent(scene_data, torrent_data): - return get_torrent_metadata(scene_data, torrent_data) + with open(join(root, name), "rb") as f: + torrent_data = bdecode(f.read()) + if scene_in_torrent(scene_data, torrent_data): + return get_torrent_metadata(scene_data, torrent_data) return {} if sys.argv[1] == "query": fragment = json.loads(sys.stdin.read()) print(json.dumps(process_torrents(get_scene_data(fragment)))) -# Last Updated December 07, 2022 +# Last Updated December 12, 2022 From 3a1a6021015bc5d794e60809bcc3be3544fb77d6 Mon Sep 17 00:00:00 2001 From: JackDawson94 <100418085+JackDawson94@users.noreply.github.com> Date: Fri, 16 Dec 2022 23:36:51 +0100 Subject: [PATCH 011/624] Minor changes to improve error handling --- scrapers/torrent.py | 24 +++++++++++++----------- 1 file changed, 13 insertions(+), 11 deletions(-) diff --git a/scrapers/torrent.py b/scrapers/torrent.py index 4cf2cb457..df4ba7cc6 100644 --- a/scrapers/torrent.py +++ b/scrapers/torrent.py @@ -2,6 +2,7 @@ import json from os import walk from os.path import join, dirname, realpath, basename +from pathlib import Path try: from bencoder import bdecode @@ -10,13 +11,13 @@ sys.exit() try: - from py_common import graphql + from py_common import graphql, log except ModuleNotFoundError: print("You need to download the folder 'py_common' from the community repo! " "(CommunityScrapers/tree/master/scrapers/py_common)", file=sys.stderr) sys.exit() -TORRENTS_PATH = join(dirname(dirname(realpath(__file__))), "torrents") +TORRENTS_PATH = Path("torrents") def get_scene_data(fragment_data): @@ -34,7 +35,7 @@ def get_scene_data(fragment_data): } }""", {"id": scene_id}) - if response: + if response and response["findScene"]: for f in response["findScene"]["files"]: scene_files.append({"filename": basename(f["path"]), "size": f["size"]}) return {"id": scene_id, "title": scene_title, "files": scene_files} @@ -76,17 +77,18 @@ def scene_in_torrent(scene_data, torrent_data): def process_torrents(scene_data): - for root, dirs, files in walk(TORRENTS_PATH): - for name in files: - if name.endswith(".torrent"): - with open(join(root, name), "rb") as f: - torrent_data = bdecode(f.read()) - if scene_in_torrent(scene_data, torrent_data): - return get_torrent_metadata(scene_data, torrent_data) + if scene_data: + for name in TORRENTS_PATH.glob("*.torrent"): + with open(name, "rb") as f: + torrent_data = bdecode(f.read()) + if scene_in_torrent(scene_data, torrent_data): + return get_torrent_metadata(scene_data, torrent_data) return {} if sys.argv[1] == "query": fragment = json.loads(sys.stdin.read()) print(json.dumps(process_torrents(get_scene_data(fragment)))) -# Last Updated December 12, 2022 + + +# Last Updated December 16, 2022 From 303235f51176c90fc0396256b1d368621d3f46e0 Mon Sep 17 00:00:00 2001 From: JackDawson94 <100418085+JackDawson94@users.noreply.github.com> Date: Fri, 16 Dec 2022 23:57:38 +0100 Subject: [PATCH 012/624] Now parses more fields --- scrapers/torrent.py | 24 +++++++++++++++++++++++- 1 file changed, 23 insertions(+), 1 deletion(-) diff --git a/scrapers/torrent.py b/scrapers/torrent.py index df4ba7cc6..ec3382bbe 100644 --- a/scrapers/torrent.py +++ b/scrapers/torrent.py @@ -3,6 +3,8 @@ from os import walk from os.path import join, dirname, realpath, basename from pathlib import Path +import re +from datetime import datetime try: from bencoder import bdecode @@ -41,6 +43,20 @@ def get_scene_data(fragment_data): return {"id": scene_id, "title": scene_title, "files": scene_files} return {} +def readJSONInput(): + input = sys.stdin.read() + log.debug(input) + return json.loads(input) + +def process_tags_performers(tagList): + return map(lambda tag: decode_bytes(tag).replace('.', ' '), tagList) + +def procress_description_bbcode(description): + res = re.sub('\[.*?\].*?\[\/.*?\]','',description) + res = re.sub('\[.*?\]','',res) + return res.rstrip() + + def get_torrent_metadata(scene_data, torrent_data): res = {"title": scene_data["title"], "url": decode_bytes(torrent_data[b"comment"])} @@ -50,9 +66,15 @@ def get_torrent_metadata(scene_data, torrent_data): if b"cover url" in torrent_data[b"metadata"]: res["image"] = decode_bytes(torrent_data[b"metadata"][b"cover url"]) if b"description" in torrent_data[b"metadata"]: - res["details"] = decode_bytes(torrent_data[b"metadata"][b"description"]) + res["details"] = procress_description_bbcode(decode_bytes(torrent_data[b"metadata"][b"description"])) if b"taglist" in torrent_data[b"metadata"]: res["tags"] = [{"name": decode_bytes(t)} for t in torrent_data[b"metadata"][b"taglist"]] + if b"taglist" in torrent_data[b"metadata"]: + res["performers"]=[{"name":x} for x in process_tags_performers(torrent_data[b"metadata"][b"taglist"])] + if b"comment" in torrent_data: + res["url"] = decode_bytes(torrent_data[b"comment"]) + if b"creation date" in torrent_data: + res["date"] = datetime.fromtimestamp(torrent_data[b"creation date"]).strftime("%Y-%m-%d") return res From 4ecf347f81293d6f31e34a13d3c7745970fa4432 Mon Sep 17 00:00:00 2001 From: JackDawson94 <100418085+JackDawson94@users.noreply.github.com> Date: Sat, 17 Dec 2022 00:13:07 +0100 Subject: [PATCH 013/624] Now allows searching --- scrapers/torrent.py | 49 +++++++++++++++++++++++++++++++++----------- scrapers/torrent.yml | 16 ++++++++++++++- 2 files changed, 52 insertions(+), 13 deletions(-) diff --git a/scrapers/torrent.py b/scrapers/torrent.py index ec3382bbe..1aa231fa9 100644 --- a/scrapers/torrent.py +++ b/scrapers/torrent.py @@ -5,6 +5,7 @@ from pathlib import Path import re from datetime import datetime +import difflib try: from bencoder import bdecode @@ -43,30 +44,24 @@ def get_scene_data(fragment_data): return {"id": scene_id, "title": scene_title, "files": scene_files} return {} -def readJSONInput(): - input = sys.stdin.read() - log.debug(input) - return json.loads(input) - def process_tags_performers(tagList): return map(lambda tag: decode_bytes(tag).replace('.', ' '), tagList) -def procress_description_bbcode(description): +def process_description_bbcode(description): res = re.sub('\[.*?\].*?\[\/.*?\]','',description) res = re.sub('\[.*?\]','',res) return res.rstrip() +def get_torrent_metadata(torrent_data): + res = {} - -def get_torrent_metadata(scene_data, torrent_data): - res = {"title": scene_data["title"], "url": decode_bytes(torrent_data[b"comment"])} if b"metadata" in torrent_data: if b"title" in torrent_data[b"metadata"]: res["title"] = decode_bytes(torrent_data[b"metadata"][b"title"]) if b"cover url" in torrent_data[b"metadata"]: res["image"] = decode_bytes(torrent_data[b"metadata"][b"cover url"]) if b"description" in torrent_data[b"metadata"]: - res["details"] = procress_description_bbcode(decode_bytes(torrent_data[b"metadata"][b"description"])) + res["details"] = process_description_bbcode(decode_bytes(torrent_data[b"metadata"][b"description"])) if b"taglist" in torrent_data[b"metadata"]: res["tags"] = [{"name": decode_bytes(t)} for t in torrent_data[b"metadata"][b"taglist"]] if b"taglist" in torrent_data[b"metadata"]: @@ -104,13 +99,43 @@ def process_torrents(scene_data): with open(name, "rb") as f: torrent_data = bdecode(f.read()) if scene_in_torrent(scene_data, torrent_data): - return get_torrent_metadata(scene_data, torrent_data) + return get_torrent_metadata(torrent_data) return {} +def similarity_file_name(search, fileName): + result = difflib.SequenceMatcher(a=search.lower(), b=fileName.lower()) + return result.ratio() + +def cleanup_name(name): + ret = str(name) + ret = ret.removeprefix("torrents\\").removesuffix(".torrent") + return ret + if sys.argv[1] == "query": fragment = json.loads(sys.stdin.read()) print(json.dumps(process_torrents(get_scene_data(fragment)))) - +elif sys.argv[1] == "fragment": + filename = json.loads(sys.stdin.read()).get('title') + with open(TORRENTS_PATH / filename, 'rb') as f: + torrent_data = bdecode(f.read()) + print(json.dumps(get_torrent_metadata(torrent_data))) +elif sys.argv[1] == "search": + search = json.loads(sys.stdin.read()).get('name') + torrents = list(TORRENTS_PATH.glob('*.torrent')) + ratios = {} + for t in torrents: + clean_t = cleanup_name(t) + ratios[round(10000*(1-similarity_file_name(search, clean_t)))] = clean_t + + # Order ratios + ratios_sorted = dict(sorted(ratios.items())) + # Only return the top 5 results + if len(ratios) > 5: + ratios = ratios_sorted[5:] + + res = list(map(lambda i: {'title': ratios_sorted[i] + ".torrent"}, ratios_sorted)) + log.debug(ratios_sorted) + print(json.dumps(res)) # Last Updated December 16, 2022 diff --git a/scrapers/torrent.yml b/scrapers/torrent.yml index c9875d0b6..785ce2473 100644 --- a/scrapers/torrent.yml +++ b/scrapers/torrent.yml @@ -5,5 +5,19 @@ sceneByFragment: - python3 - torrent.py - query + +sceneByName: + action: script + script: + - python3 + - torrent.py + - search -# Last Updated December 07, 2022 \ No newline at end of file +sceneByQueryFragment: + action: script + script: + - python3 + - torrent.py + - fragment + +# Last Updated December 16, 2022 \ No newline at end of file From f44aaef5260874eb2339831c3511505e424e5302 Mon Sep 17 00:00:00 2001 From: JackDawson94 <100418085+JackDawson94@users.noreply.github.com> Date: Sat, 17 Dec 2022 00:16:23 +0100 Subject: [PATCH 014/624] Remove the logs used for debug --- scrapers/torrent.py | 1 - 1 file changed, 1 deletion(-) diff --git a/scrapers/torrent.py b/scrapers/torrent.py index 1aa231fa9..3706357f8 100644 --- a/scrapers/torrent.py +++ b/scrapers/torrent.py @@ -135,7 +135,6 @@ def cleanup_name(name): ratios = ratios_sorted[5:] res = list(map(lambda i: {'title': ratios_sorted[i] + ".torrent"}, ratios_sorted)) - log.debug(ratios_sorted) print(json.dumps(res)) # Last Updated December 16, 2022 From 40f8ab9848638b1f53746d3db9586aee9fcd3446 Mon Sep 17 00:00:00 2001 From: JackDawson94 <100418085+JackDawson94@users.noreply.github.com> Date: Sat, 17 Dec 2022 01:31:32 +0100 Subject: [PATCH 015/624] Fixes --- scrapers/torrent.py | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/scrapers/torrent.py b/scrapers/torrent.py index 3706357f8..9b5510305 100644 --- a/scrapers/torrent.py +++ b/scrapers/torrent.py @@ -1,7 +1,6 @@ import sys import json -from os import walk -from os.path import join, dirname, realpath, basename +from os.path import basename from pathlib import Path import re from datetime import datetime @@ -14,7 +13,7 @@ sys.exit() try: - from py_common import graphql, log + from py_common import graphql except ModuleNotFoundError: print("You need to download the folder 'py_common' from the community repo! " "(CommunityScrapers/tree/master/scrapers/py_common)", file=sys.stderr) @@ -122,7 +121,7 @@ def cleanup_name(name): print(json.dumps(get_torrent_metadata(torrent_data))) elif sys.argv[1] == "search": search = json.loads(sys.stdin.read()).get('name') - torrents = list(TORRENTS_PATH.glob('*.torrent')) + torrents = list(TORRENTS_PATH.rglob('*.torrent')) ratios = {} for t in torrents: clean_t = cleanup_name(t) From 1c031bd24e99ff4fe1b313992a7b6155129e7bb7 Mon Sep 17 00:00:00 2001 From: JackDawson94 <100418085+JackDawson94@users.noreply.github.com> Date: Sat, 17 Dec 2022 01:53:16 +0100 Subject: [PATCH 016/624] Improved search --- scrapers/torrent.py | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/scrapers/torrent.py b/scrapers/torrent.py index 9b5510305..9edea595b 100644 --- a/scrapers/torrent.py +++ b/scrapers/torrent.py @@ -115,8 +115,8 @@ def cleanup_name(name): fragment = json.loads(sys.stdin.read()) print(json.dumps(process_torrents(get_scene_data(fragment)))) elif sys.argv[1] == "fragment": - filename = json.loads(sys.stdin.read()).get('title') - with open(TORRENTS_PATH / filename, 'rb') as f: + filename = json.loads(sys.stdin.read()).get('url') + with open(filename, 'rb') as f: torrent_data = bdecode(f.read()) print(json.dumps(get_torrent_metadata(torrent_data))) elif sys.argv[1] == "search": @@ -125,7 +125,7 @@ def cleanup_name(name): ratios = {} for t in torrents: clean_t = cleanup_name(t) - ratios[round(10000*(1-similarity_file_name(search, clean_t)))] = clean_t + ratios[round(10000*(1-similarity_file_name(search, clean_t)))] = {'url': str(t.absolute()), 'title': clean_t} # Order ratios ratios_sorted = dict(sorted(ratios.items())) @@ -133,7 +133,6 @@ def cleanup_name(name): if len(ratios) > 5: ratios = ratios_sorted[5:] - res = list(map(lambda i: {'title': ratios_sorted[i] + ".torrent"}, ratios_sorted)) - print(json.dumps(res)) + print(json.dumps(list(ratios_sorted.values()))) # Last Updated December 16, 2022 From a9dcbd3b58b1f2ea9281876d937c6c43b645111d Mon Sep 17 00:00:00 2001 From: JackDawson94 <100418085+JackDawson94@users.noreply.github.com> Date: Mon, 26 Dec 2022 23:31:35 +0100 Subject: [PATCH 017/624] Improvement to bbcode cleaning --- scrapers/torrent.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/scrapers/torrent.py b/scrapers/torrent.py index 9edea595b..00aa7c127 100644 --- a/scrapers/torrent.py +++ b/scrapers/torrent.py @@ -47,9 +47,10 @@ def process_tags_performers(tagList): return map(lambda tag: decode_bytes(tag).replace('.', ' '), tagList) def process_description_bbcode(description): - res = re.sub('\[.*?\].*?\[\/.*?\]','',description) - res = re.sub('\[.*?\]','',res) - return res.rstrip() + res = re.sub(r'\[(?:b|i|u|s|url|quote)?\](.*)?\[\/(?:b|i|u|s|url|quote)\]',r"\1", description ) + res = re.sub(r'\[.*?\].*?\[\/.*?\]',r'',res) + res = re.sub(r'\[.*?\]',r'',res) + return res.strip() def get_torrent_metadata(torrent_data): res = {} From ada05beea136beb62310508b5051578fc4ce7910 Mon Sep 17 00:00:00 2001 From: gimmeliina <96843881+gimmeliina@users.noreply.github.com> Date: Sun, 15 Jan 2023 18:52:32 +0200 Subject: [PATCH 018/624] Move broken Chaosmen scraper to algolia (#1241) --- SCRAPERS-LIST.md | 2 +- scrapers/Algolia.py | 1 + scrapers/Algolia_ChaosMen.yml | 30 ++++++++++++++++++++++++ scrapers/ChaosMen.yml | 44 ----------------------------------- 4 files changed, 32 insertions(+), 45 deletions(-) create mode 100644 scrapers/Algolia_ChaosMen.yml delete mode 100644 scrapers/ChaosMen.yml diff --git a/SCRAPERS-LIST.md b/SCRAPERS-LIST.md index 418591369..33051139d 100644 --- a/SCRAPERS-LIST.md +++ b/SCRAPERS-LIST.md @@ -234,7 +234,7 @@ carmenvalentina.com|VNAGirls.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- castingcouch-x.com|AMAMultimedia.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- catalinacruz.com|Catalina Cruz.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- cfnmeu.com|Williamhiggins.yml|:heavy_check_mark:|:x:|:x:|:x:|-|Gay -chaosmen.com|ChaosMen.yml|:heavy_check_mark:|:x:|:x:|:x:|-|Gay +chaosmen.com|Algolia_ChaosMen.yml|:heavy_check_mark:|:x:|:x:|:x:|Python|Gay charleechaselive.com|VNAGirls.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- chastitybabes.com|chastitybabes.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- cherrypimps.com|CherryPimps.yml|:heavy_check_mark:|:heavy_check_mark:|:x:|:x:|-|- diff --git a/scrapers/Algolia.py b/scrapers/Algolia.py index 66e924612..17f6ba88e 100644 --- a/scrapers/Algolia.py +++ b/scrapers/Algolia.py @@ -67,6 +67,7 @@ # this is because the `serie_name` is the Movie (series) title on these sites, # not the studio SITES_USING_SITENAME_AS_STUDIO_FOR_SCENE = [ + "ChaosMen", "Devil's Film", "GenderXFilms" ] diff --git a/scrapers/Algolia_ChaosMen.yml b/scrapers/Algolia_ChaosMen.yml new file mode 100644 index 000000000..156521147 --- /dev/null +++ b/scrapers/Algolia_ChaosMen.yml @@ -0,0 +1,30 @@ +name: "ChaosMen" +sceneByURL: + - action: script + url: + - chaosmen.com/en/video + script: + - python + - Algolia.py + - chaosmen +sceneByFragment: + action: script + script: + - python + - Algolia.py + - chaosmen +sceneByName: + action: script + script: + - python + - Algolia.py + - chaosmen + - searchName +sceneByQueryFragment: + action: script + script: + - python + - Algolia.py + - chaosmen + - validName +# Last Updated January 10, 2023 diff --git a/scrapers/ChaosMen.yml b/scrapers/ChaosMen.yml deleted file mode 100644 index d617f9c5e..000000000 --- a/scrapers/ChaosMen.yml +++ /dev/null @@ -1,44 +0,0 @@ -name: "ChaosMen" -sceneByURL: - - action: scrapeXPath - url: - - chaosmen.com/showgal.php?g=content/CM/video - scraper: sceneScraper - -xPathScrapers: - sceneScraper: - scene: - Title: - selector: //div[@class='content']/div[@class='cblock'][1]/div[@class='chead clearfix']/h3 - Date: - selector: //div[@class='summary_container']/p[contains(span,"Release date")] - postProcess: - - replace: - - regex: Release date.\s(.*) - with: $1 - - parseDate: January 2, 2006 - Details: - selector: //div[@class='summary_container']/p[not(@class='hbl')]/text() - concat: "\n\n" - Performers: - Name: //div[contains(@class,'video_star_stats')]/h4 - URL: - selector: //div[contains(@class,'video_star_stats')]/a/@href - postProcess: &pp - - replace: - - regex: ^ - with: https://www.chaosmen.com/ - Image: - selector: //div[@class='videoimg']/img/@src - postProcess: *pp - Tags: - Name: - selector: //div[@class='cblock'][2]/p/a - postProcess: - - replace: - - regex: ([\w\s]+)\s-\s([\w\s]+) - with: $2 - Studio: - Name: - fixed: ChaosMen -# Last Updated June 20, 2022 From 3f917cd3b9aa14f965e53c3bd80d6aa4dcdd06b0 Mon Sep 17 00:00:00 2001 From: bnkai <48220860+bnkai@users.noreply.github.com> Date: Sun, 15 Jan 2023 18:55:12 +0200 Subject: [PATCH 019/624] Fix CruelGirlfriend scraper (#1235) --- scrapers/CruelGirlfriend.yml | 27 +++++++++++++++------------ 1 file changed, 15 insertions(+), 12 deletions(-) diff --git a/scrapers/CruelGirlfriend.yml b/scrapers/CruelGirlfriend.yml index e788d5037..0ae3b7959 100644 --- a/scrapers/CruelGirlfriend.yml +++ b/scrapers/CruelGirlfriend.yml @@ -24,30 +24,33 @@ xPathScrapers: - parseDate: 2006-01-02 Performers: Name: - selector: //div[@class='info-box-white']/h7[contains(.,"MORE CONTENT FROM")] + selector: //div[contains(@id,"Lg6T7ciE57Q018g38Ndnn9")]/@id postProcess: - replace: - - regex: \|*MORE CONTENT FROM\|* + - regex: Lg6T7ciE57Q018g38Ndnn9 with: + - replace: + - regex: "([a-z])([A-Z])" + with: "$1 $2" Details: - selector: $script - postProcess: - - replace: - - regex: '.+"description": "([^"]+)".+' - with: $1 - - regex: "|" - with: + selector: //div[@class="info-box-white"]//h3[not(img)] Tags: Name: //div[@class='info-box-white']/h3[contains(.,"Added:")]/a Image: selector: $script postProcess: - replace: - - regex: '.+"thumbnailUrl": [([^\]]+)\]).+' - with: $1 - regex: '.+"([^"]+/Backgrounds/[^"]+)".+' with: $1 Studio: Name: fixed: Cruel Girlfriend -# Last Updated July 25, 2021 + Code: + selector: $script + postProcess: + - replace: + - regex: '.+"contentUrl": "[^"]+/(\d+)\.mp4".+' + with: $1 + - regex: "^{.+" #cleanup on non matches + with: +# Last Updated January 06, 2023 From 952435cfbf4e2d292695569660cbd79eebf2720d Mon Sep 17 00:00:00 2001 From: gimmeliina <96843881+gimmeliina@users.noreply.github.com> Date: Sun, 15 Jan 2023 19:51:54 +0200 Subject: [PATCH 020/624] Update Masqulin.yml (#1239) --- scrapers/Masqulin.yml | 19 ++++++++----------- 1 file changed, 8 insertions(+), 11 deletions(-) diff --git a/scrapers/Masqulin.yml b/scrapers/Masqulin.yml index 77acfaf35..982956fe1 100644 --- a/scrapers/Masqulin.yml +++ b/scrapers/Masqulin.yml @@ -8,22 +8,19 @@ xPathScrapers: sceneScraper: scene: Title: - selector: //h1[@class="titlePlayer"] + selector: //div[@class="gallery_info spacer"]/h1 Performers: Name: //span[@class="tour_update_models"]/a Tags: - Name: //p[@class="sceneInfo"]/span/a + Name: //a[@class="tagsVideoPage"] Details: - selector: //p[@class="col-lg-6 textDescription"] + selector: //p[@id="textDesc"] + Image: //meta[@property="og:image"]/@content + Date: + selector: //span[@class='availdate'][1] postProcess: - - replace: - - regex: \.\.\. - with: - - regex: \s*Read\smore\s*$ - with: - Image: - selector: //*[@class="hiddenImg stdimage"]/@src + - parseDate: Jan 02, 2006 Studio: Name: fixed: Masqulin -# Last Updated December 29, 2021 +# Last Updated January 07, 2023 From 000e279f6ffb39175971e6a36381421e430752ee Mon Sep 17 00:00:00 2001 From: baloodusudouest <77047540+baloodusudouest@users.noreply.github.com> Date: Sun, 15 Jan 2023 22:15:03 +0100 Subject: [PATCH 021/624] Update MindGeek.yml (add Noirmale.com) (#1226) --- SCRAPERS-LIST.md | 1 + scrapers/MindGeek.yml | 5 ++++- 2 files changed, 5 insertions(+), 1 deletion(-) diff --git a/SCRAPERS-LIST.md b/SCRAPERS-LIST.md index 33051139d..27fefcf82 100644 --- a/SCRAPERS-LIST.md +++ b/SCRAPERS-LIST.md @@ -882,6 +882,7 @@ nhentai.net|nhentai.yml|:x:|:heavy_check_mark:|:x:|:x:|-|Hentai nikkibenz.com|VNAGirls.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- nikkiphoenixxx.com|NikkiAndTera.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- ninakayy.com|VNAGirls.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +noirmale.com|MindGeek.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- noodledude.io|NoodleDude.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- notmygrandpa.com|PaperStreetMedia.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- nubilefilms.com|Nubiles.yml|:heavy_check_mark:|:heavy_check_mark:|:x:|:x:|-|- diff --git a/scrapers/MindGeek.yml b/scrapers/MindGeek.yml index 4fbb7390b..21166cea7 100644 --- a/scrapers/MindGeek.yml +++ b/scrapers/MindGeek.yml @@ -30,6 +30,7 @@ sceneByURL: - lilhumpers.com/scene/ - milfed.com/scene/ - mofos.com/scene/ + - noirmale.com/scene/ - publicagent.com/scene/ - realitykings.com/scene/ - seancody.com/scene/ @@ -59,6 +60,7 @@ performerByURL: - lookathernow.com/model/ - mofos.com/model/ - mofosnetwork.com/model/ + - noirmale.com/model/ - realitykings.com/model/ - rk.com/model/ - seancody.com/model/ @@ -152,6 +154,7 @@ xPathScrapers: iknowthatgirl: I Know That Girl lilhumpers: Lil Humpers milfed: Milfed + noirmale: Noir Male publicagent: Public Agent realitykings: Reality Kings rk: Reality Kings @@ -323,4 +326,4 @@ xPathScrapers: Image: selector: //img[contains(@src, "model")]/@src URL: //link[@rel="canonical"]/@href -# Last Updated October 14, 2022 +# Last Updated January 15, 2023 From 0658f731dfb9386a4d99e07511398d06e373018f Mon Sep 17 00:00:00 2001 From: Jacintocuenca <70093470+Jacintocuenca@users.noreply.github.com> Date: Mon, 16 Jan 2023 01:03:52 +0100 Subject: [PATCH 022/624] Add scraper for Breed It Raw (#1211) --- SCRAPERS-LIST.md | 1 + scrapers/BreedItRaw.yml | 73 +++++++++++++++++++++++++++++++++++++++++ 2 files changed, 74 insertions(+) create mode 100644 scrapers/BreedItRaw.yml diff --git a/SCRAPERS-LIST.md b/SCRAPERS-LIST.md index 27fefcf82..2cf0539a9 100644 --- a/SCRAPERS-LIST.md +++ b/SCRAPERS-LIST.md @@ -215,6 +215,7 @@ brattysis.com|Nubiles.yml|:heavy_check_mark:|:heavy_check_mark:|:x:|:x:|-|- brazilian-transsexuals.com|GroobyNetwork-Brazilian.yml|:heavy_check_mark:|:x:|:x:|:x:|-|Trans braziltgirls.xxx|GroobyNetwork-Brazilian.yml|:heavy_check_mark:|:x:|:x:|:x:|-|Trans brazzers.com|Brazzers.yml|:heavy_check_mark:|:heavy_check_mark:|:x:|:heavy_check_mark:|CDP|- +breeditraw.com|BreedItRaw.yml|:heavy_check_mark:|:x:|:x:|:heavy_check_mark:|-|Gay brickyates.com|brickyates.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- brookelynnebriar.com|ModelCentroAPI.yml|:heavy_check_mark:|:x:|:x:|:x:|Python|- bruceandmorgan.net|bruceandmorgan.yml|:heavy_check_mark:|:x:|:x:|:x:|CDP|Amateur Fetish diff --git a/scrapers/BreedItRaw.yml b/scrapers/BreedItRaw.yml new file mode 100644 index 000000000..4c6e79c09 --- /dev/null +++ b/scrapers/BreedItRaw.yml @@ -0,0 +1,73 @@ +name: BreedItRaw +sceneByURL: + - action: scrapeXPath + url: + - breeditraw.net/tour/trailers/ + scraper: sceneScraper +performerByURL: + - action: scrapeXPath + url: + - breeditraw.net/tour/models/ + scraper: performerScraper +xPathScrapers: + sceneScraper: + common: + $performer: //li[@class="update_models"]/a + scene: + Title: //div[@class="videoDetails clear"]//h3/text() + Date: + selector: //span[contains(., "Date Added:")]/following-sibling::text() + postProcess: + - replace: + - regex: ^\s* + with: + - parseDate: January 2, 2006 + Details: //div[@class="videoDetails clear"]/p + Tags: + Name: //li[contains(text(), "Tags:")]/following-sibling::li/a/text() + Performers: + Name: $performer/text() + URL: $performer/@href + Image: + selector: //script[contains(text(),"poster")]/text() + postProcess: + - replace: + - regex: '.+poster="([^"]+)".+' + with: https://www.breeditraw.net$1 + - regex: 1x\.jpg + with: "2x.jpg" # 3,4 is also possible but the images get too big + Studio: + Name: + fixed: Breed it Raw + performerScraper: + performer: + Name: + selector: //meta[@property="og:title"]/@content + postProcess: + - replace: + - regex: " - .*$" + with: + Gender: + fixed: Male + Height: + selector: //strong[contains(., "Height:")]/following-sibling::text() + postProcess: + - feetToCm: true + Weight: + selector: //strong[contains(., "Weight:")]/following-sibling::text() + postProcess: + - lbToKg: true + Measurements: + selector: //strong[contains(., "Dick Size:")]/following-sibling::text() + postProcess: + - replace: + - regex: ^ + with: 0' + - feetToCm: true + Image: + selector: //div[@class="profile-pic"]/img/@src0_2x # 3x is either too big or an upscale so imho not worth it + postProcess: + - replace: + - regex: ^ + with: https://www.breeditraw.net/ +# Last Updated December 18, 2022 From bf9227bbaa23f64af1def06a2f4f15870a46cf5a Mon Sep 17 00:00:00 2001 From: nrg101 <37197306+nrg101@users.noreply.github.com> Date: Wed, 18 Jan 2023 15:34:59 +0000 Subject: [PATCH 023/624] Move tsplayground.com to its own scraper (#1242) --- SCRAPERS-LIST.md | 2 +- scrapers/GammaEntertainment.yml | 4 +--- scrapers/TSPlayground.yml | 35 +++++++++++++++++++++++++++++++++ 3 files changed, 37 insertions(+), 4 deletions(-) create mode 100644 scrapers/TSPlayground.yml diff --git a/SCRAPERS-LIST.md b/SCRAPERS-LIST.md index 2cf0539a9..1832bb112 100644 --- a/SCRAPERS-LIST.md +++ b/SCRAPERS-LIST.md @@ -1309,7 +1309,7 @@ truelesbian.com|Algolia_Adultime.yml|:heavy_check_mark:|:x:|:x:|:x:|Python|- tryteens.com|Teencoreclub.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- ts-castingcouch.com|GroobyClub.yml|:heavy_check_mark:|:x:|:x:|:x:|-|Trans tsfactor.com|Algolia_EvilAngel.yml|:heavy_check_mark:|:heavy_check_mark:|:heavy_check_mark:|:x:|Python|Trans -tsplayground.com|GammaEntertainment.yml|:heavy_check_mark:|:x:|:x:|:x:|-|Trans +tsplayground.com|TSPlayground.yml|:heavy_check_mark:|:x:|:x:|:heavy_check_mark:|-|Trans tspov.com|ChristianXXX.yml|:heavy_check_mark:|:x:|:x:|:x:|-|Trans tsvirtuallovers.com|RealityLovers.yml|:heavy_check_mark:|:x:|:x:|:heavy_check_mark:|Python|- tuktukpatrol.com|TukTukPatrol.yml|:heavy_check_mark:|:x:|:x:|:heavy_check_mark:|-|- diff --git a/scrapers/GammaEntertainment.yml b/scrapers/GammaEntertainment.yml index a28bad1b4..ca8e71964 100644 --- a/scrapers/GammaEntertainment.yml +++ b/scrapers/GammaEntertainment.yml @@ -63,7 +63,6 @@ sceneByURL: - tittycreampies.com/ - transsexualangel.com/en/video/ - transsexualroadtrip.com/ - - tsplayground.com/en/video/ - whiteghetto.com/ scraper: sceneScraper @@ -217,7 +216,6 @@ xPathScrapers: tittycreampies: Titty Creampies transsexualangel: Transsexual Angel transsexualroadtrip: Transsexual Roadtrip - tsplayground: TS Playground whiteghetto: White Ghetto xempire: XEmpire @@ -240,4 +238,4 @@ xPathScrapers: Name: //a[contains(@class, 'GA_Id_headerLogo')]/span[@class='linkMainCaption']/text() FrontImage: //a[@class='frontCoverImg']/@href BackImage: //a[@class='backCoverImg']/@href -# Last Updated December 29, 2022 +# Last Updated January 13, 2023 diff --git a/scrapers/TSPlayground.yml b/scrapers/TSPlayground.yml new file mode 100644 index 000000000..9b7d4e9cb --- /dev/null +++ b/scrapers/TSPlayground.yml @@ -0,0 +1,35 @@ +name: "TS Playground" +sceneByURL: + - action: scrapeXPath + url: + - tsplayground.com/video/ + scraper: sceneScraper +performerByURL: + - action: scrapeXPath + url: + - tsplayground.com/models/ + scraper: performerScraper +xPathScrapers: + sceneScraper: + scene: + Title: //div[@class="content-title"]/h1/text() + Details: //div[@class="content-desc more-desc"] + Date: + selector: //div[@class="content-date"]/div[@class="label"]/text() + postProcess: + - replace: + - regex: (\d{2}).(\d{2}).(\d{4}) + with: $3-$2-$1 + Image: //meta[@property="og:image"]/@content + Studio: + Name: + fixed: TS Playground + Tags: + Name: //div[@class="content-tags"]//a/text() + Performers: + Name: //div[@class="content-models"]//a/span/text() + performerScraper: + performer: + Name: //div[contains(@class, "title-col")]/h1/text() + Image: //div[@class="model-avatar"]/img/@src +# Last Updated January 13, 2023 From 201d33e490c1f3b6bd45a06938c07f7f660e7981 Mon Sep 17 00:00:00 2001 From: CarlNs92891 <103487026+CarlNs92891@users.noreply.github.com> Date: Thu, 19 Jan 2023 02:36:09 +1100 Subject: [PATCH 024/624] Switch to a JSON scraper for Lustery (#1243) --- scrapers/Lustery.yml | 108 ++++++++++++++++++++++++++++++++++++++----- 1 file changed, 97 insertions(+), 11 deletions(-) diff --git a/scrapers/Lustery.yml b/scrapers/Lustery.yml index 7c571f251..b7e9f9e32 100644 --- a/scrapers/Lustery.yml +++ b/scrapers/Lustery.yml @@ -1,23 +1,109 @@ name: Lustery +performerByURL: + - action: scrapeJson + url: + - lustery.com/couple/ + queryURL: "{url}" + queryURLReplace: + url: + - regex: .+/(.+) + with: https://lustery.com/api/couple/$1 + scraper: performerScraper sceneByURL: - - action: scrapeXPath + - action: scrapeJson url: - lustery.com/video-preview/ + queryURL: "{url}" + queryURLReplace: + url: + - regex: .+/(.+) + with: https://lustery.com/api/video/$1 scraper: sceneScraper -xPathScrapers: + +jsonScrapers: + performerScraper: + performer: + Name: couple.name + Country: + selector: couple.location + postProcess: + - replace: + - regex: .+, + with: + Details: + selector: couple.permalink + postProcess: + - replace: + - regex: ^ + with: https://lustery.com/api/couple/ + - regex: $ + with: /info + - subScraper: + selector: coupleInfo.description + Image: + selector: couple.mainImage.filename + postProcess: + - replace: + - regex: ^ + with: https://cdn.lustery.com/cache/lustery-images/couple-main-image-thumbs/100x100x4/ + sceneScraper: scene: - Title: - selector: //div[@class='video-popup-info']//div[@class='title']/h3/text() + Title: video.title + Details: + selector: video.permalink + postProcess: + - replace: + - regex: ^ + with: https://lustery.com/api/video/ + - regex: $ + with: /info + - subScraper: + selector: videoInfo.description + Date: #not very accurate as it is the Last Edited Date + selector: video.poster.lastEditedAt + postProcess: + - parseDate: unix + URL: + selector: video.permalink + postProcess: + - replace: + - regex: ^ + with: https://lustery.com/video-preview/ + Image: + selector: video.poster.filename + postProcess: + - replace: + - regex: ^ + with: https://cdn.lustery.com/cache/lustery-images/video-image-thumbs/750x420x4/ Performers: Name: - selector: //div[@class='couple-and-categories']//div[@class='couple']/a/text() - Details: //div[@class='video-popup-info']/div[@class='description']/text() - Tags: - Name: //div[@class='tags']/a/text() - Image: //meta[@property="og:image"]/@content + selector: "[video.couplePermalink,video.secondaryCouplePermalink]" + postProcess: + - replace: + - regex: ^ + with: https://lustery.com/api/couple/ + - regex: $ + with: /info + - subScraper: + selector: coupleInfo.members.#.name + concat: "|" + split: "|" + URL: + selector: "[video.couplePermalink,video.secondaryCouplePermalink]" + postProcess: + - replace: + - regex: ^ + with: https://lustery.com/couple/ Studio: Name: fixed: Lustery - -# Last Updated October 03, 2020 + Tags: + Name: video.tags + Code: # Not sure, it seems unique though + selector: video.thumbnailsBasePath + postProcess: + - replace: + - regex: .*/(\d+)$ + with: $1 +# Last Updated November 03, 2022 From 4f5b066be458fadf4fd6b25a6d7c9162a6f36401 Mon Sep 17 00:00:00 2001 From: SirCumAlot1988 <98083850+SirCumAlot1988@users.noreply.github.com> Date: Wed, 18 Jan 2023 17:16:01 +0100 Subject: [PATCH 025/624] Fix Boobpedia scraper (#1230) --- scrapers/Boobpedia.yml | 394 +++++++++++++++++++++++++++++++++++++++-- 1 file changed, 377 insertions(+), 17 deletions(-) diff --git a/scrapers/Boobpedia.yml b/scrapers/Boobpedia.yml index c5ad462fa..b17f0bf09 100644 --- a/scrapers/Boobpedia.yml +++ b/scrapers/Boobpedia.yml @@ -23,18 +23,30 @@ xPathScrapers: performerScraper: performer: Name: //h1 - Twitter: //table//tr/td/b/a[text()='Twitter']/@href - Instagram: //table//tr/td/b/a[text()='Instagram']/@href + Twitter: //table//tr/td/a[b[text()='Twitter']]/@href + Instagram: //table//tr/td/a[b[text()='Instagram']]/@href Birthdate: - selector: //table//tr/td//b[text()='Born:']/../following-sibling::td/a + selector: //table//tr/td//b[text()='Born']/../following-sibling::td/a concat: " " postProcess: + - replace: + - regex: (.*\d\d\d\d).* + with: $1 - parseDate: January 2 2006 - Ethnicity: //table//tr/td/b[text()='Ethnicity:']/../following-sibling::td/a - Country: //table//tr/td/b[text()='Nationality:']/../following-sibling::td/a - EyeColor: //table//tr/td/b[text()='Eye color:']/../following-sibling::td + Ethnicity: + selector: //table//tr/td/b[text()='Ethnicity']/../following-sibling::td/a + postProcess: + - replace: + - regex: \[\d*\] + with: "" + EyeColor: + selector: //table//tr/td/b[text()='Eye color']/../following-sibling::td + postProcess: + - replace: + - regex: \[\d*\] + with: "" Height: - selector: //table//tr/td/b[text()='Height:']/../following-sibling::td + selector: //table//tr/td/b[text()='Height']/../following-sibling::td postProcess: - replace: - regex: (?:.+\D)?(\d+\.\d+)\Dm.+ @@ -42,28 +54,66 @@ xPathScrapers: - regex: \. with: "" Weight: - selector: //table//tr/td/b[text()='Weight:']/../following-sibling::td + selector: //table//tr/td/b[text()='Weight']/../following-sibling::td postProcess: - replace: - regex: (?:.+\D)?(\d+)\Dkg.+ with: $1 Measurements: - selector: //table//tr/td/b[text()='Measurements:']/../following-sibling::td|//table//tr/td[contains(b,'cup')]/following-sibling::td + selector: //table//tr/td/b[text()='Measurements']/../following-sibling::td|//table//tr/td[contains(b,'cup')]/following-sibling::td concat: "|" postProcess: - replace: - - regex: (\d+)-(\d+)-(\d+)[^|]+\|(\S+).+ # get measurements + cup - with: $1$4-$2-$3 + - regex: (\d+)-(\d+)-(\d+)[^|]*\|(\d+\S+).+ # get measurements + cup + with: $4-$2-$3 - regex: \|.+$ # fallback to clear non matching regexes with: "" - FakeTits: //table//tr/td/b[text()='Boobs:']/../following-sibling::td/a - HairColor: //table//tr/td[contains(b,'Hair')]/following-sibling::td + - regex: \[\d*\] # Remove References + with: "" + - regex: ( in) # Remove Unit Inches + with: "" + FakeTits: + selector: //table//tr/td/b[text()='Boobs']/../following-sibling::td/a + postProcess: + - replace: + - regex: \[\d*\] # Remove References + with: "" + - map: + "Enhanced": "Fake" + "Natural": "Natural" + HairColor: + selector: //table//tr/td[contains(b,'Hair')]/following-sibling::td//text() + concat: ", " + postProcess: + - replace: + - regex: (,,) + with: "," + - regex: ( , ) + with: " " + - regex: \[\d*\] + with: "" # nbsp; screws up the parsing, so use contains instead - CareerLength: //table//tr/td/b[text()[contains(.,'active:')]]/../following-sibling::td - Aliases: //table//tr/td/b[text()[contains(.,'known')]]/../following-sibling::td + CareerLength: + selector: //table//tr/td/b[text()[contains(.,'active')]]/../following-sibling::td + postProcess: + - replace: + - regex: \[\d*\] # Remove References + with: "" + - regex: (—|–) + with: "-" + - regex: (\S)-(\S) + with: $1 - $2 + - regex: (?i)(present|current) + with: "" + Aliases: + selector: //table//tr/td/b[text()[contains(.,'known')]]/../following-sibling::td + postProcess: + - replace: + - regex: \[\d*\] + with: "" Image: #selector: //table[@class="infobox"]//img/@src #alterntive image, no need for subScraper but gets lq image - selector: //table[@class="infobox"]//a[img[@src]]/@href + selector: //table[@class="infobox plainlinks"]//a[img[@src]]/@href postProcess: - replace: - regex: ^ @@ -83,4 +133,314 @@ xPathScrapers: Details: selector: //div[@class="mw-parser-output"]/p concat: "\n\n" -# Last Updated February 01, 2022 + postProcess: + - replace: + # Remove References + - regex: \[\d*\] + with: "" + # Remove and , which appears in the details of some performers (e.g. Jenna Jameson) + - regex: + with: "" + # Remove triple line breaks + - regex: \n\n\n + with: "\n" + Country: + selector: //table//tr/td/b[text()='Nationality']/../following-sibling::td/a + postProcess: + - map: + "Abkhaz": "Abkhazia" + "Abkhazian": "Abkhazia" + "Afghan": "Afghanistan" + "Albanian": "Albania" + "Algerian": "Algeria" + "American Samoan": "American Samoa" + "American": "United States of America" + "Andorran": "Andorra" + "Angolan": "Angola" + "Anguillan": "Anguilla" + "Antarctic": "Antarctica" + "Antiguan": "Antigua and Barbuda" + "Argentine": "Argentina" + "Argentinian": "Argentina" + "Armenian": "Armenia" + "Aruban": "Aruba" + "Australian": "Australia" + "Austrian": "Austria" + "Azerbaijani": "Azerbaijan" + "Azeri": "Azerbaijan" + "Bahamian": "Bahamas" + "Bahraini": "Bahrain" + "Bangladeshi": "Bangladesh" + "Barbadian": "Barbados" + "Barbudan": "Antigua and Barbuda" + "Basotho": "Lesotho" + "Belarusian": "Belarus" + "Belgian": "Belgium" + "Belizean": "Belize" + "Beninese": "Benin" + "Beninois": "Benin" + "Bermudan": "Bermuda" + "Bermudian": "Bermuda" + "Bhutanese": "Bhutan" + "BIOT": "British Indian Ocean Territory" + "Bissau-Guinean": "Guinea-Bissau" + "Bolivian": "Bolivia" + "Bonaire": "Bonaire" + "Bonairean": "Bonaire" + "Bosnian": "Bosnia and Herzegovina" + "Botswanan": "Botswana" + "Bouvet Island": "Bouvet Island" + "Brazilian": "Brazil" + "British Virgin Island": "Virgin Islands British" + "British": "United Kingdom" + "Bruneian": "Brunei" + "Bulgarian": "Bulgaria" + "Burkinabé": "Burkina Faso" + "Burmese": "Burma" + "Burundian": "Burundi" + "Cabo Verdean": "Cabo Verde" + "Cambodian": "Cambodia" + "Cameroonian": "Cameroon" + "Canadian": "Canada" + "Cantonese": "Hong Kong" + "Caymanian": "Cayman Islands" + "Central African": "Central African Republic" + "Chadian": "Chad" + "Channel Island": "Guernsey" + #Channel Island: "Jersey" + "Chilean": "Chile" + "Chinese": "China" + "Christmas Island": "Christmas Island" + "Cocos Island": "Cocos (Keeling) Islands" + "Colombian": "Colombia" + "Comoran": "Comoros" + "Comorian": "Comoros" + "Congolese": "Congo" + "Cook Island": "Cook Islands" + "Costa Rican": "Costa Rica" + "Croatian": "Croatia" + "Cuban": "Cuba" + "Curaçaoan": "Curaçao" + "Cypriot": "Cyprus" + "Czech": "Czech Republic" + "Danish": "Denmark" + "Djiboutian": "Djibouti" + "Dominican": "Dominica" + "Dutch": "Netherlands" + "Ecuadorian": "Ecuador" + "Egyptian": "Egypt" + "Emirati": "United Arab Emirates" + "Emiri": "United Arab Emirates" + "Emirian": "United Arab Emirates" + "English people": "England" + "English": "England" + "Equatoguinean": "Equatorial Guinea" + "Equatorial Guinean": "Equatorial Guinea" + "Eritrean": "Eritrea" + "Estonian": "Estonia" + "Ethiopian": "Ethiopia" + "European": "European Union" + "Falkland Island": "Falkland Islands" + "Faroese": "Faroe Islands" + "Fijian": "Fiji" + "Filipino": "Philippines" + "Finnish": "Finland" + "Formosan": "Taiwan" + "French Guianese": "French Guiana" + "French Polynesian": "French Polynesia" + "French Southern Territories": "French Southern Territories" + "French": "France" + "Futunan": "Wallis and Futuna" + "Gabonese": "Gabon" + "Gambian": "Gambia" + "Georgian": "Georgia" + "German": "Germany" + "Ghanaian": "Ghana" + "Gibraltar": "Gibraltar" + "Greek": "Greece" + "Greenlandic": "Greenland" + "Grenadian": "Grenada" + "Guadeloupe": "Guadeloupe" + "Guamanian": "Guam" + "Guatemalan": "Guatemala" + "Guinean": "Guinea" + "Guyanese": "Guyana" + "Haitian": "Haiti" + "Heard Island": "Heard Island and McDonald Islands" + "Hellenic": "Greece" + "Herzegovinian": "Bosnia and Herzegovina" + "Honduran": "Honduras" + "Hong Kong": "Hong Kong" + "Hong Konger": "Hong Kong" + "Hungarian": "Hungary" + "Icelandic": "Iceland" + "Indian": "India" + "Indonesian": "Indonesia" + "Iranian": "Iran" + "Iraqi": "Iraq" + "Irish": "Ireland" + "Israeli": "Israel" + "Israelite": "Israel" + "Italian": "Italy" + "Ivorian": "Ivory Coast" + "Jamaican": "Jamaica" + "Jan Mayen": "Jan Mayen" + "Japanese": "Japan" + "Jordanian": "Jordan" + "Kazakh": "Kazakhstan" + "Kazakhstani": "Kazakhstan" + "Kenyan": "Kenya" + "Kirghiz": "Kyrgyzstan" + "Kirgiz": "Kyrgyzstan" + "Kiribati": "Kiribati" + "Korean": "South Korea" + "Kosovan": "Kosovo" + "Kosovar": "Kosovo" + "Kuwaiti": "Kuwait" + "Kyrgyz": "Kyrgyzstan" + "Kyrgyzstani": "Kyrgyzstan" + "Lao": "Lao People's Democratic Republic" + "Laotian": "Lao People's Democratic Republic" + "Latvian": "Latvia" + "Lebanese": "Lebanon" + "Lettish": "Latvia" + "Liberian": "Liberia" + "Libyan": "Libya" + "Liechtensteiner": "Liechtenstein" + "Lithuanian": "Lithuania" + "Luxembourg": "Luxembourg" + "Luxembourgish": "Luxembourg" + "Macanese": "Macau" + "Macedonian": "North Macedonia" + "Magyar": "Hungary" + "Mahoran": "Mayotte" + "Malagasy": "Madagascar" + "Malawian": "Malawi" + "Malaysian": "Malaysia" + "Maldivian": "Maldives" + "Malian": "Mali" + "Malinese": "Mali" + "Maltese": "Malta" + "Manx": "Isle of Man" + "Marshallese": "Marshall Islands" + "Martinican": "Martinique" + "Martiniquais": "Martinique" + "Mauritanian": "Mauritania" + "Mauritian": "Mauritius" + "McDonald Islands": "Heard Island and McDonald Islands" + "Mexican": "Mexico" + "Moldovan": "Moldova" + "Monacan": "Monaco" + "Mongolian": "Mongolia" + "Montenegrin": "Montenegro" + "Montserratian": "Montserrat" + "Monégasque": "Monaco" + "Moroccan": "Morocco" + "Motswana": "Botswana" + "Mozambican": "Mozambique" + "Myanma": "Myanmar" + "Namibian": "Namibia" + "Nauruan": "Nauru" + "Nepalese": "Nepal" + "Nepali": "Nepal" + "Netherlandic": "Netherlands" + "New Caledonian": "New Caledonia" + "New Zealand": "New Zealand" + "Ni-Vanuatu": "Vanuatu" + "Nicaraguan": "Nicaragua" + "Nigerian": "Nigeria" + "Nigerien": "Niger" + "Niuean": "Niue" + "Norfolk Island": "Norfolk Island" + "Northern Irish": "Northern Ireland" + "Northern Marianan": "Northern Mariana Islands" + "Norwegian": "Norway" + "Omani": "Oman" + "Pakistani": "Pakistan" + "Palauan": "Palau" + "Palestinian": "Palestine" + "Panamanian": "Panama" + "Papua New Guinean": "Papua New Guinea" + "Papuan": "Papua New Guinea" + "Paraguayan": "Paraguay" + "Persian": "Iran" + "Peruvian": "Peru" + "Philippine": "Philippines" + "Pitcairn Island": "Pitcairn Islands" + "Polish": "Poland" + "Portuguese": "Portugal" + "Puerto Rican": "Puerto Rico" + "Qatari": "Qatar" + "Romanian": "Romania" + "Russian": "Russia" + "Rwandan": "Rwanda" + "Saba": "Saba" + "Saban": "Saba" + "Sahraouian": "Western Sahara" + "Sahrawi": "Western Sahara" + "Sahrawian": "Western Sahara" + "Salvadoran": "El Salvador" + "Sammarinese": "San Marino" + "Samoan": "Samoa" + "Saudi Arabian": "Saudi Arabia" + "Saudi": "Saudi Arabia" + "Scottish": "Scotland" + "Senegalese": "Senegal" + "Serbian": "Serbia" + "Seychellois": "Seychelles" + "Sierra Leonean": "Sierra Leone" + "Singapore": "Singapore" + "Singaporean": "Singapore" + "Slovak": "Slovakia" + "Slovene": "Slovenia" + "Slovenian": "Slovenia" + "Solomon Island": "Solomon Islands" + "Somali": "Somalia" + "Somalilander": "Somaliland" + "South African": "South Africa" + "South Georgia Island": "South Georgia and the South Sandwich Islands" + "South Ossetian": "South Ossetia" + "South Sandwich Island": "South Georgia and the South Sandwich Islands" + "South Sudanese": "South Sudan" + "Spanish": "Spain" + "Sri Lankan": "Sri Lanka" + "Sudanese": "Sudan" + "Surinamese": "Suriname" + "Svalbard resident": "Svalbard" + "Swati": "Eswatini" + "Swazi": "Eswatini" + "Swedish": "Sweden" + "Swiss": "Switzerland" + "Syrian": "Syrian Arab Republic" + "Taiwanese": "Taiwan" + "Tajikistani": "Tajikistan" + "Tanzanian": "Tanzania" + "Thai": "Thailand" + "Timorese": "Timor-Leste" + "Tobagonian": "Trinidad and Tobago" + "Togolese": "Togo" + "Tokelauan": "Tokelau" + "Tongan": "Tonga" + "Trinidadian": "Trinidad and Tobago" + "Tunisian": "Tunisia" + "Turkish": "Turkey" + "Turkmen": "Turkmenistan" + "Turks and Caicos Island": "Turks and Caicos Islands" + "Tuvaluan": "Tuvalu" + "Ugandan": "Uganda" + "Ukrainian": "Ukraine" + "Uruguayan": "Uruguay" + "Uzbek": "Uzbekistan" + "Uzbekistani": "Uzbekistan" + "Vanuatuan": "Vanuatu" + "Vatican": "Vatican City State" + "Venezuelan": "Venezuela" + "Vietnamese": "Vietnam" + "Wallis and Futuna": "Wallis and Futuna" + "Wallisian": "Wallis and Futuna" + "Welsh": "Wales" + "Yemeni": "Yemen" + "Zambian": "Zambia" + "Zimbabwean": "Zimbabwe" + "Åland Island": "Åland Islands" +# Last Updated December 30, 2022 From f888d10ba91335fe72da963447614d64683ff337 Mon Sep 17 00:00:00 2001 From: timo95 <24251362+timo95@users.noreply.github.com> Date: Sun, 22 Jan 2023 18:05:23 +0100 Subject: [PATCH 026/624] Add vTubie --- SCRAPERS-LIST.md | 1 + scrapers/vTubie.yml | 48 +++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 49 insertions(+) create mode 100644 scrapers/vTubie.yml diff --git a/SCRAPERS-LIST.md b/SCRAPERS-LIST.md index 1832bb112..75a2b1e15 100644 --- a/SCRAPERS-LIST.md +++ b/SCRAPERS-LIST.md @@ -1363,6 +1363,7 @@ vrhush.com|VRHush.yml|:heavy_check_mark:|:x:|:heavy_check_mark:|:x:|-|VR vrlatina.com|VRLatina.yml|:heavy_check_mark:|:x:|:heavy_check_mark:|:heavy_check_mark:|-|VR vrporncz.com|PornCZ.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- vrxdb.com|VrxDB.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +vtubie.com|vTubie.yml|:x:|:x:|:x:|:heavy_check_mark:|-|VTuber Database wakeupnfuck.com|WakeUpnFuck.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- wankitnow.com|Wankitnow.yml|:heavy_check_mark:|:x:|:x:|:x:|-|VR wankz.com|wankz.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- diff --git a/scrapers/vTubie.yml b/scrapers/vTubie.yml new file mode 100644 index 000000000..ecb6cee0d --- /dev/null +++ b/scrapers/vTubie.yml @@ -0,0 +1,48 @@ +name: "vTubie" +performerByName: + action: scrapeXPath + queryURL: https://vtubie.com/wp-admin/admin-ajax.php?s={}&action=ma_s_ajax + scraper: performerSearch +performerByURL: + - action: scrapeXPath + url: + - https://vtubie.com/ + scraper: performerScraper + +xPathScrapers: + performerSearch: + performer: + Name: //div[div/text()="VTubers"]/following-sibling::a/@title + URL: //div[div/text()="VTubers"]/following-sibling::a/@href + Image: + selector: //div[div/text()="VTubers"]/following-sibling::a//div[@class="ma-s-ajax-result-item-image"]/@style + postProcess: + - replace: + - regex: ^background\-image:url\((.*)\);$ + with: $1 + + performerScraper: + performer: + Name: //meta[@property="og:title"]/@content + Aliases: + selector: //div[@class="ma-section-content ma-aname-content"]//a/text() + concat: ", " + URL: + selector: //a[@class="ma-pag-next"]/@href + postProcess: + - replace: + - regex: \?pg=\d + with: + Gender: //div[@class="ma-section-title" and span/text()="Gender"]/following-sibling::div[1]//a/text() + Twitter: //div[@class="ma-section-title" and text()="Twitter "]/following-sibling::div[1]//a/@href + Tags: + Name: //div[@class="ma-section-title" and text()="Type "]/following-sibling::div[1]//a/text() | //div[@class="ma-section-title" and text()="Group"]/following-sibling::div[1]//a/text() + CareerLength: + selector: //div[@class="ma-section-title" and text()="Debut "]/following-sibling::div[1]/div/text() + postProcess: + - replace: + - regex: .*(\d{4}).* + with: $1- + Image: //link[@rel="image_src"]/@href + +# Last Updated January 22, 2023 From a9000d3e5ced8f59e90edd08bbcee275329e659b Mon Sep 17 00:00:00 2001 From: timo95 <24251362+timo95@users.noreply.github.com> Date: Mon, 23 Jan 2023 12:50:37 +0100 Subject: [PATCH 027/624] Remove https --- scrapers/vTubie.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scrapers/vTubie.yml b/scrapers/vTubie.yml index ecb6cee0d..562c40800 100644 --- a/scrapers/vTubie.yml +++ b/scrapers/vTubie.yml @@ -6,7 +6,7 @@ performerByName: performerByURL: - action: scrapeXPath url: - - https://vtubie.com/ + - vtubie.com scraper: performerScraper xPathScrapers: From c2c8ed0bf006328d77c487117c63ac7a7ad8ab74 Mon Sep 17 00:00:00 2001 From: DogmaDragon <103123951+DogmaDragon@users.noreply.github.com> Date: Fri, 27 Jan 2023 20:12:24 +0200 Subject: [PATCH 028/624] Add Strokies.yml CDP scene scraper (#1247) --- SCRAPERS-LIST.md | 6 +++--- scrapers/Strokies.yml | 42 ++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 45 insertions(+), 3 deletions(-) create mode 100644 scrapers/Strokies.yml diff --git a/SCRAPERS-LIST.md b/SCRAPERS-LIST.md index 1832bb112..88d88731b 100644 --- a/SCRAPERS-LIST.md +++ b/SCRAPERS-LIST.md @@ -325,8 +325,8 @@ czechsupermodels.com|Czechav.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- czechtaxi.com|Czechav.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- czechvr.com|CzechVR.yml|:heavy_check_mark:|:x:|:heavy_check_mark:|:x:|-|VR czechvrcasting.com|CzechVR.yml|:heavy_check_mark:|:x:|:heavy_check_mark:|:x:|-|VR -czechvrnetwork.com|CzechVR.yml|:heavy_check_mark:|:x:|:heavy_check_mark:|:x:|-|VR czechvrfetish.com|CzechVR.yml|:heavy_check_mark:|:x:|:heavy_check_mark:|:x:|-|VR +czechvrnetwork.com|CzechVR.yml|:heavy_check_mark:|:x:|:heavy_check_mark:|:x:|-|VR czechwifeswap.com|Czechav.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- d52q.com|JavLibrary.yml|:heavy_check_mark:|:x:|:heavy_check_mark:|:x:|-|JAV dadcrush.com|PaperStreetMedia.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- @@ -657,8 +657,8 @@ javdb.com|javdb.yml|:heavy_check_mark:|:x:|:heavy_check_mark:|:x:|-|Database javdb36.com|javdb.yml|:heavy_check_mark:|:x:|:heavy_check_mark:|:x:|-|Database javhd.com|JavHD.yml|:heavy_check_mark:|:x:|:x:|:x:|-|JAV Uncensored javhub.com|JavHub.yml|:heavy_check_mark:|:x:|:x:|:x:|-|JAV Uncensored -javlibrary.com|JavLibrary_python.yml|:heavy_check_mark:|:x:|:x:|:x:|Python|JAV javlibrary.com|JavLibrary.yml|:heavy_check_mark:|:x:|:heavy_check_mark:|:x:|-|JAV +javlibrary.com|JavLibrary_python.yml|:heavy_check_mark:|:x:|:x:|:x:|Python|JAV jaysinxxx.com|GammaEntertainment.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- jayspov.net|AdultEmpireCash.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- jcosplay.com|karatmedia.yml|:heavy_check_mark:|:x:|:x:|:x:|-|JAV @@ -1168,6 +1168,7 @@ strapondreamer.com|StrapDreamer.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- streaming.iafd.com|IafdStreaming.yml|:heavy_check_mark:|:x:|:heavy_check_mark:|:x:|-|- stretchedoutsnatch.com|NewSensationsNetworkSites.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- stripzvr.com|StripzVR.yml|:heavy_check_mark:|:x:|:heavy_check_mark:|:x:|-|VR +strokies.com|Strokies.yml|:heavy_check_mark:|:x:|:x:|:x:|CDP|- stuck4k.com|Vip4K.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- studiofow.com|StudioFOW.yml|:heavy_check_mark:|:x:|:x:|:x:|-|3D Animation stuffintwats.com|ChickPass.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- @@ -1436,7 +1437,6 @@ zerotolerancefilms.com|Algolia_zerotolerancefilms.yml|:heavy_check_mark:|:heavy_ zexyvr.com|ZexyVR.yml|:heavy_check_mark:|:x:|:x:|:heavy_check_mark:|-|VR zoiestarr.com|ChickPass.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- - ## Non url scrapers The following scrapers do not support xxxByURL scraping and are not included to the above list. To keep the below list tidy please add scrapers keeping the list in alphabetical order by the .yml filename. diff --git a/scrapers/Strokies.yml b/scrapers/Strokies.yml new file mode 100644 index 000000000..039334f2b --- /dev/null +++ b/scrapers/Strokies.yml @@ -0,0 +1,42 @@ +name: "Strokies" +sceneByURL: + - action: scrapeXPath + url: + - strokies.com + scraper: sceneScraper +xPathScrapers: + sceneScraper: + scene: + Title: //h1[@class='video-title'] + Details: + selector: //div[@class='video-description']/p/text() + concat: "\n\n" + Date: + selector: //div/p[contains(text(),"Added on:")] + postProcess: + - replace: + - regex: Added on:\s(.+) + with: $1 + - parseDate: Jan 2, 2006 + Image: + selector: //div[@class="vjs-poster"]/@style + postProcess: + - replace: + - regex: .+url\("(.+)\".+ + with: https:$1 + Studio: + Name: + fixed: Strokies + Tags: + Name: //div[@class='model-tags']//a[contains(@href,"tag")] + Performers: + Name: //div[@class='model-tags']//a[contains(@href,"model")] + URL: + selector: //div[@class='model-tags']//a[contains(@href,"model")]/@href + postProcess: + - replace: + - regex: ^ + with: https://strokies.com +driver: + useCDP: true +# Last Updated January 20, 2023 From 1cbcb1583c2c1c9b01d774d5e163ab774067abb3 Mon Sep 17 00:00:00 2001 From: bnkai <48220860+bnkai@users.noreply.github.com> Date: Fri, 27 Jan 2023 20:12:55 +0200 Subject: [PATCH 029/624] Add JSON scraper for riggsfilms.vip (#1246) --- SCRAPERS-LIST.md | 1 + scrapers/RiggsFilms.yml | 33 +++++++++++++++++++++++++++++++++ 2 files changed, 34 insertions(+) create mode 100644 scrapers/RiggsFilms.yml diff --git a/SCRAPERS-LIST.md b/SCRAPERS-LIST.md index 88d88731b..c17ca1f02 100644 --- a/SCRAPERS-LIST.md +++ b/SCRAPERS-LIST.md @@ -1056,6 +1056,7 @@ renderfiend.com|insex.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- restrictedsenses.com|RestrictedSenses.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- retroporncz.com|PornCZ.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- rickysroom.com|KBProductions.yml|:heavy_check_mark:|:x:|:x:|:heavy_check_mark:|Python|- +riggsfilms.vip|RiggsFilms.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- rim4k.com|Vip4K.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- rk.com|MindGeek.yml|:heavy_check_mark:|:x:|:x:|:heavy_check_mark:|-|- roccosiffredi.com|Algolia_RoccoSiffredi.yml|:heavy_check_mark:|:heavy_check_mark:|:x:|:x:|Python|- diff --git a/scrapers/RiggsFilms.yml b/scrapers/RiggsFilms.yml new file mode 100644 index 000000000..76f1bf2f0 --- /dev/null +++ b/scrapers/RiggsFilms.yml @@ -0,0 +1,33 @@ +name: Riggs Films +sceneByURL: + - action: scrapeJson + url: + - riggsfilms.vip/videos/ + scraper: sceneScraper + queryURL: "https://riggsfilms.adultmembersites.com/api/videos/{url}" + queryURLReplace: + url: + - regex: '.+/videos/(\d+)-.+' + with: "$1" +jsonScrapers: + sceneScraper: + scene: + Title: title + Details: decription + Date: + selector: publish_date + postProcess: + - replace: + - regex: \s.+$ + with: + - parseDate: 2006-01-02 + Image: poster_src + Performers: + Name: casts.#.screen_name + Studio: + Name: + fixed: Riggs Films + Tags: + Name: tags.#.name + Code: id +# Last Updated January 20, 2023 From eb1ab58b166b6b4479b36c7df8b41b26384b2eb9 Mon Sep 17 00:00:00 2001 From: DoctorD Date: Fri, 27 Jan 2023 10:33:39 -0800 Subject: [PATCH 030/624] Add scraper for R18.dev (#1244) --- SCRAPERS-LIST.md | 1 + scrapers/R18.dev.yml | 428 +++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 429 insertions(+) create mode 100644 scrapers/R18.dev.yml diff --git a/SCRAPERS-LIST.md b/SCRAPERS-LIST.md index c17ca1f02..2e794aeb2 100644 --- a/SCRAPERS-LIST.md +++ b/SCRAPERS-LIST.md @@ -1033,6 +1033,7 @@ pussyav.com|Jhdv.yml|:heavy_check_mark:|:x:|:x:|:x:|-|JAV Uncensored putalocura.com|Putalocura.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- r18.com|JavLibrary_python.yml|:heavy_check_mark:|:x:|:x:|:x:|Python|JAV r18.com|r18.yml|:heavy_check_mark:|:x:|:heavy_check_mark:|:x:|-|JAV +r18.dev|R18.dev.yml|:heavy_check_mark:|:x:|:x:|:x:|-|JAV rachelaldana.com|PinupDollars.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- rachelstormsxxx.com|VNAGirls.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- ragingstallion.com|GammaEntertainment.yml|:heavy_check_mark:|:x:|:x:|:x:|-|Gay diff --git a/scrapers/R18.dev.yml b/scrapers/R18.dev.yml new file mode 100644 index 000000000..7c92c8c40 --- /dev/null +++ b/scrapers/R18.dev.yml @@ -0,0 +1,428 @@ +name: R18.dev (JAV) + +sceneByFragment: + action: scrapeJson + queryURL: https://r18.dev/videos/vod/movies/detail/-/dvd_id={filename}/json + queryURLReplace: + filename: + # gets just the JAV ID out of the filename. This also removes the file extension which is pretty nice. + # You can have your filename be something like "Something Something ABC123 Something Something.mp4" and it will scrape as ABC123. + - regex: (.*[^a-zA-Z0-9])*([a-zA-Z-]+\d+)(.+) + with: $2 + scraper: sceneSearchIndirect + +sceneByURL: + - action: scrapeJson + url: + - r18.dev/videos/vod/movies/detail/-/i + scraper: sceneScraper + queryURL: "https://r18.dev/videos/vod/movies/detail/-/combined={url}/json" + queryURLReplace: + url: + - regex: ".+/id=(.+)/?$" + with: "$1" + +sceneByName: + action: scrapeJson + scraper: sceneSearch + queryURL: "https://r18.dev/videos/vod/movies/detail/-/dvd_id={}/json" + +sceneByQueryFragment: + action: scrapeJson + queryURL: "{url}" + scraper: sceneScraper + +movieByURL: + - action: scrapeJson + url: + - r18.dev/videos/vod/movies/detail/-/i + queryURL: "https://r18.dev/videos/vod/movies/detail/-/combined={url}/json" + queryURLReplace: + url: + - regex: ".+/id=(.+)/?$" + with: "$1" + scraper: movieScraper + +jsonScrapers: + sceneScraper: + scene: + Title: + selector: dvd_id + Date: + selector: release_date + Performers: + Name: + selector: actresses.#.name_romaji + Aliases: + selector: actresses.#.name_kanji + Image: + selector: jacket_full_url + Director: + selector: directors.#.name_romaji + Details: + selector: title_en + # All this uncensoring stuff happens automatically on the HTML version of the page due to some JS on the frontend there. + # I put in a request to the site dev to add this to the backend json api as well so we don't have to do it everytime ourselves plus keep a ever growing list of this stuff + postProcess: &ppUncensor + - replace: + - regex: A\*{3}ed\b + with: "Abused" + - regex: A\*{3}e\b + with: "Abuse" + - regex: A\*{3}es\b + with: "Abuses" + - regex: A\*{4}p\b + with: "Asleep" # ZMEN-037 + - regex: A\*{5}t + with: "Assault" + - regex: A\*{5}ted\b + with: "Assaulted" + - regex: B\*{5}p\b + with: "Bang Up" # Not Sure + - regex: B\*{5}k\b # IPX-374 + with: "Berserk" + - regex: B\*{3}d\b + with: "Blood" + - regex: B\*{3}dy\b + with: "Bloody" # Not Sure + - regex: B\*{6}y\b + with: "Brutally" + - regex: Chai\*{1}saw\b + with: "Chainsaw" + - regex: C\*{3}d\b + with: "Child" + - regex: C\*{3}dcare\b + with: "Childcare" + - regex: C\*{3}dhood\b + with: "Childhood" + - regex: C\*{3}dish\b + with: "Childish" # MMYM-045 + - regex: C\*{3}dren\b + with: "Children" + - regex: C\*{1}ck\b + with: "Cock" + - regex: C\*{1}cks\b + with: "Cocks" + - regex: C\*{1}llegiate\b + with: "Collegiate" + - regex: C\*{5}y\b + with: "Cruelty" + - regex: CrumB\*{2}d\b + with: "Crumbled" + - regex: D\*{1}ck\b + with: "Dick" + - regex: D\*{6}e\b + with: "Disgrace" + - regex: D\*{6}ed\b + with: "Disgraced" + - regex: D\*{6}eful\b + with: "Disgraceful" + - regex: D\*{3}king\b + with: "Drinking" + - regex: D\*{3}ks\b + with: "Drinks" + - regex: D\*{2}g\b + with: "Drug" + - regex: D\*{2}gged\b + with: "Drugged" + - regex: D\*{2}gs\b + with: "Drugs" + - regex: D\*{3}k\b + with: "Drunk" + - regex: D\*{3}ken\b + with: "Drunken" + - regex: D\*{3}kest\b + with: "Drunkest" + - regex: EnS\*{3}ed\b + with: "Enslaved" + - regex: F\*{3}e\b + with: "Force" + - regex: F\*{3}eful\b + with: "Forceful" + - regex: F\*{3}efully\b + with: "Forcefully" + - regex: F\*{3}es\b + with: "Forces" # Not Sure + - regex: F\*{3}ed\b + with: "Fucked" + - regex: F\*{5}g\b + with: "Fucking" # SSNI-391 + - regex: G\*{9}d\b + with: "Gang-Banged" + - regex: G\*{6}g\b + with: "Gangbang" # STAR-976 + - regex: G\*{7}g\b + with: "Gangbang" + - regex: G\*{6}ged\b + with: "Gangbanged" # SSNI-242 + - regex: G\*{7}ged\b + with: "Gangbanged" + - regex: G\*{7}gers\b + with: "Gangbangers" + - regex: G\*{6}ging\b + with: "Gangbanging" + - regex: G\*{7}ging\b + with: "Gangbanging" + - regex: G\*{7}gs\b + with: "Gangbangs" + - regex: Half-A\*{4}p\b + with: "Half-Asleep" # ZMEN-037 + - regex: HumB\*{2}d\b + with: "Humbled" + - regex: H\*{9}n\b + with: "Humiliation" + - regex: H\*{2}t\b + with: "Hurt" + - regex: H\*{2}ts\b + with: "Hurts" + - regex: H\*{7}m\b + with: "Hypnotism" + - regex: H\*{7}ed\b + with: "Hypnotized" # PPPD-376 + - regex: I\*{4}t\b + with: "Incest" + - regex: I\*{4}tuous\b + with: "Incestuous" + - regex: I\*{4}ts\b + with: "Insults" + - regex: J\*{1}\b + with: "Jo" # Not Sure + - regex: J\*{1}s\b + with: "Jos" # Not Sure + - regex: K\*{1}d\b + with: "Kid" + - regex: K\*{1}dding\b + with: "Kidding" + - regex: K\*{4}pped\b + with: "Kidnapped" + - regex: K\*{4}pper\b + with: "Kidnapper" + - regex: K\*{4}pping\b + with: "Kidnapping" + - regex: K\*{1}ds\b + with: "Kids" + - regex: K\*{2}l\b + with: "Kill" + - regex: K\*{2}led\b + with: "Killed" # SNIS-036 + - regex: K\*{2}ler\b + with: "Killer" # Not Sure + - regex: K\*{2}ling\b + with: "Killing" + - regex: Lol\*{1}pop\b + with: "Lolipop" + - regex: Lo\*{2}ta\b + with: "Lolita" + - regex: Ma\*{1}ko\b + with: "Maiko" + - regex: M\*{4}t\b + with: "Molest" + - regex: M\*{4}tation\b + with: "Molestation" + - regex: M\*{4}ted\b + with: "Molested" + - regex: M\*{4}ter\b + with: "Molester" + - regex: M\*{4}ters\b + with: "Molesters" + - regex: M\*{4}ting\b + with: "Molesting" + - regex: M\*{4}tor\b + with: "Molestor" + - regex: P\*{4}h\b + with: "Punish" + - regex: P\*{4}hed\b + with: "Punished" + - regex: P\*{4}hment\b + with: "Punishment" + - regex: P\*{1}ssy\b + with: "Pussy" + - regex: R\*{2}e\b + with: "Rape" + #- regex: R\*{1}pe\b + # with: "Rape" # Can be Rope ? IPX-311 + - regex: R\*{2}ed\b + with: "Raped" + - regex: R\*{1}ped\b + with: "Raped" + - regex: R\*{2}es\b + with: "Rapes" + - regex: R\*{4}g\b + with: "Raping" + - regex: S\*{9}l\b + with: "School Girl" # Not Sure + - regex: S\*{9}ls\b + with: "School Girls" # SSNI-296 + - regex: S\*{8}l\b + with: "Schoolgirl" + - regex: Sch\*{2}lgirl\b + with: "Schoolgirl" + - regex: S\*{9}ls\b + with: "Schoolgirls" # Not Sure (PPPD-811) + - regex: S\*{8}ls\b + with: "Schoolgirls" + - regex: Sch\*{2}lgirls\b + with: "Schoolgirls" + - regex: SK\*{2}led\b + with: "Skilled" + - regex: SK\*{2}lful\b + with: "Skillful" + - regex: SK\*{2}lfully\b + with: "Skillfully" + - regex: SK\*{2}ls\b + with: "Skills" + - regex: S\*{3}e\b + with: "Slave" + - regex: S\*{3}ery\b + with: "Slavery" + - regex: S\*{3}es\b + with: "Slaves" + - regex: S\*{6}g\b + with: "Sleeping" + - regex: StepB\*{16}r\b + with: "StepBrother And Sister" # Not Sure + - regex: StepK\*{1}ds \b + with: "StepKids" + - regex: StepM\*{12}n\b + with: "StepMother And Son" # GVG-299 + - regex: S\*{5}t\b + with: "Student" + - regex: S\*{5}ts\b + with: "Students" + - regex: S\*{8}n\b + with: "Submission" + - regex: T\*{6}e\b + with: "Tentacle" #MIDD-648 + - regex: T\*{6}es\b + with: "Tentacles" + - regex: T\*{5}e\b + with: "Torture" + - regex: T\*{5}ed\b + with: "Tortured" + - regex: T\*{5}es\b + with: "Tortures" #MIDD-648 + - regex: U\*{9}sly\b + with: "Unconsciously" + - regex: U\*{7}g\b + with: "Unwilling" + - regex: V\*{5}e\b + with: "Violate" + - regex: V\*{1}olated\b + with: "Violated" + - regex: V\*{5}ed\b + with: "Violated" + - regex: V\*{5}es\b + with: "Violates" + - regex: V\*{6}e\b + with: "Violence" + - regex: V\*{5}t\b + with: "Violent" + - regex: Y\*{8}l\b + with: "Young Girl" # Not Sure + - regex: Y\*{8}ls\b + with: "Young Girls" # Not Sure + Studio: + Name: maker_name_en + Code: + selector: dvd_id + Tags: + Name: + selector: categories.#.name_en + postProcess: *ppUncensor + URL: + selector: content_id + postProcess: + - replace: + - regex: ^ + with: https://r18.dev/videos/vod/movies/detail/-/id= + + movieScraper: + movie: + Name: + selector: dvd_id + Aliases: title_ja + Duration: + selector: runtime_mins + postProcess: + - replace: + - regex: $ + with: ":00" + Date: + selector: release_date + FrontImage: + selector: jacket_full_url + Director: + selector: directors.#.name_romaji + Synopsis: + selector: title_en + postProcess: *ppUncensor + Studio: + Name: maker_name_en + URL: + selector: content_id + postProcess: + - replace: + - regex: ^ + with: https://r18.dev/videos/vod/movies/detail/-/id= + + # Used for fragment scraping - the api is a little messy if we don't have the exact ID and need to do a search because the search result api doesn't have all the info we need. + # So we get the ID from the search result page and make another search for it + # Note I am not getting aliases yet for performers here as it didn't seem worth it to make that many subqueries. You should hopefully be matching them up with StashDB versions of the performers + # or doing a performer search anyways. + # I've also sent in a request to the dev of the site to make their API a little easier to use, so hopefully that goes through and we can have our cake and eat it too. :) + sceneSearchIndirect: + scene: + # Title not on this page, so we need to fetch another page to get it + Title: + selector: content_id + postProcess: + - replace: + - regex: (.+) + with: https://r18.dev/videos/vod/movies/detail/-/combined=$1/json + - subScraper: + selector: dvd_id + # Code not on this page, so we need to fetch another page to get it. It's the same as title. Surely there must be a way to only query once? + Code: + selector: content_id + postProcess: + - replace: + - regex: (.+) + with: https://r18.dev/videos/vod/movies/detail/-/combined=$1/json + - subScraper: + selector: dvd_id + Details: + selector: title + postProcess: *ppUncensor + Image: images.jacket_image.large + Director: director + Date: release_date + Tags: + Name: + selector: categories.#.name + postProcess: *ppUncensor + Studio: + Name: maker.name + Performers: + Name: actresses.#.name + URL: + selector: content_id + postProcess: + - replace: + - regex: (.+) + with: https://r18.dev/videos/vod/movies/detail/-/id=$1/ + + sceneSearch: + scene: + Title: + selector: title + Image: + selector: images.jacket_image.large + Date: + selector: release_date + URL: + selector: content_id + postProcess: + - replace: + - regex: (.+) + with: https://r18.dev/videos/vod/movies/detail/-/combined=$1/json +# Last Updated January 19, 2023 From 46d9dd82a6716fad56cd001e87a29bf71d96a5af Mon Sep 17 00:00:00 2001 From: ZzazzCDTS <112476145+zzazzcdts@users.noreply.github.com> Date: Fri, 27 Jan 2023 18:50:46 +0000 Subject: [PATCH 031/624] Update Hypnotube.yml to pull in main video thumbnail (#1251) --- scrapers/Hypnotube.yml | 10 ++-------- 1 file changed, 2 insertions(+), 8 deletions(-) diff --git a/scrapers/Hypnotube.yml b/scrapers/Hypnotube.yml index aef5ee1f3..7aba32b71 100644 --- a/scrapers/Hypnotube.yml +++ b/scrapers/Hypnotube.yml @@ -11,12 +11,7 @@ xPathScrapers: scene: Title: //div[@class='item-tr-inner-col inner-col']/h1/text() Details: //div[@class='main-description']/text() - Image: - selector: //script[@type="text/javascript" and contains(text(),"og:image")]/text() - postProcess: - - replace: - - regex: '.+"og:image" content="([^"]+)".+' - with: $1 + Image: //meta[@property='og:image']/@content Studio: Name: $studio/@title URL: $studio/@href @@ -29,5 +24,4 @@ xPathScrapers: Tags: Name: //div[@class='tags-block']/a/text() URL: //link[rel='canonical']/@href - -# Last Updated July 16, 2022 \ No newline at end of file +# Last Updated January 27, 2023 From 6d37f4ebd1df81f034f7f14420b4b9b7b58bdadc Mon Sep 17 00:00:00 2001 From: niemands <67282402+niemands@users.noreply.github.com> Date: Fri, 27 Jan 2023 19:55:25 +0100 Subject: [PATCH 032/624] Fix xvideos scene scraper (#1250) --- scrapers/Xvideos.yml | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/scrapers/Xvideos.yml b/scrapers/Xvideos.yml index 71a331786..dab555994 100644 --- a/scrapers/Xvideos.yml +++ b/scrapers/Xvideos.yml @@ -10,15 +10,15 @@ xPathScrapers: scene: Title: //h2[@class="page-title"]/text()[1] Tags: - Name: //li/a[@class="btn btn-default"][contains(@href,"/tags/")]/text() + Name: //li/a[@class="is-keyword btn btn-default"]/text() Performers: - Name: //a[@class="btn btn-default label profile hover-name"]/span[@class="name"]/text() + Name: //li[@class="model"]/a/span[1]/text() Studio: Name: //a[@class="btn btn-default label main uploader-tag hover-name"]/span[@class="name"]/text() Image: selector: //script[contains(text(), "setThumbUrl169")]/text()|//div[@id="html5video"][span[@class="premium-log-overlay"]]/div[@class="embed-responsive-item"]/@style postProcess: - replace: - - regex: ^.+(?:setThumbUrl169|url)\('(.+?\.jpg).+$ - with: $1 -# Last Updated November 11, 2020 + - regex: ^.+(?:setThumbUrl169|url)\('(.+?\.jpg).+$ + with: $1 +# Last Updated January 23, 2023 From 25a85ee76c70c7ed9fb2c73ab9a69defc7e66d29 Mon Sep 17 00:00:00 2001 From: Emilo2 <99644577+Emilo2@users.noreply.github.com> Date: Fri, 27 Jan 2023 21:00:45 +0200 Subject: [PATCH 033/624] Fix tag selector for Mature.nl (#1253) --- scrapers/MatureNL.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/scrapers/MatureNL.yml b/scrapers/MatureNL.yml index ea42b9fce..87ea8ed23 100644 --- a/scrapers/MatureNL.yml +++ b/scrapers/MatureNL.yml @@ -12,7 +12,7 @@ xPathScrapers: Title: &title //div[@class="box"]/h1/text() Details: &details //div/span[text()="Synopsis:"]/following-sibling::text() | //meta[@name="description"]/@content Tags: &tags - Name: //div[@class="box-cnt"]/div[@class="mar-t"]/a[not(@class)]/text() + Name: //div[@class="box-cnt"]/div[@class="mar-t"]//a[contains(@href,"/niche/")]/text() Performers: &performers Name: selector: //div[@class="name"]/span[@class="col-accent"]/text() @@ -39,4 +39,4 @@ xPathScrapers: Performers: *performers Date: *date Studio: *studio -# Last Updated March 21, 2022 +# Last Updated January 25, 2023 From 95b2de7fe6a27086e27e9918cc54a489d98003dc Mon Sep 17 00:00:00 2001 From: xx790 <118630824+xx790@users.noreply.github.com> Date: Tue, 31 Jan 2023 21:31:12 +0400 Subject: [PATCH 034/624] Add scene scraper for (ecchi.)iwara.tv (#1182) --- SCRAPERS-LIST.md | 1 + scrapers/Iwara.yml | 93 ++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 94 insertions(+) create mode 100644 scrapers/Iwara.yml diff --git a/SCRAPERS-LIST.md b/SCRAPERS-LIST.md index 2e794aeb2..7f6bd7c50 100644 --- a/SCRAPERS-LIST.md +++ b/SCRAPERS-LIST.md @@ -644,6 +644,7 @@ italianshotclub.com|MVG.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- itscleolive.com|VNAGirls.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- itspov.com|ItsPOV.yml|:heavy_check_mark:|:x:|:x:|:x:|CDP|- iwantclips.com|IWantClips.yml|:heavy_check_mark:|:x:|:x:|:heavy_check_mark:|CDP|- +iwara.tv|Iwara.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- jacquieetmicheltv.net|JacquieEtMichelTV.yml|:heavy_check_mark:|:x:|:x:|:x:|Python|- jamesdeen.com|ThirdRockEnt.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- janafox.com|bellapass.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- diff --git a/scrapers/Iwara.yml b/scrapers/Iwara.yml new file mode 100644 index 000000000..ebacb23d4 --- /dev/null +++ b/scrapers/Iwara.yml @@ -0,0 +1,93 @@ +name: "Iwara" +sceneByURL: + - action: scrapeXPath + url: + - iwara.tv + scraper: sceneScraper + +sceneByFragment: + action: scrapeXPath + scraper: sceneScraper + queryURL: https://ecchi.iwara.tv/videos/{filename} # also works for sfw videos + queryURLReplace: + filename: # expects the default filename format when saved from Iwara - __. + - regex: ".*_([0-9a-zA-Z]{12,})_.*" + with: $1 + - regex: .*\.[^\.]+$ # if no id is found in the filename + with: # clear the filename so that it doesn't leak + +sceneByName: + action: scrapeXPath + queryURL: https://ecchi.iwara.tv/search?f%5B0%5D=type%3Avideo&query={} + scraper: sceneSearch +sceneByQueryFragment: + action: scrapeXPath + queryURL: "{url}" + scraper: sceneScraper + +xPathScrapers: + sceneSearch: + common: + $searchItem: //div[contains(@class,"view-search")]//div[contains(@class,"views-row")]//div[contains(@class,"node-video")] + scene: + Title: $searchItem//h3[@class="title"]/a/text() + URL: + selector: $searchItem//h3[@class="title"]/a/@href + postProcess: + - replace: + - regex: "^" + with: "https://ecchi.iwara.tv" + Image: + selector: $searchItem//img/@src + postProcess: + - replace: + - regex: "^//" + with: "https://" + Studio: + Name: $searchItem//a[@class="username"]/text() + URL: + selector: $searchItem//a[@class="username"]/@href + postProcess: + - replace: + - regex: "^" + with: "https://ecchi.iwara.tv" + Date: + selector: $searchItem//div[@class="submitted"] + postProcess: + - replace: + - regex: '.+(\d{4}-\d{2}-\d{2}).+' + with: $1 + - parseDate: "2006-01-02" + sceneScraper: + common: + $infoNode: //div[@class="node-info"] + scene: + URL: //link[@rel="canonical"][contains(@href,"http")]/@href + Title: $infoNode//h1/text() + Details: + selector: $infoNode/div[contains(@class,"field-name-body")]//p//*/text() + concat: "\n" + Image: + selector: //video/@poster + postProcess: + - replace: + - regex: "^" + with: "https:" + Tags: + Name: $infoNode/div[contains(@class,"field-name-field-categories")]//a/text() + Studio: + Name: $infoNode//a[@class="username"]/text() + URL: + selector: $infoNode//a[@class="username"]/@href + postProcess: + - replace: + - regex: "^" + with: "https://ecchi.iwara.tv" + Date: + selector: $infoNode/div[@class="submitted"] + postProcess: + - replace: + - regex: '.+(\d{4}-\d{2}-\d{2}).+' + with: $1 + - parseDate: "2006-01-02" +# Last Updated November 25, 2022 From d37851eaac41df2a08212fc940921a4f13275e48 Mon Sep 17 00:00:00 2001 From: xx790 <118630824+xx790@users.noreply.github.com> Date: Tue, 31 Jan 2023 21:34:38 +0400 Subject: [PATCH 035/624] Add scene scraper for oreno3d.com (#1183) --- SCRAPERS-LIST.md | 1 + scrapers/Oreno3d.yml | 56 ++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 57 insertions(+) create mode 100644 scrapers/Oreno3d.yml diff --git a/SCRAPERS-LIST.md b/SCRAPERS-LIST.md index 7f6bd7c50..12c133e14 100644 --- a/SCRAPERS-LIST.md +++ b/SCRAPERS-LIST.md @@ -925,6 +925,7 @@ onlyteenblowjobs.com|GammaEntertainment.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- oopsie.com|Algolia_Adultime.yml|:heavy_check_mark:|:x:|:x:|:x:|Python|- openlife.com|GammaEntertainment.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- oraloverdose.com|PervCity.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +oreno3d.com|Oreno3d.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- orgytrain.com|Vivid.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- outdoorjp.com|karatmedia.yml|:heavy_check_mark:|:x:|:x:|:x:|-|JAV outhim.com|AMAMultimedia.yml|:heavy_check_mark:|:x:|:x:|:x:|-|Gay diff --git a/scrapers/Oreno3d.yml b/scrapers/Oreno3d.yml new file mode 100644 index 000000000..39d5a0729 --- /dev/null +++ b/scrapers/Oreno3d.yml @@ -0,0 +1,56 @@ +name: "Oreno3d" +sceneByURL: + - action: scrapeXPath + url: + - oreno3d.com + scraper: sceneScraper + +sceneByName: + action: scrapeXPath + queryURL: https://oreno3d.com/search?keyword={} + scraper: sceneSearch +sceneByQueryFragment: + action: scrapeXPath + queryURL: "{url}" + scraper: sceneScraper + +xPathScrapers: + sceneSearch: + common: + $searchItem: //div[@class="g-main-grid"]/article/a + scene: + Title: $searchItem/h2/text() + URL: $searchItem/@href + Image: + selector: $searchItem/figure/img/@src + postProcess: + - replace: + - regex: "^" + with: "https://oreno3d.com" + Studio: + Name: $searchItem/div[@class="box-text1"]/div/text() + sceneScraper: + common: + $article: //article[@class="g-main-video-article"] + $aTag: //article[@class="g-main-video-article"]/section[@class="video-section-tag"]//a[contains(@class,"tag-btn")] + scene: + URL: //link[rel='canonical']/@href|//meta[@property='og:url']/@content + Title: $article//h1[@class="video-h1"]/text() + Details: + selector: $article//blockquote[@class="video-information-comment"]/text() + concat: "\n" + Image: + selector: $article//img[@class="video-img"]/@src + postProcess: + - replace: + - regex: "^" + with: "https://oreno3d.com" + Tags: + Name: $aTag/div/text() + Studio: + Name: $aTag[contains(@href,"/authors/")]/div/text() + URL: $aTag[contains(@href,"/authors/")]/@href + Performers: + Name: $aTag[contains(@href,"/characters/")]/div/text() + URL: $aTag[contains(@href,"/characters/")]/@href +# Last Updated November 25, 2022 From 8a6272e09907f793fdd77e2a2570863861593973 Mon Sep 17 00:00:00 2001 From: Valmox <122233426+Valmox@users.noreply.github.com> Date: Tue, 31 Jan 2023 17:42:08 +0000 Subject: [PATCH 036/624] Update BelAmi.yml - change to larger image, add tags & studio code (#1256) --- scrapers/BelAmi.yml | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/scrapers/BelAmi.yml b/scrapers/BelAmi.yml index 9d8a3c74e..4a748c8b0 100644 --- a/scrapers/BelAmi.yml +++ b/scrapers/BelAmi.yml @@ -26,5 +26,13 @@ xPathScrapers: postProcess: - replace: - regex: .*?Screen-([1-9].*) - with: https://freecdn.belamionline.com/Data/Contents/Content_$1/Thumbnail8.jpg -# Last Updated July 06, 2022 + with: https://freecdn.belamionline.com/Data/Contents/Content_$1/Thumbnail6.jpg + Tags: + Name: //*[@id="ContentPlaceHolder1_LabelTags"]/a + Code: + selector: //form[@method="post"]/@action + postProcess: + - replace: + - regex: ^.*\=\s* + with: +# Last Updated January 29, 2023 From b40909c66b674523ed80c4873bd3f465b5be85e2 Mon Sep 17 00:00:00 2001 From: hphpanon <102124543+hphpanon@users.noreply.github.com> Date: Tue, 31 Jan 2023 13:20:56 -0500 Subject: [PATCH 037/624] Add scoreland2 and fix title XPath for TheScoreGroup.yml (#1254) --- SCRAPERS-LIST.md | 1 + scrapers/TheScoreGroup.yml | 5 +++-- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/SCRAPERS-LIST.md b/SCRAPERS-LIST.md index 12c133e14..2fe7daa4b 100644 --- a/SCRAPERS-LIST.md +++ b/SCRAPERS-LIST.md @@ -1078,6 +1078,7 @@ scarybigdicks.com|Hustler.yml|:heavy_check_mark:|:x:|:x:|:x:|CDP|- schoolgirlshd.com|Jhdv.yml|:heavy_check_mark:|:x:|:x:|:x:|-|JAV Uncensored schoolpov.com|ItsPOV.yml|:heavy_check_mark:|:x:|:x:|:x:|CDP|- scoreland.com|TheScoreGroup.yml|:heavy_check_mark:|:heavy_check_mark:|:x:|:x:|-|- +scoreland2.com|TheScoreGroup.yml|:heavy_check_mark:|:heavy_check_mark:|:x:|:x:|-|- screwmetoo.com|VegasDreamsLLC.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- seancody.com|MindGeek.yml|:heavy_check_mark:|:x:|:x:|:heavy_check_mark:|-|- seductive18.com|Teencoreclub.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- diff --git a/scrapers/TheScoreGroup.yml b/scrapers/TheScoreGroup.yml index 524ca4a4f..138733878 100644 --- a/scrapers/TheScoreGroup.yml +++ b/scrapers/TheScoreGroup.yml @@ -9,6 +9,7 @@ sceneByURL: - legsex.com/ - pornmegaload.com/ - scoreland.com/ + - scoreland2.com/ - xlgirls.com/ scraper: sceneScraper galleryByURL: @@ -18,7 +19,7 @@ galleryByURL: xPathScrapers: sceneScraper: scene: - Title: //section[@id="videos_page-page"]/div[@class="row"]/div/h2/text() + Title: //section[contains(@id,"_page-page")]/div[@class="row"]/div/h1/text() Studio: &studioAttr Name: selector: //meta[@property="og:site_name"]/@content @@ -56,4 +57,4 @@ xPathScrapers: Tags: Name: $photopage//div[@class='mb-3']/a/text() Performers: *performersAttr -# Last Updated November 10, 2021 +# Last Updated January 31, 2023 From 8ead07daba5ee612f3ebfdb3842160f32a686025 Mon Sep 17 00:00:00 2001 From: freeagent1384 Date: Tue, 31 Jan 2023 10:58:05 -0800 Subject: [PATCH 038/624] Tweak movie title selector for data18.yml (#1252) --- scrapers/data18.yml | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/scrapers/data18.yml b/scrapers/data18.yml index ec2eef975..403f3004a 100644 --- a/scrapers/data18.yml +++ b/scrapers/data18.yml @@ -49,7 +49,12 @@ xPathScrapers: $movieInfo: //div[@id="body2div_b"] $studio: //b[text()='Studio']/following-sibling::b/a movie: - Name: //div[@id="topmedia"]//a/text() + Name: + selector: //title + postProcess: + - replace: + - regex: (.+?)(?:\s\(\d{4}\)\sPorn\sMovie\s\|\sDATA18) + with: $1 Duration: selector: $movieInfo//b[contains(text(),"Length")]/following-sibling::span|$movieInfo//b[contains(text(),"Length")]/following-sibling::text() postProcess: @@ -78,4 +83,4 @@ xPathScrapers: with: FrontImage: //a[@id='enlargecover']/@href BackImage: //a[text()='+Back']/@href -# Last Updated June 30, 2022 \ No newline at end of file +# Last Updated January 31, 2023 From c33d0a4082f5efb96417005297f92a793ef1354c Mon Sep 17 00:00:00 2001 From: bnkai <48220860+bnkai@users.noreply.github.com> Date: Sat, 4 Feb 2023 20:35:17 +0200 Subject: [PATCH 039/624] Update IFeelMyself.yml - update Last Updated Date to track changes to the py scraper --- scrapers/IFeelMyself.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scrapers/IFeelMyself.yml b/scrapers/IFeelMyself.yml index 60a3c5289..9c8c2c5e4 100644 --- a/scrapers/IFeelMyself.yml +++ b/scrapers/IFeelMyself.yml @@ -27,4 +27,4 @@ sceneByURL: - python3 - IFeelMyself.py - url -# Last Updated October 29, 2022 +# Last Updated February 04, 2023 From 669932c049286a258bac7d82530c5e43882448d8 Mon Sep 17 00:00:00 2001 From: paperSpock <75402043+paperSpock@users.noreply.github.com> Date: Sat, 4 Feb 2023 13:50:17 -0500 Subject: [PATCH 040/624] Update IFeelMyself.py: search results beyond 2022 (#1261) --- scrapers/IFeelMyself.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/scrapers/IFeelMyself.py b/scrapers/IFeelMyself.py index dbafa1489..25ac538ac 100644 --- a/scrapers/IFeelMyself.py +++ b/scrapers/IFeelMyself.py @@ -86,7 +86,7 @@ def scrapeScene(filename,date,url): video_id = re.search(r"-(\d+)",filename,re.I).group(1) cookie_obj = create_cookie(name='ifm_search_keyword', value=artist_id, domain='ifeelmyself.com') browser.session.cookies.set_cookie(cookie_obj) - cookie_obj = create_cookie(name='ifm_prefs', value="a%3A1%3A%7Bs%3A6%3A%22search%22%3Ba%3A17%3A%7Bs%3A8%3A%22category%22%3Ba%3A0%3A%7B%7Ds%3A7%3A%22view_by%22%3Bs%3A4%3A%22news%22%3Bs%3A7%3A%22date_by%22%3Bs%3A7%3A%22anytime%22%3Bs%3A10%3A%22from_month%22%3Bs%3A1%3A%221%22%3Bs%3A9%3A%22from_year%22%3Bs%3A4%3A%222006%22%3Bs%3A8%3A%22to_month%22%3Bs%3A2%3A%2210%22%3Bs%3A7%3A%22to_year%22%3Bs%3A4%3A%222022%22%3Bs%3A7%3A%22country%22%3Bs%3A3%3A%22all%22%3Bs%3A10%3A%22attributes%22%3Ba%3A0%3A%7B%7Ds%3A12%3A%22tags_logical%22%3Bs%3A3%3A%22AND%22%3Bs%3A13%3A%22tags_remember%22%3Bs%3A1%3A%22n%22%3Bs%3A4%3A%22tags%22%3Ba%3A0%3A%7B%7Ds%3A12%3A%22tags_exclude%22%3Bs%3A0%3A%22%22%3Bs%3A9%3A%22hide_tags%22%3Ba%3A0%3A%7B%7Ds%3A8%3A%22age_from%22%3Bs%3A2%3A%2218%22%3Bs%3A6%3A%22age_to%22%3Bs%3A2%3A%2299%22%3Bs%3A16%3A%22profilevid_limit%22%3Bs%3A0%3A%22%22%3B%7D%7D", domain='.ifeelmyself.com') + cookie_obj = create_cookie(name='ifm_prefs', value="a%3A1%3A%7Bs%3A6%3A%22search%22%3Ba%3A17%3A%7Bs%3A8%3A%22category%22%3Ba%3A0%3A%7B%7Ds%3A7%3A%22view_by%22%3Bs%3A4%3A%22news%22%3Bs%3A7%3A%22date_by%22%3Bs%3A7%3A%22anytime%22%3Bs%3A10%3A%22from_month%22%3Bs%3A1%3A%221%22%3Bs%3A9%3A%22from_year%22%3Bs%3A4%3A%222006%22%3Bs%3A8%3A%22to_month%22%3Bs%3A2%3A%2212%22%3Bs%3A7%3A%22to_year%22%3Bs%3A4%3A%223000%22%3Bs%3A7%3A%22country%22%3Bs%3A3%3A%22all%22%3Bs%3A10%3A%22attributes%22%3Ba%3A0%3A%7B%7Ds%3A12%3A%22tags_logical%22%3Bs%3A3%3A%22AND%22%3Bs%3A13%3A%22tags_remember%22%3Bs%3A1%3A%22n%22%3Bs%3A4%3A%22tags%22%3Ba%3A0%3A%7B%7Ds%3A12%3A%22tags_exclude%22%3Bs%3A0%3A%22%22%3Bs%3A9%3A%22hide_tags%22%3Ba%3A0%3A%7B%7Ds%3A8%3A%22age_from%22%3Bs%3A2%3A%2218%22%3Bs%3A6%3A%22age_to%22%3Bs%3A2%3A%2299%22%3Bs%3A16%3A%22profilevid_limit%22%3Bs%3A0%3A%22%22%3B%7D%7D", domain='.ifeelmyself.com') browser.session.cookies.set_cookie(cookie_obj) cookie_obj = create_cookie(name='ifeel_auth', value=ifeelauth, domain='.ifeelmyself.com') browser.session.cookies.set_cookie(cookie_obj) @@ -135,7 +135,7 @@ def scrapeScene(filename,date,url): debugPrint(f"Title: {title}") cookie_obj = create_cookie(name='ifm_search_keyword', value=title, domain='ifeelmyself.com') browser.session.cookies.set_cookie(cookie_obj) - cookie_obj = create_cookie(name='ifm_prefs', value="a%3A1%3A%7Bs%3A6%3A%22search%22%3Ba%3A17%3A%7Bs%3A8%3A%22category%22%3Ba%3A0%3A%7B%7Ds%3A7%3A%22view_by%22%3Bs%3A4%3A%22news%22%3Bs%3A7%3A%22date_by%22%3Bs%3A7%3A%22anytime%22%3Bs%3A10%3A%22from_month%22%3Bs%3A1%3A%221%22%3Bs%3A9%3A%22from_year%22%3Bs%3A4%3A%222006%22%3Bs%3A8%3A%22to_month%22%3Bs%3A2%3A%2210%22%3Bs%3A7%3A%22to_year%22%3Bs%3A4%3A%222022%22%3Bs%3A7%3A%22country%22%3Bs%3A3%3A%22all%22%3Bs%3A10%3A%22attributes%22%3Ba%3A0%3A%7B%7Ds%3A12%3A%22tags_logical%22%3Bs%3A3%3A%22AND%22%3Bs%3A13%3A%22tags_remember%22%3Bs%3A1%3A%22n%22%3Bs%3A4%3A%22tags%22%3Ba%3A0%3A%7B%7Ds%3A12%3A%22tags_exclude%22%3Bs%3A0%3A%22%22%3Bs%3A9%3A%22hide_tags%22%3Ba%3A0%3A%7B%7Ds%3A8%3A%22age_from%22%3Bs%3A2%3A%2218%22%3Bs%3A6%3A%22age_to%22%3Bs%3A2%3A%2299%22%3Bs%3A16%3A%22profilevid_limit%22%3Bs%3A0%3A%22%22%3B%7D%7D", domain='.ifeelmyself.com') + cookie_obj = create_cookie(name='ifm_prefs', value="a%3A1%3A%7Bs%3A6%3A%22search%22%3Ba%3A17%3A%7Bs%3A8%3A%22category%22%3Ba%3A0%3A%7B%7Ds%3A7%3A%22view_by%22%3Bs%3A4%3A%22news%22%3Bs%3A7%3A%22date_by%22%3Bs%3A7%3A%22anytime%22%3Bs%3A10%3A%22from_month%22%3Bs%3A1%3A%221%22%3Bs%3A9%3A%22from_year%22%3Bs%3A4%3A%222006%22%3Bs%3A8%3A%22to_month%22%3Bs%3A2%3A%2212%22%3Bs%3A7%3A%22to_year%22%3Bs%3A4%3A%223000%22%3Bs%3A7%3A%22country%22%3Bs%3A3%3A%22all%22%3Bs%3A10%3A%22attributes%22%3Ba%3A0%3A%7B%7Ds%3A12%3A%22tags_logical%22%3Bs%3A3%3A%22AND%22%3Bs%3A13%3A%22tags_remember%22%3Bs%3A1%3A%22n%22%3Bs%3A4%3A%22tags%22%3Ba%3A0%3A%7B%7Ds%3A12%3A%22tags_exclude%22%3Bs%3A0%3A%22%22%3Bs%3A9%3A%22hide_tags%22%3Ba%3A0%3A%7B%7Ds%3A8%3A%22age_from%22%3Bs%3A2%3A%2218%22%3Bs%3A6%3A%22age_to%22%3Bs%3A2%3A%2299%22%3Bs%3A16%3A%22profilevid_limit%22%3Bs%3A0%3A%22%22%3B%7D%7D", domain='.ifeelmyself.com') browser.session.cookies.set_cookie(cookie_obj) cookie_obj = create_cookie(name='ifeel_auth', value=ifeelauth, domain='.ifeelmyself.com') browser.session.cookies.set_cookie(cookie_obj) @@ -190,7 +190,7 @@ def queryPerformer(perfname): browser.open("https://ifeelmyself.com/public/main.php") cookie_obj = create_cookie(name='tags_popup_shown', value='true', domain='ifeelmyself.com') browser.session.cookies.set_cookie(cookie_obj) - cookie_obj = create_cookie(name='ifm_prefs', value="a%3A1%3A%7Bs%3A6%3A%22search%22%3Ba%3A17%3A%7Bs%3A8%3A%22category%22%3Ba%3A0%3A%7B%7Ds%3A7%3A%22view_by%22%3Bs%3A4%3A%22news%22%3Bs%3A7%3A%22date_by%22%3Bs%3A7%3A%22anytime%22%3Bs%3A10%3A%22from_month%22%3Bs%3A1%3A%221%22%3Bs%3A9%3A%22from_year%22%3Bs%3A4%3A%222006%22%3Bs%3A8%3A%22to_month%22%3Bs%3A2%3A%2210%22%3Bs%3A7%3A%22to_year%22%3Bs%3A4%3A%222022%22%3Bs%3A7%3A%22country%22%3Bs%3A3%3A%22all%22%3Bs%3A10%3A%22attributes%22%3Ba%3A0%3A%7B%7Ds%3A12%3A%22tags_logical%22%3Bs%3A3%3A%22AND%22%3Bs%3A13%3A%22tags_remember%22%3Bs%3A1%3A%22n%22%3Bs%3A4%3A%22tags%22%3Ba%3A0%3A%7B%7Ds%3A12%3A%22tags_exclude%22%3Bs%3A0%3A%22%22%3Bs%3A9%3A%22hide_tags%22%3Ba%3A0%3A%7B%7Ds%3A8%3A%22age_from%22%3Bs%3A2%3A%2218%22%3Bs%3A6%3A%22age_to%22%3Bs%3A2%3A%2299%22%3Bs%3A16%3A%22profilevid_limit%22%3Bs%3A0%3A%22%22%3B%7D%7D", domain='.ifeelmyself.com') + cookie_obj = create_cookie(name='ifm_prefs', value="a%3A1%3A%7Bs%3A6%3A%22search%22%3Ba%3A17%3A%7Bs%3A8%3A%22category%22%3Ba%3A0%3A%7B%7Ds%3A7%3A%22view_by%22%3Bs%3A4%3A%22news%22%3Bs%3A7%3A%22date_by%22%3Bs%3A7%3A%22anytime%22%3Bs%3A10%3A%22from_month%22%3Bs%3A1%3A%221%22%3Bs%3A9%3A%22from_year%22%3Bs%3A4%3A%222006%22%3Bs%3A8%3A%22to_month%22%3Bs%3A2%3A%2212%22%3Bs%3A7%3A%22to_year%22%3Bs%3A4%3A%223000%22%3Bs%3A7%3A%22country%22%3Bs%3A3%3A%22all%22%3Bs%3A10%3A%22attributes%22%3Ba%3A0%3A%7B%7Ds%3A12%3A%22tags_logical%22%3Bs%3A3%3A%22AND%22%3Bs%3A13%3A%22tags_remember%22%3Bs%3A1%3A%22n%22%3Bs%3A4%3A%22tags%22%3Ba%3A0%3A%7B%7Ds%3A12%3A%22tags_exclude%22%3Bs%3A0%3A%22%22%3Bs%3A9%3A%22hide_tags%22%3Ba%3A0%3A%7B%7Ds%3A8%3A%22age_from%22%3Bs%3A2%3A%2218%22%3Bs%3A6%3A%22age_to%22%3Bs%3A2%3A%2299%22%3Bs%3A16%3A%22profilevid_limit%22%3Bs%3A0%3A%22%22%3B%7D%7D", domain='.ifeelmyself.com') browser.session.cookies.set_cookie(cookie_obj) cookie_obj = create_cookie(name='ifm_search_keyword', value=perfname, domain='ifeelmyself.com') browser.session.cookies.set_cookie(cookie_obj) From 7283b7e07e0f9fcb0b5956cf0866c83f32d57630 Mon Sep 17 00:00:00 2001 From: nrg101 <37197306+nrg101@users.noreply.github.com> Date: Tue, 7 Feb 2023 18:44:22 +0000 Subject: [PATCH 041/624] Add agentredgirl.com to Algolia_Adultime.yml (#1267) --- SCRAPERS-LIST.md | 1 + scrapers/Algolia.py | 3 ++- scrapers/Algolia_Adultime.yml | 3 ++- 3 files changed, 5 insertions(+), 2 deletions(-) diff --git a/SCRAPERS-LIST.md b/SCRAPERS-LIST.md index 2fe7daa4b..f51a3db31 100644 --- a/SCRAPERS-LIST.md +++ b/SCRAPERS-LIST.md @@ -42,6 +42,7 @@ adultprime.com|AdultPrime.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- adulttime.com|Algolia_Adultime.yml|:heavy_check_mark:|:x:|:heavy_check_mark:|:x:|Python|- adulttimepilots.com|Algolia_Adultime.yml|:heavy_check_mark:|:heavy_check_mark:|:heavy_check_mark:|:x:|Python|- aebn.com|AEBN.yml|:heavy_check_mark:|:x:|:heavy_check_mark:|:heavy_check_mark:|-|Straight + Gay +agentredgirl.com|Algolia_Adultime.yml|:heavy_check_mark:|:x:|:x:|:x:|Python|- alettaoceanempire.com|GammaEntertainment.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- alexismonroe.com|bellapass.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- alexlegend.com|VNAGirls.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- diff --git a/scrapers/Algolia.py b/scrapers/Algolia.py index 17f6ba88e..f4b2a1a5c 100644 --- a/scrapers/Algolia.py +++ b/scrapers/Algolia.py @@ -59,7 +59,8 @@ # this is because the `serie_name` is the Movie (series) title on these sites, # not the studio SITES_USING_OVERRIDE_AS_STUDIO_FOR_SCENE = { - "Devilstgirls": "Devil's Tgirls" + "Devilstgirls": "Devil's Tgirls", + "AgentRedGirl": "Agent Red Girl" } # a list of sites (`sitename_pretty` from the API) which should pick out the diff --git a/scrapers/Algolia_Adultime.yml b/scrapers/Algolia_Adultime.yml index 3b723a445..ec90ade10 100644 --- a/scrapers/Algolia_Adultime.yml +++ b/scrapers/Algolia_Adultime.yml @@ -6,6 +6,7 @@ sceneByURL: - adamandevepictures.com/en/video/ - adulttime.com/en/video/ - adulttimepilots.com/en/video/ + - agentredgirl.com/en/video/ - analteenangels.com/en/video/ - assholefever.com/en/video/ - beingtrans247.com/en/video/ @@ -95,4 +96,4 @@ movieByURL: - Algolia.py - puretaboo - movie -# Last Updated December 22, 2022 +# Last Updated February 06, 2023 From 01fe569a5e0b82393ef35169cb58ab2ebbdae2e4 Mon Sep 17 00:00:00 2001 From: nrg101 <37197306+nrg101@users.noreply.github.com> Date: Tue, 7 Feb 2023 18:46:48 +0000 Subject: [PATCH 042/624] Fix studio name selector for VR Bangers (and network sites) (#1266) --- scrapers/VRBangers.yml | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/scrapers/VRBangers.yml b/scrapers/VRBangers.yml index 4bf55690b..0978a9308 100644 --- a/scrapers/VRBangers.yml +++ b/scrapers/VRBangers.yml @@ -51,13 +51,18 @@ xPathScrapers: Name: //div[starts-with(@class, 'video-item__info-starring')]//a/text() Studio: Name: &studioName - selector: //meta[@name="dl8-customization-brand-name"]/@content + selector: &studioURLSel //meta[@name="dl8-customization-brand-url"]/@content postProcess: - replace: - - regex: \# + - regex: ^// with: "" + - map: + vrbangers.com: VR Bangers + vrbgay.com: VRB Gay + vrbtrans.com: VRB Trans + vrconk.com: VR Conk URL: &studioURL - selector: //meta[@name="dl8-customization-brand-url"]/@content + selector: *studioURLSel postProcess: - replace: - regex: ^ @@ -87,4 +92,4 @@ xPathScrapers: Name: *studioName URL: *studioURL FrontImage: *imageSel -# Last Updated November 27, 2022 +# Last Updated February 06, 2023 From 211d73f282f36930eec82b74d308188cf234656f Mon Sep 17 00:00:00 2001 From: jessqic <8094540+jessqic@users.noreply.github.com> Date: Tue, 7 Feb 2023 18:58:57 +0000 Subject: [PATCH 043/624] Update Transerotica.yml (fix date selector) (#1264) --- scrapers/Transerotica.yml | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/scrapers/Transerotica.yml b/scrapers/Transerotica.yml index ac7e0b2a3..4690bce12 100644 --- a/scrapers/Transerotica.yml +++ b/scrapers/Transerotica.yml @@ -12,9 +12,14 @@ xPathScrapers: Title: //h1[@class='title_bar'] Image: //div[@id="player"]/video/@poster Date: - selector: $update//span[@class="upddate"][1] + selector: $update//comment() postProcess: + - replace: + - regex: .*(?:class='upddate').*(\d{2}/\d{2}/\d{4}).* + with: $1 - parseDate: "01/02/2006" + - map: + 1970-01-01: "" Details: selector: $update/p[starts-with(text(),"Description:")] postProcess: @@ -31,4 +36,4 @@ xPathScrapers: Studio: Name: fixed: Trans Erotica -# Last Updated August 07, 2021 +# Last Updated February 06, 2023 From 58c89255134bbc5b43a0b9233e8bfc8f5a07bf71 Mon Sep 17 00:00:00 2001 From: vt-idiot <81622808+vt-idiot@users.noreply.github.com> Date: Tue, 7 Feb 2023 14:14:45 -0500 Subject: [PATCH 044/624] Add more helpful 404 errors to py_common/graphql.py (#1260) --- scrapers/py_common/graphql.py | 52 ++++++++++++++++++++--------------- 1 file changed, 30 insertions(+), 22 deletions(-) diff --git a/scrapers/py_common/graphql.py b/scrapers/py_common/graphql.py index 09f619adf..a96d583b3 100644 --- a/scrapers/py_common/graphql.py +++ b/scrapers/py_common/graphql.py @@ -4,15 +4,21 @@ try: import requests except ModuleNotFoundError: - print("You need to install the requests module. (https://docs.python-requests.org/en/latest/user/install/)", file=sys.stderr) - print("If you have pip (normally installed with python), run this command in a terminal (cmd): pip install requests", file=sys.stderr) + print( + "You need to install the requests module. (https://docs.python-requests.org/en/latest/user/install/)", + file=sys.stderr) + print( + "If you have pip (normally installed with python), run this command in a terminal (cmd): pip install requests", + file=sys.stderr) sys.exit() try: import py_common.config as config import py_common.log as log except ModuleNotFoundError: - print("You need to download the folder 'py_common' from the community repo! (CommunityScrapers/tree/master/scrapers/py_common)", file=sys.stderr) + print( + "You need to download the folder 'py_common' from the community repo! (CommunityScrapers/tree/master/scrapers/py_common)", + file=sys.stderr) sys.exit() @@ -27,16 +33,14 @@ def callGraphQL(query, variables=None): stash_url = config.STASH["url"] + "/graphql" headers = { - "Accept-Encoding": "gzip, deflate, br", + "Accept-Encoding": "gzip, deflate", "Content-Type": "application/json", "Accept": "application/json", "Connection": "keep-alive", "DNT": "1", "ApiKey": api_key } - json = { - 'query': query - } + json = {'query': query} if variables is not None: json['variables'] = variables try: @@ -45,16 +49,28 @@ def callGraphQL(query, variables=None): result = response.json() if result.get("error"): for error in result["error"]["errors"]: - raise Exception("GraphQL error: {}".format(error)) + raise Exception(f"GraphQL error: {error}") if result.get("data"): return result.get("data") elif response.status_code == 401: log.error( - "[ERROR][GraphQL] HTTP Error 401, Unauthorised. You can add a API Key in 'config.py' in the 'py_common' folder") + "[ERROR][GraphQL] HTTP Error 401, Unauthorised. You can add a API Key in 'config.py' in the 'py_common' folder" + ) + return None + elif response.status_code == 404: + if config.STASH["url"] == "http://localhost:9999": + log.error( + "[ERROR][GraphQL] HTTP Error 404, Not Found. Your local stash server is your endpoint, but port 9999 did not respond. Did you change stash's port? Edit 'config.py' in the 'py_common' folder to point at the correct port for stash!" + ) + else: + log.error( + "[ERROR][GraphQL] HTTP Error 404, Not Found. Make sure 'config.py' in the 'py_common' folder points at the correct address and port!" + ) return None else: raise ConnectionError( - "GraphQL query failed:{} - {}".format(response.status_code, response.content)) + f"GraphQL query failed:{response.status_code} - {response.content}" + ) except Exception as err: log.error(err) return None @@ -454,9 +470,7 @@ def getScene(scene_id): } """ - variables = { - "id": scene_id - } + variables = {"id": scene_id} result = callGraphQL(query, variables) if result: return result.get('findScene') @@ -474,9 +488,7 @@ def getSceneScreenshot(scene_id): } } """ - variables = { - "id": scene_id - } + variables = {"id": scene_id} result = callGraphQL(query, variables) if result: return result.get('findScene') @@ -1164,9 +1176,7 @@ def getGallery(gallery_id): """ - variables = { - "id": gallery_id - } + variables = {"id": gallery_id} result = callGraphQL(query, variables) if result: return result.get('findGallery') @@ -1181,9 +1191,7 @@ def getGalleryPath(gallery_id): } } """ - variables = { - "id": gallery_id - } + variables = {"id": gallery_id} result = callGraphQL(query, variables) if result: return result.get('findGallery') From a73a1dfab0efe9cedadb5ec599ddc1edd00c02a2 Mon Sep 17 00:00:00 2001 From: aussiehuddo Date: Wed, 8 Feb 2023 07:15:14 +1100 Subject: [PATCH 045/624] Added momcum.com to AMAMultimedia.yml + map studio names (#1265) --- SCRAPERS-LIST.md | 1 + scrapers/AMAMultimedia.yml | 19 ++++++++++++++++--- 2 files changed, 17 insertions(+), 3 deletions(-) diff --git a/SCRAPERS-LIST.md b/SCRAPERS-LIST.md index f51a3db31..60e775663 100644 --- a/SCRAPERS-LIST.md +++ b/SCRAPERS-LIST.md @@ -811,6 +811,7 @@ mofos.com|MindGeek.yml|:heavy_check_mark:|:x:|:x:|:heavy_check_mark:|-|- mofosnetwork.com|MindGeek.yml|:heavy_check_mark:|:x:|:x:|:heavy_check_mark:|-|- mom4k.com|AMAMultimedia.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- momcomesfirst.com|FamilyTherapyXXX.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +momcum.com|AMAMultimedia.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- momisamilf.com|Vivid.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- momlover.com|Nubiles.yml|:heavy_check_mark:|:heavy_check_mark:|:x:|:x:|-|- mommyblowsbest.com|GammaEntertainment.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- diff --git a/scrapers/AMAMultimedia.yml b/scrapers/AMAMultimedia.yml index dfc636901..cdf338430 100644 --- a/scrapers/AMAMultimedia.yml +++ b/scrapers/AMAMultimedia.yml @@ -32,6 +32,7 @@ sceneByURL: - holed.com/video/ - lubed.com/video/ - mom4k.com/video/ + - momcum.com/video/ - myveryfirsttime.com/video/ - nannyspy.com/video/ - passion-hd.com/video/ @@ -73,10 +74,22 @@ xPathScrapers: - regex: ([?&]img[wh]=\d+)+$ with: Studio: - Name: //div[@id="navigation"]/h5/a/@alt + Name: + selector: //div[@id="navigation"]/h5/a/@alt + postProcess: + - map: + Baeb: BAEB + Casting Couch X: Casting Couch-X + Cum4K: Cum 4K + Exotic4k: Exotic 4K + GirlCum: Girl Cum + MomCum: Mom Cum + NannySpy: Nanny Spy + SpyFam: Spy Fam + Tiny4K: Tiny 4K + WetVR: Wet VR Date: selector: //div[contains(text(), 'RELEASED')]/span/text() postProcess: - parseDate: January 02, 2006 - -# Last Updated March 26, 2022 +# Last Updated February 06, 2023 From 66bfc225a04f8f11cf00ea2f54d6997c57770b2c Mon Sep 17 00:00:00 2001 From: DoctorD Date: Tue, 7 Feb 2023 12:21:30 -0800 Subject: [PATCH 046/624] Fix Clips4Sale images on scene scraper and search (#1263) --- scrapers/Clips4Sale.yml | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/scrapers/Clips4Sale.yml b/scrapers/Clips4Sale.yml index c6d53e806..024eb0628 100644 --- a/scrapers/Clips4Sale.yml +++ b/scrapers/Clips4Sale.yml @@ -29,11 +29,12 @@ xPathScrapers: URL: selector: $card//a[contains(@class, "search-clip__title")]/@href Image: - selector: $card//img[@draggable="false"]/@data-src + selector: $card//a[contains(@class, "search-clip__title")]/@href postProcess: - replace: - - regex: ^// - with: https:// + - regex: https:\/\/www.clips4sale.com\/studio\/(\d+)\/(\d+)\/.*$ + with: https://imagecdn.clips4sale.com/accounts99/$1/clip_images/previewlg_$2.jpg + c4sSceneScraper: common: $studio: //span[contains(text(),"From:")]/following-sibling::a @@ -79,7 +80,7 @@ xPathScrapers: # often included in the video tags. So we attempt to find matches there. Name: //div/span[contains(text(),"Keywords:")]/..//a Image: - selector: //div[contains(@class, "clipImage")]/img/@data-src + selector: //div[contains(@class, "clipImage")]/div/video/@data-poster postProcess: &ppPrependScheme - replace: - regex: ^// @@ -112,4 +113,4 @@ xPathScrapers: # Clips4Sale doesn't have an explict performer field, but performers are # often included in the video tags. So we attempt to find matches there. Name: //span[contains(text(),"Keywords:")]/following-sibling::a -# Last Updated August 21, 2022 +# Last Updated February 03, 2023 From 22cee2efe5d39f3112bdf7a7a6eda2164c815678 Mon Sep 17 00:00:00 2001 From: DoctorD Date: Tue, 7 Feb 2023 13:01:15 -0800 Subject: [PATCH 047/624] Update GoddessSnow.com scraper (add search by name) (#1258) --- scrapers/GoddessSnow.yml | 100 ++++++++++++++++++++++++--------------- 1 file changed, 62 insertions(+), 38 deletions(-) diff --git a/scrapers/GoddessSnow.yml b/scrapers/GoddessSnow.yml index c0027d2f4..0b9b7c63d 100644 --- a/scrapers/GoddessSnow.yml +++ b/scrapers/GoddessSnow.yml @@ -1,57 +1,81 @@ -name: GoddessSnow +name: GoddessSnow.com + +sceneByName: + action: scrapeXPath + scraper: sceneSearch + queryURL: "https://www.goddesssnow.com/vod/search.php?query={}" + +# We don't want the /updates URL here because it has the wrong release date by a year (scenes get released a year early on /scenes) +# And also the descriotion is truncated often on /updates +# /scenes also has two versions, one that ends in "_vids.html" and one that ends in ".html" +# We want to make sure we get the _vids.html version as that is the one with the images +# We take care of both issues above in the queryURLReplace section sceneByURL: - - action: scrapeXPath - url: - - goddesssnow.com/vod/scenes/ - scraper: vodScraper - action: scrapeXPath url: - goddesssnow.com/updates/ - scraper: updateScraper + - goddesssnow.com/vod/scenes + queryURL: "{url}" + queryURLReplace: + url: + # convert /updates URLs to /vod/scenes + - regex: (.+)(\/updates\/)(.+)(\.html) + with: $1/vod/scenes/$3.html + # fix up the /vod/scenes urls that people may have that do not end in _vids.html. First get rid of it for everyone, then add it back in. + # This both adds it to the /updates urls from above, leaves the urls that have the correct form alone, and fix /vod/scenes urls that are the "bad" ones + # We are doing this two step process because Go regex does not support backreferences which would have let us cleanly do this in one regex + - regex: _vids\.html + with: ".html" + - regex: \.html + with: "_vids.html" + scraper: sceneScraper + +sceneByQueryFragment: + action: scrapeXPath + queryURL: "{url}" + scraper: sceneScraper xPathScrapers: - vodScraper: + sceneScraper: scene: - Title: - selector: //div[@class="title_bar"]/span/text() + Title: //div[@class="title_bar"]/span/text() + URL: + selector: //div/@data-redirect + postProcess: + - replace: + - regex: \.html + with: _vids.html Date: - selector: //span[@class="release-date"]/text() + selector: //span[@class="release-date"]/text()|//div[@class="cell update_date"]/text() postProcess: - replace: - - regex: 'Release Date: (\d{2}/\d{2}/\d{4})' - with: $1 + - regex: ^Release Date:\s + with: - parseDate: 01/02/2006 - Details: &details - selector: //span[@class="update_description"]/text() - Performers: - Name: //span[@class="update_models"]/a/text() + Details: //span[@class="update_description"] Tags: - Name: //span[@class="update_tags"]/a/text() - Studio: &studio Name: - fixed: Goddess Alexandra Snow - Image: &image - selector: //meta[@name="twitter:image"]/@content + selector: //span[@class="update_tags"]/a/text() + Image: + selector: //div[@class="VOD_update"]/img/@src0_4x + postProcess: + - replace: + - regex: ^ + with: https://www.goddesssnow.com + Studio: + Name: + fixed: Alexandra Snow + Performers: + Name: //span[@class="update_models"]/a - updateScraper: + sceneSearch: scene: - Title: - selector: //h2[@class="update-title"] - Date: - selector: //span[@class="update-date"] - postProcess: - - parseDate: 01/02/2006 - Details: - selector: //div[@class="update-join"]/a[2]/@href + Title: //div[@class="update_details"]/div/@data-title + URL: //a[@class="update-details-image"]/@href + Image: + selector: //a[@class="update-details-image"]/img/@src0_1x postProcess: - replace: - regex: ^ with: https://www.goddesssnow.com - - subScraper: *details - Performers: - Name: //span[@class="tour_update_models"]/a - Tags: - Name: //div[@class="update-tags"]/a - Studio: *studio - Image: *image -# Last Updated June 07, 2021 +# Last Updated January 31, 2023 From abfd58d82a522d3e6615a85c9e7029144089e8b7 Mon Sep 17 00:00:00 2001 From: nrg101 <37197306+nrg101@users.noreply.github.com> Date: Mon, 13 Feb 2023 20:43:52 +0000 Subject: [PATCH 048/624] Add more sites to Trans500.yml (#1233) --- SCRAPERS-LIST.md | 5 +++++ scrapers/Trans500.yml | 7 ++++++- 2 files changed, 11 insertions(+), 1 deletion(-) diff --git a/SCRAPERS-LIST.md b/SCRAPERS-LIST.md index 60e775663..1ec527b50 100644 --- a/SCRAPERS-LIST.md +++ b/SCRAPERS-LIST.md @@ -158,6 +158,7 @@ beauty-angels.com|TeenMegaWorld.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- beauty4k.com|TeenMegaWorld.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- beaverhunt.com|Hustler.yml|:heavy_check_mark:|:x:|:x:|:x:|CDP|- becomingfemme.com|ChristianXXX.yml|:heavy_check_mark:|:x:|:x:|:x:|-|Trans +behindtrans500.com|Trans500.yml|:heavy_check_mark:|:x:|:x:|:x:|-|Trans beingphoenixmarie.com|ModelCentroAPI.yml|:heavy_check_mark:|:x:|:x:|:x:|Python|- belamionline.com|BelAmi.yml|:heavy_check_mark:|:x:|:x:|:x:|-|Gay bellahd.com|bellapass.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- @@ -625,6 +626,7 @@ iconmale.com|MindGeek.yml|:heavy_check_mark:|:x:|:x:|:x:|-|Gay idols69.com|karatmedia.yml|:heavy_check_mark:|:x:|:x:|:x:|-|JAV ifeelmyself.com|IFeelMyself.yml|:heavy_check_mark:|:x:|:x:|:heavy_check_mark:|Python|- ihuntmycunt.com|DollsHub.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +ikillitts.com|Trans500.yml|:heavy_check_mark:|:x:|:x:|:x:|-|Trans ikissgirls.com|IKissGirls.yml|:heavy_check_mark:|:x:|:x:|:x:|-|Lesbian iknowthatgirl.com|MindGeek.yml|:heavy_check_mark:|:x:|:x:|:heavy_check_mark:|-|- imdb.com|IMDB.yml|:heavy_check_mark:|:x:|:heavy_check_mark:|:heavy_check_mark:|-|Database @@ -1188,6 +1190,7 @@ sugardaddyporn.com|BlurredMedia.yml|:heavy_check_mark:|:x:|:x:|:x:|CDP|- sunnylanelive.com|VNAGirls.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- sunnyleone.com|GammaEntertainment.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- superbemodels.com|superbemodels.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +superramon.com|Trans500.yml|:heavy_check_mark:|:x:|:x:|:x:|-|Trans susanayn.com|PornCZ.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- swallowbay.com|SwallowBay.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- swallowed.com|Nympho.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- @@ -1292,6 +1295,7 @@ trans500.com/tour/|Trans500.yml|:heavy_check_mark:|:x:|:x:|:x:|-|Trans trans500.com/tour3/|Trans500.yml|:heavy_check_mark:|:x:|:x:|:x:|-|Trans trans500.com/tourespanol|Trans500.yml|:heavy_check_mark:|:x:|:x:|:x:|-|Trans transangels.com|MindGeek.yml|:heavy_check_mark:|:x:|:x:|:x:|-|Trans +transatplay.com|Trans500.yml|:heavy_check_mark:|:x:|:x:|:x:|-|Trans transbella.com|LetsDoeIt.yml|:heavy_check_mark:|:x:|:x:|:x:|-|Trans transerotica.com|Transerotica.yml|:heavy_check_mark:|:x:|:x:|:x:|-|Trans transexdomination.com|GroobyClub.yml|:heavy_check_mark:|:x:|:x:|:x:|-|Trans @@ -1317,6 +1321,7 @@ truelesbian.com|Algolia_Adultime.yml|:heavy_check_mark:|:x:|:x:|:x:|Python|- tryteens.com|Teencoreclub.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- ts-castingcouch.com|GroobyClub.yml|:heavy_check_mark:|:x:|:x:|:x:|-|Trans tsfactor.com|Algolia_EvilAngel.yml|:heavy_check_mark:|:heavy_check_mark:|:heavy_check_mark:|:x:|Python|Trans +tsgirlfriendexperience.com|Trans500.yml|:heavy_check_mark:|:x:|:x:|:x:|-|Trans tsplayground.com|TSPlayground.yml|:heavy_check_mark:|:x:|:x:|:heavy_check_mark:|-|Trans tspov.com|ChristianXXX.yml|:heavy_check_mark:|:x:|:x:|:x:|-|Trans tsvirtuallovers.com|RealityLovers.yml|:heavy_check_mark:|:x:|:x:|:heavy_check_mark:|Python|- diff --git a/scrapers/Trans500.yml b/scrapers/Trans500.yml index 13250a28a..4f3b4a995 100644 --- a/scrapers/Trans500.yml +++ b/scrapers/Trans500.yml @@ -7,7 +7,12 @@ sceneByURL: scraper: sceneScraper - action: scrapeXPath url: + - behindtrans500.com/tour/ + - ikillitts.com/tour/ + - superramon.com/tour/ - trans500.com/tourespanol + - transatplay.com/tour/ + - tsgirlfriendexperience.com/tour/ scraper: sceneScraperEspanol xPathScrapers: @@ -66,4 +71,4 @@ xPathScrapers: with: "Behind Trans500" Tags: # Either //meta[@name="keywords"]/@content OR: //div[@class="scene-infobrick"][contains(text(), "Categories:")]/a/text() Name: //div[@class="scene-infobrick"][contains(text(), "Categories:")]/a/text() -# Last Updated November 10, 2020 +# Last Updated January 03, 2023 From 6e638c1998cb5e4cbb4cef6d6abc4fa9d517bc54 Mon Sep 17 00:00:00 2001 From: bnkai <48220860+bnkai@users.noreply.github.com> Date: Mon, 13 Feb 2023 22:47:26 +0200 Subject: [PATCH 049/624] Fix algolia search (#1268) --- scrapers/Algolia.py | 22 +++++++++++++--------- scrapers/Algolia_Biphoria.yml | 2 +- 2 files changed, 14 insertions(+), 10 deletions(-) diff --git a/scrapers/Algolia.py b/scrapers/Algolia.py index f4b2a1a5c..4b21a7209 100644 --- a/scrapers/Algolia.py +++ b/scrapers/Algolia.py @@ -592,7 +592,9 @@ def parse_scene_json(scene_json, url=None): log.warning("Can't locate image.") # URL try: - hostname = scene_json['sitename'] + hostname = scene_json.get('sitename') + if hostname is None: + hostname = SITE # Movie if scene_json.get('movie_title'): scrape['movies'] = [{ @@ -606,17 +608,19 @@ def parse_scene_json(scene_json, url=None): if URL_DOMAIN and MOVIE_SITES.get(URL_DOMAIN): scrape['movies'][0][ 'url'] = f"{MOVIE_SITES[URL_DOMAIN]}/{scene_json['url_movie_title']}/{scene_json['movie_id']}" - net_name = scene_json['network_name'] - if net_name.lower() == "21 sextury": - hostname = "21sextury" - elif net_name.lower() == "21 naturals": - hostname = "21naturals" + net_name = scene_json.get('network_name') + if net_name: + if net_name.lower() == "21 sextury": + hostname = "21sextury" + elif net_name.lower() == "21 naturals": + hostname = "21naturals" scrape[ - 'url'] = f"https://{hostname.lower()}.com/en/video/{scene_json['sitename'].lower()}/{scene_json['url_title']}/{scene_json['clip_id']}" - except: + 'url'] = f"https://{hostname.lower()}.com/en/video/{hostname.lower()}/{scene_json['url_title']}/{scene_json['clip_id']}" + except Exception as exc: + log.debug(f"{exc}") if url: scrape['url'] = url - #debug(f"{scrape}") + #log.debug(f"{scrape}") return scrape def parse_gallery_json(gallery_json: dict, url: str = None) -> dict: diff --git a/scrapers/Algolia_Biphoria.yml b/scrapers/Algolia_Biphoria.yml index 3076e6800..94d6441c2 100644 --- a/scrapers/Algolia_Biphoria.yml +++ b/scrapers/Algolia_Biphoria.yml @@ -36,4 +36,4 @@ galleryByURL: - Algolia.py - biphoria - gallery -# Last Updated December 22, 2022 +# Last Updated February 07, 2023 From def9c06c316dfbe42ed6b565a1e7e6cb16248af7 Mon Sep 17 00:00:00 2001 From: Valmox <122233426+Valmox@users.noreply.github.com> Date: Sun, 19 Feb 2023 18:55:56 +0000 Subject: [PATCH 050/624] HelixStudios.yml - Scene Scraper - increase image resolution, add director, studio code and fix date parsing (#1255) --- scrapers/HelixStudios.yml | 27 +++++++++++++++++---------- 1 file changed, 17 insertions(+), 10 deletions(-) diff --git a/scrapers/HelixStudios.yml b/scrapers/HelixStudios.yml index 982f77cdf..45f1fc7c4 100644 --- a/scrapers/HelixStudios.yml +++ b/scrapers/HelixStudios.yml @@ -56,15 +56,7 @@ xPathScrapers: Date: selector: //div[@class="info-items"]/span[@class="info-item date"]/text() postProcess: - - replace: - - regex: "th|st|nd|rd" - with: - - regex: '(\d+)\sdays\sago' - with: $1 - - regex: '^([a-zA-Z]+)\s(\d+)$' - with: "$1 $2, 2021" - - subtractDays: true - - parseDate: Jan 2, 2006 + - parseDate: January 2, 2006 Details: selector: //div[contains(@class, "description-content")]/p/text() concat: "#LINEBREAK#" @@ -82,6 +74,21 @@ xPathScrapers: URL: //link[@rel="canonical"][1]/@href Image: selector: //video/@poster + postProcess: + - replace: + - regex: 960w + with: 1500w + Director: + selector: //span[contains(@class, "info-item director")]/text() + Code: + selector: //*[@id="titleImage"]/@src + postProcess: + - replace: + - regex: ^.*\/\s* + with: + - replace: + - regex: \_1600.*$ + with: Studio: Name: fixed: Helix @@ -137,4 +144,4 @@ xPathScrapers: - replace: - regex: $ with: " " -# Last Updated December 29, 2021 +# Last Updated February 20, 2023 From 9f59ae4c111a6e60bc78995ddbaabbb8ecdbc230 Mon Sep 17 00:00:00 2001 From: nrg101 <37197306+nrg101@users.noreply.github.com> Date: Sun, 19 Feb 2023 19:55:55 +0000 Subject: [PATCH 051/624] Fix gallery scraping via algolia (#1272) --- scrapers/Algolia.py | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/scrapers/Algolia.py b/scrapers/Algolia.py index 4b21a7209..296d8ed15 100644 --- a/scrapers/Algolia.py +++ b/scrapers/Algolia.py @@ -236,12 +236,13 @@ def api_search_movie_id(m_id, url): return req def api_search_gallery_id(p_id, url): - gallery_id = [f"set_id:{p_id}"] + gallery_id = [[f"set_id:{p_id}"]] request_api = { "requests": [{ "indexName": "all_photosets", "params": "query=&hitsPerPage=20&page=0", - "facetFilters": gallery_id + "facetFilters": gallery_id, + "facets": [] }] } req = send_request(url, HEADERS, request_api) @@ -629,7 +630,9 @@ def parse_gallery_json(gallery_json: dict, url: str = None) -> dict: """ scrape = {} # Title - if gallery_json.get('title'): + if gallery_json.get('clip_title'): + scrape['title'] = gallery_json['clip_title'].strip() + elif gallery_json.get('title'): scrape['title'] = gallery_json['title'].strip() # Date scrape['date'] = gallery_json.get('date_online') @@ -693,8 +696,8 @@ def parse_gallery_json(gallery_json: dict, url: str = None) -> dict: hostname = "21sextury" elif net_name.lower() == "21 naturals": hostname = "21naturals" - scrape[ - 'url'] = f"https://{hostname.lower()}.com/en/video/{gallery_json['sitename'].lower()}/{gallery_json['url_title']}/{gallery_json['set_id']}" + scrape['url'] = f"https://www.{hostname.lower()}.com/en/photo/" \ + f"{gallery_json['url_title']}/{gallery_json['set_id']}" except: if url: scrape['url'] = url From 6223b8a854c18ca8558c7157d0b02e2087f6db4f Mon Sep 17 00:00:00 2001 From: nrg101 <37197306+nrg101@users.noreply.github.com> Date: Sat, 25 Feb 2023 21:39:14 +0000 Subject: [PATCH 052/624] Add scene scraper for ladyboygold.com and tsraw.com (#1214) --- SCRAPERS-LIST.md | 3 ++- scrapers/LadyboyGold.yml | 45 +++++++++++++++++++++++++++++++++++++++- 2 files changed, 46 insertions(+), 2 deletions(-) diff --git a/SCRAPERS-LIST.md b/SCRAPERS-LIST.md index 1ec527b50..130bb93eb 100644 --- a/SCRAPERS-LIST.md +++ b/SCRAPERS-LIST.md @@ -709,7 +709,7 @@ kpopping.com|Kpopping.yml|:x:|:x:|:x:|:heavy_check_mark:|-|- kristenbjorn.com|KristenBjorn.yml|:heavy_check_mark:|:x:|:heavy_check_mark:|:heavy_check_mark:|-|Gay ladyboy-ladyboy.com|GroobyNetwork-Partial.yml|:heavy_check_mark:|:x:|:x:|:x:|-|Trans ladyboy.xxx|GroobyNetwork-Partial.yml|:heavy_check_mark:|:x:|:x:|:x:|-|Trans -ladyboygold.com|LadyboyGold.yml|:x:|:x:|:x:|:heavy_check_mark:|-|Trans +ladyboygold.com|LadyboyGold.yml|:heavy_check_mark:|:x:|:x:|:heavy_check_mark:|-|Trans ladydee.xxx|PornCZ.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- lanakendrick.com|PinupDollars.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- lanesisters.com|GammaEntertainment.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- @@ -1324,6 +1324,7 @@ tsfactor.com|Algolia_EvilAngel.yml|:heavy_check_mark:|:heavy_check_mark:|:heavy_ tsgirlfriendexperience.com|Trans500.yml|:heavy_check_mark:|:x:|:x:|:x:|-|Trans tsplayground.com|TSPlayground.yml|:heavy_check_mark:|:x:|:x:|:heavy_check_mark:|-|Trans tspov.com|ChristianXXX.yml|:heavy_check_mark:|:x:|:x:|:x:|-|Trans +tsraw.com|LadyboyGold.yml|:heavy_check_mark:|:x:|:x:|:heavy_check_mark:|-|Trans tsvirtuallovers.com|RealityLovers.yml|:heavy_check_mark:|:x:|:x:|:heavy_check_mark:|Python|- tuktukpatrol.com|TukTukPatrol.yml|:heavy_check_mark:|:x:|:x:|:heavy_check_mark:|-|- tushy.com|vixenNetwork.yml|:heavy_check_mark:|:x:|:x:|:x:|Python|- diff --git a/scrapers/LadyboyGold.yml b/scrapers/LadyboyGold.yml index 16b8a3842..966aa6c3c 100644 --- a/scrapers/LadyboyGold.yml +++ b/scrapers/LadyboyGold.yml @@ -4,6 +4,12 @@ performerByURL: url: - ladyboygold.com/index.php scraper: performerScraper +sceneByURL: + - action: scrapeXPath + url: + - ladyboygold.com/tour + - tsraw.com + scraper: sceneScraper xPathScrapers: performerScraper: common: @@ -42,4 +48,41 @@ xPathScrapers: - regex: ^ with: https://www.ladyboygold.com Details: //div[@class="profileBio"]/text() -# Last Updated May 17, 2022 + sceneScraper: + scene: + Title: + selector: //div[contains(@class, "show_video")]//h2/text() + postProcess: + - replace: + - regex: \ 4[Kk]$ + with: "" + Details: + selector: //div[contains(@class, "setDescription")]/p[contains(@class, "d-none")]/text() + concat: "\n\n" + Tags: + Name: //div[contains(@class, "tags")]//a/text() + Performers: + Name: + selector: //div[contains(@class, "show_video")]//h3/text() + postProcess: + - replace: + - regex: .*Ladyboy (.*) + with: $1 + split: ", " + Studio: + Name: + selector: //footer//p[contains(text(), 'Copyright')]/text()[2] + postProcess: + - replace: + - regex: ^(\d+\ )?(.+)\.\s+.* + with: $2 + - map: + TSRAW.com: TSRaw + LadyboyGold.com: LadyboyGold + Image: + selector: //div[contains(@class, "show_video")]//img/@style + postProcess: + - replace: + - regex: (background:\ ?url\()(.+)(?:\).+) + with: https://ladyboygold.com/$2 +# Last Updated December 29, 2022 From b3b7a7010f839e70104382eed5ee687f86440b53 Mon Sep 17 00:00:00 2001 From: Stephan Date: Sat, 25 Feb 2023 22:43:33 +0100 Subject: [PATCH 053/624] Improve Xvideos scraper (#1275) --- scrapers/Xvideos.yml | 38 +++++++++++++++++++++++++++++++++++--- 1 file changed, 35 insertions(+), 3 deletions(-) diff --git a/scrapers/Xvideos.yml b/scrapers/Xvideos.yml index dab555994..2fef8a494 100644 --- a/scrapers/Xvideos.yml +++ b/scrapers/Xvideos.yml @@ -1,10 +1,29 @@ -name: xvideos +name: Xvideos sceneByURL: - action: scrapeXPath url: - xvideos.com + - xvideos2.com + #- xvideos.es - xvideos.red scraper: sceneScraper + +sceneByFragment: + action: scrapeXPath + scraper: sceneScraper + queryURL: https://www.xvideos.com/video{filename}/x + queryURLReplace: + filename: + # expects an id in square brackets before extension, as saved by yt-dlp by default + - regex: '.*\[([0-9a-zA-Z]{4,})\]\.[^\.]+' + with: $1 + # or expects an id + - regex: '(?i)^.+-(\d+)\..+$' + with: $1 + # if no id is found in the filename + - regex: .*\.[^\.]+$ + with: # clear the filename so that it doesn't leak + xPathScrapers: sceneScraper: scene: @@ -14,11 +33,24 @@ xPathScrapers: Performers: Name: //li[@class="model"]/a/span[1]/text() Studio: - Name: //a[@class="btn btn-default label main uploader-tag hover-name"]/span[@class="name"]/text() + Name: //li[@class="main-uploader"]/a[ + starts-with(@href,'/channels') or + starts-with(@href,'/amateur-channels') or + starts-with(@href,'/model-channels') or + starts-with(@href,'/pornstar-channels') + ]/span[@class="name"]/text() Image: selector: //script[contains(text(), "setThumbUrl169")]/text()|//div[@id="html5video"][span[@class="premium-log-overlay"]]/div[@class="embed-responsive-item"]/@style postProcess: - replace: - regex: ^.+(?:setThumbUrl169|url)\('(.+?\.jpg).+$ with: $1 -# Last Updated January 23, 2023 + URL: //link[@rel="alternate" and @hreflang="x-default"]/@href + Date: + selector: //script[@type="application/ld+json"]/text() + postProcess: + - replace: + - regex: '[\S\s]+"uploadDate"\s*:\s*"(\d+-\d{2}-\d{2})[^"]+"[\S\s]+' + with: $1 + - parseDate: 2006-01-02 +# Last Updated February 15, 2023 From 3362c5b5384301aae0274171d097c22d94cc50f7 Mon Sep 17 00:00:00 2001 From: l1xander <125724412+l1xander@users.noreply.github.com> Date: Sun, 26 Feb 2023 04:14:11 +0530 Subject: [PATCH 054/624] Add DoTheWife xPath scene scraper (#1276) --- SCRAPERS-LIST.md | 1 + scrapers/DoTheWife.yml | 33 +++++++++++++++++++++++++++++++++ 2 files changed, 34 insertions(+) create mode 100644 scrapers/DoTheWife.yml diff --git a/SCRAPERS-LIST.md b/SCRAPERS-LIST.md index 130bb93eb..6ffebd334 100644 --- a/SCRAPERS-LIST.md +++ b/SCRAPERS-LIST.md @@ -381,6 +381,7 @@ dollsporn.com|Wtfpass.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- domai.com|SARJ-LLC.yml|:x:|:heavy_check_mark:|:x:|:heavy_check_mark:|Python|- dorcelclub.com|DorcelClub.yml|:heavy_check_mark:|:x:|:heavy_check_mark:|:x:|-|- dorcelvision.com|DorcelVision.yml|:heavy_check_mark:|:x:|:heavy_check_mark:|:x:|-|- +dothewife.com|DoTheWife.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- doubleteamedteens.com|Teencoreclub.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- downtofuckdating.com|ThirdRockEnt.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- dpfanatics.com|Algolia_Adultime.yml|:heavy_check_mark:|:x:|:x:|:x:|Python|- diff --git a/scrapers/DoTheWife.yml b/scrapers/DoTheWife.yml new file mode 100644 index 000000000..7128a5623 --- /dev/null +++ b/scrapers/DoTheWife.yml @@ -0,0 +1,33 @@ +name: Do The Wife +sceneByURL: + - action: scrapeXPath + url: + - dothewife.com/update + scraper: sceneScraper +xPathScrapers: + sceneScraper: + scene: + Title: //div[@class="section-title"]/h4 + Performers: + Name: //div[@class="model-rich"]/h4[@class="theme-color"]/a + Details: + selector: //p[@class="read-more"]/text() + postProcess: + - replace: + - regex: '^\s*:\s*' + with: "" + Tags: + Name: + selector: //div[@class="model-categories"]/a/text() + Studio: + Name: + fixed: Do The Wife + Image: + selector: //img[@class="video-banner"]/@src|//video/@poster + postProcess: + - replace: + - regex: (?:.+)(\/usermedia\/.+\.jpg)(?:.+) + with: $1 + - regex: "^/usermedia/" + with: "https://dothewife.com/usermedia/" +# Last Updated February 18, 2023 From 94e1c43009094b8087c6f9f7fa92a6a1bdc0a9ce Mon Sep 17 00:00:00 2001 From: jersprin <111305811+jersprin@users.noreply.github.com> Date: Sat, 25 Feb 2023 22:59:46 +0000 Subject: [PATCH 055/624] Fix Oldje scene performers/image selector (#1280) --- scrapers/Oldje.yml | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/scrapers/Oldje.yml b/scrapers/Oldje.yml index 9a75a37b6..11834b2c8 100644 --- a/scrapers/Oldje.yml +++ b/scrapers/Oldje.yml @@ -25,11 +25,11 @@ xPathScrapers: - parseDate: 2006-01-02 Performers: Name: - selector: //a[contains(@href,'/models/preview/')] + selector: //a[@class="act_name_h"] Details: selector: //p[@class='text']|//div[@class="preview_desc"] Image: - selector: (//div[@id="content" or @id="prev_m"]/a)[1]/img/@src + selector: //div[@id="content" or @id="prev_m"]/a[1]/img/@src oldje3someScraper: scene: Studio: @@ -51,4 +51,4 @@ xPathScrapers: - replace: - regex: ^ with: "https://www.oldje-3some.com/" -# Last Updated April 15, 2021 +# Last Updated February 25, 2023 From 5ad4f65826f396e3d0c9e8c607c39c462f2cf815 Mon Sep 17 00:00:00 2001 From: imagineimaginingthings <87026461+imagineimaginingthings@users.noreply.github.com> Date: Sat, 25 Feb 2023 23:07:01 +0000 Subject: [PATCH 056/624] Add ThisVid xPath scene scraper (#1274) --- SCRAPERS-LIST.md | 1 + scrapers/ThisVid.yml | 25 +++++++++++++++++++++++++ 2 files changed, 26 insertions(+) create mode 100644 scrapers/ThisVid.yml diff --git a/SCRAPERS-LIST.md b/SCRAPERS-LIST.md index 6ffebd334..0e164b716 100644 --- a/SCRAPERS-LIST.md +++ b/SCRAPERS-LIST.md @@ -1275,6 +1275,7 @@ thickandbig.com|AMAMultimedia.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- thickumz.com|PaperStreetMedia.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- thirdsexxxx.com|ThirdSexXXX.yml|:heavy_check_mark:|:x:|:x:|:x:|-|Trans thirdworldxxx.com|AdultEmpireCash.yml|:heavy_check_mark:|:x:|:heavy_check_mark:|:x:|-|Asian + Latin + Trans +thisvid.com|ThisVid.yml|:heavy_check_mark:|:x:|:x:|:x:|-|Tube Site throated.com|GammaEntertainment.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- timtales.com|TimTales.yml|:heavy_check_mark:|:x:|:x:|:heavy_check_mark:|-|Gay tiny4k.com|AMAMultimedia.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- diff --git a/scrapers/ThisVid.yml b/scrapers/ThisVid.yml new file mode 100644 index 000000000..b28eb7a54 --- /dev/null +++ b/scrapers/ThisVid.yml @@ -0,0 +1,25 @@ +name: "ThisVid" +sceneByURL: + - action: scrapeXPath + url: + - thisvid.com + scraper: sceneScraper +xPathScrapers: + sceneScraper: + common: + $container: //div[@class='container'] + $videowrap: //div[@class='column-centre column-video']//div[@class='wrap'] + $desc: //ul[@class="description"] + scene: + Title: $container//div[@class='headline']//h1/text() + Image: + selector: $videowrap//div[@class='video-holder']//div//img/@src + postProcess: + - replace: + - regex: "//" + with: https:// + Tags: + Name: $desc//li//a[contains(@href,"/tags/")]/text() + Details: $desc//li//p/text() + +# Last Updated February 15, 2023 From ec0768b6d4be9053014d205ac0fc6e2c9f72545e Mon Sep 17 00:00:00 2001 From: quantumandwoody <124731992+quantumandwoody@users.noreply.github.com> Date: Sat, 25 Feb 2023 15:24:13 -0800 Subject: [PATCH 057/624] Add xPath scraper for zishy.com (#1271) --- SCRAPERS-LIST.md | 1 + scrapers/Zishy.yml | 54 ++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 55 insertions(+) create mode 100644 scrapers/Zishy.yml diff --git a/SCRAPERS-LIST.md b/SCRAPERS-LIST.md index 0e164b716..6efc9a6f5 100644 --- a/SCRAPERS-LIST.md +++ b/SCRAPERS-LIST.md @@ -1450,6 +1450,7 @@ z-filmz-originals.com|Z-Filmz-Originals.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- zentaidolls.com|ZentaiDolls.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- zerotolerancefilms.com|Algolia_zerotolerancefilms.yml|:heavy_check_mark:|:heavy_check_mark:|:heavy_check_mark:|:x:|Python|- zexyvr.com|ZexyVR.yml|:heavy_check_mark:|:x:|:x:|:heavy_check_mark:|-|VR +zishy.com|Zishy.yml|:heavy_check_mark:|:heavy_check_mark:|:x:|:x:|-|- zoiestarr.com|ChickPass.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- ## Non url scrapers diff --git a/scrapers/Zishy.yml b/scrapers/Zishy.yml new file mode 100644 index 000000000..d6c696dda --- /dev/null +++ b/scrapers/Zishy.yml @@ -0,0 +1,54 @@ +name: Zishy +sceneByURL: + - action: scrapeXPath + url: + - zishy.com/albums + scraper: sceneScraper + +galleryByURL: + - action: scrapeXPath + url: + - zishy.com/albums + scraper: galleryScraper +xPathScrapers: + sceneScraper: + scene: + Title: &title + selector: //head/title + postProcess: + - replace: + - regex: ([\w ]*) - Zishy + with: $1 + Details: &details + selector: //div[@id='descrip'] + Date: &date + selector: //div[@id='headline']/span[last()] + postProcess: + - replace: + - regex: added on (\w{3}) (\d{2}). (\d{4}) + with: $3-$1-$2 + - parseDate: 2006-Jan-02 + Image: + selector: //div[@class="player"]//@style[contains(.,"background")] + postProcess: + - replace: + - regex: .+url\((/uploads/.+)\);.+ + with: https://www.zishy.com/$1 + Studio: &studio + Name: + fixed: Zishy + Performers: &performers + Name: + selector: //span[@class='moreof']/a[text()] + postProcess: + - replace: + - regex: \#([\w\s]+) + with: $1 + galleryScraper: + gallery: + Title: *title + Details: *details + Date: *date + Studio: *studio + Performers: *performers +# Last Updated February 26, 2023 From b3fa4f59c4c9861f8454b785a255c06cd232989a Mon Sep 17 00:00:00 2001 From: quantumandwoody <124731992+quantumandwoody@users.noreply.github.com> Date: Sat, 25 Feb 2023 15:28:14 -0800 Subject: [PATCH 058/624] Added ability to scrape paywalled urls with PlayboyPlus scraper (#1270) --- scrapers/PlayboyPlus.yml | 14 +++++++++++++- 1 file changed, 13 insertions(+), 1 deletion(-) diff --git a/scrapers/PlayboyPlus.yml b/scrapers/PlayboyPlus.yml index 94d0cd642..4a634b06d 100644 --- a/scrapers/PlayboyPlus.yml +++ b/scrapers/PlayboyPlus.yml @@ -3,12 +3,24 @@ sceneByURL: - action: scrapeXPath url: - playboyplus.com + - pbp-ma.playboy.com scraper: sceneScraper + queryURL: "{url}" + queryURLReplace: + url: + - regex: (?:https://)?(pbp-ma.playboy.com)/gallery/([^/]*)(?:/vip)? + with: https://playboyplus.com/gallery/$2 galleryByURL: - action: scrapeXPath url: - playboyplus.com + - pbp-ma.playboy.com scraper: galleryScraper + queryURL: "{url}" + queryURLReplace: + url: + - regex: (?:https://)?(pbp-ma.playboy.com)/gallery/([^/]*)(?:/vip)? + with: https://playboyplus.com/gallery/$2 xPathScrapers: sceneScraper: scene: @@ -39,4 +51,4 @@ xPathScrapers: Performers: *performers Details: *details Studio: *studio -# Last Updated April 13, 2021 +# Last Updated February 07, 2023 From 2c5a687d90603d913bab94d00615e338e66d91b5 Mon Sep 17 00:00:00 2001 From: Timo <24251362+timo95@users.noreply.github.com> Date: Sun, 26 Feb 2023 00:42:56 +0100 Subject: [PATCH 059/624] Add scraper for Anime-DB (#1248) --- SCRAPERS-LIST.md | 1 + scrapers/Anime-DB.yml | 129 ++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 130 insertions(+) create mode 100644 scrapers/Anime-DB.yml diff --git a/SCRAPERS-LIST.md b/SCRAPERS-LIST.md index 6efc9a6f5..98c0c27c7 100644 --- a/SCRAPERS-LIST.md +++ b/SCRAPERS-LIST.md @@ -34,6 +34,7 @@ activeduty.com|GammaEntertainment.yml|:heavy_check_mark:|:x:|:x:|:heavy_check_ma adamandevepictures.com|Algolia_Adultime.yml|:heavy_check_mark:|:heavy_check_mark:|:heavy_check_mark:|:x:|Python|- addicted2girls.com|Algolia_Addicted2Girls.yml|:heavy_check_mark:|:heavy_check_mark:|:x:|:x:|Python|- addisonstreet.com|ChickPass.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +adultanime.dbsearch.net|Anime-DB.yml|:heavy_check_mark:|:x:|:x:|:x:|CDP|Japanese Hentai adultdvdempire.com|AdultEmpire.yml|:heavy_check_mark:|:x:|:heavy_check_mark:|:x:|-|- adultdvdmarketplace.com|AdultDvdMarketPlace.yml|:x:|:x:|:heavy_check_mark:|:x:|-|- adultempire.com|AdultEmpire.yml|:heavy_check_mark:|:x:|:heavy_check_mark:|:x:|-|- diff --git a/scrapers/Anime-DB.yml b/scrapers/Anime-DB.yml new file mode 100644 index 000000000..743cd5933 --- /dev/null +++ b/scrapers/Anime-DB.yml @@ -0,0 +1,129 @@ +name: Anime-DB + +sceneByName: + action: scrapeXPath + queryURL: "https://adultanime.dbsearch.net/black_curtain_redirect.php?url=%2F%2Fadultanime.dbsearch.net%2Fsearch%2F%3Fkeyword%3D{}&check=true" + scraper: sceneSearch +sceneByQueryFragment: + action: scrapeXPath + queryURL: "https://adultanime.dbsearch.net/black_curtain_redirect.php?url={url}&check=true" + queryURLReplace: + url: + - regex: "https:" + with: + - regex: \/ + with: "%2F" + scraper: sceneScraper +sceneByURL: + - action: scrapeXPath + url: + - adultanime.dbsearch.net + queryURL: "https://adultanime.dbsearch.net/black_curtain_redirect.php?url={url}&check=true" + queryURLReplace: + url: + - regex: "https:" + with: + - regex: \/ + with: "%2F" + scraper: sceneScraper + +xPathScrapers: + sceneSearch: + scene: + Title: //div[@class="item-info"]/h4/a/text() + URL: + selector: //div[@class="item-info"]/h4/a/@href + postProcess: + - replace: + - regex: "^" + with: "https:" + Image: + selector: //section[@class="item-box"]/div[@class="item-img"]/a/img/@data-src + postProcess: + - replace: + - regex: "^" + with: "https:" + - regex: "/basic/" + with: "/small/" + - regex: '\.jpg' + with: "_s.jpg" + Date: + selector: //div[@class="item-info"]/p[@class="ndate"]/span/text() + postProcess: + - replace: # 2006年1月2日 + - regex: "\u5E74|\u6708" + with: "-" + - regex: "\u65E5" + with: + - regex: -(\d)- + with: -0$1- + - regex: -(\d)$ + with: -0$1 + Studio: + Name: //div[@class="item-info"]/p[@class="maker"]/a/text() + sceneScraper: + scene: + Title: //meta[@property="og:title"]/@content + URL: //meta[@property="og:url"]/@content + Details: + selector: //section[@class="iteminfo-box"]/blockquote/p[@class="pq"]//text() + concat: "\n\n" + Code: //dt[text()="規格品番"]/following-sibling::dd[1]/p/text() + Image: //section[@id="sample-image"]/img/@data-src + Tags: + Name: + selector: //nav[@id="tag-list"]/ul/li/a/text() + postProcess: + - map: # remove all 作品形式 (format) tags + DVD: "" + オリジナルアニメ作品: "" + PCゲーム原作アニメ: "" + コミック原作アニメ: "" + ライトノベル・ノベル原作アニメ: "" + 同人原作アニメ: "" + アダルトコミック原作アニメ: "" + ボーイズラブアニメ作品: "" + 廉価版アニメ: "" + BD-BOX・DVD-BOX: "" + 3D: "" + RPG: "" + アクション: "" + 麻雀・テーブルゲーム: "" + 3Dポリゴン: "" + 廉価版・新装版: "" + 萌えゲーアワード受賞: "" + 4時間以上作品: "" + ベスト・総集編: "" + サンプル動画: "" + アドベンチャー: "" + シミュレーション: "" + Blu-ray(ブルーレイ): "" + DVDPG: "" + UMD: "" + VFT: "" + フルボイス: "" + 廉価版: "" + BDPG: "" + 売り尽くしセール: "" # other unneeded tags + Yahooコメント掲載禁止: "" + アニメ: "" + 特典付き・セット商品: "" + Studio: + Name: //dt[text()="レーベル"]/following-sibling::dd[1]/p/text() + Date: + selector: //dt[text()="発売日"]/following-sibling::dd[1]/p/text() + postProcess: + - replace: # 2006年1月2日 + - regex: "\u5E74|\u6708" + with: "-" + - regex: "\u65E5" + with: + - regex: -(\d)- + with: -0$1- + - regex: -(\d)$ + with: -0$1 + +driver: + useCDP: true # needed for the age confirmation redirect - cookies only work temporarily + +# Last Updated January 22, 2023 From 804aa63b36865a647eac7c29baba555f838fad56 Mon Sep 17 00:00:00 2001 From: nrg101 <37197306+nrg101@users.noreply.github.com> Date: Sun, 26 Feb 2023 00:12:32 +0000 Subject: [PATCH 060/624] Improve Grooby Network scrapers (#1216) --- SCRAPERS-LIST.md | 31 ++++++++-------- scrapers/Algolia_EvilAngel.yml | 2 +- scrapers/GroobyNetwork-Brazilian.yml | 3 +- scrapers/GroobyNetwork-Partial.yml | 53 ++++++++++++++++++++++------ 4 files changed, 62 insertions(+), 27 deletions(-) diff --git a/SCRAPERS-LIST.md b/SCRAPERS-LIST.md index 98c0c27c7..3a935c4bf 100644 --- a/SCRAPERS-LIST.md +++ b/SCRAPERS-LIST.md @@ -102,7 +102,7 @@ asian18.com|Hustler.yml|:heavy_check_mark:|:x:|:x:|:x:|CDP|- asianamericantgirls.com|GroobyClub.yml|:heavy_check_mark:|:x:|:x:|:x:|-|Trans asianfever.com|Hustler.yml|:heavy_check_mark:|:x:|:x:|:x:|CDP|- asiansexdiary.com|VegasDreamsLLC.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- -asiantgirl.com|GroobyNetwork-Partial.yml|:heavy_check_mark:|:x:|:x:|:x:|-|Trans +asiantgirl.com|GroobyNetwork-Partial.yml|:heavy_check_mark:|:heavy_check_mark:|:x:|:x:|-|Trans assholefever.com|Algolia_Adultime.yml|:heavy_check_mark:|:x:|:x:|:x:|Python|- assmeat.com|Hustler.yml|:heavy_check_mark:|:x:|:x:|:x:|CDP|- assteenmouth.com|Teencoreclub.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- @@ -177,7 +177,7 @@ biguysfuck.com|BlurredMedia.yml|:heavy_check_mark:|:x:|:x:|:x:|CDP|- billiestar.com|xSinsVR.yml|:heavy_check_mark:|:heavy_check_mark:|:x:|:x:|-|- biphoria.com|Algolia_Biphoria.yml|:heavy_check_mark:|:heavy_check_mark:|:x:|:x:|Python|- bjraw.com|BJRaw.yml|:heavy_check_mark:|:x:|:x:|:heavy_check_mark:|CDP|- -black-tgirls.com|GroobyNetwork-Partial.yml|:heavy_check_mark:|:x:|:x:|:x:|-|Trans +black-tgirls.com|GroobyNetwork-Partial.yml|:heavy_check_mark:|:heavy_check_mark:|:x:|:x:|-|Trans black4k.com|Vip4K.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- blackambush.com|ExploitedX.yml|:heavy_check_mark:|:x:|:x:|:heavy_check_mark:|-|- blacked.com|vixenNetwork.yml|:heavy_check_mark:|:x:|:x:|:x:|Python|- @@ -185,14 +185,14 @@ blackedraw.com|vixenNetwork.yml|:heavy_check_mark:|:x:|:x:|:x:|Python|- blackmarketxxx.com|CombatZone.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- blackmassivecocks.com|AdultEmpireCash.yml|:heavy_check_mark:|:x:|:heavy_check_mark:|:x:|-|- blackph.com|FFCSH.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- -blacktgirlshardcore.com|GroobyNetwork-Partial.yml|:heavy_check_mark:|:x:|:x:|:x:|-|Trans +blacktgirlshardcore.com|GroobyNetwork-Partial.yml|:heavy_check_mark:|:heavy_check_mark:|:x:|:x:|-|Trans blackvalleygirls.com|PaperStreetMedia.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- blackwhitefuckfest.com|Vivid.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- blakemason.com|BlakeMason.yml|:heavy_check_mark:|:x:|:x:|:x:|-|Gay blownbyrone.com|VNAGirls.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- blowpass.com|GammaEntertainment.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- bobbiedenlive.com|VNAGirls.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- -bobstgirls.com|GroobyNetwork-Partial.yml|:heavy_check_mark:|:x:|:x:|:x:|-|Trans +bobstgirls.com|GroobyNetwork-Partial.yml|:heavy_check_mark:|:heavy_check_mark:|:x:|:x:|-|Trans bondagecafe.com|BondageCafe.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- bondageliberation.com|BondageLiberation.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- boobpedia.com|Boobpedia.yml|:x:|:x:|:x:|:heavy_check_mark:|-|Database @@ -446,7 +446,7 @@ femdomempire.com|FemdomEmpire.yml|:heavy_check_mark:|:x:|:x:|:heavy_check_mark:| feminized.com|FemdomEmpire.yml|:heavy_check_mark:|:x:|:x:|:heavy_check_mark:|-|Trans femjoy.com|FemJoy.yml|:heavy_check_mark:|:heavy_check_mark:|:x:|:x:|-|- femlatex.com|GymRotic.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- -femoutsex.xxx|GroobyNetwork-Partial.yml|:heavy_check_mark:|:x:|:x:|:x:|-|Trans +femoutsex.xxx|GroobyNetwork-Partial.yml|:heavy_check_mark:|:heavy_check_mark:|:x:|:x:|-|Trans ferame.com|Jhdv.yml|:heavy_check_mark:|:x:|:x:|:x:|-|JAV Uncensored fetishnetwork.com|FetishNetwork.yml|:heavy_check_mark:|:x:|:x:|:x:|-|Fetish fetishpros.com|FetishPro.yml|:heavy_check_mark:|:x:|:x:|:x:|-|Fetish @@ -479,7 +479,7 @@ fostertapes.com|PaperStreetMedia.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- fourfingerclub.com|NewSensationsNetworkSites.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- foxxedup.com|VNAGirls.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- fragileslave.com|FragileSlave.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- -franks-tgirlworld.com|GroobyNetwork-Partial.yml|:heavy_check_mark:|:x:|:x:|:x:|-|Trans +franks-tgirlworld.com|GroobyNetwork-Partial.yml|:heavy_check_mark:|:heavy_check_mark:|:x:|:x:|-|Trans fratx.com|FratX.yml|:heavy_check_mark:|:x:|:x:|:x:|Python|Gay freakmobmedia.com|FreakMobMedia.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- free.premiumbukkake.com|PremiumBukkake.yml|:heavy_check_mark:|:x:|:x:|:x:|Python|- @@ -544,9 +544,9 @@ goddesssnow.com|GoddessSnow.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- gostuckyourself.net|Mypervmom.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- gotfilled.com|BJRaw.yml|:heavy_check_mark:|:x:|:x:|:heavy_check_mark:|CDP|- grannyghetto.com|GammaEntertainment.yml|:heavy_check_mark:|:x:|:x:|:x:|-|Granny -grooby-archives.com|GroobyNetwork-Partial.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +grooby-archives.com|GroobyNetwork-Partial.yml|:heavy_check_mark:|:heavy_check_mark:|:x:|:x:|-|- grooby.club|GroobyClub.yml|:heavy_check_mark:|:x:|:x:|:x:|-|Trans -groobygirls.com|GroobyNetwork-Partial.yml|:heavy_check_mark:|:x:|:x:|:x:|-|Trans +groobygirls.com|GroobyNetwork-Partial.yml|:heavy_check_mark:|:heavy_check_mark:|:x:|:x:|-|Trans groobyvr.com|GroobyNetwork-Partial.yml|:heavy_check_mark:|:x:|:x:|:x:|-|Trans gymnastic.xxx|GymRotic.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- gymrotic.com|GymRotic.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- @@ -709,8 +709,8 @@ kinkyspa.com|MindGeek.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- kissmefuckme.com|Algolia_Adultime.yml|:heavy_check_mark:|:x:|:x:|:x:|Python|- kpopping.com|Kpopping.yml|:x:|:x:|:x:|:heavy_check_mark:|-|- kristenbjorn.com|KristenBjorn.yml|:heavy_check_mark:|:x:|:heavy_check_mark:|:heavy_check_mark:|-|Gay -ladyboy-ladyboy.com|GroobyNetwork-Partial.yml|:heavy_check_mark:|:x:|:x:|:x:|-|Trans -ladyboy.xxx|GroobyNetwork-Partial.yml|:heavy_check_mark:|:x:|:x:|:x:|-|Trans +ladyboy-ladyboy.com|GroobyNetwork-Partial.yml|:heavy_check_mark:|:heavy_check_mark:|:x:|:x:|-|Trans +ladyboy.xxx|GroobyNetwork-Partial.yml|:heavy_check_mark:|:heavy_check_mark:|:x:|:x:|-|Trans ladyboygold.com|LadyboyGold.yml|:heavy_check_mark:|:x:|:x:|:heavy_check_mark:|-|Trans ladydee.xxx|PornCZ.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- lanakendrick.com|PinupDollars.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- @@ -1255,9 +1255,10 @@ tgirljapan.com|GroobyClub.yml|:heavy_check_mark:|:x:|:x:|:x:|-|Trans tgirljapanhardcore.com|GroobyClub.yml|:heavy_check_mark:|:x:|:x:|:x:|-|Trans tgirlpornstar.com|GroobyClub.yml|:heavy_check_mark:|:x:|:x:|:x:|-|Trans tgirlpostop.com|GroobyClub.yml|:heavy_check_mark:|:x:|:x:|:x:|-|Trans -tgirls.porn|GroobyNetwork-Partial.yml|:heavy_check_mark:|:x:|:x:|:x:|-|Trans -tgirls.xxx|GroobyNetwork-Partial.yml|:heavy_check_mark:|:x:|:x:|:x:|-|Trans -tgirlsex.xxx|GroobyNetwork-Partial.yml|:heavy_check_mark:|:x:|:x:|:x:|-|Trans +tgirls.porn|GroobyNetwork-Partial.yml|:heavy_check_mark:|:heavy_check_mark:|:x:|:x:|-|Trans +tgirls.xxx|GroobyNetwork-Partial.yml|:heavy_check_mark:|:heavy_check_mark:|:x:|:x:|-|Trans +tgirlsex.xxx|GroobyNetwork-Partial.yml|:heavy_check_mark:|:heavy_check_mark:|:x:|:x:|-|Trans +tgirlsfuck.com|GroobyNetwork-Brazilian.yml|:heavy_check_mark:|:x:|:x:|:x:|-|Trans thatsitcomshow.com|Nubiles.yml|:heavy_check_mark:|:heavy_check_mark:|:x:|:x:|-|- theartporn.com|Wtfpass.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- theassfactory.com|JulesJordan.yml|:heavy_check_mark:|:heavy_check_mark:|:x:|:x:|-|- @@ -1302,10 +1303,10 @@ transatplay.com|Trans500.yml|:heavy_check_mark:|:x:|:x:|:x:|-|Trans transbella.com|LetsDoeIt.yml|:heavy_check_mark:|:x:|:x:|:x:|-|Trans transerotica.com|Transerotica.yml|:heavy_check_mark:|:x:|:x:|:x:|-|Trans transexdomination.com|GroobyClub.yml|:heavy_check_mark:|:x:|:x:|:x:|-|Trans -transexpov.com|GroobyNetwork-Partial.yml|:heavy_check_mark:|:x:|:x:|:x:|-|Trans +transexpov.com|GroobyNetwork-Partial.yml|:heavy_check_mark:|:heavy_check_mark:|:x:|:x:|-|Trans transfeet.com|FFCSH.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- transfixed.com|Algolia_Adultime.yml|:heavy_check_mark:|:x:|:x:|:x:|Python|Trans -transgasm.com|GroobyNetwork-Partial.yml|:heavy_check_mark:|:x:|:x:|:x:|-|Trans +transgasm.com|GroobyNetwork-Partial.yml|:heavy_check_mark:|:heavy_check_mark:|:x:|:x:|-|Trans transgressivefilms.com|Algolia_Adultime.yml|:heavy_check_mark:|:x:|:x:|:x:|Python|Trans transgressivexxx.com|Algolia_EvilAngel|:heavy_check_mark:|:heavy_check_mark:|:heavy_check_mark:|:x:|Python|Trans transmodeldatabase.com|TransModelDatabase.yml|:x:|:x:|:x:|:heavy_check_mark:|-|Trans diff --git a/scrapers/Algolia_EvilAngel.yml b/scrapers/Algolia_EvilAngel.yml index c4494fa29..c13bbdb3e 100644 --- a/scrapers/Algolia_EvilAngel.yml +++ b/scrapers/Algolia_EvilAngel.yml @@ -57,4 +57,4 @@ galleryByURL: - Algolia.py - evilangel - gallery -# Last Updated December 26, 2022 \ No newline at end of file +# Last Updated December 26, 2022 diff --git a/scrapers/GroobyNetwork-Brazilian.yml b/scrapers/GroobyNetwork-Brazilian.yml index d5af835cf..ea1ad4733 100644 --- a/scrapers/GroobyNetwork-Brazilian.yml +++ b/scrapers/GroobyNetwork-Brazilian.yml @@ -4,6 +4,7 @@ sceneByURL: url: - brazilian-transsexuals.com - braziltgirls.xxx + - tgirlsfuck.com scraper: sceneScraper xPathScrapers: sceneScraper: @@ -35,4 +36,4 @@ xPathScrapers: - regex: ^\/\/ with: "https://" -# Last Updated December 16, 2022 +# Last Updated January 09, 2023 diff --git a/scrapers/GroobyNetwork-Partial.yml b/scrapers/GroobyNetwork-Partial.yml index 4e1b19662..333df7287 100644 --- a/scrapers/GroobyNetwork-Partial.yml +++ b/scrapers/GroobyNetwork-Partial.yml @@ -1,8 +1,8 @@ name: "GroobyNetwork-Partial" sceneByURL: - action: scrapeXPath - url: - # Gone through list at: grooby.com/about/websites/ + url: &urls + # Gone through list at: grooby.com/about/websites/ - asiantgirl.com - blacktgirlshardcore.com - black-tgirls.com @@ -11,7 +11,6 @@ sceneByURL: - franks-tgirlworld.com - grooby-archives.com - groobygirls.com - - groobyvr.com - ladyboy-ladyboy.com - ladyboy.xxx - tgirlsex.xxx @@ -20,18 +19,26 @@ sceneByURL: - transexpov.com - transgasm.com scraper: sceneScraper + - action: scrapeXPath + url: + - groobyvr.com + scraper: sceneScraperGroobyVR +galleryByURL: + - action: scrapeXPath + url: *urls + scraper: galleryScraper xPathScrapers: sceneScraper: scene: - Title: //p[@class="trailertitle"]/text()|//div[@class="trailer_toptitle_left"]/text() - Date: + Title: &title //p[@class="trailertitle"]/text()|//div[@class="trailer_toptitle_left"]/text() + Date: &date selector: //div[@class="setdesc"]//b[contains(.,"Added")]/following-sibling::text()[1] postProcess: - parseDate: January 2, 2006 - Details: //div[@class="trailerpage_info"]/p[not(@class)]/text() - Performers: + Details: &details //div[@class="trailerpage_info"]/p[not(@class)]/text() + Performers: &performers Name: //div[@class="setdesc"]//a/text() - Studio: + Studio: &studio Name: //meta[@name="author"]/@content Image: selector: //meta[@property="og:image"]/@content @@ -39,5 +46,31 @@ xPathScrapers: - replace: - regex: ^// # bobstgirls with: "https://" - -# Last Updated July 20, 2021 + galleryScraper: + gallery: + Title: *title + Date: *date + Details: *details + Performers: *performers + Studio: *studio + sceneScraperGroobyVR: + scene: + Title: *title + Date: + selector: //div[@class="set_meta"]//b[contains(.,"Added")]/following-sibling::text()[1] + postProcess: + - parseDate: January 2, 2006 + Details: //div[@class="trailerblock"]/p[not(@class)]/text() + Performers: + Name: //div[@class="trailer_toptitle_left"]//a/text() + Studio: *studio + Image: + selector: //dl8-video/@poster + postProcess: + - replace: + - regex: content// # errant double slash + with: content/ + - replace: + - regex: ^/ + with: https://www.groobyvr.com/ +# Last Updated December 19, 2022 From 3f3c3b16a44198ac3972591db149d6359ecba0b3 Mon Sep 17 00:00:00 2001 From: imagineimaginingthings Date: Mon, 27 Feb 2023 03:55:29 +0000 Subject: [PATCH 061/624] use video id for Studio Code --- scrapers/ThisVid.yml | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/scrapers/ThisVid.yml b/scrapers/ThisVid.yml index b28eb7a54..57a7c8cca 100644 --- a/scrapers/ThisVid.yml +++ b/scrapers/ThisVid.yml @@ -21,5 +21,11 @@ xPathScrapers: Tags: Name: $desc//li//a[contains(@href,"/tags/")]/text() Details: $desc//li//p/text() + Code: + selector: //meta[@property="og:video:url"]/@content + postProcess: + - replace: + - regex: .+/(\d+)/?$ + with: $1 # Last Updated February 15, 2023 From d71775f06c7a34ec81704c9cd908963ed3445ab9 Mon Sep 17 00:00:00 2001 From: imagineimaginingthings Date: Mon, 27 Feb 2023 04:00:27 +0000 Subject: [PATCH 062/624] add performerByURL scraper --- scrapers/ThisVid.yml | 25 ++++++++++++++++++++++++- 1 file changed, 24 insertions(+), 1 deletion(-) diff --git a/scrapers/ThisVid.yml b/scrapers/ThisVid.yml index 57a7c8cca..bb07f45c8 100644 --- a/scrapers/ThisVid.yml +++ b/scrapers/ThisVid.yml @@ -4,6 +4,11 @@ sceneByURL: url: - thisvid.com scraper: sceneScraper +performerByURL: + - action: scrapeXPath + url: + - thisvid.com + scraper: performerScraper xPathScrapers: sceneScraper: common: @@ -27,5 +32,23 @@ xPathScrapers: - replace: - regex: .+/(\d+)/?$ with: $1 + performerScraper: + common: + $profileCaseL: //div[@class="profile"]//div[@class="case"]//div[@class="case-left"] + $profileCaseR: //div[@class="profile"]//div[@class="case"]//div[@class="case-right"] + performer: + Name: $profileCaseL//span[contains(text(),"Name")]/strong|//div[@class="profile-menu"]//div[@class="headline"]//h2/text() + Birthdate: + selector: $profileCaseL//span[contains(text(),"Birth")]/strong + postProcess: + - parseDate: 02 January, 2006 + - parseDate: 2006-01-02 + Country: + selector: $profileCaseL//span[contains(text(),"Country")]/strong + postProcess: + - map: + United States: "USA" + Gender: $profileCaseR//span[contains(text(),"Gender")]/strong + Image: //div[@class="avatar"]/img[not(contains(@src,"no-avatar"))]/@src -# Last Updated February 15, 2023 +# Last Updated February 26, 2023 From e7524bfe3b36a5c1cf08bce9277fb1f1db012663 Mon Sep 17 00:00:00 2001 From: nrg101 <37197306+nrg101@users.noreply.github.com> Date: Mon, 27 Feb 2023 19:39:13 +0000 Subject: [PATCH 063/624] Fix VRBangers scraper (#1282) --- scrapers/VRBangers.yml | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/scrapers/VRBangers.yml b/scrapers/VRBangers.yml index 0978a9308..791f87ed5 100644 --- a/scrapers/VRBangers.yml +++ b/scrapers/VRBangers.yml @@ -35,7 +35,7 @@ sceneByFragment: xPathScrapers: sceneScraper: common: - $info: &info //div[starts-with(@class,"video-item__info ")] + $info: &info //div[starts-with(@class,"video-item__info ")]|//div[@class="single-video-info"] scene: Title: &titleSel //h1 Date: &dateAttr @@ -43,12 +43,12 @@ xPathScrapers: postProcess: - parseDate: Jan 2, 2006 Details: &detailsAttr - selector: //div[contains(@class,"second-text")]/div//text() + selector: //div[contains(@class,"second-text")]/div//text()|//div[contains(@class,"single-video-description")]/div//text() concat: " " Tags: - Name: //div[@data-testid="video-categories-list"]/a[contains(@href,"category/")]/text() + Name: //div[@data-testid="video-categories-list"]/a[contains(@href,"category/")]/text()|//div[@class="single-video-categories"]//a[contains(@href,"category/")]/text() Performers: - Name: //div[starts-with(@class, 'video-item__info-starring')]//a/text() + Name: //div[starts-with(@class, 'video-item__info-starring')]//a/text()|//div[contains(@class, "single-video-info__starring")]//a/text() Studio: Name: &studioName selector: &studioURLSel //meta[@name="dl8-customization-brand-url"]/@content @@ -92,4 +92,4 @@ xPathScrapers: Name: *studioName URL: *studioURL FrontImage: *imageSel -# Last Updated February 06, 2023 +# Last Updated February 27, 2023 From 7a8d9d3e57f9ca03d53e4b2ef7bf7703d243b8ae Mon Sep 17 00:00:00 2001 From: nrg101 <37197306+nrg101@users.noreply.github.com> Date: Mon, 27 Feb 2023 19:50:16 +0000 Subject: [PATCH 064/624] Fix ShinyBound scraper (#1279) --- SCRAPERS-LIST.md | 2 +- scrapers/ShinyBound.yml | 25 +++++++++++++------------ 2 files changed, 14 insertions(+), 13 deletions(-) diff --git a/SCRAPERS-LIST.md b/SCRAPERS-LIST.md index 3a935c4bf..b9c5f434b 100644 --- a/SCRAPERS-LIST.md +++ b/SCRAPERS-LIST.md @@ -1118,7 +1118,7 @@ shesbrandnew.com|TopWebModels.yml|:heavy_check_mark:|:x:|:x:|:x:|Python|- sheseducedme.com|Andomark.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- shewillcheat.com|RealityKingsOL.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- shinybound.com|ShinyBound.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- -shinysboundsluts.com|ShinyBound.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +shinysboundsluts.com|ShinyBound.yml|:heavy_check_mark:|:x:|:x:|:x:|-|Trans shiofuky.com|Jhdv.yml|:heavy_check_mark:|:x:|:x:|:x:|-|JAV Uncensored shoplyfter.com|PaperStreetMedia.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- shoplyftermylf.com|PaperStreetMedia.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- diff --git a/scrapers/ShinyBound.yml b/scrapers/ShinyBound.yml index dcb2cfbcc..302268f35 100644 --- a/scrapers/ShinyBound.yml +++ b/scrapers/ShinyBound.yml @@ -8,24 +8,22 @@ sceneByURL: xPathScrapers: sceneScraper: - common: - $logo: //div[@class="logo"]/a/@href scene: - Title: //span[@class='update_title'] + Title: //h1 Details: - selector: //span[@class='latest_update_description'] + selector: //div[contains(@class, "videoDescription")]/p Date: - selector: //span[@class='availdate'] + selector: //div[@class="contentT"]/ul[@class="contentInfo"]/li[./i[contains(@class, "fa-calendar")]]/text() postProcess: - - parseDate: 01/02/2006 + - parseDate: Jan 2, 2006 Tags: - Name: //span[@class='update_tags']/a + Name: //div[@class="tags"]//a/text() Performers: Name: - selector: //span[@class='tour_update_models']/a + selector: //div[@class="models"]//a/text() Studio: Name: - selector: $logo + selector: //div[@class="logo"]/a/@href postProcess: - replace: - regex: https://([^.]+)\..+ @@ -34,6 +32,9 @@ xPathScrapers: shinysboundsluts: ShinysBoundSluTS shinybound: ShinyBound Image: - selector: $logo|//img[@class='stdimage promo_thumb left thumbs']/@src - concat: "/" -# Last Updated May 23, 2022 + selector: //iframe/@src + postProcess: + - replace: + - regex: .*\?poster=(.*\.jpg).* + with: $1 +# Last Updated February 23, 2023 From 41af3ec2e89201b61a3a77552edd73d7e3183a27 Mon Sep 17 00:00:00 2001 From: nrg101 <37197306+nrg101@users.noreply.github.com> Date: Mon, 27 Feb 2023 20:16:59 +0000 Subject: [PATCH 065/624] Add lordaardvark.com xPath scraper (#1278) --- SCRAPERS-LIST.md | 1 + scrapers/LordAardvark.yml | 58 +++++++++++++++++++++++++++++++++++++++ 2 files changed, 59 insertions(+) create mode 100644 scrapers/LordAardvark.yml diff --git a/SCRAPERS-LIST.md b/SCRAPERS-LIST.md index b9c5f434b..052f874a4 100644 --- a/SCRAPERS-LIST.md +++ b/SCRAPERS-LIST.md @@ -745,6 +745,7 @@ littlehellcat.com|Teencoreclub.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- lollyhardcore.com|TeenMegaWorld.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- lonestarangel.com|LoneStarAngel.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- lookathernow.com|MindGeek.yml|:heavy_check_mark:|:x:|:x:|:heavy_check_mark:|-|- +lordaardvark.com|LordAardvark.yml|:heavy_check_mark:|:heavy_check_mark:|:x:|:x:|CDP|- lovehairy.com|SARJ-LLC.yml|:heavy_check_mark:|:heavy_check_mark:|:x:|:heavy_check_mark:|Python|- loveherass.com|devianteNetwork.yml|:heavy_check_mark:|:x:|:x:|:x:|CDP|- loveherboobs.com|LoveHerFeet.yml|:heavy_check_mark:|:x:|:x:|:heavy_check_mark:|-|Boob Fetish diff --git a/scrapers/LordAardvark.yml b/scrapers/LordAardvark.yml new file mode 100644 index 000000000..a60d16415 --- /dev/null +++ b/scrapers/LordAardvark.yml @@ -0,0 +1,58 @@ +name: "LordAardvark" +galleryByURL: + - action: scrapeXPath + url: + - lordaardvark.com/html/galleries.html + scraper: galleryScraper +sceneByURL: + - action: scrapeXPath + url: + - lordaardvark.com/films/ + scraper: sceneScraper +xPathScrapers: + galleryScraper: + common: + $content: //div[contains(@class, "viewer-content-controls")] + gallery: + Date: + selector: $content/h2 + postProcess: + - parseDate: January 2, 2006 + Title: $content/h1 + Details: + selector: $content/p + Studio: + Name: + fixed: LordAardvark + sceneScraper: + scene: + Title: //div[@class="player-overlay-title"]/h1 + Details: + selector: //section[@class="player-overlay-description"]//div[@class="row"]/div[@class="col"]/* + concat: "\n\n" + Date: + selector: //meta[@property="video:release_date"]/@content + postProcess: + - replace: + - regex: .*(\d{4}-\d{2}-\d{2}).* + with: $1 + Image: //meta[@property="og:image"]/@content + Studio: + Name: + fixed: LordAardvark + Code: + selector: //script[contains(text(), "_filmOrigin")] + postProcess: + - replace: + - regex: '.*id: (\d+).*' + with: $1 + Movies: + Name: //p[contains(text(), "Series:")]/following-sibling::a/text() + Tags: + Name: //div[contains(@class, "col")]/a[@class="player-tag"]/text() + Performers: + Name: //p[contains(text(), "Characters:")]/following-sibling::a/text() + +driver: + useCDP: true +# Last Updated February 23, 2023 From 488427e2e3375f0234aa814a57dc1f15d25241f6 Mon Sep 17 00:00:00 2001 From: nrg101 <37197306+nrg101@users.noreply.github.com> Date: Mon, 27 Feb 2023 21:13:08 +0000 Subject: [PATCH 066/624] Fix filthyfamily scraper (scrape from the mobile site) (#1277) --- scrapers/FilthyFamily.yml | 35 +++++++++++++++++++++++------------ 1 file changed, 23 insertions(+), 12 deletions(-) diff --git a/scrapers/FilthyFamily.yml b/scrapers/FilthyFamily.yml index 90ce5cb26..a6acc3b54 100644 --- a/scrapers/FilthyFamily.yml +++ b/scrapers/FilthyFamily.yml @@ -3,30 +3,41 @@ sceneByURL: - action: scrapeXPath url: - filthyfamily.com + - mobile.filthyfamily.com scraper: sceneScraper + queryURL: "{url}" + queryURLReplace: + url: + - regex: https://(www\.)?filthyfamily\.com + with: https://mobile.filthyfamily.com xPathScrapers: sceneScraper: + common: + $videoinfo: //div[@id="video-player-meta"]/div[@class="card-info"] scene: - Title: //div[@class='hideWhilePlaying']/img/@alt - Details: //p[@class='videoDetail']/text() + Title: $videoinfo/h1/text() + Details: + selector: $videoinfo/p[@class[contains(.,"desc")]] Image: - selector: //div[@class="hideWhilePlaying"]/img/@src + selector: //video/@data-poster-url postProcess: - replace: - - regex: ^ - with: "https:" + - regex: \[resolution\] + with: ipadbig.jpg + - regex: ^// + with: https:// Tags: Name: - selector: //meta[@http-equiv='keywords']/@content - split: ", " + selector: $videoinfo/div[@class="tags"]//a/text() + Performers: + Name: //section[@class="group"]/div[@data-scrollbar="#model-scroll"]/ul//li//div[@class="model-info"]/h2/a/text() Studio: Name: fixed: Filthy Family URL: - selector: //link[@rel='canonical']/@href + selector: $videoinfo/div[contains(@class, "act")]/a[@id="ovrl-share-btn"]/@*[name()="addthis:url"] postProcess: - replace: - - regex: ^ - with: "https:" - -# Last Updated October 07, 2020 + - regex: mobile\.bangbros\.com + with: mobile.filthyfamily.com +# Last Updated February 27, 2023 From f0a2363a2c2476b8804b63f2dfecc7bb4ff17cea Mon Sep 17 00:00:00 2001 From: SnZ <3882467+EsEnZeT@users.noreply.github.com> Date: Wed, 22 Mar 2023 00:30:26 +0100 Subject: [PATCH 067/624] Torrent scraper rewrite v2 (#1198) --- scrapers/torrent.py | 186 +++++++---- scrapers/torrent.yml | 21 +- scrapers/torrent_parser.py | 646 ------------------------------------- 3 files changed, 146 insertions(+), 707 deletions(-) delete mode 100644 scrapers/torrent_parser.py diff --git a/scrapers/torrent.py b/scrapers/torrent.py index 2e5e283d2..73b4862dc 100644 --- a/scrapers/torrent.py +++ b/scrapers/torrent.py @@ -1,66 +1,138 @@ -import os import sys import json +from os.path import basename +from pathlib import Path +import re +from datetime import datetime +import difflib try: - import torrent_parser as tp + from bencoder import bdecode except ModuleNotFoundError: - print("You need to download the file 'torrent_parser.py' from the community repo! (CommunityScrapers/tree/master/scrapers/torrent_parser.py)", file=sys.stderr) + print("You need to install the 'bencoder.pyx' module. (https://pypi.org/project/bencoder.pyx/)", file=sys.stderr) sys.exit() -''' This script parses all torrent files in the specified directory for embedded metadata. - The title can either be a filename or the filename of the .torrent file - - This requires python3. - This uses the torrent_parser library to parse torrent files from: https://github.com/7sDream/torrent_parser - This library is under the MIT Licence. - - ''' - -path='./torrents/' - -def query(title): -# print(f"Test",file=sys.stderr) - for root,dirs,files in os.walk(path): - for name in files: - if '.torrent' in name: - query_torrent(title,os.path.join(root,name)) - -def query_torrent(title,path,found=False): - data=tp.parse_torrent_file(path) - # does the torrent contain more than one file and check if the file name we want is in the list - if not found and 'files' in data['info']: - for d in data['info']['files']: - for f in d['path']: - if title in f: - found=True - elif title in data['info']['name']: - found=True - if found: - res={'title':title} - if 'metadata' in data: - if 'title' in data['metadata']: - res['title']=data['metadata']['title'] - if 'cover url' in data['metadata']: - res['image']=data['metadata']['cover url'] - if 'description' in data['metadata']: - res['details']=data['metadata']['description'] - if 'taglist' in data['metadata']: - res['tags']=[{"name":x} for x in data['metadata']['taglist']] - - print(json.dumps(res)) - exit(0) -def lookup_torrent(title): - for root,dirs,files in os.walk(path): - if title in files: - query_torrent(title,os.path.join(root,title),found=True) - +try: + from py_common import graphql +except ModuleNotFoundError: + print("You need to download the folder 'py_common' from the community repo! " + "(CommunityScrapers/tree/master/scrapers/py_common)", file=sys.stderr) + sys.exit() + +TORRENTS_PATH = Path("torrents") + + +def get_scene_data(fragment_data): + scene_id = fragment_data["id"] + scene_title = fragment_data["title"] + scene_files = [] + + response = graphql.callGraphQL(""" + query FileInfoBySceneId($id: ID) { + findScene(id: $id) { + files { + path + size + } + } + }""", {"id": scene_id}) + + if response and response["findScene"]: + for f in response["findScene"]["files"]: + scene_files.append({"filename": basename(f["path"]), "size": f["size"]}) + return {"id": scene_id, "title": scene_title, "files": scene_files} + return {} + +def process_tags_performers(tagList): + return map(lambda tag: decode_bytes(tag).replace('.', ' '), tagList) + +def process_description_bbcode(description): + res = re.sub(r'\[(?:b|i|u|s|url|quote)?\](.*)?\[\/(?:b|i|u|s|url|quote)\]',r"\1", description ) + res = re.sub(r'\[.*?\].*?\[\/.*?\]',r'',res) + res = re.sub(r'\[.*?\]',r'',res) + return res.strip() + +def get_torrent_metadata(torrent_data): + res = {} + + if b"metadata" in torrent_data: + if b"title" in torrent_data[b"metadata"]: + res["title"] = decode_bytes(torrent_data[b"metadata"][b"title"]) + if b"cover url" in torrent_data[b"metadata"]: + res["image"] = decode_bytes(torrent_data[b"metadata"][b"cover url"]) + if b"description" in torrent_data[b"metadata"]: + res["details"] = process_description_bbcode(decode_bytes(torrent_data[b"metadata"][b"description"])) + if b"taglist" in torrent_data[b"metadata"]: + res["tags"] = [{"name": decode_bytes(t)} for t in torrent_data[b"metadata"][b"taglist"]] + if b"taglist" in torrent_data[b"metadata"]: + res["performers"]=[{"name":x} for x in process_tags_performers(torrent_data[b"metadata"][b"taglist"])] + if b"comment" in torrent_data: + res["url"] = decode_bytes(torrent_data[b"comment"]) + if b"creation date" in torrent_data: + res["date"] = datetime.fromtimestamp(torrent_data[b"creation date"]).strftime("%Y-%m-%d") + return res + + +def decode_bytes(s, encodings=("utf-8", "latin-1")): + for enc in encodings: + try: + return s.decode(enc) + except UnicodeDecodeError: + pass + return s.decode("utf-8", "ignore") + + +def scene_in_torrent(scene_data, torrent_data): + for scene in scene_data["files"]: + if b"length" in torrent_data[b"info"]: + if scene["filename"] in decode_bytes(torrent_data[b"info"][b"name"]) and torrent_data[b"info"][b"length"] == scene["size"]: + return True + elif b"files" in torrent_data[b"info"]: + for file in torrent_data[b"info"][b"files"]: + if scene["filename"] in decode_bytes(file[b"path"][-1]) and file[b"length"] == scene["size"]: + return True + + +def process_torrents(scene_data): + if scene_data: + for name in TORRENTS_PATH.glob("*.torrent"): + with open(name, "rb") as f: + torrent_data = bdecode(f.read()) + if scene_in_torrent(scene_data, torrent_data): + return get_torrent_metadata(torrent_data) + return {} + +def similarity_file_name(search, fileName): + result = difflib.SequenceMatcher(a=search.lower(), b=fileName.lower()) + return result.ratio() + +def cleanup_name(name): + ret = str(name) + ret = ret.removeprefix("torrents\\").removesuffix(".torrent") + return ret + + if sys.argv[1] == "query": fragment = json.loads(sys.stdin.read()) - title=fragment['title'] - if '.torrent' in title: - lookup_torrent(title) - else: - query(title) - print(json.dumps(fragment)) -# Last Updated February 18, 2021 + print(json.dumps(process_torrents(get_scene_data(fragment)))) +elif sys.argv[1] == "fragment": + filename = json.loads(sys.stdin.read()).get('url') + with open(filename, 'rb') as f: + torrent_data = bdecode(f.read()) + print(json.dumps(get_torrent_metadata(torrent_data))) +elif sys.argv[1] == "search": + search = json.loads(sys.stdin.read()).get('name') + torrents = list(TORRENTS_PATH.rglob('*.torrent')) + ratios = {} + for t in torrents: + clean_t = cleanup_name(t) + ratios[round(10000*(1-similarity_file_name(search, clean_t)))] = {'url': str(t.absolute()), 'title': clean_t} + + # Order ratios and return the top 5 results + if len(ratios) > 0: + ratios_sorted = list(ratios.keys()) + ratios_sorted.sort() + ratios_filtered = (ratios[i] for i in ratios_sorted[:5]) + print(json.dumps(list(ratios_filtered))) + +# Last Updated December 16, 2022 diff --git a/scrapers/torrent.yml b/scrapers/torrent.yml index 3eca5cbc8..785ce2473 100644 --- a/scrapers/torrent.yml +++ b/scrapers/torrent.yml @@ -1,10 +1,23 @@ -name: "Torrent" +name: Torrent sceneByFragment: action: script script: - - python - # use python3 instead if needed + - python3 - torrent.py - query + +sceneByName: + action: script + script: + - python3 + - torrent.py + - search -# Last Updated February 04, 2021 +sceneByQueryFragment: + action: script + script: + - python3 + - torrent.py + - fragment + +# Last Updated December 16, 2022 \ No newline at end of file diff --git a/scrapers/torrent_parser.py b/scrapers/torrent_parser.py deleted file mode 100644 index bc6e0e7b7..000000000 --- a/scrapers/torrent_parser.py +++ /dev/null @@ -1,646 +0,0 @@ -#!/usr/bin/env python -# coding: utf-8 - -""" -A .torrent file parser for both Python 2 and 3 - -Usage: - - data = parse_torrent_file(filename) - - # or - - with open(filename, 'rb') as f: # the binary mode 'b' is necessary - data = TorrentFileParser(f).parse() - - # then you can edit the data - - data['announce-list'].append(['http://127.0.0.1:8080']) - - # and create a new torrent file from data - - create_torrent_file('new.torrent', data) - - # or - - with open('new.torrent', 'wb') as f: - f.write(TorrentFileCreator(data).encode()) - - # or you don't deal with file, just object in memory - - data = decode(b'i12345e') # data = 12345 - content = encode(data) # content = b'i12345e' - -""" - -from __future__ import print_function, unicode_literals - -import argparse -import binascii -import collections -import io -import json -import sys -import warnings - -try: - FileNotFoundError -except NameError: - # Python 2 do not have FileNotFoundError, use IOError instead - # noinspection PyShadowingBuiltins - FileNotFoundError = IOError - -try: - # noinspection PyPackageRequirements - from chardet import detect as _detect -except ImportError: - def _detect(_): - warnings.warn("No chardet module installed, encoding will be utf-8") - return {'encoding': 'utf-8', 'confidence': 1} - -try: - # noinspection PyUnresolvedReferences - # For Python 2 - str_type = unicode -except NameError: - # For Python 3 - str_type = str - -__all__ = [ - 'InvalidTorrentDataException', - 'BEncoder', - 'BDecoder', - 'encode', - 'decode', - 'TorrentFileParser', - 'create_torrent_file', - 'parse_torrent_file', -] - -__version__ = '0.3.0' - - -def detect(content): - return _detect(content)['encoding'] - - -class InvalidTorrentDataException(Exception): - def __init__(self, pos, msg=None): - msg = msg or "Invalid torrent format when read at pos {pos}" - msg = msg.format(pos=pos) - super(InvalidTorrentDataException, self).__init__(msg) - - -class __EndCls(object): - pass - - -_END = __EndCls() - - -def _check_hash_field_params(name, value): - return isinstance(name, str_type) \ - and isinstance(value, tuple) and len(value) == 2 \ - and isinstance(value[0], int) and isinstance(value[1], bool) - - -class TorrentFileParser(object): - - TYPE_LIST = 'list' - TYPE_DICT = 'dict' - TYPE_INT = 'int' - TYPE_STRING = 'string' - TYPE_END = 'end' - - LIST_INDICATOR = b'l' - DICT_INDICATOR = b'd' - INT_INDICATOR = b'i' - END_INDICATOR = b'e' - STRING_INDICATOR = b'' - STRING_DELIMITER = b':' - - HASH_FIELD_PARAMS = { - # field length need_list - 'pieces': (20, True), - 'ed2k': (16, False), - 'filehash': (20, False), - } - - TYPES = [ - (TYPE_LIST, LIST_INDICATOR), - (TYPE_DICT, DICT_INDICATOR), - (TYPE_INT, INT_INDICATOR), - (TYPE_END, END_INDICATOR), - (TYPE_STRING, STRING_INDICATOR), - ] - - def __init__( - self, fp, use_ordered_dict=False, encoding='utf-8', errors='strict', - hash_fields=None, hash_raw=False, - ): - """ - :param fp: a **binary** file-like object to parse, - which means need 'b' mode when use built-in open function - :param bool use_ordered_dict: Use collections.OrderedDict as dict - container default False, which mean use built-in dict - :param str encoding: file content encoding, default utf-8, use 'auto' - to enable charset auto detection (need 'chardet' package installed) - :param str errors: how to deal with encoding error when try to parse - string from content with ``encoding`` - :param Dict[str, Tuple[int, bool]] hash_fields: extra fields should - be treated as hash value. dict key is the field name, value is a - two-element tuple of (hash_block_length, as_a_list). - See :any:`hash_field` for detail - """ - if getattr(fp, 'read', ) is None \ - or getattr(fp, 'seek') is None: - raise ValueError('Parameter fp needs a file like object') - - self._pos = 0 - self._encoding = encoding - self._content = fp - self._use_ordered_dict = use_ordered_dict - self._error_handler = errors - self._hash_fields = dict(TorrentFileParser.HASH_FIELD_PARAMS) - if hash_fields is not None: - for k, v in hash_fields.items(): - if _check_hash_field_params(k, v): - self._hash_fields[k] = v - else: - raise ValueError( - "Invalid hash field parameter, it should be type of " - "Dict[str, Tuple[int, bool]]" - ) - self._hash_raw = bool(hash_raw) - - def hash_field(self, name, block_length=20, need_list=False): - """ - Let field with the `name` to be treated as hash value, don't decode it - as a string. - - :param str name: field name - :param int block_length: hash block length for split - :param bool need_list: if True, when the field only has one block( - or even empty) its parse result will be a one-element list( - or empty list); If False, will be a string in 0 or 1 block condition - :return: return self, so you can chained call - """ - v = (block_length, need_list) - if _check_hash_field_params(name, v): - self._hash_fields[name] = v - else: - raise ValueError("Invalid hash field parameter") - return self - - def parse(self): - """ - :rtype: dict|list|int|str|bytes - :raise: :any:`InvalidTorrentDataException` when parse failed or error - happened when decode string using specified encoding - """ - self._restart() - data = self._next_element() - - try: - c = self._read_byte(1, True) - raise InvalidTorrentDataException( - 0, 'Expect EOF, but get [{}] at pos {}'.format(c, self._pos) - ) - except EOFError: # expect EOF - pass - - return data - - def _read_byte(self, count=1, raise_eof=False): - assert count >= 0 - gotten = self._content.read(count) - if count != 0 and len(gotten) == 0: - if raise_eof: - raise EOFError() - raise InvalidTorrentDataException( - self._pos, - 'Unexpected EOF when reading torrent file' - ) - self._pos += count - return gotten - - def _seek_back(self, count): - self._content.seek(-count, 1) - self._pos = self._pos - count - - def _restart(self): - self._content.seek(0, 0) - self._pos = 0 - - def _dict_items_generator(self): - while True: - k = self._next_element() - if k is _END: - return - if not isinstance(k, str_type): - raise InvalidTorrentDataException( - self._pos, "Type of dict key can't be " + type(k).__name__ - ) - if k in self._hash_fields: - v = self._next_hash(*self._hash_fields[k]) - else: - v = self._next_element(k) - if k == 'encoding': - self._encoding = v - yield k, v - - def _next_dict(self): - data = collections.OrderedDict() if self._use_ordered_dict else dict() - for key, element in self._dict_items_generator(): - data[key] = element - return data - - def _list_items_generator(self): - while True: - element = self._next_element() - if element is _END: - return - yield element - - def _next_list(self): - return [element for element in self._list_items_generator()] - - def _next_int(self, end=END_INDICATOR): - value = 0 - char = self._read_byte(1) - neg = False - while char != end: - if not neg and char == b'-': - neg = True - elif not b'0' <= char <= b'9': - raise InvalidTorrentDataException(self._pos - 1) - else: - value = value * 10 + int(char) - int(b'0') - char = self._read_byte(1) - return -value if neg else value - - def _next_string(self, need_decode=True, field=None): - length = self._next_int(self.STRING_DELIMITER) - raw = self._read_byte(length) - if need_decode: - encoding = self._encoding - if encoding == 'auto': - self.encoding = encoding = detect(raw) - try: - string = raw.decode(encoding, self._error_handler) - except UnicodeDecodeError as e: - msg = [ - "Fail to decode string at pos {pos} using encoding ", - e.encoding - ] - if field: - msg.extend([ - ' when parser field "', field, '"' - ', maybe it is an hash field. ', - 'You can use self.hash_field("', field, '") ', - 'to let it be treated as hash value, ', - 'so this error may disappear' - ]) - raise InvalidTorrentDataException( - self._pos - length + e.start, - ''.join(msg) - ) - return string - return raw - - def _next_hash(self, p_len, need_list): - raw = self._next_string(need_decode=False) - if len(raw) % p_len != 0: - raise InvalidTorrentDataException( - self._pos - len(raw), "Hash bit length not match at pos {pos}" - ) - if self._hash_raw: - return raw - res = [ - binascii.hexlify(chunk).decode('ascii') - for chunk in (raw[x:x+p_len] for x in range(0, len(raw), p_len)) - ] - if len(res) == 0 and not need_list: - return '' - if len(res) == 1 and not need_list: - return res[0] - return res - - @staticmethod - def _next_end(): - return _END - - def _next_type(self): - for (element_type, indicator) in self.TYPES: - indicator_length = len(indicator) - char = self._read_byte(indicator_length) - if indicator == char: - return element_type - self._seek_back(indicator_length) - raise InvalidTorrentDataException(self._pos) - - def _type_to_func(self, t): - return getattr(self, '_next_' + t) - - def _next_element(self, field=None): - element_type = self._next_type() - if element_type is TorrentFileParser.TYPE_STRING and field is not None: - element = self._type_to_func(element_type)(field=field) - else: - element = self._type_to_func(element_type)() - return element - - -class BEncoder(object): - - TYPES = { - (dict,): TorrentFileParser.TYPE_DICT, - (list,): TorrentFileParser.TYPE_LIST, - (int,): TorrentFileParser.TYPE_INT, - (str_type, bytes): TorrentFileParser.TYPE_STRING, - } - - def __init__(self, data, encoding='utf-8', hash_fields=None): - """ - :param dict|list|int|str data: data will be encoded - :param str encoding: string field output encoding - :param List[str] hash_fields: see - :any:`TorrentFileParser.__init__` - """ - self._data = data - self._encoding = encoding - self._hash_fields = list(TorrentFileParser.HASH_FIELD_PARAMS.keys()) - if hash_fields is not None: - self._hash_fields.extend(str_type(hash_fields)) - - def hash_field(self, name): - """ - see :any:`TorrentFileParser.hash_field` - - :param str name: - :return: return self, so you can chained call - """ - return self._hash_fields.append(str_type(name)) - - def encode(self): - """ - Encode to bytes - - :rtype: bytes - """ - return b''.join(self._output_element(self._data)) - - def encode_to_filelike(self): - """ - Encode to a file-like(BytesIO) object - - :rtype: BytesIO - """ - return io.BytesIO(self.encode()) - - def _output_string(self, data): - if isinstance(data, str_type): - data = data.encode(self._encoding) - yield str(len(data)).encode('ascii') - yield TorrentFileParser.STRING_DELIMITER - yield data - - @staticmethod - def _output_int(data): - yield TorrentFileParser.INT_INDICATOR - yield str(data).encode('ascii') - yield TorrentFileParser.END_INDICATOR - - def _output_decode_hash(self, data): - if isinstance(data, str_type): - data = [data] - result = [] - for hash_line in data: - if not isinstance(hash_line, str_type): - raise InvalidTorrentDataException( - None, - "Hash must be " + str_type.__name__ + " not " + - type(hash_line).__name__, - ) - if len(hash_line) % 2 != 0: - raise InvalidTorrentDataException( - None, - "Hash(" + hash_line + ") length(" + str(len(hash_line)) + - ") is a not even number", - ) - try: - raw = binascii.unhexlify(hash_line) - except binascii.Error as e: - raise InvalidTorrentDataException( - None, str(e), - ) - result.append(raw) - for x in self._output_string(b''.join(result)): - yield x - - def _output_dict(self, data): - yield TorrentFileParser.DICT_INDICATOR - for k, v in data.items(): - if not isinstance(k, str_type): - raise InvalidTorrentDataException( - None, "Dict key must be " + str_type.__name__, - ) - for x in self._output_element(k): - yield x - if k in self._hash_fields: - for x in self._output_decode_hash(v): - yield x - else: - for x in self._output_element(v): - yield x - yield TorrentFileParser.END_INDICATOR - - def _output_list(self, data): - yield TorrentFileParser.LIST_INDICATOR - for v in data: - for x in self._output_element(v): - yield x - yield TorrentFileParser.END_INDICATOR - - def _type_to_func(self, t): - return getattr(self, '_output_' + t) - - def _output_element(self, data): - for types, t in self.TYPES.items(): - if isinstance(data, types): - # noinspection PyCallingNonCallable - return self._type_to_func(t)(data) - raise InvalidTorrentDataException( - None, - "Invalid type for torrent file: " + type(data).__name__, - ) - - -class BDecoder(object): - def __init__( - self, data, use_ordered_dict=False, encoding='utf-8', errors='strict', - hash_fields=None, hash_raw=False, - ): - """ - See :any:`TorrentFileParser.__init__` for parameter description. - - :param bytes data: raw data to be decoded - :param bool use_ordered_dict: - :param str encoding: - :param str errors: - :param Dict[str, Tuple[int, bool]] hash_fields: - :param bool hash_raw: - """ - self._parser = TorrentFileParser( - io.BytesIO(bytes(data)), - use_ordered_dict, - encoding, - errors, - hash_fields, - hash_raw, - ) - - def hash_field(self, name, block_length=20, need_dict=False): - """ - See :any:`TorrentFileParser.hash_field` for parameter description - - :param name: - :param block_length: - :param need_dict: - :return: return self, so you can chained call - """ - self._parser.hash_field(name, block_length, need_dict) - return self - - def decode(self): - return self._parser.parse() - - -def encode(data, encoding='utf-8', hash_fields=None): - """ - Shortcut function for encode python object to torrent file format(bencode) - - See :any:`BEncoder.__init__` for parameter description - - :param dict|list|int|str|bytes data: data to be encoded - :param str encoding: - :param List[str] hash_fields: - :rtype: bytes - """ - return BEncoder(data, encoding, hash_fields).encode() - - -def decode( - data, use_ordered_dict=False, encoding='utf-8', errors='strict', - hash_fields=None, hash_raw=False, -): - """ - Shortcut function for decode bytes as torrent file format(bencode) to python - object - - See :any:`BDecoder.__init__` for parameter description - - :param bytes data: raw data to be decoded - :param bool use_ordered_dict: - :param str encoding: - :param str errors: - :param Dict[str, Tuple[int, bool]] hash_fields: - :param bool hash_raw: - :rtype: dict|list|int|str|bytes|bytes - """ - return BDecoder( - data, use_ordered_dict, encoding, errors, hash_fields, hash_raw, - ).decode() - - -def parse_torrent_file( - filename, use_ordered_dict=False, encoding='utf-8', errors='strict', - hash_fields=None, hash_raw=False, -): - """ - Shortcut function for parse torrent object using TorrentFileParser - - See :any:`TorrentFileParser.__init__` for parameter description - - :param str filename: torrent filename - :param bool use_ordered_dict: - :param str encoding: - :param str errors: - :param Dict[str, Tuple[int, bool]] hash_fields: - :param bool hash_raw: - :rtype: dict|list|int|str|bytes - """ - with open(filename, 'rb') as f: - return TorrentFileParser( - f, use_ordered_dict, encoding, errors, hash_fields, hash_raw, - ).parse() - - -def create_torrent_file(filename, data, encoding='utf-8', hash_fields=None): - """ - Shortcut function for create a torrent file using BEncoder - - see :any:`BDecoder.__init__` for parameter description - - :param str filename: output torrent filename - :param dict|list|int|str|bytes data: - :param str encoding: - :param List[str] hash_fields: - """ - with open(filename, 'wb') as f: - f.write(BEncoder(data, encoding, hash_fields).encode()) - - -def __main(): - parser = argparse.ArgumentParser() - parser.add_argument('file', nargs='?', default='', - help='input file, will read form stdin if empty') - parser.add_argument('--dict', '-d', action='store_true', default=False, - help='use built-in dict, default will be OrderedDict') - parser.add_argument('--sort', '-s', action='store_true', default=False, - help='sort output json item by key') - parser.add_argument('--indent', '-i', type=int, default=None, - help='json output indent for every inner level') - parser.add_argument('--ascii', '-a', action='store_true', default=False, - help='ensure output json use ascii char, ' - 'escape other char use \\u') - parser.add_argument('--coding', '-c', default='utf-8', - help='string encoding, default "utf-8"') - parser.add_argument('--errors', '-e', default='strict', - help='decoding error handler, default "strict", you can' - ' use "ignore" or "replace" to avoid exception') - parser.add_argument('--version', '-v', action='store_true', default=False, - help='print version and exit') - args = parser.parse_args() - - if args.version: - print(__version__) - exit(0) - - try: - if args.file == '': - target_file = io.BytesIO( - getattr(sys.stdin, 'buffer', sys.stdin).read() - ) - else: - target_file = open(args.file, 'rb') - except FileNotFoundError: - sys.stderr.write('File "{}" not exist\n'.format(args.file)) - exit(1) - - # noinspection PyUnboundLocalVariable - data = TorrentFileParser( - target_file, not args.dict, args.coding, args.errors - ).parse() - - data = json.dumps( - data, ensure_ascii=args.ascii, - sort_keys=args.sort, indent=args.indent - ) - - print(data) - - -if __name__ == '__main__': - __main() -# Last Updated February 18, 2021 From 40ff6f47f5ffd16629b8542d8dfc4438c6d92232 Mon Sep 17 00:00:00 2001 From: MortonBridges <97797486+MortonBridges@users.noreply.github.com> Date: Mon, 27 Mar 2023 17:55:10 -0400 Subject: [PATCH 068/624] Added URL for Noel Alejandro Films scene scraper New url added for NoelAladandro.yml, a scene scraper for Noel Alajandro Films. --- SCRAPERS-LIST.md | 1 + 1 file changed, 1 insertion(+) diff --git a/SCRAPERS-LIST.md b/SCRAPERS-LIST.md index 052f874a4..978e2e63d 100644 --- a/SCRAPERS-LIST.md +++ b/SCRAPERS-LIST.md @@ -891,6 +891,7 @@ nhentai.net|nhentai.yml|:x:|:heavy_check_mark:|:x:|:x:|-|Hentai nikkibenz.com|VNAGirls.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- nikkiphoenixxx.com|NikkiAndTera.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- ninakayy.com|VNAGirls.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +noelalejandrofilms.com.com|NoelAlejandro.yml|:heavy_check_mark:|:x:|:x:|:x:|-|Gay noirmale.com|MindGeek.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- noodledude.io|NoodleDude.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- notmygrandpa.com|PaperStreetMedia.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- From 3bc01263b5af3ccd25228c296783591e6e7d4a8d Mon Sep 17 00:00:00 2001 From: MortonBridges <97797486+MortonBridges@users.noreply.github.com> Date: Mon, 27 Mar 2023 17:57:54 -0400 Subject: [PATCH 069/624] New scene scraper for Noel Alejandro Films New scene scraper for Noel Alejandro Films. Site only has dates to the year, and does have tags for scenes. --- scrapers/NoelAlejandro.yml | 37 +++++++++++++++++++++++++++++++++++++ 1 file changed, 37 insertions(+) create mode 100644 scrapers/NoelAlejandro.yml diff --git a/scrapers/NoelAlejandro.yml b/scrapers/NoelAlejandro.yml new file mode 100644 index 000000000..b22e8a91d --- /dev/null +++ b/scrapers/NoelAlejandro.yml @@ -0,0 +1,37 @@ +name: NoelAlejandro +sceneByURL: + - action: scrapeXPath + url: + - noelalejandrofilms.com/product + scraper: sceneScraper +xPathScrapers: + sceneScraper: + scene: + Title: + selector: //h1[@id="title"] + Image: + selector: //video[@id="product-img"]/@poster + Date: + selector: //li[contains(.,"Year:")] + postProcess: + - replace: + - regex: ^\s*Year:\s* + with: '' + - parseDate: 2006 + Director: + selector: //li[contains(.,"Director:")] + postProcess: + - replace: + - regex: ^\s*Director:\s* + with: '' + Details: + selector: //div[@class="film-prologue"]/p + concat: "\n\n" + Performers: + Name: + selector: //li[contains(.,"Cast:")]/a + Studio: + Name: + selector: //meta[@name="title"]/@content + +# Last Updated March 27, 2023 From 6714f950dc21e472210a314357b208609724aad0 Mon Sep 17 00:00:00 2001 From: MortonBridges <97797486+MortonBridges@users.noreply.github.com> Date: Mon, 27 Mar 2023 18:00:45 -0400 Subject: [PATCH 070/624] Fixed typo for noelalejandro URL Fixed silly typo (repeated .com in URL). --- SCRAPERS-LIST.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/SCRAPERS-LIST.md b/SCRAPERS-LIST.md index 978e2e63d..70bc1e3c2 100644 --- a/SCRAPERS-LIST.md +++ b/SCRAPERS-LIST.md @@ -891,7 +891,7 @@ nhentai.net|nhentai.yml|:x:|:heavy_check_mark:|:x:|:x:|-|Hentai nikkibenz.com|VNAGirls.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- nikkiphoenixxx.com|NikkiAndTera.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- ninakayy.com|VNAGirls.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- -noelalejandrofilms.com.com|NoelAlejandro.yml|:heavy_check_mark:|:x:|:x:|:x:|-|Gay +noelalejandrofilms.com|NoelAlejandro.yml|:heavy_check_mark:|:x:|:x:|:x:|-|Gay noirmale.com|MindGeek.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- noodledude.io|NoodleDude.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- notmygrandpa.com|PaperStreetMedia.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- From c64912bf0116125b0c1df00ab50b13cd76a0145b Mon Sep 17 00:00:00 2001 From: MortonBridges <97797486+MortonBridges@users.noreply.github.com> Date: Mon, 27 Mar 2023 21:26:35 -0400 Subject: [PATCH 071/624] Scraping full date Changed way date is scraped to grab full date instead of just the year, after review by DogmaDragon. --- scrapers/NoelAlejandro.yml | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/scrapers/NoelAlejandro.yml b/scrapers/NoelAlejandro.yml index b22e8a91d..28df3e48a 100644 --- a/scrapers/NoelAlejandro.yml +++ b/scrapers/NoelAlejandro.yml @@ -12,12 +12,11 @@ xPathScrapers: Image: selector: //video[@id="product-img"]/@poster Date: - selector: //li[contains(.,"Year:")] + selector: //script[@class="yoast-schema-graph"] postProcess: - replace: - - regex: ^\s*Year:\s* - with: '' - - parseDate: 2006 + - regex: .+datePublished\"\:\"(\d{4}-\d{2}-\d{2}).+ + with: $1 Director: selector: //li[contains(.,"Director:")] postProcess: From c511895a1a40c854d8e8dd6d299f7ea5ee47a2fa Mon Sep 17 00:00:00 2001 From: MortonBridges <97797486+MortonBridges@users.noreply.github.com> Date: Tue, 28 Mar 2023 01:08:24 -0400 Subject: [PATCH 072/624] Small fixes in scrapers list (#1304) --- SCRAPERS-LIST.md | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/SCRAPERS-LIST.md b/SCRAPERS-LIST.md index 052f874a4..fe188d370 100644 --- a/SCRAPERS-LIST.md +++ b/SCRAPERS-LIST.md @@ -771,7 +771,7 @@ manuelferrara.com|JulesJordan.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- manyvids.com|ManyVids|:heavy_check_mark:|:x:|:x:|:heavy_check_mark:|python|- mariskax.com|MariskaX.yml|:heavy_check_mark:|:x:|:x:|:heavy_check_mark:|-|- maskurbate.com|GammaEntertainment.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- -masqulin.com.com|Masqulin.yml|:heavy_check_mark:|:x:|:x:|:x:|-|Gay +masqulin.com|Masqulin.yml|:heavy_check_mark:|:x:|:x:|:x:|-|Gay massage-parlor.com|Algolia_Adultime.yml|:heavy_check_mark:|:x:|:x:|:x:|Python|- massagebait.com|AMAMultimedia.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- mature.nl|MatureNL.yml|:heavy_check_mark:|:heavy_check_mark:|:x:|:x:|-|- @@ -891,7 +891,7 @@ nhentai.net|nhentai.yml|:x:|:heavy_check_mark:|:x:|:x:|-|Hentai nikkibenz.com|VNAGirls.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- nikkiphoenixxx.com|NikkiAndTera.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- ninakayy.com|VNAGirls.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- -noirmale.com|MindGeek.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +noirmale.com|MindGeek.yml|:heavy_check_mark:|:x:|:x:|:x:|-|Gay noodledude.io|NoodleDude.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- notmygrandpa.com|PaperStreetMedia.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- nubilefilms.com|Nubiles.yml|:heavy_check_mark:|:heavy_check_mark:|:x:|:x:|-|- @@ -1087,7 +1087,7 @@ schoolpov.com|ItsPOV.yml|:heavy_check_mark:|:x:|:x:|:x:|CDP|- scoreland.com|TheScoreGroup.yml|:heavy_check_mark:|:heavy_check_mark:|:x:|:x:|-|- scoreland2.com|TheScoreGroup.yml|:heavy_check_mark:|:heavy_check_mark:|:x:|:x:|-|- screwmetoo.com|VegasDreamsLLC.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- -seancody.com|MindGeek.yml|:heavy_check_mark:|:x:|:x:|:heavy_check_mark:|-|- +seancody.com|MindGeek.yml|:heavy_check_mark:|:x:|:x:|:heavy_check_mark:|-|Gay seductive18.com|Teencoreclub.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- seehimfuck.com|hussiemodels.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- seehimsolo.com|hussiemodels.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- From cee777e352aaba17c026e7df41b2847b71c6344e Mon Sep 17 00:00:00 2001 From: MortonBridges <97797486+MortonBridges@users.noreply.github.com> Date: Tue, 28 Mar 2023 19:25:39 -0400 Subject: [PATCH 073/624] Add director field to DorcelClub scene scraper (#1296) --- scrapers/DorcelClub.yml | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) diff --git a/scrapers/DorcelClub.yml b/scrapers/DorcelClub.yml index f3c84d8aa..649087ece 100644 --- a/scrapers/DorcelClub.yml +++ b/scrapers/DorcelClub.yml @@ -33,8 +33,14 @@ xPathScrapers: selector: //div[@class="left"]/span[@class="movie"]/a/@href postProcess: - replace: - - regex: (.*) - with: https://dorcelclub.com$1 + - regex: ^/ + with: https://dorcelclub.com/ + Director: + selector: //div[@class="left"]/span[@class='director']/text() + postProcess: + - replace: + - regex: Director\s*:\s*(.*) + with: $1 movieScraper: movie: @@ -60,7 +66,7 @@ xPathScrapers: selector: //span[@class='director']/text() postProcess: - replace: - - regex: Director\s:\s(.*) + - regex: Director\s*:\s*(.*) with: $1 FrontImage: selector: //img[contains(@class, "cover")]/@data-src @@ -69,4 +75,4 @@ xPathScrapers: - regex: ([^\s]*)\s1x with: $1 Synopsis: //span[@class="full"]/p|/div[@class="content-text"]/p -# Last Updated June 27, 2022 +# Last Updated March 22, 2023 From decda97beb72802f8315602f4a6cca971664cd5b Mon Sep 17 00:00:00 2001 From: quantumandwoody <124731992+quantumandwoody@users.noreply.github.com> Date: Tue, 28 Mar 2023 16:45:10 -0700 Subject: [PATCH 074/624] Update PlayboyPlus.yml (#1302) --- scrapers/PlayboyPlus.yml | 17 ++++++++--------- 1 file changed, 8 insertions(+), 9 deletions(-) diff --git a/scrapers/PlayboyPlus.yml b/scrapers/PlayboyPlus.yml index 4a634b06d..e4e22ac56 100644 --- a/scrapers/PlayboyPlus.yml +++ b/scrapers/PlayboyPlus.yml @@ -1,26 +1,25 @@ name: PlayboyPlus sceneByURL: - action: scrapeXPath - url: + url: &urls - playboyplus.com - pbp-ma.playboy.com + - pbp-ma-legacy.playboy.com scraper: sceneScraper queryURL: "{url}" queryURLReplace: url: - - regex: (?:https://)?(pbp-ma.playboy.com)/gallery/([^/]*)(?:/vip)? - with: https://playboyplus.com/gallery/$2 + - regex: (?:https://)?(pbp-ma(-legacy)?.playboy.com)/gallery/([^/]*)(?:/vip)? + with: https://playboyplus.com/gallery/$3 galleryByURL: - action: scrapeXPath - url: - - playboyplus.com - - pbp-ma.playboy.com + url: *urls scraper: galleryScraper queryURL: "{url}" queryURLReplace: url: - - regex: (?:https://)?(pbp-ma.playboy.com)/gallery/([^/]*)(?:/vip)? - with: https://playboyplus.com/gallery/$2 + - regex: (?:https://)?(pbp-ma(-legacy)?.playboy.com)/gallery/([^/]*)(?:/vip)? + with: https://playboyplus.com/gallery/$3 xPathScrapers: sceneScraper: scene: @@ -51,4 +50,4 @@ xPathScrapers: Performers: *performers Details: *details Studio: *studio -# Last Updated February 07, 2023 +# Last Updated March 27, 2023 From bc7f784c6e24205222cb617f7582d5ca1217e574 Mon Sep 17 00:00:00 2001 From: MortonBridges <97797486+MortonBridges@users.noreply.github.com> Date: Tue, 28 Mar 2023 20:07:44 -0400 Subject: [PATCH 075/624] Add BoyFun xPath scene scraper (#1297) --- SCRAPERS-LIST.md | 1 + scrapers/BoyFun.yml | 30 ++++++++++++++++++++++++++++++ 2 files changed, 31 insertions(+) create mode 100644 scrapers/BoyFun.yml diff --git a/SCRAPERS-LIST.md b/SCRAPERS-LIST.md index fe188d370..c56de0cb8 100644 --- a/SCRAPERS-LIST.md +++ b/SCRAPERS-LIST.md @@ -206,6 +206,7 @@ boundlife.com|Boundlife.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- bountyhunterporn.com|Nubiles.yml|:heavy_check_mark:|:heavy_check_mark:|:x:|:x:|-|- boxtrucksex.com|BoxTruckSex.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- boyfriendsharing.com|Mypervmom.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +boyfun.com|BoyFun.yml|:heavy_check_mark:|:x:|:x:|:x:|-|Gay boysdestroyed.com|AMAMultimedia.yml|:heavy_check_mark:|:x:|:x:|:x:|-|Gay boysfuckmilfs.com|PornCZ.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- bradmontana.com|BradMontana.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- diff --git a/scrapers/BoyFun.yml b/scrapers/BoyFun.yml new file mode 100644 index 000000000..276219a7d --- /dev/null +++ b/scrapers/BoyFun.yml @@ -0,0 +1,30 @@ +name: BoyFun +sceneByURL: + - action: scrapeXPath + url: + - boyfun.com/video + scraper: sceneScraper +xPathScrapers: + sceneScraper: + common: + $perf: //span[@class="models"]/span[@class="content"]/a + scene: + Title: //span[@class="title"] + Image: //video/@poster + Date: + selector: //span[@class="date"]/span[@class="content"] + postProcess: + - replace: + - regex: (st|[nr]d|th) + with: "" + - parseDate: Jan 2, 2006 + Details: + selector: //div[@class="heading"]/following-sibling::text() + concat: "\n\n" + Performers: + Name: $perf/text() + URL: $perf/@href + Studio: + Name: + fixed: BoyFun +# Last Updated March 19, 2023 From d0cf61ffcc7515fab48958d2960487c32dbfa574 Mon Sep 17 00:00:00 2001 From: hphpanon <102124543+hphpanon@users.noreply.github.com> Date: Tue, 28 Mar 2023 20:12:37 -0400 Subject: [PATCH 076/624] Add support for sidechick.com to KBProductions scraper (#1289) --- SCRAPERS-LIST.md | 1 + scrapers/KBProductions.yml | 4 +++- 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/SCRAPERS-LIST.md b/SCRAPERS-LIST.md index c56de0cb8..6d302a62f 100644 --- a/SCRAPERS-LIST.md +++ b/SCRAPERS-LIST.md @@ -1127,6 +1127,7 @@ shoplyftermylf.com|PaperStreetMedia.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- showerbait.com|AMAMultimedia.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- showybeauty.com|ShowyBeauty.yml|:heavy_check_mark:|:heavy_check_mark:|:x:|:x:|-|- shylaj.com|ShylaJ.yml|:heavy_check_mark:|:heavy_check_mark:|:x:|:x:|-|- +sidechick.com|KBProductions.yml|:heavy_check_mark:|:x:|:x:|:heavy_check_mark:|Python|- silverstonedvd.com|GammaEntertainment.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- silviasaint.com|GammaEntertainment.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- simplyanal.com|PuffyNetworkSite.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- diff --git a/scrapers/KBProductions.yml b/scrapers/KBProductions.yml index b6199c7dd..cdb300a2d 100644 --- a/scrapers/KBProductions.yml +++ b/scrapers/KBProductions.yml @@ -8,6 +8,7 @@ sceneByURL: - inserted.com/videos/ - rickysroom.com/videos/ + - sidechick.com/videos/ action: script script: @@ -19,9 +20,10 @@ performerByURL: - inserted.com/tour/models/ - inserted.com/models/ - rickysroom.com/models/ + - sidechick.com/models/ action: script script: - python3 - KBProductions.py - performer -# Last Updated December 24, 2022 +# Last Updated March 14, 2023 From bd9238080f56393479cee3709519a2b3da17a3fc Mon Sep 17 00:00:00 2001 From: puc9 <51006296+puc9@users.noreply.github.com> Date: Tue, 28 Mar 2023 18:20:53 -0700 Subject: [PATCH 077/624] Fix Pornbox scraper to return male performers. Add URL to Legalporno scraper (#1284) --- scrapers/LegalPorno.yml | 22 ++++++++++++---------- scrapers/Pornbox.yml | 29 ++++++++++++++++------------- scrapers/pornworld.yml | 24 ++++++++++++++++++------ 3 files changed, 46 insertions(+), 29 deletions(-) diff --git a/scrapers/LegalPorno.yml b/scrapers/LegalPorno.yml index 2a88ad65c..92264091b 100644 --- a/scrapers/LegalPorno.yml +++ b/scrapers/LegalPorno.yml @@ -1,3 +1,5 @@ +# yaml-language-server: $schema=../validator/scraper.schema.json + name: "LegalPorno" sceneByURL: - action: scrapeXPath @@ -10,12 +12,12 @@ performerByURL: - https://www.analvids.com scraper: performerScraper sceneByFragment: - action: script - script: - - python - # use python3 instead if needed - - LegalPorno.py - - query + action: script + script: + - python + # use python3 instead if needed + - LegalPorno.py + - query xPathScrapers: sceneScraper: @@ -30,7 +32,7 @@ xPathScrapers: postProcess: - parseDate: 2006-01-02 Details: $description/div[3]/dd/text() - Code: + Code: selector: //h1[@class="watchpage-title"] postProcess: - replace: @@ -48,11 +50,11 @@ xPathScrapers: - replace: - regex: .+(https[^"]+).+ with: $1 + URL: //meta[@property="og:url"]/@content performerScraper: performer: Name: //h2 Country: //td[@class='text-danger']//a[contains(@href,'nationality')]/text() - Image: - selector: //div[@class='model--avatar']//img/@src -# Last Updated December 16, 2022 + Image: //div[@class='model--avatar']//img/@src +# Last Updated March 03, 2023 diff --git a/scrapers/Pornbox.yml b/scrapers/Pornbox.yml index 01f9702da..b7c275f63 100644 --- a/scrapers/Pornbox.yml +++ b/scrapers/Pornbox.yml @@ -1,3 +1,5 @@ +# yaml-language-server: $schema=../validator/scraper.schema.json + name: Pornbox sceneByURL: @@ -28,19 +30,22 @@ sceneByQueryFragment: jsonScrapers: sceneScraper: scene: - Title: - selector: scene_name + Title: scene_name Date: selector: publish_date postProcess: - parseDate: 2006-01-02T15:04:05.999999Z Tags: - Name: - selector: niches.#.niche + Name: niches.#.niche Performers: - Name: models.#.model_name - Image: - selector: player_poster + Name: "[models.#.model_name,male_models.#.model_name].@flatten" + Gender: + selector: "[models.#.sex,male_models.#.sex].@flatten" + postProcess: + - map: + female: FEMALE + male: MALE + Image: player_poster Details: selector: small_description postProcess: @@ -60,10 +65,8 @@ jsonScrapers: common: $contents: content.contents.# scene: - Title: - selector: $contents.scene_name - Image: - selector: $contents.thumbnail.large + Title: $contents.scene_name + Image: $contents.thumbnail.large Date: selector: $contents.publish_date postProcess: @@ -73,5 +76,5 @@ jsonScrapers: postProcess: - replace: - regex: ^ - with: https://pornbox.com/contents/$1 -# Last Updated December 10, 2021 + with: https://pornbox.com/contents/ +# Last Updated March 03, 2023 diff --git a/scrapers/pornworld.yml b/scrapers/pornworld.yml index 24e3baa83..9bd4a92d9 100644 --- a/scrapers/pornworld.yml +++ b/scrapers/pornworld.yml @@ -1,31 +1,43 @@ +# yaml-language-server: $schema=../validator/scraper.schema.json + name: Pornworld + sceneByURL: - action: scrapeXPath url: - pornworld.com/watch/ scraper: sceneScraper + sceneByFragment: action: scrapeJson queryURL: https://pornworld.com/autocomplete?query={filename} queryURLReplace: filename: - - regex: .+(GP\d+).* + - regex: .*(GP\d+).* with: $1 scraper: sceneQueryScraper + jsonScrapers: sceneQueryScraper: scene: Title: terms.Scene.0.name URL: terms.Scene.0.url + xPathScrapers: sceneScraper: scene: - Title: + Title: selector: //h1 postProcess: - replace: - - regex: featuring.+ - with: "" + - regex: featuring.+ + with: "" + Code: + selector: //h1 + postProcess: + - replace: + - regex: .*(GP\d+).* + with: $1 Date: //i[contains(@class,"calendar3")]/text() Details: selector: //meta[@name="description"]/@content @@ -41,5 +53,5 @@ xPathScrapers: Name: fixed: Porn World Image: //video/@data-poster - -# Last Updated August 10, 2021 + URL: //div[@class="pagination"]/a[1]/@href +# Last Updated March 03, 2023 From 7d55d2856dd9b94ab48ec7c5c2b898c8e2000269 Mon Sep 17 00:00:00 2001 From: MortonBridges <97797486+MortonBridges@users.noreply.github.com> Date: Tue, 28 Mar 2023 22:59:32 -0400 Subject: [PATCH 078/624] Add url for cocksuremen.com Added URL for cocsuremen.com for new scraper. --- SCRAPERS-LIST.md | 1 + 1 file changed, 1 insertion(+) diff --git a/SCRAPERS-LIST.md b/SCRAPERS-LIST.md index 6d302a62f..229c60690 100644 --- a/SCRAPERS-LIST.md +++ b/SCRAPERS-LIST.md @@ -258,6 +258,7 @@ clubinfernodungeon.com|Algolia_ClubInfernoDungeon.yml|:heavy_check_mark:|:heavy_ clubseventeen.com|ClubSeventeen.yml|:heavy_check_mark:|:heavy_check_mark:|:x:|:x:|-|- clubsweethearts.com|ClubSeventeen.yml|:heavy_check_mark:|:heavy_check_mark:|:x:|:x:|-|- cockhero.info|Cockhero.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +cocksuremen.com|CocksureMen.yml|:heavy_check_mark:|:x:|:x:|:x:|-|Gay colbyknox.com|ColbyKnox.yml|:heavy_check_mark:|:x:|:x:|:x:|-|Gay colette.com|Colette.yml|:heavy_check_mark:|:heavy_check_mark:|:x:|:x:|-|- college-amateurs.com|ChickPass.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- From 7061543b684a8075e4edbfbf1a3f454783369eb5 Mon Sep 17 00:00:00 2001 From: MortonBridges <97797486+MortonBridges@users.noreply.github.com> Date: Tue, 28 Mar 2023 23:01:45 -0400 Subject: [PATCH 079/624] Add scraper for CocksureMen New scraper for CocksureMen. No tags section, as the studio does not provide tags on their scenes pages. --- scrapers/CocksureMen.yml | 32 ++++++++++++++++++++++++++++++++ 1 file changed, 32 insertions(+) create mode 100644 scrapers/CocksureMen.yml diff --git a/scrapers/CocksureMen.yml b/scrapers/CocksureMen.yml new file mode 100644 index 000000000..6fb628b60 --- /dev/null +++ b/scrapers/CocksureMen.yml @@ -0,0 +1,32 @@ +name: CocksureMen +sceneByURL: + - action: scrapeXPath + url: + - cocksuremen.com/tour/ + scraper: sceneScraper +xPathScrapers: + sceneScraper: + scene: + Title: + selector: //meta[@property="og:title"]/@content + Performers: + Name: //ul[@class="featuredModels"]/li/a/span + Date: + selector: //span[@class="post-date"]/text() + postProcess: + - replace: + - regex: (st|[nr]d|th) + with: "" + - parseDate: 2 Jan 2006 + Image: + selector: //script[contains(text(),"poster=")]/text() + postProcess: + - replace: + - regex: ^.+?poster="([^"]+)".+$ + with: https://cocksuremen.com$1 + Details: + selector: //div[@class="aboutvideo"]/p + Studio: + Name: + fixed: "Cocksure Men" +# Last Updated March 28, 2023 From 7fd699e58ba3509ba972b360b66f99cd5b49e4b7 Mon Sep 17 00:00:00 2001 From: Emilo2 <99644577+Emilo2@users.noreply.github.com> Date: Wed, 29 Mar 2023 12:50:49 +0300 Subject: [PATCH 080/624] Fix RealJamVR scraper Dates are all over the place, but this should catch them all. Added gallery scraping also. --- scrapers/RealJamVR.yml | 50 ++++++++++++++++++++++++++++-------------- 1 file changed, 34 insertions(+), 16 deletions(-) diff --git a/scrapers/RealJamVR.yml b/scrapers/RealJamVR.yml index d3002c9ef..579c36a26 100644 --- a/scrapers/RealJamVR.yml +++ b/scrapers/RealJamVR.yml @@ -1,30 +1,48 @@ name: RealJamVR -sceneByURL: +sceneByURL: &byURL - action: scrapeXPath url: - realjamvr.com scraper: sceneScraper + +galleryByURL: *byURL + xPathScrapers: sceneScraper: scene: - Title: + Title: &title selector: //h1 - Date: - selector: //div[contains(@class, "date")] + Date: &date + selector: //div[@class="ms-4"]/text() postProcess: - replace: - - regex: ',' - with: "" - - parseDate: January 2 2006 - Performers: - Name: //span[text()="Featuring:"]/following-sibling::a - Tags: - Name: //span[text()="TAGS:"]/following-sibling::a - Details: - selector: //div[contains(@class, "c-video-item-desc")] + - regex: 'Sept\.' + with: "Sep." + - parseDate: Jan. 2, 2006 + - parseDate: January 2, 2006 + Performers: &performers + Name: //div[contains(@class,"text-sm-start")]/a[contains(@href,"/actor/")] + Tags: &tags + Name: //a[@class="tag"] + Details: &details + selector: //div[@class="opacity-75 my-2"] Image: - selector: //meta[@property="og:image"]/@content - Studio: + selector: //dl8-video/@poster + Code: + selector: //source[1]/@src + postProcess: + - replace: + - regex: '.*\/(\d+)_\d+p.mp4.*' + with: $1 + Studio: &studio Name: fixed: RealJamVR -# Last Updated June 25, 2021 + gallery: + Title: *title + Date: *date + Performers: *performers + Tags: *tags + Details: *details + Studio: *studio + +# Last Updated March 29, 2023 From a0f3285c6ce7cf6b5395d85c1182c0d65dffe646 Mon Sep 17 00:00:00 2001 From: Emilo2 <99644577+Emilo2@users.noreply.github.com> Date: Wed, 29 Mar 2023 12:51:48 +0300 Subject: [PATCH 081/624] Update SCRAPERS-LIST.md --- SCRAPERS-LIST.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/SCRAPERS-LIST.md b/SCRAPERS-LIST.md index 6d302a62f..ee217d3a4 100644 --- a/SCRAPERS-LIST.md +++ b/SCRAPERS-LIST.md @@ -1055,7 +1055,7 @@ reaganfoxx.com|AdultEmpireCash.yml|:heavy_check_mark:|:x:|:x:|:x:|-|MILF realityjunkies.com|MindGeek.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- realitykings.com|MindGeek.yml|:heavy_check_mark:|:x:|:x:|:heavy_check_mark:|-|- realitylovers.com|RealityLovers.yml|:heavy_check_mark:|:x:|:x:|:heavy_check_mark:|Python|- -realjamvr.com|RealJamVR.yml|:heavy_check_mark:|:x:|:x:|:x:|-|VR +realjamvr.com|RealJamVR.yml|:heavy_check_mark:|:heavy_check_mark:|:x:|:x:|-|VR realsensual.com|RealSensual.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- realtimebondage.com|insex.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- realvr.com|BaDoink.yml|:heavy_check_mark:|:x:|:heavy_check_mark:|:heavy_check_mark:|-|VR From 5ea34aa103df1439136a631f4d50f4ed5cde8c7e Mon Sep 17 00:00:00 2001 From: Emilo2 <99644577+Emilo2@users.noreply.github.com> Date: Fri, 31 Mar 2023 08:31:00 +0300 Subject: [PATCH 082/624] Remove Code parsing Apparently the trailer URL isn't available all the time --- scrapers/RealJamVR.yml | 6 ------ 1 file changed, 6 deletions(-) diff --git a/scrapers/RealJamVR.yml b/scrapers/RealJamVR.yml index 579c36a26..f26b811fd 100644 --- a/scrapers/RealJamVR.yml +++ b/scrapers/RealJamVR.yml @@ -28,12 +28,6 @@ xPathScrapers: selector: //div[@class="opacity-75 my-2"] Image: selector: //dl8-video/@poster - Code: - selector: //source[1]/@src - postProcess: - - replace: - - regex: '.*\/(\d+)_\d+p.mp4.*' - with: $1 Studio: &studio Name: fixed: RealJamVR From bda520b4ea8597825c0f9897af5a867d9fa86464 Mon Sep 17 00:00:00 2001 From: Emilo2 <99644577+Emilo2@users.noreply.github.com> Date: Fri, 31 Mar 2023 08:40:17 +0300 Subject: [PATCH 083/624] Include VR specs to tags --- scrapers/RealJamVR.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scrapers/RealJamVR.yml b/scrapers/RealJamVR.yml index f26b811fd..e7cf034e4 100644 --- a/scrapers/RealJamVR.yml +++ b/scrapers/RealJamVR.yml @@ -23,7 +23,7 @@ xPathScrapers: Performers: &performers Name: //div[contains(@class,"text-sm-start")]/a[contains(@href,"/actor/")] Tags: &tags - Name: //a[@class="tag"] + Name: //a[@class="tag"] | //div[not(@class)]/div[@class="specs-icon"] Details: &details selector: //div[@class="opacity-75 my-2"] Image: From 2ae3cae9f78cd61bc561fc7a60e4007f17d2f8b8 Mon Sep 17 00:00:00 2001 From: litcum22 <129493982+litcum22@users.noreply.github.com> Date: Fri, 31 Mar 2023 19:37:04 +1100 Subject: [PATCH 084/624] add studio code --- scrapers/LoveHerFeet.yml | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/scrapers/LoveHerFeet.yml b/scrapers/LoveHerFeet.yml index cc023f657..2204bc797 100644 --- a/scrapers/LoveHerFeet.yml +++ b/scrapers/LoveHerFeet.yml @@ -34,6 +34,12 @@ xPathScrapers: selector: //div[@class='date']/text() postProcess: - parseDate: January 2, 2006 + Code: + selector: //*/div[@class='photos vide-section']/a[1]/img/@data-src + postProcess: + - replace: + - regex: ^https?.+/[a-zA-Z_]+(\d+)_.+/.+ + with: $1 Image: selector: //div[@class='video']//img[contains(@class,'mainThumb') or contains(@class,'update_thumb')]/@src0_3x|//video/@poster|//base/@href concat: "|" @@ -156,4 +162,4 @@ xPathScrapers: with: "" Gender: fixed: "Female" -# Last Updated August 28, 2022 +# Last Updated March 31, 2023 From 7a4b7ab8f4fd2d3a796c770beefdaed0af68795c Mon Sep 17 00:00:00 2001 From: litcum22 <129493982+litcum22@users.noreply.github.com> Date: Thu, 13 Apr 2023 20:43:39 +1000 Subject: [PATCH 085/624] adjust regex to better match all urls --- scrapers/LoveHerFeet.yml | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/scrapers/LoveHerFeet.yml b/scrapers/LoveHerFeet.yml index 2204bc797..daed233c2 100644 --- a/scrapers/LoveHerFeet.yml +++ b/scrapers/LoveHerFeet.yml @@ -38,8 +38,11 @@ xPathScrapers: selector: //*/div[@class='photos vide-section']/a[1]/img/@data-src postProcess: - replace: - - regex: ^https?.+/[a-zA-Z_]+(\d+)_.+/.+ - with: $1 + - regex: ^https?.+/(?:[a-zA-Z]+_(\d+)_.+|(\d+)_[a-zA-Z0-9]+(?:\.jpg)?.*) + with: $1$2 + - replace: + - regex: ^\D.* + with: Image: selector: //div[@class='video']//img[contains(@class,'mainThumb') or contains(@class,'update_thumb')]/@src0_3x|//video/@poster|//base/@href concat: "|" @@ -162,4 +165,4 @@ xPathScrapers: with: "" Gender: fixed: "Female" -# Last Updated March 31, 2023 +# Last Updated April 13, 2023 From e1b2a54bb1dcd93e6b63d069e075f5d0da4c0e54 Mon Sep 17 00:00:00 2001 From: MortonBridges <97797486+MortonBridges@users.noreply.github.com> Date: Sat, 15 Apr 2023 18:19:26 -0400 Subject: [PATCH 086/624] Add entry for new Jacquie et Michel Elite scraper Added entry for new Jacquie et Michel Elite scene and movie scraper. --- SCRAPERS-LIST.md | 1 + 1 file changed, 1 insertion(+) diff --git a/SCRAPERS-LIST.md b/SCRAPERS-LIST.md index 6d302a62f..5e46679ba 100644 --- a/SCRAPERS-LIST.md +++ b/SCRAPERS-LIST.md @@ -651,6 +651,7 @@ itscleolive.com|VNAGirls.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- itspov.com|ItsPOV.yml|:heavy_check_mark:|:x:|:x:|:x:|CDP|- iwantclips.com|IWantClips.yml|:heavy_check_mark:|:x:|:x:|:heavy_check_mark:|CDP|- iwara.tv|Iwara.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +jacquieetmichelelite.com|JacquieEtMichelElite.yml|:heavy_check_mark:|:x:|:heavy_check_mark:|:x:|-|- jacquieetmicheltv.net|JacquieEtMichelTV.yml|:heavy_check_mark:|:x:|:x:|:x:|Python|- jamesdeen.com|ThirdRockEnt.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- janafox.com|bellapass.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- From 33f227a41bc9ec3829011cd792edcf743c3e9304 Mon Sep 17 00:00:00 2001 From: MortonBridges <97797486+MortonBridges@users.noreply.github.com> Date: Sat, 15 Apr 2023 21:16:38 -0400 Subject: [PATCH 087/624] New scraper for Jacquie et Michel Elite Added new URL scraper for scenes and movies for Jacquie et Michel Elite (https://www.jacquieetmichelelite.com/en/). After seeing that some class values had trailing spaces within the double quotes, it was decided to be super paranoid and do all @class matching using "contains". The Movie title on the Scenes scraper can only pull the value in lower case, since the movie itself is only available once as part of a URL. This can be corrected easily by scraping the movie itself once it's created. No tags available on the studio site, unfortunately. The way that the site lays out their movies is the same as how they lay out their scenes, so the scene scraper can also be used on the URL for a movie. This is handy if you happen to have the movie as one file. It's for this reason that the scene scraper URL is more general than one would initially believe is necessary. --- scrapers/JacquieEtMichelElite.yml | 87 +++++++++++++++++++++++++++++++ 1 file changed, 87 insertions(+) create mode 100644 scrapers/JacquieEtMichelElite.yml diff --git a/scrapers/JacquieEtMichelElite.yml b/scrapers/JacquieEtMichelElite.yml new file mode 100644 index 000000000..9fb4d050b --- /dev/null +++ b/scrapers/JacquieEtMichelElite.yml @@ -0,0 +1,87 @@ +name: JacquieEtMichelElite + +movieByURL: + - action: scrapeXPath + url: + - jacquieetmichelelite.com/en/porn-movie + scraper: movieScraper +sceneByURL: + - action: scrapeXPath + url: + - jacquieetmichelelite.com + scraper: sceneScraper + +xPathScrapers: + movieScraper: + movie: + Name: //h1[contains(@class,"video-detail__title")] + Director: + selector: //li[contains(@class,"video-detail__info") and contains(.,'Director:')] + postProcess: + - replace: + - regex: "^Director:" + with: + Duration: + selector: //li[contains(@class,"video-detail__info") and contains(.,'Duration:')] + postProcess: + - replace: + - regex: "^Duration:" + with: + - regex: " min.+" + with: ":00" + Date: + selector: //script[contains(.,"datePublished")] + postProcess: + - replace: + - regex: ^.+datePublished":\s*"(\d\d\d\d-\d\d-\d\d).+$ + with: $1 + Synopsis: + selector: //div[contains(@class,"video-detail__description")]//text() + Studio: + Name: //li[contains(@class,"video-detail__info")]/strong[contains(.,'Studio:')]/following-sibling::a + FrontImage: //img[contains(@class,"video-detail__poster__img")]/@src + sceneScraper: + scene: + Title: + selector: //h1[contains(@class,"video-detail__title")] + Details: + selector: //div[contains(@class,"video-detail__description")]//text() + Date: + selector: //script[contains(.,"datePublished")] + postProcess: + - replace: + - regex: ^.+datePublished":\s*"(\d\d\d\d-\d\d-\d\d).+$ + with: $1 + Image: + selector: //div[contains(@class,"video-detail__handler")]/@style + postProcess: + - replace: + - regex: ^[^']*'([^']*).* + with: $1 + Studio: + Name: //li[contains(@class,"video-detail__info")]/strong[contains(.,'Studio:')]/following-sibling::a + Director: + selector: //li[contains(@class,"video-detail__info") and contains(.,'Director:')] + postProcess: + - replace: + - regex: "^Director:" + with: + Performers: + Name: //p[contains(@class,"actor-item__title")] + Movies: + Name: + selector: //a[contains(@class,"video-detail__poster")]/@href + postProcess: + - replace: + - regex: .+?([^/]+)\.html + with: $1 + - regex: "-" + with: " " + URL: + selector: //a[contains(@class,"video-detail__poster")]/@href + postProcess: + - replace: + - regex: ^ + with: https://www.jacquieetmichelelite.com + +# Last Updated April 15, 2023 From dbb522c49d0b5afb7932843602bd31b811c25abe Mon Sep 17 00:00:00 2001 From: grabolento <131325048+grabolento@users.noreply.github.com> Date: Wed, 19 Apr 2023 23:43:04 -0300 Subject: [PATCH 088/624] Add VRPorn.com --- scrapers/VRPorn.com.yml | 36 ++++++++++++++++++++++++++++++++++++ 1 file changed, 36 insertions(+) create mode 100644 scrapers/VRPorn.com.yml diff --git a/scrapers/VRPorn.com.yml b/scrapers/VRPorn.com.yml new file mode 100644 index 000000000..d1ffe4620 --- /dev/null +++ b/scrapers/VRPorn.com.yml @@ -0,0 +1,36 @@ +name: "VRPorn.com" + +sceneByURL: + - action: scrapeXPath + url: + - vrporn.com/ + scraper: sceneScraper + +xPathScrapers: + sceneScraper: + common: + $header: //header[@class="entry-header"] + $footer: //footer[@class="entry-footer"] + $videoarea: //footer[@class="entry-footer"]/div[@class="layout_area_video"] + scene: + Title: $header//h1[@class="content-title"]/text() + Date: + selector: $videoarea/div[2]/span[@class="footer-titles"]/text() + postProcess: + - replace: + - regex: Posted on (?:Premium on )?(.*) + with: $1 + - parseDate: January 02, 2006 + Details: + selector: //main/article/div//p/text() + concat: "\n\n" + Tags: + Name: $footer//a[@rel="tag"]/text() + Performers: + Name: $header//div[@class="name_pornstar"]/text() + Studio: + Name: $header//a[@id="studio-logo"]//span[@class="footer-titles"]/text() + Image: //main/article/header//dl8-video/@poster + URL: &sceneUrl //link[@rel="canonical"]/@href + +# Last Updated April 05, 2023 \ No newline at end of file From a6806b799de9d740b03cfa353c322da2a9d23b12 Mon Sep 17 00:00:00 2001 From: grabolento <131325048+grabolento@users.noreply.github.com> Date: Wed, 19 Apr 2023 23:47:40 -0300 Subject: [PATCH 089/624] Added VRPorn.com.yml to list --- SCRAPERS-LIST.md | 1 + 1 file changed, 1 insertion(+) diff --git a/SCRAPERS-LIST.md b/SCRAPERS-LIST.md index 6d302a62f..6b210569d 100644 --- a/SCRAPERS-LIST.md +++ b/SCRAPERS-LIST.md @@ -1382,6 +1382,7 @@ vrcosplayx.com|BaDoink.yml|:heavy_check_mark:|:x:|:heavy_check_mark:|:heavy_chec vrfirsttimer.com|SLRoriginals.yml|:heavy_check_mark:|:x:|:heavy_check_mark:|:x:|-|VR vrhush.com|VRHush.yml|:heavy_check_mark:|:x:|:heavy_check_mark:|:x:|-|VR vrlatina.com|VRLatina.yml|:heavy_check_mark:|:x:|:heavy_check_mark:|:heavy_check_mark:|-|VR +vrporn.com|VRPorn.com.yml|:heavy_check_mark:|:x:|:x:|:x:|-|VR vrporncz.com|PornCZ.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- vrxdb.com|VrxDB.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- wakeupnfuck.com|WakeUpnFuck.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- From 9f6eb8579d65477dc5d0219f4e640d60b72bb2be Mon Sep 17 00:00:00 2001 From: grabolento <131325048+grabolento@users.noreply.github.com> Date: Thu, 20 Apr 2023 08:15:07 -0300 Subject: [PATCH 090/624] VRPorn instead of VRPorn.com --- scrapers/{VRPorn.com.yml => VRPorn.yml} | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) rename scrapers/{VRPorn.com.yml => VRPorn.yml} (95%) diff --git a/scrapers/VRPorn.com.yml b/scrapers/VRPorn.yml similarity index 95% rename from scrapers/VRPorn.com.yml rename to scrapers/VRPorn.yml index d1ffe4620..b2bcfe576 100644 --- a/scrapers/VRPorn.com.yml +++ b/scrapers/VRPorn.yml @@ -1,4 +1,4 @@ -name: "VRPorn.com" +name: "VRPorn" sceneByURL: - action: scrapeXPath @@ -33,4 +33,4 @@ xPathScrapers: Image: //main/article/header//dl8-video/@poster URL: &sceneUrl //link[@rel="canonical"]/@href -# Last Updated April 05, 2023 \ No newline at end of file +# Last Updated April 05, 2023 From e51ca5f9f62a5d673cfa4403611a22c60f75b763 Mon Sep 17 00:00:00 2001 From: grabolento <131325048+grabolento@users.noreply.github.com> Date: Thu, 20 Apr 2023 08:18:14 -0300 Subject: [PATCH 091/624] Update last updated date --- scrapers/VRPorn.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scrapers/VRPorn.yml b/scrapers/VRPorn.yml index b2bcfe576..4887cee80 100644 --- a/scrapers/VRPorn.yml +++ b/scrapers/VRPorn.yml @@ -33,4 +33,4 @@ xPathScrapers: Image: //main/article/header//dl8-video/@poster URL: &sceneUrl //link[@rel="canonical"]/@href -# Last Updated April 05, 2023 +# Last Updated April 20, 2023 From 1ee98619195d30f72cb23a67720bed37db0a5d51 Mon Sep 17 00:00:00 2001 From: grabolento <131325048+grabolento@users.noreply.github.com> Date: Thu, 20 Apr 2023 08:19:34 -0300 Subject: [PATCH 092/624] VRPorn instead of VRPorn.com --- SCRAPERS-LIST.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/SCRAPERS-LIST.md b/SCRAPERS-LIST.md index 6b210569d..b4d8bfa42 100644 --- a/SCRAPERS-LIST.md +++ b/SCRAPERS-LIST.md @@ -1382,7 +1382,7 @@ vrcosplayx.com|BaDoink.yml|:heavy_check_mark:|:x:|:heavy_check_mark:|:heavy_chec vrfirsttimer.com|SLRoriginals.yml|:heavy_check_mark:|:x:|:heavy_check_mark:|:x:|-|VR vrhush.com|VRHush.yml|:heavy_check_mark:|:x:|:heavy_check_mark:|:x:|-|VR vrlatina.com|VRLatina.yml|:heavy_check_mark:|:x:|:heavy_check_mark:|:heavy_check_mark:|-|VR -vrporn.com|VRPorn.com.yml|:heavy_check_mark:|:x:|:x:|:x:|-|VR +vrporn.com|VRPorn.yml|:heavy_check_mark:|:x:|:x:|:x:|-|VR vrporncz.com|PornCZ.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- vrxdb.com|VrxDB.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- wakeupnfuck.com|WakeUpnFuck.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- From 6a1860c40d196417b8166044f4f46f819f7142fd Mon Sep 17 00:00:00 2001 From: nrg101 Date: Thu, 20 Apr 2023 14:33:51 +0100 Subject: [PATCH 093/624] add gallery scraper to dreamtranny.com --- scrapers/DreamTranny.yml | 42 +++++++++++++++++++++++++++++----------- 1 file changed, 31 insertions(+), 11 deletions(-) diff --git a/scrapers/DreamTranny.yml b/scrapers/DreamTranny.yml index d134ca377..dfdc51cb7 100644 --- a/scrapers/DreamTranny.yml +++ b/scrapers/DreamTranny.yml @@ -4,17 +4,21 @@ sceneByURL: url: - dreamtranny.com scraper: sceneScraper +galleryByURL: + - action: scrapeXPath + url: + - dreamtranny.com + scraper: galleryScraper xPathScrapers: sceneScraper: scene: - Title: //div[@class="section-title"]/h4/text() - Details: - selector: //p[@class="read-more"]/text() - Date: + Title: &titleSel //div[@class="section-title"]/h4/text() + Details: &detailsSel //p[@class="read-more"]/text() + Date: &dateAttr selector: //small[@class="updated-at"]/text() postProcess: - parseDate: Jan 2, 2006 - Performers: + Performers: &performersAttr Name: //a[@class="model-name no-text-decoration"] Image: selector: //video[contains(@class,"video-js")]/@poster|//div[contains(@class,"model-player")]//img/@src @@ -22,15 +26,31 @@ xPathScrapers: - replace: - regex: ^ with: "https://dreamtranny.com" - Studio: + Studio: &studioAttr Name: fixed: "Dream Tranny" - Tags: + Tags: &tagsAttr Name: //div[@class="model-categories"]/a/text() - URL: - selector: //script[contains(.,"API_VIEW_URLS")]/text() + URL: &urlAttr + selector: &urlSel //script[contains(.,"API_VIEW_URLS")]/text() postProcess: - replace: - regex: .*/api(/update/\d+)/view_count.* - with: "https://dreamtranny.com$1" -# Last Updated January 06, 2023 + with: "https://dreamtranny.com$1/" + Code: &codeAttr + selector: *urlSel + postProcess: + - replace: + - regex: .*/api/update/(\d+)/view_count.* + with: "$1" + galleryScraper: + gallery: + Title: *titleSel + Details: *detailsSel + Date: *dateAttr + Tags: *tagsAttr + Performers: *performersAttr + Studio: *studioAttr + Code: *codeAttr + URL: *urlAttr +# Last Updated April 20, 2023 From 93419f0314d77d4aa51aa6a2411a6d22a44a4839 Mon Sep 17 00:00:00 2001 From: nrg101 Date: Thu, 20 Apr 2023 15:12:20 +0100 Subject: [PATCH 094/624] add performerByURL --- scrapers/DreamTranny.yml | 27 +++++++++++++++++++++++++-- 1 file changed, 25 insertions(+), 2 deletions(-) diff --git a/scrapers/DreamTranny.yml b/scrapers/DreamTranny.yml index dfdc51cb7..7584cec6b 100644 --- a/scrapers/DreamTranny.yml +++ b/scrapers/DreamTranny.yml @@ -9,6 +9,11 @@ galleryByURL: url: - dreamtranny.com scraper: galleryScraper +performerByURL: + - action: scrapeXPath + url: + - dreamtranny.com/models/ + scraper: performerScraper xPathScrapers: sceneScraper: scene: @@ -37,7 +42,7 @@ xPathScrapers: - replace: - regex: .*/api(/update/\d+)/view_count.* with: "https://dreamtranny.com$1/" - Code: &codeAttr + Code: selector: *urlSel postProcess: - replace: @@ -51,6 +56,24 @@ xPathScrapers: Tags: *tagsAttr Performers: *performersAttr Studio: *studioAttr - Code: *codeAttr URL: *urlAttr + performerScraper: + performer: + Name: //h1[@class="model-title"]/text() + Gender: + fixed: transgender_female + Image: + selector: //div[@class="model-img"]/a/img[@class="img"]/@src + postProcess: + - replace: + - regex: ^ + with: https://dreamtranny.com/ + Country: //div[@class="model-content"]/p/span[text()="NATIONALITY"]/following-sibling::span[1] + Birthdate: + selector: //div[@class="model-content"]/p/span[text()="DATE OF BIRTH"]/following-sibling::span[1] + postProcess: + - replace: + - regex: (st|[nr]d|th) + with: "" + - parseDate: January 2, 2006 # Last Updated April 20, 2023 From 344f501d838898a2d4151d082921f2f48ab405cf Mon Sep 17 00:00:00 2001 From: nrg101 Date: Thu, 20 Apr 2023 15:12:31 +0100 Subject: [PATCH 095/624] update scrapers list --- SCRAPERS-LIST.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/SCRAPERS-LIST.md b/SCRAPERS-LIST.md index 6d302a62f..7219b722e 100644 --- a/SCRAPERS-LIST.md +++ b/SCRAPERS-LIST.md @@ -389,7 +389,7 @@ downtofuckdating.com|ThirdRockEnt.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- dpfanatics.com|Algolia_Adultime.yml|:heavy_check_mark:|:x:|:x:|:x:|Python|- dreamsofspanking.com|DreamsOfSpanking.yml|:heavy_check_mark:|:heavy_check_mark:|:x:|:x:|-|- dreamteenshd.com|Teencoreclub.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- -dreamtranny.com|DreamTranny.yml|:heavy_check_mark:|:x:|:x:|:x:|-|Trans +dreamtranny.com|DreamTranny.yml|:heavy_check_mark:|:heavy_check_mark:|:x:|:heavy_check_mark:|-|Trans drilledchicks.com|Teencoreclub.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- driverxxx.com|Nubiles.yml|:heavy_check_mark:|:heavy_check_mark:|:x:|:x:|-|- dtfsluts.com|ThirdRockEnt.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- From 21c031c56a4b72054fa61d11cdebaf06ba0836dc Mon Sep 17 00:00:00 2001 From: nrg101 Date: Thu, 20 Apr 2023 15:42:21 +0100 Subject: [PATCH 096/624] fix regex (incorrectly changing August to Augu) --- scrapers/DreamTranny.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/scrapers/DreamTranny.yml b/scrapers/DreamTranny.yml index 7584cec6b..85130eb3f 100644 --- a/scrapers/DreamTranny.yml +++ b/scrapers/DreamTranny.yml @@ -73,7 +73,7 @@ xPathScrapers: selector: //div[@class="model-content"]/p/span[text()="DATE OF BIRTH"]/following-sibling::span[1] postProcess: - replace: - - regex: (st|[nr]d|th) - with: "" + - regex: (\d)(st|[nr]d|th) + with: "$1" - parseDate: January 2, 2006 # Last Updated April 20, 2023 From a910aa5ea41daeaa97642679e38edd1dd1a07024 Mon Sep 17 00:00:00 2001 From: RyoSaeba75 Date: Mon, 24 Apr 2023 12:06:14 +0200 Subject: [PATCH 097/624] Changed the details selector --- scrapers/FapHouse.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/scrapers/FapHouse.yml b/scrapers/FapHouse.yml index b1785ade3..b1ace0560 100644 --- a/scrapers/FapHouse.yml +++ b/scrapers/FapHouse.yml @@ -13,11 +13,11 @@ xPathScrapers: postProcess: - parseDate: "02.01.2006" Details: - selector: //div[contains(@class,"video-info-details__description")]/span + selector: //div[contains(@class,"video-info-details")]/div[@class="fh-text-with-collapse-multiline"]/p[@class="fh-text-with-collapse-multiline__text"] concat: " " Tags: Name: //div[@class="video-info-details__categories"]/a Studio: Name: //a[@class="video-info-details__studio-link"] Image: //meta[@property="og:image"]/@content -# Last Updated November 24, 2021 +# Last Updated April 24, 2023 From 53b4527bdc5c7a15791f796d020a0c1788cda186 Mon Sep 17 00:00:00 2001 From: ryosaeba75 <68193713+ryosaeba75@users.noreply.github.com> Date: Mon, 24 Apr 2023 20:50:38 +0200 Subject: [PATCH 098/624] Pornhub : Added cookie to bypass age verification prompt (#1322) --- scrapers/Pornhub.yml | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/scrapers/Pornhub.yml b/scrapers/Pornhub.yml index 9bac8d946..28dd41d78 100644 --- a/scrapers/Pornhub.yml +++ b/scrapers/Pornhub.yml @@ -142,4 +142,13 @@ xPathScrapers: - replace: - regex: "Description: (.*)" with: $1 -# Last Updated July 08, 2022 + +driver: + cookies: + - CookieURL: "https://www.pornhub.com" + Cookies: + - Name: "accessAgeDisclaimerPH" + Domain: ".pornhub.com" + Value: "1" + Path: "/" +# Last Updated April 23, 2023 From 0ab3da51a159f17c98660d8a2059f0d1fc55b1b5 Mon Sep 17 00:00:00 2001 From: nrg101 <37197306+nrg101@users.noreply.github.com> Date: Mon, 24 Apr 2023 20:13:08 +0100 Subject: [PATCH 099/624] Use `availableAt` instead of `createdAt` for date in Arx scraper (#1317) --- scrapers/Arx.py | 66 +++++++++++++++++++++++++++--------------------- scrapers/Arx.yml | 2 +- 2 files changed, 38 insertions(+), 30 deletions(-) diff --git a/scrapers/Arx.py b/scrapers/Arx.py index e7cee3306..70dff548a 100644 --- a/scrapers/Arx.py +++ b/scrapers/Arx.py @@ -1,8 +1,9 @@ -import requests import sys import json from urllib.parse import urlparse +import requests + # Static definition, used in the GraphQL request site_ids = { 'japanlust.com': 2, @@ -15,8 +16,11 @@ 'transroommates.com': 12 } +# Timeout (seconds) to prevent indefinite hanging +API_TIMEOUT = 10 + # GraphQL API endpoint -endpoint = "https://arwest-api-production.herokuapp.com/graphql" +ENDPOINT = "https://arwest-api-production.herokuapp.com/graphql" # Request headers headers = { @@ -30,11 +34,11 @@ "Referer": "https://lesworship.com" } -def __prefix(levelChar): - startLevelChar = b'\x01' - endLevelChar = b'\x02' +def __prefix(level_char): + start_level_char = b'\x01' + end_level_char = b'\x02' - ret = startLevelChar + levelChar + endLevelChar + ret = start_level_char + level_char + end_level_char return ret.decode() def __log(levelChar, s): @@ -43,36 +47,39 @@ def __log(levelChar, s): print(__prefix(levelChar) + s + "\n", file=sys.stderr, flush=True) -def LogTrace(s): +def log_trace(s): __log(b't', s) -def LogDebug(s): +def log_debug(s): __log(b'd', s) -def LogInfo(s): +def log_info(s): __log(b'i', s) -def LogWarning(s): +def log_warning(s): __log(b'w', s) -def LogError(s): +def log_error(s): __log(b'e', s) -def readJSONInput(): - input = sys.stdin.read() - return json.loads(input) +def read_json_input(): + json_input = sys.stdin.read() + return json.loads(json_input) -def callGraphQL(query, variables=None): - json = {'query': query} +def call_graphql(query, variables=None): + graphql_json = {'query': query} if variables is not None: - json['variables'] = variables + graphql_json['variables'] = variables - response = requests.post(endpoint, json=json, headers=headers) + response = requests.post(ENDPOINT, json=graphql_json, headers=headers, timeout=API_TIMEOUT) if response.status_code == 200: result = response.json() + + log_debug(json.dumps(result)) + if result.get("errors", None): for error in result["errors"]["errors"]: raise Exception("GraphQL error: {}".format(error)) @@ -85,7 +92,7 @@ def callGraphQL(query, variables=None): ) -def getScene(url): +def get_scene(url): # Sending the full query that gets used in the regular frontend query = """ query @@ -144,16 +151,16 @@ def getScene(url): site_id = site_ids.get(urlparse(url).netloc) if site_id is None: - LogError(f"Could not determine id for site {urlparse(url).netloc}") + log_error(f"Could not determine id for site {urlparse(url).netloc}") return None - + try: scene_id = int(urlparse(url).path.split('/')[2]) except ValueError: - LogError(f"No scene id found in url {url}") + log_error(f"No scene id found in url {url}") return None - - LogInfo(f"Scraping scene {scene_id}") + + log_info(f"Scraping scene {scene_id}") variables = { 'id': int(scene_id), @@ -161,9 +168,9 @@ def getScene(url): } try: - result = callGraphQL(query, variables) + result = call_graphql(query, variables) except ConnectionError as e: - LogError(e) + log_error(e) return None result = result.get('scene') @@ -176,12 +183,13 @@ def getScene(url): ret['tags'] = [{'name': x.get('name')} for x in result.get('genres')] ret['performers'] = [{'name': x.get('stageName')} for x in result.get('actors')] ret['image'] = result.get('primaryPhotoUrl') - ret['date'] = result.get('createdAt')[:10] + ret['date'] = result.get('availableAt') and result.get('availableAt')[:10] \ + or result.get('createdAt') and result.get('createdAt')[:10] return ret if sys.argv[1] == 'scrapeByURL': - i = readJSONInput() - ret = getScene(i.get('url')) + i = read_json_input() + ret = get_scene(i.get('url')) print(json.dumps(ret)) diff --git a/scrapers/Arx.yml b/scrapers/Arx.yml index 3a5cd7318..3b970eebf 100644 --- a/scrapers/Arx.yml +++ b/scrapers/Arx.yml @@ -15,4 +15,4 @@ sceneByURL: - Arx.py - scrapeByURL -# Last Updated October 01, 2021 +# Last Updated April 24, 2023 From 1675d5e272508dfc29c77aef7f04524bdd3c0a03 Mon Sep 17 00:00:00 2001 From: stg-annon <14135675+stg-annon@users.noreply.github.com> Date: Mon, 24 Apr 2023 15:34:51 -0400 Subject: [PATCH 100/624] Add studio Code in Traxxx scraper (#1314) --- scrapers/Traxxx.yml | 2 +- scrapers/traxxx_interface.py | 42 ++++++++++-------------------------- 2 files changed, 12 insertions(+), 32 deletions(-) diff --git a/scrapers/Traxxx.yml b/scrapers/Traxxx.yml index c63407e7b..2186c9a18 100644 --- a/scrapers/Traxxx.yml +++ b/scrapers/Traxxx.yml @@ -46,4 +46,4 @@ performerByURL: - Traxxx.py - performer_url -# Last Updated December 14, 2021 +# Last Updated April 24, 2023 diff --git a/scrapers/traxxx_interface.py b/scrapers/traxxx_interface.py index f3c9d1421..d37d9a2e5 100644 --- a/scrapers/traxxx_interface.py +++ b/scrapers/traxxx_interface.py @@ -245,6 +245,9 @@ def parse_to_stash_scene_search(self, s): if s.get("slug"): fragment["url"] = f'https://traxxx.me/scene/{s["id"]}/{s["slug"]}/' + if s.get("shootId"): + fragment["code"] = s["shootId"] + if s.get("date"): fragment["date"] = s["date"].split("T")[0] @@ -260,29 +263,19 @@ def parse_to_stash_scene_search(self, s): # #tags take too much space in the results page #if s.get("tags"): - # tags = [] - # for t in s["tags"]: - # if t.get("tag"): - # if t["tag"].get("name"): - # tags.append({ - # "name": t["tag"]["name"] - # }) - # fragment["tags"] = tags + # fragment["tags"] = [{"name": t["tag"]["name"]} for t in s.get("tags",{}) if t["tag"] and t["tag"].get("name")] if s.get("actors"): - performers = [] - for a in s["actors"]: - if a["actor"].get("name"): - performers.append({ - "name": a["actor"]["name"] - }) - fragment["performers"] = performers + fragment["performers"] = [{"name": a["actor"]["name"]} for a in s["actors"] if a["actor"] and a["actor"].get("name")] return fragment def parse_to_stash_scene(self, s): fragment = {} + if s.get("shootId"): + fragment["code"] = s["shootId"] + if s.get("title"): fragment["title"] = s["title"] @@ -301,28 +294,15 @@ def parse_to_stash_scene(self, s): if s.get("tags"): - tags = [] - for t in s["tags"]: - if t.get("tag"): - if t["tag"].get("name"): - tags.append({ - "name": t["tag"]["name"] - }) - fragment["tags"] = tags + fragment["tags"] = [{"name": t["tag"]["name"]} for t in s.get("tags",{}) if t["tag"] and t["tag"].get("name")] if s.get("actors"): - performers = [] - for a in s["actors"]: - if a["actor"].get("name"): - performers.append({ - "name": a["actor"]["name"] - }) - fragment["performers"] = performers + fragment["performers"] = [{"name": a["actor"]["name"]} for a in s["actors"] if a["actor"] and a["actor"].get("name")] if s.get("movies"): movies = [] for m in s["movies"]: - m = m.movie + m = m["movie"] if m.get("title"): movie = { From b280b32551c85219e67639f968c2c8c5876f95d0 Mon Sep 17 00:00:00 2001 From: estellaarrieta <111574945+estellaarrieta@users.noreply.github.com> Date: Mon, 24 Apr 2023 22:39:10 +0300 Subject: [PATCH 101/624] SARJ-LLC minor imporovements (#1313) --- scrapers/SARJ-LLC.py | 10 ++++++---- scrapers/SARJ-LLC.yml | 2 +- 2 files changed, 7 insertions(+), 5 deletions(-) diff --git a/scrapers/SARJ-LLC.py b/scrapers/SARJ-LLC.py index 88b3e13c1..1dedfb7c4 100644 --- a/scrapers/SARJ-LLC.py +++ b/scrapers/SARJ-LLC.py @@ -168,6 +168,7 @@ def scrape_model(base_url, name): def map_media(data, studio, base_url): url = "" + studio_code = data["UUID"] studio_name = {'Name': ""} if studio is not None: studio_url = studio[1] @@ -199,6 +200,7 @@ def map_media(data, studio, base_url): 'Tags': list(map(lambda t: {'Name': t}, data['tags'])), 'Performers': list(map(lambda m: map_model(base_url, m), data['models'])), 'Studio': studio_name, + 'Code': studio_code, "Director": director } @@ -290,11 +292,11 @@ def add_tag(key, tag_format): '706DF46B88884F7BB226097952427754': ('Eternal Desire', 'eternaldesire.com'), '5592E33324211E3FF640800200C93111': ('Goddess Nudes', 'goddessnudes.com'), '5A68E1D7B6E69E7401226779D559A10A': ('Love Hairy', 'lovehairy.com'), - 'E6B595104E3411DF98790800200C9A66': ('Met Art', 'metart.com'), - '5C38C84F55841824817C19987F5447B0': ('Met Art Intimate', 'metart.com'), - 'E7DFB70DF31C45B3B5E0BF10D733D349': ('Met Art X', 'metartx.com'), + 'E6B595104E3411DF98790800200C9A66': ('MetArt', 'metart.com'), + '5C38C84F55841824817C19987F5447B0': ('MetArt Intimate', 'metart.com'), + 'E7DFB70DF31C45B3B5E0BF10D733D349': ('MetArt X', 'metartx.com'), 'D99236C04DD011E1B86C0800200C9A66': ('Rylsky Art', 'rylskyart.com'), - '94DB3D0036FC11E1B86C0800200C9A66': ('Sex Art', 'sexart.com'), + '94DB3D0036FC11E1B86C0800200C9A66': ('SexArt', 'sexart.com'), '3D345D1E156910B44DB5A80CDD746318': ('Straplez', 'straplez.com'), '18A2E47EAEFD45F29033A5FCAF1F5B91': ('Stunning 18', 'stunning18.com'), 'FDAFDF209DC311E0AA820800200C9A66': ('The Life Erotic', 'thelifeerotic.com'), diff --git a/scrapers/SARJ-LLC.yml b/scrapers/SARJ-LLC.yml index 0620ae429..cf7b5fe60 100644 --- a/scrapers/SARJ-LLC.yml +++ b/scrapers/SARJ-LLC.yml @@ -90,4 +90,4 @@ performerByName: - SARJ-LLC.py - search - performer -# Last Updated November 29, 2022 +# Last Updated April 24, 2023 From 43f1a61f3585f28e8e8d24dbd2341effb1b945e3 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Mon, 24 Apr 2023 23:39:50 +0300 Subject: [PATCH 102/624] Bump yaml from 1.10.0 to 2.2.2 in /validator (#1324) --- .github/workflows/validate.yml | 2 +- validator/package.json | 2 +- validator/yarn.lock | 8 ++++---- 3 files changed, 6 insertions(+), 6 deletions(-) diff --git a/.github/workflows/validate.yml b/.github/workflows/validate.yml index 644c1caae..a6db84194 100644 --- a/.github/workflows/validate.yml +++ b/.github/workflows/validate.yml @@ -13,6 +13,6 @@ jobs: - uses: actions/checkout@v2 - uses: actions/setup-node@v1 with: - node-version: '12.x' + node-version: '14.x' - run: cd ./validator && yarn install --frozen-lockfile - run: node ./validate.js --ci diff --git a/validator/package.json b/validator/package.json index 6e42778c4..5fc3e8e38 100644 --- a/validator/package.json +++ b/validator/package.json @@ -10,6 +10,6 @@ "ajv": "^6.12.6", "better-ajv-errors": "^0.6.7", "chalk": "^4.1.1", - "yaml": "^1.10.0" + "yaml": "^2.2.2" } } diff --git a/validator/yarn.lock b/validator/yarn.lock index 59c876e43..6dbea9c4c 100644 --- a/validator/yarn.lock +++ b/validator/yarn.lock @@ -207,7 +207,7 @@ uri-js@^4.2.2: dependencies: punycode "^2.1.0" -yaml@^1.10.0: - version "1.10.0" - resolved "https://registry.yarnpkg.com/yaml/-/yaml-1.10.0.tgz#3b593add944876077d4d683fee01081bd9fff31e" - integrity sha512-yr2icI4glYaNG+KWONODapy2/jDdMSDnrONSjblABjD9B4Z5LgiircSt8m8sRZFNi08kG9Sm0uSHtEmP3zaEGg== +yaml@^2.2.2: + version "2.2.2" + resolved "https://registry.yarnpkg.com/yaml/-/yaml-2.2.2.tgz#ec551ef37326e6d42872dad1970300f8eb83a073" + integrity sha512-CBKFWExMn46Foo4cldiChEzn7S7SRV+wqiluAb6xmueD/fGyRHIhX8m14vVGgeFWjN540nKCNVj6P21eQjgTuA== From f1b6b8bf8e8d0eb25cfadd4c624765cb36219100 Mon Sep 17 00:00:00 2001 From: RyoSaeba75 Date: Tue, 25 Apr 2023 13:13:36 +0200 Subject: [PATCH 103/624] Update the detail selector to something less restrictive --- scrapers/FapHouse.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/scrapers/FapHouse.yml b/scrapers/FapHouse.yml index b1ace0560..aff15363f 100644 --- a/scrapers/FapHouse.yml +++ b/scrapers/FapHouse.yml @@ -13,11 +13,11 @@ xPathScrapers: postProcess: - parseDate: "02.01.2006" Details: - selector: //div[contains(@class,"video-info-details")]/div[@class="fh-text-with-collapse-multiline"]/p[@class="fh-text-with-collapse-multiline__text"] + selector: //div[contains(@class,"video-info-details")]//p concat: " " Tags: Name: //div[@class="video-info-details__categories"]/a Studio: Name: //a[@class="video-info-details__studio-link"] Image: //meta[@property="og:image"]/@content -# Last Updated April 24, 2023 +# Last Updated April 25, 2023 From 8de30342334eef0ef419d581be4e3aef03308e32 Mon Sep 17 00:00:00 2001 From: Michael Fitzgerald Date: Fri, 28 Apr 2023 10:53:05 +0000 Subject: [PATCH 104/624] Add Support for parasited.com to existing Hentaied.yml scraper (tested using local instance appears working) --- SCRAPERS-LIST.md | 1 + scrapers/Hentaied.yml | 1 + 2 files changed, 2 insertions(+) diff --git a/SCRAPERS-LIST.md b/SCRAPERS-LIST.md index 6d302a62f..3eb9ad607 100644 --- a/SCRAPERS-LIST.md +++ b/SCRAPERS-LIST.md @@ -946,6 +946,7 @@ pansexualx.com|Algolia_EvilAngel|:heavy_check_mark:|:heavy_check_mark:|:heavy_ch pantyjobs.com|pantyjobs.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- pantypops.com|GammaEntertainment.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- paradisegfs.com|VegasDreamworks.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +parasited.com|Hentaied.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- parodypass.com|NewSensationsNetworkSites.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- passion-hd.com|AMAMultimedia.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- passionxxx.com|Passionxxx.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- diff --git a/scrapers/Hentaied.yml b/scrapers/Hentaied.yml index 7ac946183..b377aff27 100644 --- a/scrapers/Hentaied.yml +++ b/scrapers/Hentaied.yml @@ -4,6 +4,7 @@ sceneByURL: url: - futanari.xxx - hentaied.com + - parasited.com scraper: sceneScraper xPathScrapers: sceneScraper: From 97f09023f88daebe04ce625e8b8769de0082e739 Mon Sep 17 00:00:00 2001 From: Michael Fitzgerald Date: Fri, 28 Apr 2023 11:05:00 +0000 Subject: [PATCH 105/624] Fix Last Update Date on Hentaied.yml --- scrapers/Hentaied.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scrapers/Hentaied.yml b/scrapers/Hentaied.yml index b377aff27..935dd400e 100644 --- a/scrapers/Hentaied.yml +++ b/scrapers/Hentaied.yml @@ -41,4 +41,4 @@ xPathScrapers: postProcess: - map: Real Life Hentai: Hentaied -# Last Updated June 29, 2022 +# Last Updated April 28, 2023 From 56ba66ab74be4989396043f053ac849ce2218df4 Mon Sep 17 00:00:00 2001 From: plz12345 <132735020+plz12345@users.noreply.github.com> Date: Sat, 6 May 2023 01:42:01 -0400 Subject: [PATCH 106/624] URL scraper for apovstory.com --- scrapers/APOVStory.yml | 39 +++++++++++++++++++++++++++++++++++++++ 1 file changed, 39 insertions(+) create mode 100644 scrapers/APOVStory.yml diff --git a/scrapers/APOVStory.yml b/scrapers/APOVStory.yml new file mode 100644 index 000000000..680d157db --- /dev/null +++ b/scrapers/APOVStory.yml @@ -0,0 +1,39 @@ +name: A POV Story + +sceneByURL: + - action: scrapeXPath + url: + - apovstory.com/trailers/ + scraper: sceneScraper +xPathScrapers: + sceneScraper: + scene: + Studio: + Name: + fixed: 'A POV Story' + Title: + selector: //div[@class = 'trailerArea centerwrap']/h3 + Details: + selector: //div[@class = 'trailerContent']//*//text() + concat: "\n\n" + postProcess: &ppStripDescription + - replace: + - regex: ^Description:\s* + with: + Tags: + Name: //li/span[contains(text(),'CATEGORIES')]/parent::li//a//text() + Performers: + Name: //li/span[contains(text(),'FEATURING')]/parent::li//a//text() + Image: + selector: //div[@class="player-thumb"]/img/@src0_3x + postProcess: + - replace: + - regex: ^ + with: 'https://apovstory.com' + Date: + selector: //span[contains(text(),'RELEASE DATE')]/parent::li/text() + postProcess: &ppParseDate + - replace: + - regex: \s+ + with: "" + - parseDate: "January2,2006" \ No newline at end of file From ad0b5eab30ad23e44cec6b622515cacea4d15d27 Mon Sep 17 00:00:00 2001 From: plz12345 <132735020+plz12345@users.noreply.github.com> Date: Sat, 6 May 2023 01:51:01 -0400 Subject: [PATCH 107/624] add apovstory.com --- SCRAPERS-LIST.md | 1 + 1 file changed, 1 insertion(+) diff --git a/SCRAPERS-LIST.md b/SCRAPERS-LIST.md index 6d302a62f..43836cc58 100644 --- a/SCRAPERS-LIST.md +++ b/SCRAPERS-LIST.md @@ -93,6 +93,7 @@ angelawhite.com|AngelaWhite.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- angelinacastrolive.com|VNAGirls.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- anilos.com|Nubiles.yml|:heavy_check_mark:|:heavy_check_mark:|:x:|:x:|-|- animecharactersdatabase.com|AnimeCharactersDatabase.yml|:x:|:x:|:x:|:heavy_check_mark:|Python|Database +apovstory.com|APOVStory.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- archangelvideo.com|ArchAngelVideo.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- ariellynn.com|Andomark.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- ashemaletube.com|AShemaleTube.yml|:heavy_check_mark:|:x:|:x:|:heavy_check_mark:|-|- From 2d84b6aa43d57f64ed84ea1a52b7ced4c03efcc8 Mon Sep 17 00:00:00 2001 From: ZzazzCDTS <112476145+zzazzcdts@users.noreply.github.com> Date: Sun, 7 May 2023 18:56:12 +0100 Subject: [PATCH 108/624] Update Hypnotube.yml (Added search functionality, fixed issue with URL scraping) (#1330) --- scrapers/Hypnotube.yml | 26 ++++++++++++++++++++++++-- 1 file changed, 24 insertions(+), 2 deletions(-) diff --git a/scrapers/Hypnotube.yml b/scrapers/Hypnotube.yml index 7aba32b71..6e0def19b 100644 --- a/scrapers/Hypnotube.yml +++ b/scrapers/Hypnotube.yml @@ -4,7 +4,29 @@ sceneByURL: url: - hypnotube.com scraper: sceneScraper + +sceneByName: + action: scrapeXPath + queryURL: https://hypnotube.com/search/{}/ + scraper: hypnotubeSearch + +sceneByQueryFragment: + action: scrapeXPath + queryURL: "{url}" + scraper: sceneScraper + xPathScrapers: + hypnotubeSearch: + common: + $card: //div[@class='item-inner-col inner-col'] + scene: + Title: + selector: $card//a[contains(@href, '/video')]/@title + URL: + selector: $card//a[contains(@href, '/video')]/@href + Image: + selector: $card//img[contains(@data-mb, "shuffle-thumbs")]/@src + sceneScraper: common: $studio: //a[starts-with(@href,'https://hypnotube.com/user/')] @@ -23,5 +45,5 @@ xPathScrapers: with: "" Tags: Name: //div[@class='tags-block']/a/text() - URL: //link[rel='canonical']/@href -# Last Updated January 27, 2023 + URL: //link[@rel='canonical']/@href +# Last Updated April 29, 2023 From 8c0810dc73f8028fa43c9b393ce93d222fee2d52 Mon Sep 17 00:00:00 2001 From: DrTongKwokKwan <79197977+DrTongKwokKwan@users.noreply.github.com> Date: Sun, 7 May 2023 21:48:07 +0200 Subject: [PATCH 109/624] Added scraper for visit-x.net (#1325) --- SCRAPERS-LIST.md | 1 + scrapers/Visit-X.yml | 26 ++++++++++++++++++++++++++ 2 files changed, 27 insertions(+) create mode 100644 scrapers/Visit-X.yml diff --git a/SCRAPERS-LIST.md b/SCRAPERS-LIST.md index 6d302a62f..38c6bb75e 100644 --- a/SCRAPERS-LIST.md +++ b/SCRAPERS-LIST.md @@ -1368,6 +1368,7 @@ virtualrealpassion.com|VirtualRealPorn.yml|:heavy_check_mark:|:x:|:heavy_check_m virtualrealporn.com|VirtualRealPorn.yml|:heavy_check_mark:|:x:|:heavy_check_mark:|:x:|-|VR virtualrealtrans.com|VirtualRealPorn.yml|:heavy_check_mark:|:x:|:heavy_check_mark:|:x:|-|VR virtualtaboo.com|VirtualTaboo.yml|:heavy_check_mark:|:x:|:heavy_check_mark:|:x:|-|VR +visit-x.net|Visit-X.yml|:heavy_check_mark:|:x:|:x:|:x:|CDP|- vivid.com|Vivid.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- vividclassic.com|Vivid.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- vivthomas.com|SARJ-LLC.yml|:heavy_check_mark:|:heavy_check_mark:|:x:|:heavy_check_mark:|Python|- diff --git a/scrapers/Visit-X.yml b/scrapers/Visit-X.yml new file mode 100644 index 000000000..3fcf546f6 --- /dev/null +++ b/scrapers/Visit-X.yml @@ -0,0 +1,26 @@ +name: Visit-X +sceneByURL: + - action: scrapeXPath + url: + - visit-x.net + scraper: sceneScraper +xPathScrapers: + sceneScraper: + scene: + Title: //div[@class="profile-video-details"]//h2 + Tags: + Name: //a[@class="profile-video-details--description-tags-list-item-tag"]/text() + Performers: + Name: //a[contains(@class, 'profile-video-details--description-complex-actors-link')]/text() + Details: //div[contains(@class, 'profile-video-details--description-text')]/text() + Date: + selector: //span[contains(@class, 'profile-video-details--description-complex-meta-item')][1]/text() + postProcess: + - parseDate: 02.01.2006 + Image: //div[contains(@class, 'gallery__preview-image-container')]/img/@src + Studio: + Name: + fixed: Visit-X +driver: + useCDP: true +# Last Updated April 24, 2023 From 047fe0ca89f5677ea92b25db2365b331bb68c358 Mon Sep 17 00:00:00 2001 From: ryosaeba75 <68193713+ryosaeba75@users.noreply.github.com> Date: Sun, 7 May 2023 22:56:15 +0200 Subject: [PATCH 110/624] Add sceneByName search functionality to ManyVids scraper (#1321) --- scrapers/ManyVids/ManyVids.py | 48 +++++++++++++++++++++++++++++++--- scrapers/ManyVids/ManyVids.yml | 18 ++++++++++++- 2 files changed, 62 insertions(+), 4 deletions(-) diff --git a/scrapers/ManyVids/ManyVids.py b/scrapers/ManyVids/ManyVids.py index fb10c7476..780e6a14a 100644 --- a/scrapers/ManyVids/ManyVids.py +++ b/scrapers/ManyVids/ManyVids.py @@ -44,6 +44,17 @@ def get_request(url: str) -> requests.Response(): } return requests.get(url, headers=mv_headers, timeout=(3, 10)) +def post_request(url: str, jsonBody:str) -> requests.Response(): + """ + wrapper function over requests.post to set common options + """ + mv_headers = { + "User-Agent": + 'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:79.0) Gecko/20100101 Firefox/79.0', + "Referer": "https://www.manyvids.com/" + } + return requests.post(url, json=jsonBody, headers=mv_headers, timeout=(3, 10)) + def get_model_name(model_id: str) -> str: """ @@ -112,11 +123,16 @@ def get_scene(scene_id: str) -> dict: scrape = {} scrape['title'] = meta.get('title') scrape['details'] = meta.get('description') + scrape['code'] = scene_id + sceneURLPartial = meta.get('url') + if sceneURLPartial: + scrape["url"] = f'https://www.manyvids.com{sceneURLPartial}' if meta.get('modelId'): model_name = get_model_name(meta['modelId']) if model_name: scrape['performers'] = [] scrape['performers'].append({'name': model_name}) + scrape['studio'] = {"name": model_name} image = meta.get('screenshot') if image is None: # fallback to thumbnail image = meta.get('thumbnail') @@ -274,26 +290,52 @@ def performer_by_name(name: str, max_results: int = 25) -> None: for i in range(0, max_results): performers.append({"name": names[i].strip(), "url": urls[i]}) except Exception as search_exc: - log.error(f"Failed to search for {name}: {search_exc}") + log.error(f"Failed to search for performer {name}: {search_exc}") print(json.dumps(performers)) + +def scene_by_name(name: str) -> None: + if name: + search_url = f'https://api.journey-bff.kiwi.manyvids.com/api/v1/search/all' + try: + response = post_request(search_url, jsonBody={'keywords':name}) + # log.debug(response.request.body) + # log.debug(response.content) + meta = response.json() + vids = meta.get('vids') + scrapes = [] + if vids: + for vid in vids: + scrape = {} + scrape['Title'] = vid.get('username') + scrape['URL'] = 'https://www.manyvids.com'+vid.get('url') + scrapes.append(scrape) + print(json.dumps(scrapes)) + except Exception as search_exc: + log.error(f"Failed to search for scene {name}: {search_exc}") + print(json.dumps("{}")) def main(): fragment = json.loads(sys.stdin.read()) url = fragment.get("url") + queryURL = fragment.get("queryURL") name = fragment.get("name") - if url is None and name is None: + if url is None and name is None and queryURL is None: log.error("No URL/Name provided") sys.exit(1) - if url and "performer_by_url" in sys.argv: + if name and "scene_by_name" in sys.argv: + scene_by_name(name) + elif url and "performer_by_url" in sys.argv: scrape_performer(url) elif name and "performer_by_name" in sys.argv: search_name = quote_plus(name) performer_by_name(search_name) elif url: scrape_scene(url) + elif queryURL: + scrape_scene(queryURL) if __name__ == "__main__": diff --git a/scrapers/ManyVids/ManyVids.yml b/scrapers/ManyVids/ManyVids.yml index 06bb0127d..57404e58a 100644 --- a/scrapers/ManyVids/ManyVids.yml +++ b/scrapers/ManyVids/ManyVids.yml @@ -20,4 +20,20 @@ performerByName: - python3 - ManyVids.py - performer_by_name -# Last Updated December 30, 2022 \ No newline at end of file + +sceneByName: + action: script + script: + - python3 + - ManyVids.py + - scene_by_name + +sceneByQueryFragment: + action: script + queryURL: "{url}" + script: + - python3 + - ManyVids.py + - scene_by_query_fragment + +# Last Updated April 22, 2023 \ No newline at end of file From ea09ad4ede0936eca827807c8d195fc14594ab0d Mon Sep 17 00:00:00 2001 From: l1xander <125724412+l1xander@users.noreply.github.com> Date: Wed, 10 May 2023 10:31:13 +0530 Subject: [PATCH 111/624] Update PaperStreetMedia.yml --- scrapers/PaperStreetMedia.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scrapers/PaperStreetMedia.yml b/scrapers/PaperStreetMedia.yml index 1d2a60f49..f4cab4c8e 100755 --- a/scrapers/PaperStreetMedia.yml +++ b/scrapers/PaperStreetMedia.yml @@ -43,7 +43,7 @@ xPathScrapers: sceneScraper: scene: Title: //p[@class="video-title"]/text() - Details: //p[@class="video-description"]/text() + Details: //div[contains(@class, 'video-description')]//p/text() Performers: Name: //p[@class="model-name"]/a Image: //div[@class="video-area"]//stream/@poster From a3e6c78541829cc6999ffcf28acb1b7d0b487a85 Mon Sep 17 00:00:00 2001 From: l1xander <125724412+l1xander@users.noreply.github.com> Date: Wed, 10 May 2023 10:42:57 +0530 Subject: [PATCH 112/624] Updated xPath for backward compatiblity Updated the xPath to make it compatible with some older scene pages --- scrapers/PaperStreetMedia.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scrapers/PaperStreetMedia.yml b/scrapers/PaperStreetMedia.yml index f4cab4c8e..79b785117 100755 --- a/scrapers/PaperStreetMedia.yml +++ b/scrapers/PaperStreetMedia.yml @@ -43,7 +43,7 @@ xPathScrapers: sceneScraper: scene: Title: //p[@class="video-title"]/text() - Details: //div[contains(@class, 'video-description')]//p/text() + Details: //div[contains(@class, 'video-description')]/descendant-or-self::*/text() Performers: Name: //p[@class="model-name"]/a Image: //div[@class="video-area"]//stream/@poster From 32cbe4c45fc1ebf905f4556b29f2381f8e87fc72 Mon Sep 17 00:00:00 2001 From: grabolento <131325048+grabolento@users.noreply.github.com> Date: Tue, 16 May 2023 23:40:00 -0300 Subject: [PATCH 113/624] RealJamVR update Update to changes in RealJamVR website --- scrapers/RealJamVR.yml | 16 +++++++++------- 1 file changed, 9 insertions(+), 7 deletions(-) diff --git a/scrapers/RealJamVR.yml b/scrapers/RealJamVR.yml index d3002c9ef..2a7a6a478 100644 --- a/scrapers/RealJamVR.yml +++ b/scrapers/RealJamVR.yml @@ -2,7 +2,7 @@ name: RealJamVR sceneByURL: - action: scrapeXPath url: - - realjamvr.com + - realjamvr.com/scene/ scraper: sceneScraper xPathScrapers: sceneScraper: @@ -10,21 +10,23 @@ xPathScrapers: Title: selector: //h1 Date: - selector: //div[contains(@class, "date")] + selector: //div[@class="ms-4 text-nowrap"]/strong postProcess: - replace: - regex: ',' with: "" - parseDate: January 2 2006 Performers: - Name: //span[text()="Featuring:"]/following-sibling::a + Name: //div[@class="scene-view mx-auto"]/a Tags: - Name: //span[text()="TAGS:"]/following-sibling::a + # Tags + Technical specs, excluding video duration + Name: //div[@class="lh-lg"]/a | //div[@class="d-flex justify-content-between my-2"]/div[1]/div[position() Date: Sun, 21 May 2023 10:37:02 +0000 Subject: [PATCH 114/624] Updated Details scraper to pull from all sites --- scrapers/Hentaied.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/scrapers/Hentaied.yml b/scrapers/Hentaied.yml index 935dd400e..f0a981f88 100644 --- a/scrapers/Hentaied.yml +++ b/scrapers/Hentaied.yml @@ -27,7 +27,7 @@ xPathScrapers: - regex: "Exclude" with: "" Details: - selector: //div[@class="cont" or @class="column"]/$desc + selector: //div[@class="fullss"]/p concat: "\n\n" postProcess: - replace: @@ -41,4 +41,4 @@ xPathScrapers: postProcess: - map: Real Life Hentai: Hentaied -# Last Updated April 28, 2023 +# Last Updated May 21, 2023 From 718fa24cad6f1fb7fcc5772de32819d9d64119d9 Mon Sep 17 00:00:00 2001 From: Prakash Date: Mon, 22 May 2023 16:04:00 -0700 Subject: [PATCH 115/624] Update hanime scraper to scrape by filename --- scrapers/hanime.yml | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/scrapers/hanime.yml b/scrapers/hanime.yml index 0610201c2..20b479e88 100644 --- a/scrapers/hanime.yml +++ b/scrapers/hanime.yml @@ -1,4 +1,12 @@ name: hanime +sceneByFragment: + action: scrapeXPath + queryURL: https://hanime.tv/videos/hentai/{filename} + queryURLReplace: + filename: + - regex: ^([a-z\-0-9]+)(\-[0-9]{3,4}p).* + with: $1 + scraper: sceneScraper sceneByURL: - action: scrapeXPath url: @@ -35,4 +43,4 @@ xPathScrapers: - regex: "https://" with: "https://i1.wp.com/" -# Last Updated August 18, 2020 +# Last Updated May 22, 2023 From 655271698cd1553788ba7a76e192068bca291a7e Mon Sep 17 00:00:00 2001 From: imagineimaginingthings Date: Wed, 24 May 2023 17:23:14 +0100 Subject: [PATCH 116/624] Added scene and performer XPath scraper for PornHex --- SCRAPERS-LIST.md | 1 + scrapers/PornHex.yml | 49 ++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 50 insertions(+) create mode 100644 scrapers/PornHex.yml diff --git a/SCRAPERS-LIST.md b/SCRAPERS-LIST.md index 38c6bb75e..b19d07e5f 100644 --- a/SCRAPERS-LIST.md +++ b/SCRAPERS-LIST.md @@ -991,6 +991,7 @@ porndudecasting.com|PornDudeCasting.yml|:heavy_check_mark:|:x:|:x:|:heavy_check_ pornfidelity.com|KellyMadisonMedia.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- pornforce.com|ThirdRockEnt.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- porngoespro.com|Spizoo.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +pornhex.com|PornHex.yml|:heavy_check_mark:|:x:|:x:|:heavy_check_mark:|-|Tube Site pornhub.com|Pornhub.yml|:heavy_check_mark:|:x:|:x:|:heavy_check_mark:|-|- pornhubpremium.com|PornhubPremium.yml|:heavy_check_mark:|:x:|:x:|:heavy_check_mark:|-|- pornlandvideos.com|MVG.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- diff --git a/scrapers/PornHex.yml b/scrapers/PornHex.yml new file mode 100644 index 000000000..d370936f6 --- /dev/null +++ b/scrapers/PornHex.yml @@ -0,0 +1,49 @@ +name: "PornHex" +sceneByURL: + - action: scrapeXPath + url: + - pornhex.com + scraper: sceneScraper +performerByURL: + - action: scrapeXPath + url: + - pornhex.com + scraper: performerScraper +xPathScrapers: + sceneScraper: + scene: + Title: //meta[@itemprop="name"]/@content + Image: //video[@id="player"]/@poster + Date: + selector: //meta[@itemprop="uploadDate"]/@content + postProcess: + - parseDate: 2006-01-02 15:04:05 + - parseDate: 2006-01-02 + Tags: + Name: //div[@class="meta-bar col-12 padd-top-small"]//a[contains(@href,"/search?q=")]/@title | //div[@class="meta-bar col-12"]//a[contains(@href,"/categories/")]/text() + Performers: + Name: //div[@class="meta-bar col-12 padd-top-small"]//a[contains(@href,"/pornstars/")]/text() + Details: //meta[@itemprop="description"]/@content + performerScraper: + common: + $pornstar: //section[@class="pornstar-header"]//div[@class="col-6"]//p + performer: + Name: $pornstar//span[contains(text(),"Alias -")]/following::span/text() + Country: $pornstar//span[contains(text(),"Home Country -")]/following::span/text() + Ethnicity: $pornstar//span[contains(text(),"Ethnicity -")]/following::span/text() + HairColor: $pornstar//span[contains(text(),"Hair -")]/following::span/text() + EyeColor: $pornstar//span[contains(text(),"Eye Colour -")]/following::span/text() + Height: + selector: $pornstar//span[contains(text(),"Height -")]/following::span/text() + postProcess: + - feetToCm: true + Measurements: $pornstar//span[contains(text(),"Measurements -")]/following::span/text() + URL: $pornstar//span[contains(text(),"Official site -")]/following::a/@href + Image: + selector: //section[@class="pornstar-header"]//div[@class="thumbnail-col-2 col-2"]/img[not(contains(@src,"default_portrait"))]/@src + postProcess: + - replace: + - regex: ^ + with: "https://pornhex.com" + +# Last Updated May 24, 2023 \ No newline at end of file From 817f1a469c21092114c7e2b818541752cc56bdf2 Mon Sep 17 00:00:00 2001 From: imagineimaginingthings Date: Wed, 24 May 2023 19:40:58 +0100 Subject: [PATCH 117/624] Added XPath scene scraper for EroProfile --- SCRAPERS-LIST.md | 1 + scrapers/EroProfile.yml | 22 ++++++++++++++++++++++ 2 files changed, 23 insertions(+) create mode 100644 scrapers/EroProfile.yml diff --git a/SCRAPERS-LIST.md b/SCRAPERS-LIST.md index 38c6bb75e..0dc4ab662 100644 --- a/SCRAPERS-LIST.md +++ b/SCRAPERS-LIST.md @@ -401,6 +401,7 @@ elitebabes.com|EliteBabes.yml|:x:|:x:|:x:|:heavy_check_mark:|-|Babes ericjohnssexadventures.com|EricJohnsSexAdventures.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- ericvideos.com|EricVideos.yml|:heavy_check_mark:|:x:|:x:|:heavy_check_mark:|-|Gay erito.com|RealityKingsOL.yml|:heavy_check_mark:|:x:|:x:|:x:|-|JAV +eroprofile.com|EroProfile.yml|:heavy_check_mark:|:x:|:x:|:x:|-|Tube Site eroticax.com|Algolia_xEmpire.yml|:heavy_check_mark:|:heavy_check_mark:|:x:|:x:|Python|- eroticbeauty.com|SARJ-LLC.yml|:x:|:heavy_check_mark:|:x:|:heavy_check_mark:|Python|- eroticspice.com|devianteNetwork.yml|:heavy_check_mark:|:x:|:x:|:x:|CDP|- diff --git a/scrapers/EroProfile.yml b/scrapers/EroProfile.yml new file mode 100644 index 000000000..bf0cb1f31 --- /dev/null +++ b/scrapers/EroProfile.yml @@ -0,0 +1,22 @@ +name: "EroProfile" +sceneByURL: + - action: scrapeXPath + url: + - eroprofile.com/m/videos/view/ + - m.eroprofile.com/p/videos/view/ + scraper: sceneScraper +xPathScrapers: + sceneScraper: + scene: + Title: //h1[@class="capMultiLine"]/text() | //div[@class="pageTitle"]/text() + Image: //video[@id="vidVideoViewPlayer"]/@poster | //video[@id="vidPlayer"]/@poster + Date: + selector: //div[@class="col-xs-3 col-sm-2 colEmp"][contains(text(),"Upload date")]/following-sibling::div/text() | //table[@class="data marT"]//tbody//tr//th[contains(text(),"Uploaded:")]/following::td/text() + postProcess: + - replace: + - regex: (.+)\s\((.+)ago\) + with: $1 + - parseDate: 2 January 2006 + Details: //h1[@class="capMultiLine"]/following::p/text() | //table[@class="data marT"]//tbody//tr//th[contains(text(),"Description:")]/following::td/text() + +# Last Updated May 24, 2023 From 180090651e123db437ebadd2655641c4c5ac8788 Mon Sep 17 00:00:00 2001 From: Silvered Gold <35001040+silveredgold@users.noreply.github.com> Date: Thu, 25 May 2023 14:44:15 +1000 Subject: [PATCH 118/624] Add performer scraping to Kink scraper; --- scrapers/Kink.yml | 52 ++++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 51 insertions(+), 1 deletion(-) diff --git a/scrapers/Kink.yml b/scrapers/Kink.yml index 9bda22f0a..2717e13e3 100644 --- a/scrapers/Kink.yml +++ b/scrapers/Kink.yml @@ -17,6 +17,15 @@ sceneByFragment: - regex: .*\((\d+)\)\.[a-zA-Z\d]+$ #support filenames in the form scene - date - performer (12345).mp4 with: $1 scraper: sceneScraper +performerByName: + action: scrapeXPath + queryURL: https://www.kink.com/search?type=performers&q={} + scraper: performerSearch +performerByURL: + - action: scrapeXPath + url: + - kink.com/model + scraper: performerScraper xPathScrapers: sceneScraper: @@ -119,6 +128,47 @@ xPathScrapers: whipped-ass: Whipped Ass wired-pussy: Wired Pussy URL: //link[@rel="canonical"]/@href + performerSearch: + common: + $result: //div/a[contains(@href, "/model") and contains(concat(" ", normalize-space(@class), " "), " model-link ")] + + performer: + Name: $result/img/@alt + URL: + selector: $result/@href + postProcess: + - replace: + - regex: ^ + with: https://www.kink.com + performerScraper: + performer: + Name: + selector: //h1/text() # //div[@font-size][number(translate(@font-size,"px",""))>=35]/text() + concat: " " + postProcess: + - replace: + - regex: ^\s+ + with: "" + - regex: \s+$ + with: + Twitter: + selector: '//div/a[contains(concat(" ", normalize-space(@class), " "), " social-link ") and contains(@href, "twitter.com")]/@href' + Image: + selector: //div/img[contains(@src, "imagedb")][1]/@src + Tattoos: + selector: '//div/span[text()=" tags: "]/following-sibling::a[contains(@href,"/tattoo")]//text()' + postProcess: + - map: + Tattoo: "Yes" + Piercings: + selector: '//div/span[text()=" tags: "]/following-sibling::a[contains(@href,"/pierced")]/span' + concat: "\n" + Tags: + Name: '//div/span[text()=" tags: "]/following-sibling::a/span/text()' + Details: + selector: '//div/span/p[@class="bio"]/following-sibling::p//text()' + concat: "\n" + URL: //link[@rel="canonical"]/@href driver: useCDP: true -# Last Updated December 16, 2022 +# Last Updated May 25, 2023 From eb32b8be617b2bce73860e385bd06fe9493d39c6 Mon Sep 17 00:00:00 2001 From: Silvered Gold <35001040+silveredgold@users.noreply.github.com> Date: Thu, 25 May 2023 14:53:20 +1000 Subject: [PATCH 119/624] Update scrapers list; --- SCRAPERS-LIST.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/SCRAPERS-LIST.md b/SCRAPERS-LIST.md index 38c6bb75e..65bee43dd 100644 --- a/SCRAPERS-LIST.md +++ b/SCRAPERS-LIST.md @@ -700,7 +700,7 @@ killergram.com|Killergram.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- kimberleelive.com|VNAGirls.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- kin8tengoku.com|Kin8tengoku.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- kingsoffetish.com|AdultEmpireCash.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- -kink.com|Kink.yml|:heavy_check_mark:|:x:|:x:|:x:|CDP|- +kink.com|Kink.yml|:heavy_check_mark:|:x:|:x:|:heavy_check_mark:|CDP|- kink305.com|VNAGirls.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- kinkbomb.com|KinkBomb.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- kinkvr.com|BaDoink.yml|:heavy_check_mark:|:x:|:heavy_check_mark:|:heavy_check_mark:|-|VR From 07f3851c85072fe52fa7556760ce2cdbd334dafb Mon Sep 17 00:00:00 2001 From: Maista6969 Date: Mon, 29 May 2023 18:04:55 +0200 Subject: [PATCH 120/624] Add scene scraper for YesGirlz.com --- SCRAPERS-LIST.md | 1 + scrapers/YesGirlz.yml | 30 ++++++++++++++++++++++++++++++ 2 files changed, 31 insertions(+) create mode 100644 scrapers/YesGirlz.yml diff --git a/SCRAPERS-LIST.md b/SCRAPERS-LIST.md index 38c6bb75e..99a7eab42 100644 --- a/SCRAPERS-LIST.md +++ b/SCRAPERS-LIST.md @@ -1443,6 +1443,7 @@ xvirtual.com|XVirtual.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- xxxnj.com|ChickPass.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- xxxpawn.com|XXXPawn.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- yanks.com|Yanks.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +yesgirlz.com|YesGirls.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- yngr.com|YNGR.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- younganaltryouts.com|OldGoesYoung.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- youngerloverofmine.com|Mypervmom.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- diff --git a/scrapers/YesGirlz.yml b/scrapers/YesGirlz.yml new file mode 100644 index 000000000..2376cef3d --- /dev/null +++ b/scrapers/YesGirlz.yml @@ -0,0 +1,30 @@ +name: Yes Girlz +sceneByURL: + - action: scrapeXPath + url: + - yesgirlz.com + scraper: sceneScraper +xPathScrapers: + sceneScraper: + common: + $siteContent: //div[@class="site-content"] + scene: + Title: $siteContent//h2 + Performers: + Name: + selector: $siteContent//h2[contains(text(), "Starring")] + postProcess: + - replace: + - regex: ^\s*Starring:\s* + with: "" + - regex: \s*$ + with: "" + split: " & " + Details: + selector: $siteContent//div[@class="elementor-widget-container"]/p + Image: + selector: $siteContent//video/@data-poster + Studio: + Name: + fixed: Yes Girlz +# Last Updated May 29, 2023 From 665b082dfc8a4e8491628585a4eeadcfe793fa2a Mon Sep 17 00:00:00 2001 From: Maista6969 Date: Tue, 30 May 2023 01:30:02 +0200 Subject: [PATCH 121/624] Improve studio matching for WoodmanCastingX --- SCRAPERS-LIST.md | 2 +- scrapers/WoodmancastingX.yml | 10 ++++++++-- 2 files changed, 9 insertions(+), 3 deletions(-) diff --git a/SCRAPERS-LIST.md b/SCRAPERS-LIST.md index 38c6bb75e..07b92b60d 100644 --- a/SCRAPERS-LIST.md +++ b/SCRAPERS-LIST.md @@ -1417,7 +1417,7 @@ wicked.com|Algolia_Wicked.yml|:heavy_check_mark:|:heavy_check_mark:|:heavy_check wildoncam.com|trafficpimps.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- williamhiggins.com|Williamhiggins.yml|:heavy_check_mark:|:x:|:x:|:x:|-|Gay willtilexxx.com|WillTileXXX.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- -woodmancastingx.com|WoodmancastingX.y,l|:heavy_check_mark:|:x:|:x:|:x:|-|- +woodmancastingx.com|WoodmancastingX.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- wowgirls.xxx|WowPorn.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- wowgirlsblog.com|WOWGirlsBlog.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- wowporn.xxx|WowPorn.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- diff --git a/scrapers/WoodmancastingX.yml b/scrapers/WoodmancastingX.yml index 65e4a3615..dfa4b2937 100644 --- a/scrapers/WoodmancastingX.yml +++ b/scrapers/WoodmancastingX.yml @@ -35,7 +35,13 @@ xPathScrapers: with: "https://www.woodmancastingx.com/" Studio: Name: - fixed: Woodman Casting X + selector: //div[@id="breadcrumb"] + postProcess: + - replace: + - regex: ^[^>]*> (Casting|Sthuf).*$ + with: "Woodman Casting X" + - regex: ^[^>]*> (Scenes|Backstage|Live).*$ + with: "Pierre Woodman" Tags: Name: //a[@class="tag"] Image: //meta[@property="og:image"]/@content @@ -52,4 +58,4 @@ xPathScrapers: URL: selector: $res/@href postProcess: *pp -# Last Updated September 28, 2022 +# Last Updated May 30, 2023 From 4910deb9e34e5f601957f9aac022aff35c113729 Mon Sep 17 00:00:00 2001 From: Flashy78 <90150289+Flashy78@users.noreply.github.com> Date: Tue, 30 May 2023 13:42:19 -0700 Subject: [PATCH 122/624] Add search to Plushies.tv --- scrapers/Plushies.yml | 31 +++++++++++++++++++++++++++++-- 1 file changed, 29 insertions(+), 2 deletions(-) diff --git a/scrapers/Plushies.yml b/scrapers/Plushies.yml index d1ed37572..112d53ac4 100644 --- a/scrapers/Plushies.yml +++ b/scrapers/Plushies.yml @@ -1,10 +1,37 @@ name: Plushies +sceneByName: + action: scrapeXPath + queryURL: "https://plushies.tv/tour/search.php?query={}" + scraper: sceneSearch +sceneByQueryFragment: + action: scrapeXPath + queryURL: "{url}" + scraper: sceneScraper sceneByURL: - action: scrapeXPath url: - plushies.tv scraper: sceneScraper xPathScrapers: + sceneSearch: + common: + $searchItem: //div[@class='updateItem']/div[@class='updateDetails'] + $searchThumb: //div[@class='updateItem']/a/img + scene: + Title: + selector: $searchItem/h4/a + Date: + selector: $searchItem/p/span[2] + postProcess: + - parseDate: 01/02/2006 + URL: + selector: $searchItem/h4/a/@href + Image: + selector: $searchThumb/@src + postProcess: + - replace: + - regex: ^ + with: "https://plushies.tv/tour/" sceneScraper: common: $ub: //div[@class="update_block_info"] @@ -22,7 +49,7 @@ xPathScrapers: Details: selector: $ub/span[@class="latest_update_description"] Image: - selector: //img[@class="stdimage large_update_thumb left thumbs"]/@src + selector: //img[@class="stdimage small_update_thumb thumbs"]/@src postProcess: - replace: - regex: ^ @@ -30,4 +57,4 @@ xPathScrapers: Studio: Name: fixed: Plushies -# Last Updated May 07, 2021 +# Last Updated March 30, 2023 From 8ead80a6db3600af1727015a12c1fb1c6fc5f085 Mon Sep 17 00:00:00 2001 From: nrg101 Date: Wed, 31 May 2023 16:35:11 +0100 Subject: [PATCH 123/624] add Movies and Code to sceneByURL in MindGeek.yml --- scrapers/MindGeek.yml | 28 ++++++++++++++++++++++++++-- 1 file changed, 26 insertions(+), 2 deletions(-) diff --git a/scrapers/MindGeek.yml b/scrapers/MindGeek.yml index 21166cea7..2865e8c1d 100644 --- a/scrapers/MindGeek.yml +++ b/scrapers/MindGeek.yml @@ -118,6 +118,8 @@ xPathScrapers: scriptScraper: common: $script: //script[@type="application/ld+json"] + $canonicalUrl: //link[@rel="canonical"]/@href + $movieUriPath: //a[text()="Movie Info"]/@href scene: Title: selector: $script @@ -175,7 +177,29 @@ xPathScrapers: with: '"' Performers: Name: //div/*[self::h1 or self::h2]/..//a[contains(@href,"/model")] - + Movies: + URL: + selector: $canonicalUrl|$movieUriPath + concat: __SEPARATOR__ + postProcess: + - replace: + - regex: '^(https://[^/]+).+__SEPARATOR__' + with: $1 + Name: + selector: $canonicalUrl|$movieUriPath + concat: __SEPARATOR__ + postProcess: + - replace: + - regex: '^(https://[^/]+).+__SEPARATOR__' + with: $1 + - subScraper: + selector: //h2/text() + Code: + selector: $canonicalUrl + postProcess: + - replace: + - regex: '.*/scene/(\d+).*' + with: $1 movieScraper: common: $section: //div[text()="Release Date:"]/ancestor::section @@ -326,4 +350,4 @@ xPathScrapers: Image: selector: //img[contains(@src, "model")]/@src URL: //link[@rel="canonical"]/@href -# Last Updated January 15, 2023 +# Last Updated May 31, 2023 From 92829706a6af90c5b12a6e1922b409f1b7a04937 Mon Sep 17 00:00:00 2001 From: nrg101 Date: Wed, 31 May 2023 16:52:27 +0100 Subject: [PATCH 124/624] add Movies and Code to other sceneByURL scraper --- scrapers/MindGeek.yml | 54 +++++++++++++++++++++++-------------------- 1 file changed, 29 insertions(+), 25 deletions(-) diff --git a/scrapers/MindGeek.yml b/scrapers/MindGeek.yml index 2865e8c1d..6174a87ca 100644 --- a/scrapers/MindGeek.yml +++ b/scrapers/MindGeek.yml @@ -73,6 +73,8 @@ xPathScrapers: sceneScraper: common: $section: //div[contains(@class,"tg5e7m")]/ancestor::section + $canonicalUrl: &canonicalUrl //link[@rel="canonical"]/@href + $movieUriPath: &movieUriPath //a[text()="Movie Info"]/@href scene: Title: $section//h1/text()|$section//h2/text() Date: @@ -115,11 +117,34 @@ xPathScrapers: sweetsinner: Sweet Sinner teenslovehugecocks: Teens Love Huge Cocks Image: $section//img[contains(@src,"poster")]/@src + Movies: &sceneMovies + URL: + selector: $canonicalUrl|$movieUriPath + concat: __SEPARATOR__ + postProcess: + - replace: + - regex: '^(https://[^/]+).+__SEPARATOR__' + with: $1 + Name: + selector: $canonicalUrl|$movieUriPath + concat: __SEPARATOR__ + postProcess: + - replace: + - regex: '^(https://[^/]+).+__SEPARATOR__' + with: $1 + - subScraper: + selector: //h1/text()|//h2/text() + Code: &sceneCode + selector: $canonicalUrl + postProcess: + - replace: + - regex: '.*/scene/(\d+).*' + with: $1 scriptScraper: common: $script: //script[@type="application/ld+json"] - $canonicalUrl: //link[@rel="canonical"]/@href - $movieUriPath: //a[text()="Movie Info"]/@href + $canonicalUrl: *canonicalUrl + $movieUriPath: *movieUriPath scene: Title: selector: $script @@ -177,29 +202,8 @@ xPathScrapers: with: '"' Performers: Name: //div/*[self::h1 or self::h2]/..//a[contains(@href,"/model")] - Movies: - URL: - selector: $canonicalUrl|$movieUriPath - concat: __SEPARATOR__ - postProcess: - - replace: - - regex: '^(https://[^/]+).+__SEPARATOR__' - with: $1 - Name: - selector: $canonicalUrl|$movieUriPath - concat: __SEPARATOR__ - postProcess: - - replace: - - regex: '^(https://[^/]+).+__SEPARATOR__' - with: $1 - - subScraper: - selector: //h2/text() - Code: - selector: $canonicalUrl - postProcess: - - replace: - - regex: '.*/scene/(\d+).*' - with: $1 + Movies: *sceneMovies + Code: *sceneCode movieScraper: common: $section: //div[text()="Release Date:"]/ancestor::section From b665bb5caabdd45bda436ac61b289d1ca3932b10 Mon Sep 17 00:00:00 2001 From: nrg101 Date: Wed, 31 May 2023 17:16:37 +0100 Subject: [PATCH 125/624] add more sites to movieByURL --- SCRAPERS-LIST.md | 14 +++++++------- scrapers/MindGeek.yml | 11 +++++++++-- 2 files changed, 16 insertions(+), 9 deletions(-) diff --git a/SCRAPERS-LIST.md b/SCRAPERS-LIST.md index 38c6bb75e..4b27934b7 100644 --- a/SCRAPERS-LIST.md +++ b/SCRAPERS-LIST.md @@ -377,7 +377,7 @@ disruptivefilms.com|Algolia_disruptivefilms.yml|:heavy_check_mark:|:heavy_check_ dlsite.com|DLsite.yml|:heavy_check_mark:|:heavy_check_mark:|:x:|:x:|-|- doegirls.com|LetsDoeIt.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- dogfartnetwork.com|DogFart.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- -doghousedigital.com|MindGeek.yml|:heavy_check_mark:|:x:|:x:|:heavy_check_mark:|-|- +doghousedigital.com|MindGeek.yml|:heavy_check_mark:|:x:|:heavy_check_mark:|:heavy_check_mark:|-|- dollrotic.com|GymRotic.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- dollsporn.com|Wtfpass.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- domai.com|SARJ-LLC.yml|:x:|:heavy_check_mark:|:x:|:heavy_check_mark:|Python|- @@ -625,7 +625,7 @@ hustlerslesbians.com|Hustler.yml|:heavy_check_mark:|:x:|:x:|:x:|CDP|Lesbian hustlerstaboo.com|Hustler.yml|:heavy_check_mark:|:x:|:x:|:x:|CDP|- hypnotube.com|Hypnotube.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- iafd.com|IAFD.yml|:heavy_check_mark:|:x:|:heavy_check_mark:|:heavy_check_mark:|Python|Database -iconmale.com|MindGeek.yml|:heavy_check_mark:|:x:|:x:|:x:|-|Gay +iconmale.com|MindGeek.yml|:heavy_check_mark:|:x:|:heavy_check_mark:|:x:|-|Gay idols69.com|karatmedia.yml|:heavy_check_mark:|:x:|:x:|:x:|-|JAV ifeelmyself.com|IFeelMyself.yml|:heavy_check_mark:|:x:|:x:|:heavy_check_mark:|Python|- ihuntmycunt.com|DollsHub.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- @@ -794,7 +794,7 @@ metadataapi.net (JSON API)|ThePornDB.yml|:heavy_check_mark:|:x:|:heavy_check_mar metart.com|SARJ-LLC.yml|:heavy_check_mark:|:heavy_check_mark:|:x:|:heavy_check_mark:|Python|- metartnetwork.com|SARJ-LLC.yml|:heavy_check_mark:|:heavy_check_mark:|:x:|:heavy_check_mark:|Python|- metartx.com|SARJ-LLC.yml|:heavy_check_mark:|:heavy_check_mark:|:x:|:heavy_check_mark:|Python|- -milehighmedia.com|MindGeek.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +milehighmedia.com|MindGeek.yml|:heavy_check_mark:|:x:|:heavy_check_mark:|:x:|-|- milfed.com|MindGeek.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- milfthing.com|PerfectGonzo.yml|:heavy_check_mark:|:x:|:x:|:heavy_check_mark:|Python|- milftrip.com|VegasDreamsLLC.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- @@ -892,7 +892,7 @@ nhentai.net|nhentai.yml|:x:|:heavy_check_mark:|:x:|:x:|-|Hentai nikkibenz.com|VNAGirls.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- nikkiphoenixxx.com|NikkiAndTera.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- ninakayy.com|VNAGirls.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- -noirmale.com|MindGeek.yml|:heavy_check_mark:|:x:|:x:|:x:|-|Gay +noirmale.com|MindGeek.yml|:heavy_check_mark:|:x:|:heavy_check_mark:|:x:|-|Gay noodledude.io|NoodleDude.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- notmygrandpa.com|PaperStreetMedia.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- nubilefilms.com|Nubiles.yml|:heavy_check_mark:|:heavy_check_mark:|:x:|:x:|-|- @@ -1052,7 +1052,7 @@ ravenswallowzxxx.com|Andomark.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- rawattack.com|RawAttack.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- rawcouples.com|TeenMegaWorld.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- reaganfoxx.com|AdultEmpireCash.yml|:heavy_check_mark:|:x:|:x:|:x:|-|MILF -realityjunkies.com|MindGeek.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +realityjunkies.com|MindGeek.yml|:heavy_check_mark:|:x:|:heavy_check_mark:|:x:|-|- realitykings.com|MindGeek.yml|:heavy_check_mark:|:x:|:x:|:heavy_check_mark:|-|- realitylovers.com|RealityLovers.yml|:heavy_check_mark:|:x:|:x:|:heavy_check_mark:|Python|- realjamvr.com|RealJamVR.yml|:heavy_check_mark:|:x:|:x:|:x:|-|VR @@ -1202,8 +1202,8 @@ swallowed.com|Nympho.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- swallowsalon.com|AmateurAllure.yml|:heavy_check_mark:|:heavy_check_mark:|:x:|:x:|-|- sweetcarla.com|FFCSH.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- sweetfemdom.com|SweetFemdom.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- -sweetheartvideo.com|MindGeek.yml|:heavy_check_mark:|:x:|:x:|:x:|-|Lesbian -sweetsinner.com|MindGeek.yml|:heavy_check_mark:|:x:|:x:|:heavy_check_mark:|-|- +sweetheartvideo.com|MindGeek.yml|:heavy_check_mark:|:x:|:heavy_check_mark:|:x:|-|Lesbian +sweetsinner.com|MindGeek.yml|:heavy_check_mark:|:x:|:heavy_check_mark:|:heavy_check_mark:|-|- sweetyx.com|SweetyX.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- swinger-blog.xxx|SwingerBlog.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- swnude.com|Williamhiggins.yml|:heavy_check_mark:|:x:|:x:|:x:|-|Gay diff --git a/scrapers/MindGeek.yml b/scrapers/MindGeek.yml index 6174a87ca..454d665d8 100644 --- a/scrapers/MindGeek.yml +++ b/scrapers/MindGeek.yml @@ -42,8 +42,15 @@ sceneByURL: movieByURL: - action: scrapeXPath url: - - digitalplayground.com/movie - - transsensual.com/movie + - digitalplayground.com/movie/ + - doghousedigital.com/movie/ + - iconmale.com/movie/ + - milehighmedia.com/movie/ + - noirmale.com/movie/ + - realityjunkies.com/movie/ + - sweetheartvideo.com/movie/ + - sweetsinner.com/movie/ + - transsensual.com/movie/ scraper: movieScraper performerByURL: From 5c86b9cd32ebd4ded4d063b01d7e392004f3160f Mon Sep 17 00:00:00 2001 From: nrg101 Date: Wed, 7 Jun 2023 12:05:31 +0100 Subject: [PATCH 126/624] add tags scraping --- scrapers/GroobyNetwork-Partial.yml | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/scrapers/GroobyNetwork-Partial.yml b/scrapers/GroobyNetwork-Partial.yml index 333df7287..adc583740 100644 --- a/scrapers/GroobyNetwork-Partial.yml +++ b/scrapers/GroobyNetwork-Partial.yml @@ -46,6 +46,8 @@ xPathScrapers: - replace: - regex: ^// # bobstgirls with: "https://" + Tags: &tags + Name: //div[@class="set_tags"]/ul/li//a/text() galleryScraper: gallery: Title: *title @@ -53,6 +55,7 @@ xPathScrapers: Details: *details Performers: *performers Studio: *studio + Tags: *tags sceneScraperGroobyVR: scene: Title: *title @@ -73,4 +76,5 @@ xPathScrapers: - replace: - regex: ^/ with: https://www.groobyvr.com/ -# Last Updated December 19, 2022 + Tags: *tags +# Last Updated June 07, 2023 From 7a1ee48032b5d724b3e1136f98a9b01a977545f4 Mon Sep 17 00:00:00 2001 From: nrg101 Date: Wed, 7 Jun 2023 12:42:29 +0100 Subject: [PATCH 127/624] add tags to other grooby scraper --- scrapers/GroobyClub.yml | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/scrapers/GroobyClub.yml b/scrapers/GroobyClub.yml index 74439b5fe..be5f20791 100644 --- a/scrapers/GroobyClub.yml +++ b/scrapers/GroobyClub.yml @@ -44,4 +44,6 @@ xPathScrapers: - replace: - regex: "(^[^|]+)\\|.*/tour/([^\\.]+\\.jpg).*" with: $1$2 -# Last Updated June 26, 2022 + Tags: + Name: //div[@class="set_tags"]/ul/li//a/text() +# Last Updated June 07, 2023 From 8d0d2877968dd67ea76f0771d71ecf875c0db368 Mon Sep 17 00:00:00 2001 From: nrg101 Date: Wed, 7 Jun 2023 12:45:50 +0100 Subject: [PATCH 128/624] add more grooby network sites --- SCRAPERS-LIST.md | 5 +++++ scrapers/GroobyNetwork-Partial.yml | 5 +++++ 2 files changed, 10 insertions(+) diff --git a/SCRAPERS-LIST.md b/SCRAPERS-LIST.md index 38c6bb75e..fe937c395 100644 --- a/SCRAPERS-LIST.md +++ b/SCRAPERS-LIST.md @@ -447,6 +447,7 @@ femdomempire.com|FemdomEmpire.yml|:heavy_check_mark:|:x:|:x:|:heavy_check_mark:| feminized.com|FemdomEmpire.yml|:heavy_check_mark:|:x:|:x:|:heavy_check_mark:|-|Trans femjoy.com|FemJoy.yml|:heavy_check_mark:|:heavy_check_mark:|:x:|:x:|-|- femlatex.com|GymRotic.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +femout.xxx|GroobyNetwork-Partial.yml|:heavy_check_mark:|:heavy_check_mark:|:x:|:x:|-|Trans femoutsex.xxx|GroobyNetwork-Partial.yml|:heavy_check_mark:|:heavy_check_mark:|:x:|:x:|-|Trans ferame.com|Jhdv.yml|:heavy_check_mark:|:x:|:x:|:x:|-|JAV Uncensored fetishnetwork.com|FetishNetwork.yml|:heavy_check_mark:|:x:|:x:|:x:|-|Fetish @@ -1057,6 +1058,7 @@ realitykings.com|MindGeek.yml|:heavy_check_mark:|:x:|:x:|:heavy_check_mark:|-|- realitylovers.com|RealityLovers.yml|:heavy_check_mark:|:x:|:x:|:heavy_check_mark:|Python|- realjamvr.com|RealJamVR.yml|:heavy_check_mark:|:x:|:x:|:x:|-|VR realsensual.com|RealSensual.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +realtgirls.com|GroobyNetwork-Partial.yml|:heavy_check_mark:|:heavy_check_mark:|:x:|:x:|-|Trans realtimebondage.com|insex.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- realvr.com|BaDoink.yml|:heavy_check_mark:|:x:|:heavy_check_mark:|:heavy_check_mark:|-|VR redheadmariah.com|ChickPass.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- @@ -1262,6 +1264,8 @@ tgirls.porn|GroobyNetwork-Partial.yml|:heavy_check_mark:|:heavy_check_mark:|:x:| tgirls.xxx|GroobyNetwork-Partial.yml|:heavy_check_mark:|:heavy_check_mark:|:x:|:x:|-|Trans tgirlsex.xxx|GroobyNetwork-Partial.yml|:heavy_check_mark:|:heavy_check_mark:|:x:|:x:|-|Trans tgirlsfuck.com|GroobyNetwork-Brazilian.yml|:heavy_check_mark:|:x:|:x:|:x:|-|Trans +tgirlshookup.com|GroobyNetwork-Partial.yml|:heavy_check_mark:|:heavy_check_mark:|:x:|:x:|-|Trans +tgirltops.com|GroobyNetwork-Partial.yml|:heavy_check_mark:|:heavy_check_mark:|:x:|:x:|-|Trans thatsitcomshow.com|Nubiles.yml|:heavy_check_mark:|:heavy_check_mark:|:x:|:x:|-|- theartporn.com|Wtfpass.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- theassfactory.com|JulesJordan.yml|:heavy_check_mark:|:heavy_check_mark:|:x:|:x:|-|- @@ -1313,6 +1317,7 @@ transgasm.com|GroobyNetwork-Partial.yml|:heavy_check_mark:|:heavy_check_mark:|:x transgressivefilms.com|Algolia_Adultime.yml|:heavy_check_mark:|:x:|:x:|:x:|Python|Trans transgressivexxx.com|Algolia_EvilAngel|:heavy_check_mark:|:heavy_check_mark:|:heavy_check_mark:|:x:|Python|Trans transmodeldatabase.com|TransModelDatabase.yml|:x:|:x:|:x:|:heavy_check_mark:|-|Trans +transnificent.com|GroobyNetwork-Partial.yml|:heavy_check_mark:|:heavy_check_mark:|:x:|:x:|-|Trans transroommates.com|Arx.yml|:heavy_check_mark:|:x:|:x:|:x:|Python|Trans transsensual.com|MindGeek.yml|:heavy_check_mark:|:x:|:heavy_check_mark:|:heavy_check_mark:|-|Trans transsexualangel.com|GammaEntertainment.yml|:heavy_check_mark:|:x:|:x:|:x:|-|Trans diff --git a/scrapers/GroobyNetwork-Partial.yml b/scrapers/GroobyNetwork-Partial.yml index adc583740..ea459333c 100644 --- a/scrapers/GroobyNetwork-Partial.yml +++ b/scrapers/GroobyNetwork-Partial.yml @@ -7,17 +7,22 @@ sceneByURL: - blacktgirlshardcore.com - black-tgirls.com - bobstgirls.com + - femout.xxx - femoutsex.xxx #Scenes on 'femout.xxx' can some times be found on this one as well - franks-tgirlworld.com - grooby-archives.com - groobygirls.com - ladyboy-ladyboy.com - ladyboy.xxx + - realtgirls.com - tgirlsex.xxx - tgirls.porn - tgirls.xxx + - tgirlshookup.com + - tgirltops.com - transexpov.com - transgasm.com + - transnificent.com scraper: sceneScraper - action: scrapeXPath url: From 38380819cf52124b0336f54ced21ea42e7ce5ddc Mon Sep 17 00:00:00 2001 From: imagineimaginingthings Date: Sat, 10 Jun 2023 18:58:27 +0100 Subject: [PATCH 129/624] Added xPath sceneScraper for Xrares --- SCRAPERS-LIST.md | 1 + scrapers/Xrares.yml | 30 ++++++++++++++++++++++++++++++ 2 files changed, 31 insertions(+) create mode 100644 scrapers/Xrares.yml diff --git a/SCRAPERS-LIST.md b/SCRAPERS-LIST.md index 38c6bb75e..8c18067c7 100644 --- a/SCRAPERS-LIST.md +++ b/SCRAPERS-LIST.md @@ -1434,6 +1434,7 @@ xevunleashed.com|Xevunleashed.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- xhamster.com|Xhamster.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- xlgirls.com|TheScoreGroup.yml|:heavy_check_mark:|:heavy_check_mark:|:x:|:x:|-|- xnxx.com|Xnxx.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +xrares.com|Xrares.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- xsinsvr.com|xSinsVR.yml|:heavy_check_mark:|:heavy_check_mark:|:x:|:x:|-|- xslist.org|Xslist.yml|:x:|:x:|:x:|:heavy_check_mark:|-|JAV Database xtube.com|Xtube.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- diff --git a/scrapers/Xrares.yml b/scrapers/Xrares.yml new file mode 100644 index 000000000..1bb441a92 --- /dev/null +++ b/scrapers/Xrares.yml @@ -0,0 +1,30 @@ +name: "Xrares" +sceneByURL: + - action: scrapeXPath + url: + - xrares.com + scraper: sceneScraper +xPathScrapers: + sceneScraper: + scene: + Title: //meta[@property="og:title"]/@content + URL: //meta[@property="og:url"]/@content + Image: //meta[@property="og:image"]/@content + Tags: + Name: //meta[@property="video:tag"]/@content + Details: //meta[@property="og:description"]/@content + Date: + selector: //div[@class="pull-right big-views hidden-xs"]//span[@class="text-white"]/text() + postProcess: + - replace: + - regex: (\d+)\sdays\sago + with: $1 + - subtractDays: true + Code: + selector: //meta[@property="og:url"]/@content + postProcess: + - replace: + - regex: .+/video/(\d+)/.+?$ + with: $1 + +# Last Updated June 10, 2023 From 1a747e9fa7b16f4b52bc1f882a26cacd7b8d3bae Mon Sep 17 00:00:00 2001 From: Maista6969 Date: Sun, 11 Jun 2023 01:04:15 +0200 Subject: [PATCH 130/624] Add scene scraper for TeenyTaboo.com --- SCRAPERS-LIST.md | 1 + scrapers/TeenyTaboo.yml | 40 ++++++++++++++++++++++++++++++++++++++++ 2 files changed, 41 insertions(+) create mode 100644 scrapers/TeenyTaboo.yml diff --git a/SCRAPERS-LIST.md b/SCRAPERS-LIST.md index 38c6bb75e..1270b7c43 100644 --- a/SCRAPERS-LIST.md +++ b/SCRAPERS-LIST.md @@ -1248,6 +1248,7 @@ teensnaturalway.com|Teencoreclub.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- teensneaks.com|Algolia_Adultime.yml|:heavy_check_mark:|:x:|:x:|:x:|Python|- teenstryblacks.com|Teencoreclub.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- teenthais.com|Baberotica.yml|:heavy_check_mark:|:x:|:x:|:x:|-|Thai Uncensored +teenytaboo.com|TeenyTaboo.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- tenshigao.com|Tenshigao.yml|:heavy_check_mark:|:x:|:x:|:x:|-|Jav terapatrick.com|NikkiAndTera.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- tessafowler.com|PinupDollars.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- diff --git a/scrapers/TeenyTaboo.yml b/scrapers/TeenyTaboo.yml new file mode 100644 index 000000000..24734f6da --- /dev/null +++ b/scrapers/TeenyTaboo.yml @@ -0,0 +1,40 @@ +name: Teeny Taboo +sceneByURL: + - action: scrapeXPath + url: + - teenytaboo.com + scraper: sceneScraper +xPathScrapers: + sceneScraper: + scene: + Title: //h1[@class="customhcolor"] + Details: //h2[@class="customhcolor2"] + Date: + selector: //span[@class="date"] + postProcess: + - replace: + - regex: (\w+) (0?[1-9]|[12][0-9]|3[01])(?:st|nd|th) (\d{4}) + with: $1 $2, $3 + - parseDate: January 2, 2006 + Image: + selector: //center/img/@src + postProcess: + - replace: + - regex: ^ + with: "https://teenytaboo.com/" + Studio: + Name: + fixed: Teeny Taboo + Tags: + Name: + selector: //h4[@class="customhcolor" and not(./span)] + split: "," + Performers: + Name: + selector: //h3[@class="customhcolor"] + split: "," + postProcess: + - replace: + - regex: \x{00a0} + with: "" +# Last Updated June 11, 2023 From 5e9620291118bdfb69bb38cb9128a439217d150d Mon Sep 17 00:00:00 2001 From: JackDawson94 <100418085+JackDawson94@users.noreply.github.com> Date: Tue, 13 Jun 2023 00:23:19 +0200 Subject: [PATCH 131/624] Bugfix & Multi-line cleanup --- scrapers/torrent.py | 19 +++---------------- 1 file changed, 3 insertions(+), 16 deletions(-) diff --git a/scrapers/torrent.py b/scrapers/torrent.py index 35c99b5e1..7eb043e44 100644 --- a/scrapers/torrent.py +++ b/scrapers/torrent.py @@ -6,17 +6,6 @@ from datetime import datetime import difflib -try: - from bencoder import bdecode -except ModuleNotFoundError: - print("You need to install the 'bencoder.pyx' module. (https://pypi.org/project/bencoder.pyx/)", file=sys.stderr) - sys.exit() -from os.path import basename -from pathlib import Path -import re -from datetime import datetime -import difflib - try: from bencoder import bdecode except ModuleNotFoundError: @@ -61,6 +50,7 @@ def process_description_bbcode(description): res = re.sub(r'\[(?:b|i|u|s|url|quote)?\](.*)?\[\/(?:b|i|u|s|url|quote)\]',r"\1", description ) res = re.sub(r'\[.*?\].*?\[\/.*?\]',r'',res) res = re.sub(r'\[.*?\]',r'',res) + res = re.sub(r'[\r\n]{3,}}', '\r\n\r\n', res) return res.strip() def get_torrent_metadata(torrent_data): @@ -140,11 +130,8 @@ def cleanup_name(name): ratios[round(10000*(1-similarity_file_name(search, clean_t)))] = {'url': str(t.absolute()), 'title': clean_t} # Order ratios - ratios_sorted = dict(sorted(ratios.items())) - # Only return the top 5 results - if len(ratios) > 5: - ratios = ratios_sorted[5:] + ratios_sorted = dict(sorted(ratios.items())[:5]) print(json.dumps(list(ratios_sorted.values()))) -# Last Updated December 16, 2022 +# Last Updated June 12, 2023 From 859dc804b46274e8fe8d5ba5e33592b2b250e1fc Mon Sep 17 00:00:00 2001 From: JackDawson94 <100418085+JackDawson94@users.noreply.github.com> Date: Tue, 13 Jun 2023 11:23:04 +0200 Subject: [PATCH 132/624] Improved bbcode cleanup --- scrapers/torrent.py | 16 +++++++++++++--- 1 file changed, 13 insertions(+), 3 deletions(-) diff --git a/scrapers/torrent.py b/scrapers/torrent.py index 7eb043e44..8c0f67a2b 100644 --- a/scrapers/torrent.py +++ b/scrapers/torrent.py @@ -6,6 +6,8 @@ from datetime import datetime import difflib +from py_common import log + try: from bencoder import bdecode except ModuleNotFoundError: @@ -47,10 +49,17 @@ def process_tags_performers(tagList): return map(lambda tag: decode_bytes(tag).replace('.', ' '), tagList) def process_description_bbcode(description): - res = re.sub(r'\[(?:b|i|u|s|url|quote)?\](.*)?\[\/(?:b|i|u|s|url|quote)\]',r"\1", description ) - res = re.sub(r'\[.*?\].*?\[\/.*?\]',r'',res) + #Remove image tags + res = re.sub(r'\[img\]([^\[]*)\[\/img\]',r"", description ) + + #Remove bbcode & replace with the contained text + res = re.sub(r'\[.*?\]([^\[]*)\[\/(?:b|i|u|s|url|quote)\]',r"\1", res ) + + #Cleanup any bbcode tags that may have been left behind res = re.sub(r'\[.*?\]',r'',res) - res = re.sub(r'[\r\n]{3,}}', '\r\n\r\n', res) + + #Remove excessive newlines + res = re.sub(r'[\r|\n]{3,}', '\r\n\r\n', res) return res.strip() def get_torrent_metadata(torrent_data): @@ -112,6 +121,7 @@ def cleanup_name(name): ret = ret.removeprefix("torrents\\").removesuffix(".torrent") return ret +log.debug(sys.argv[1]) if sys.argv[1] == "query": fragment = json.loads(sys.stdin.read()) From 06d0793976368791045193dd795005642fb2c079 Mon Sep 17 00:00:00 2001 From: JackDawson94 <100418085+JackDawson94@users.noreply.github.com> Date: Tue, 13 Jun 2023 11:24:20 +0200 Subject: [PATCH 133/624] Remove debug logs --- scrapers/torrent.py | 4 ---- 1 file changed, 4 deletions(-) diff --git a/scrapers/torrent.py b/scrapers/torrent.py index 8c0f67a2b..a88db119b 100644 --- a/scrapers/torrent.py +++ b/scrapers/torrent.py @@ -6,8 +6,6 @@ from datetime import datetime import difflib -from py_common import log - try: from bencoder import bdecode except ModuleNotFoundError: @@ -121,8 +119,6 @@ def cleanup_name(name): ret = ret.removeprefix("torrents\\").removesuffix(".torrent") return ret -log.debug(sys.argv[1]) - if sys.argv[1] == "query": fragment = json.loads(sys.stdin.read()) print(json.dumps(process_torrents(get_scene_data(fragment)))) From 9699d0a23e13179971befe215f94ac17890caf0b Mon Sep 17 00:00:00 2001 From: nrg101 Date: Tue, 13 Jun 2023 11:04:57 +0100 Subject: [PATCH 134/624] add tags/categories scraping to LetsDoeIt.yml --- scrapers/LetsDoeIt.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/scrapers/LetsDoeIt.yml b/scrapers/LetsDoeIt.yml index 94cdf1d39..4143957ce 100644 --- a/scrapers/LetsDoeIt.yml +++ b/scrapers/LetsDoeIt.yml @@ -41,10 +41,10 @@ xPathScrapers: - regex: (.*\.?\!?)(?:\s-\s\w.*-.*)$ # remove Studio name at the end of a description with: $1 Tags: - Name: $details//div[@class="col"][4]//a/text()|$details//div[@class="col"][6]//a/text() + Name: $details//div[@class="col"][4]//a/text()|$details//div[@class="col"][6]//a/text()|//span[@class="-mvd-list-item"]//a/text() Performers: Name: $actors//span/a[contains(@href,"/models/")] Studio: Name: $actors//a//text() Image: //source[@type="image/webp"]/@srcset -# Last Updated August 19, 2022 +# Last Updated June 13, 2023 From 9d24f71e726416ee3404d290486c5e6041850237 Mon Sep 17 00:00:00 2001 From: JackDawson94 <100418085+JackDawson94@users.noreply.github.com> Date: Wed, 14 Jun 2023 10:06:57 +0200 Subject: [PATCH 135/624] Aliases are now split correctly --- scrapers/Babepedia.yml | 2 ++ 1 file changed, 2 insertions(+) diff --git a/scrapers/Babepedia.yml b/scrapers/Babepedia.yml index aa88e3933..b09386bdb 100644 --- a/scrapers/Babepedia.yml +++ b/scrapers/Babepedia.yml @@ -121,6 +121,8 @@ xPathScrapers: - replace: - regex: aka with: "" + - regex: '\/' + with: "," Tattoos: $label[text()='Tattoos:']]/text() Piercings: $label[text()='Piercings:']]/text() Image: From 04a792e7e08395dcf1b561c665b831a54015ef01 Mon Sep 17 00:00:00 2001 From: devnsfw <> Date: Sat, 17 Jun 2023 02:23:09 +0200 Subject: [PATCH 136/624] Add Milfy to Vixen Media Group scraper --- scrapers/vixenNetwork.py | 1 + scrapers/vixenNetwork.yml | 3 ++- 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/scrapers/vixenNetwork.py b/scrapers/vixenNetwork.py index d1a92c3af..aac186843 100644 --- a/scrapers/vixenNetwork.py +++ b/scrapers/vixenNetwork.py @@ -238,6 +238,7 @@ def sortByLength(sites): Site('Blacked Raw'), Site('Blacked'), Site('Deeper'), + Site('Milfy'), Site('Tushy'), Site('Tushy Raw'), Site('Slayed'), diff --git a/scrapers/vixenNetwork.yml b/scrapers/vixenNetwork.yml index 9e6559504..44d4f5b0d 100644 --- a/scrapers/vixenNetwork.yml +++ b/scrapers/vixenNetwork.yml @@ -4,6 +4,7 @@ sceneByURL: - blacked.com/videos - blackedraw.com/videos - deeper.com/videos + - milfy.com/videos - slayed.com/videos - tushy.com/videos - tushyraw.com/videos @@ -29,4 +30,4 @@ sceneByQueryFragment: - python - vixenNetwork.py -# Last Updated January 07, 2022 +# Last Updated June 17, 2023 From 6f05356355e2262ad49cf9a5a6b26a2db576ac16 Mon Sep 17 00:00:00 2001 From: Maista6969 Date: Sat, 17 Jun 2023 04:09:57 +0200 Subject: [PATCH 137/624] fix styled text in description missing spaces --- scrapers/IFeelMyself.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scrapers/IFeelMyself.py b/scrapers/IFeelMyself.py index 25ac538ac..fe9d13059 100644 --- a/scrapers/IFeelMyself.py +++ b/scrapers/IFeelMyself.py @@ -33,7 +33,7 @@ def readJSONInput(): def extract_SceneInfo(table,cover_url=None): description = None if table.find(class_= ["blog_wide_new_text","entryBlurb"]): - description=table.find(class_= ["blog_wide_new_text","entryBlurb"]).get_text(strip=True) + description=table.find(class_= ["blog_wide_new_text","entryBlurb"]).get_text(" ", strip=True) description=unicodedata.normalize('NFKC', description).encode('ascii','ignore').decode('ascii') date = table.find(class_="blog-title-right").get_text(strip=True) #This is a BeautifulSoup element performer = table.find(class_= ["entryHeadingFlash","entryHeading"]).find_all("a")[1].get_text().replace("_"," ") From 5eb50937d2f44b1ede90ab39c9d0c43c3194820c Mon Sep 17 00:00:00 2001 From: ZzazzCDTS Date: Sun, 18 Jun 2023 10:18:20 +0100 Subject: [PATCH 138/624] Added new scraper for Loyalfans links --- scrapers/Loyalfans/Loyalfans.py | 165 ++++++++++++++++++++++++++++ scrapers/Loyalfans/Loyalfans.yml | 8 ++ scrapers/Loyalfans/requirements.txt | 3 + 3 files changed, 176 insertions(+) create mode 100644 scrapers/Loyalfans/Loyalfans.py create mode 100644 scrapers/Loyalfans/Loyalfans.yml create mode 100644 scrapers/Loyalfans/requirements.txt diff --git a/scrapers/Loyalfans/Loyalfans.py b/scrapers/Loyalfans/Loyalfans.py new file mode 100644 index 000000000..55596e5b8 --- /dev/null +++ b/scrapers/Loyalfans/Loyalfans.py @@ -0,0 +1,165 @@ +import os +import sys +import json + +# to import from a parent directory we need to add that directory to the system path +csd = os.path.dirname( + os.path.realpath(__file__)) # get current script directory +parent = os.path.dirname(csd) # parent directory (should be the scrapers one) +sys.path.append( + parent +) # add parent dir to sys path so that we can import py_common from there + +try: + # Import Stash logging system from py_common + from py_common import log +except ModuleNotFoundError: + print( + "You need to download the folder 'py_common' from the community repo. (CommunityScrapers/tree/master/scrapers/py_common)", + file=sys.stderr) + sys.exit() + +try: + # Import necessary modules. + from lxml import html + import requests + from requests import utils + from requests import cookies + import re + from urllib.parse import urlparse + from bs4 import BeautifulSoup + + # Set headers with user agent to avoid Cloudflare throwing a hissy fit. + # Establish session and implement headers + +# If one of these modules is not installed: +except ModuleNotFoundError: + log.error( + "You need to install the python modules mentioned in requirements.txt" + ) + log.error( + "If you have pip (normally installed with python), run this command in a terminal from the directory the scraper is located: pip install -r requirements.txt" + ) + sys.exit() + +def output_json_url(title, tags, url, image, studio, performers, description, date): + # Split the tags into a list (comma-separated), stripping away any trailing full stops or tags which are just "N/A" + tag_list = [tag.strip().rstrip('.') for tag in tags.split(",") if tag.strip() != "N/A"] + # Create a tag dictionary from the tag list. + tag_dicts = [{"name": tag} for tag in tag_list] + # We're only using the value of 'performers' for our performer list. Kept for future-proofing, and also because I couldn't get it to work any other way. + performer_list = [performers] + performer_dicts = [{"name": performer} for performer in performer_list] + # Dump all of this as JSON data. + return json.dumps({ + "title": title, + "tags": tag_dicts, + "url": url, + "image": image, + "studio": {"name": studio}, + "performers": performer_dicts, + "details": description, + "date": date + }, indent=4) + +def get_cookies(scene_url: str): + session = requests.Session() + headers = { + 'Accept': 'application/json, text/plain, */*', + 'Accept-Language': 'en-US,en;q=0.9', + 'Connection': 'keep-alive', + 'Content-Type': 'application/json', + 'Origin': 'https://www.loyalfans.com', + 'Referer': scene_url, + 'Sec-Fetch-Dest': 'empty', + 'Sec-Fetch-Mode': 'cors', + 'Sec-Fetch-Site': 'same-origin', + 'User-Agent': 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/114.0.0.0 Safari/537.36', + 'X-Api-Version': '3.4.4', + 'sec-ch-ua': '"Not.A/Brand";v="8", "Chromium";v="114", "Google Chrome";v="114"', + 'sec-ch-ua-mobile': '?0', + 'sec-ch-ua-platform': '"Linux"', + } + url = 'https://www.loyalfans.com/api/v2/system-status' + data = {} + response = session.post(url, headers=headers, json=data) + return response.cookies + + +def get_api_url(scene_url: str): + end_segment = scene_url.split('/')[-1] + return f"https://www.loyalfans.com/api/v1/social/post/{end_segment}" + +def get_json(scene_url: str): + headers = { + 'Accept': 'application/json, text/plain, */*', + 'Accept-Encoding': 'gzip, deflate, br', + 'Accept-Language': 'en-US,en;q=0.9', + 'Connection': 'keep-alive', + 'Host': 'www.loyalfans.com', + 'Referer': scene_url, + 'Sec-Fetch-Dest': 'empty', + 'Sec-Fetch-Mode': 'cors', + 'Sec-Fetch-Site': 'same-origin', + 'User-Agent': 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/114.0.0.0 Safari/537.36', + 'X-Api-Version': '3.4.4', + 'sec-ch-ua': '"Not.A/Brand";v="8", "Chromium";v="114", "Google Chrome";v="114"', + 'sec-ch-ua-mobile': '?0', + 'sec-ch-ua-platform': '"Linux"' + } + cookie_set = get_cookies(scene_url) + response = requests.get(get_api_url(scene_url), headers=headers, cookies=cookie_set) + json_data = response.json() + return json_data + +def scrape_scene(scene_url: str) -> dict: + json = get_json(scene_url) + title = json['post']['title'].strip() + image = json['post']['video_object']['poster'] + description = json['post']['content'].replace('\u2019', "'").replace('
', '') + description = re.sub(r'#\w+\b', '', description).strip().replace(' ', ' ').replace('. . .', '...') + studio = json['post']['owner']['display_name'] + date = json['post']['created_at']['date'].split(' ')[0] + tags_list = json['post']['hashtags'] + replacements = { + 'Fin Dom': 'Findom', + 'Fem Dom': 'Femdom', + 'bigtits': 'Big Tits', + 'titworship': 'Tit Worship', + 'financialdomination': 'Financial Domination', + 'R I P O F F': 'ripoff', + 'pussydenial': 'pussy denial' + } + fixed_tags = [] + for tag in tags_list: + tag = tag[1:] + modified_tag = tag + modified_tag = re.sub(r'(? Date: Sun, 18 Jun 2023 10:33:40 +0100 Subject: [PATCH 139/624] Added new scraper for Loyalfans links - added additional commenting --- scrapers/Loyalfans/Loyalfans.py | 34 +++++++++++++++++++++++++++------ 1 file changed, 28 insertions(+), 6 deletions(-) diff --git a/scrapers/Loyalfans/Loyalfans.py b/scrapers/Loyalfans/Loyalfans.py index 55596e5b8..aeef5e4a4 100644 --- a/scrapers/Loyalfans/Loyalfans.py +++ b/scrapers/Loyalfans/Loyalfans.py @@ -29,9 +29,6 @@ from urllib.parse import urlparse from bs4 import BeautifulSoup - # Set headers with user agent to avoid Cloudflare throwing a hissy fit. - # Establish session and implement headers - # If one of these modules is not installed: except ModuleNotFoundError: log.error( @@ -63,7 +60,9 @@ def output_json_url(title, tags, url, image, studio, performers, description, da }, indent=4) def get_cookies(scene_url: str): + # Establish a session. session = requests.Session() + # Set headers required for a successful POST query. headers = { 'Accept': 'application/json, text/plain, */*', 'Accept-Language': 'en-US,en;q=0.9', @@ -80,17 +79,23 @@ def get_cookies(scene_url: str): 'sec-ch-ua-mobile': '?0', 'sec-ch-ua-platform': '"Linux"', } + # URL of the system status API. This is called when a Loyalfans page is first loaded from what I can tell. url = 'https://www.loyalfans.com/api/v2/system-status' data = {} + # Perform a POST query to capture initial cookies. response = session.post(url, headers=headers, json=data) + # Return these cookies. return response.cookies def get_api_url(scene_url: str): + # Extract the last component of the scene URL. end_segment = scene_url.split('/')[-1] + # Append this to the API link. As far as I can tell, post names in scene URLs are unique. I have yet to encounter any data mismatches. return f"https://www.loyalfans.com/api/v1/social/post/{end_segment}" def get_json(scene_url: str): + # Set headers required for a successful request. headers = { 'Accept': 'application/json, text/plain, */*', 'Accept-Encoding': 'gzip, deflate, br', @@ -107,20 +112,33 @@ def get_json(scene_url: str): 'sec-ch-ua-mobile': '?0', 'sec-ch-ua-platform': '"Linux"' } + # Set cookies using get_cookies function. cookie_set = get_cookies(scene_url) + # Perform request using the API URL of the scene in question, adding headers and cookies. response = requests.get(get_api_url(scene_url), headers=headers, cookies=cookie_set) + # Capture the response as JSON. json_data = response.json() + # Return the JSON data. return json_data def scrape_scene(scene_url: str) -> dict: + # Capture JSON relating to this scene from the Loyalfans API. json = get_json(scene_url) + # Extract title from the JSON and strip out any whitespace. title = json['post']['title'].strip() + # Use the video thumbnail/preview poster as the image. image = json['post']['video_object']['poster'] + # Extract description, fix apostrophes and remove HTML newline tags. description = json['post']['content'].replace('\u2019', "'").replace('
', '') + # Sometimes hashtags are included at the bottom of the description. This line strips all that junk out, as we're utilising the hashtags for the tags. Also tidies up double-spacing and ellipses. description = re.sub(r'#\w+\b', '', description).strip().replace(' ', ' ').replace('. . .', '...') + # Extract studio name. studio = json['post']['owner']['display_name'] + # Extract date. The JSON returns the date in the format '2023-06-18 12:00:00', but we only need the date, so the time is stripped out. date = json['post']['created_at']['date'].split(' ')[0] + # Extract tags. tags_list = json['post']['hashtags'] + # Lookup table for tag replacements. The tags are in the form of hashtags, and often have multiple words mashed together. This is a quick and dirty way of turning these into meaningful data, and can be expanded on to taste. replacements = { 'Fin Dom': 'Findom', 'Fem Dom': 'Femdom', @@ -131,14 +149,18 @@ def scrape_scene(scene_url: str) -> dict: 'pussydenial': 'pussy denial' } fixed_tags = [] + # For every tag we find: for tag in tags_list: + # Remove the hash from the start. tag = tag[1:] modified_tag = tag + # Split CamelCase tags into separate words. modified_tag = re.sub(r'(? Date: Sun, 18 Jun 2023 10:39:45 +0100 Subject: [PATCH 140/624] Added new scraper for Loyalfans links - added 'last updated' to YML --- scrapers/Loyalfans/Loyalfans.yml | 2 ++ 1 file changed, 2 insertions(+) diff --git a/scrapers/Loyalfans/Loyalfans.yml b/scrapers/Loyalfans/Loyalfans.yml index 9bdca4c84..74f0c006d 100644 --- a/scrapers/Loyalfans/Loyalfans.yml +++ b/scrapers/Loyalfans/Loyalfans.yml @@ -6,3 +6,5 @@ sceneByURL: script: - python3 - Loyalfans.py + +# Last Updated June 18, 2023 From ec69eea5551090b0d33a32b47e6f2f30bb7d2ad6 Mon Sep 17 00:00:00 2001 From: Silvered Gold <35001040+silveredgold@users.noreply.github.com> Date: Mon, 19 Jun 2023 21:58:14 +1000 Subject: [PATCH 141/624] Add scene scraping for MetalBondage.com --- SCRAPERS-LIST.md | 1 + scrapers/MetalBondage.yml | 39 +++++++++++++++++++++++++++++++++++++++ 2 files changed, 40 insertions(+) create mode 100644 scrapers/MetalBondage.yml diff --git a/SCRAPERS-LIST.md b/SCRAPERS-LIST.md index 38c6bb75e..5138efbab 100644 --- a/SCRAPERS-LIST.md +++ b/SCRAPERS-LIST.md @@ -791,6 +791,7 @@ menover30.com|Algolia_MenOver30.yml|:heavy_check_mark:|:heavy_check_mark:|:x:|:x menpov.com|AMAMultimedia.yml|:heavy_check_mark:|:x:|:x:|:x:|-|Gay messyxxx.com|ChickPass.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- metadataapi.net (JSON API)|ThePornDB.yml|:heavy_check_mark:|:x:|:heavy_check_mark:|:heavy_check_mark:|-|- +metalbondage.com|MetalBondage.yml|:heavy_check_mark:|:x:|:x:|:x:|CDP|- metart.com|SARJ-LLC.yml|:heavy_check_mark:|:heavy_check_mark:|:x:|:heavy_check_mark:|Python|- metartnetwork.com|SARJ-LLC.yml|:heavy_check_mark:|:heavy_check_mark:|:x:|:heavy_check_mark:|Python|- metartx.com|SARJ-LLC.yml|:heavy_check_mark:|:heavy_check_mark:|:x:|:heavy_check_mark:|Python|- diff --git a/scrapers/MetalBondage.yml b/scrapers/MetalBondage.yml new file mode 100644 index 000000000..dd7832ab1 --- /dev/null +++ b/scrapers/MetalBondage.yml @@ -0,0 +1,39 @@ +name: Metal Bondage +sceneByURL: + - action: scrapeXPath + url: + - metalbondage.com + scraper: sceneScraper + +xPathScrapers: + sceneScraper: + scene: + Title: //title/text() + Date: + selector: //div[@class="metabar"]/text() + postProcess: + - parseDate: January 2, 2006 + Details: + selector: //div[@class="textcontent"]/p + concat: "\n\n" + Code: + selector: //div[@class="post-title fix"]//a/text() + postProcess: + - replace: + - regex: '(MB\d+)\s([\w-].*)' + with: $1 + Performers: + Name: + selector: //div[@class="tags"]//a[@rel="tag"]/text() + Tags: + Name: + selector: //div[@class="tags"]//a[@rel="category tag"]/text() + Image: //div[@class="textcontent"]//img/@src + Studio: + Name: + fixed: "Metal Bondage" + URL: //link[@rel="canonical"]/@href + +driver: + useCDP: true +# Last Updated December 16, 2022 From e7d72d6f9b0643ab9d596f689f1e04073fb43f89 Mon Sep 17 00:00:00 2001 From: Silvered Gold <35001040+silveredgold@users.noreply.github.com> Date: Mon, 19 Jun 2023 22:19:14 +1000 Subject: [PATCH 142/624] Add Blackfoxbound UK Scene support --- SCRAPERS-LIST.md | 1 + scrapers/BlackFoxBound.yml | 31 +++++++++++++++++++++++++++++++ 2 files changed, 32 insertions(+) create mode 100644 scrapers/BlackFoxBound.yml diff --git a/SCRAPERS-LIST.md b/SCRAPERS-LIST.md index 38c6bb75e..fd8eb5f99 100644 --- a/SCRAPERS-LIST.md +++ b/SCRAPERS-LIST.md @@ -182,6 +182,7 @@ black4k.com|Vip4K.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- blackambush.com|ExploitedX.yml|:heavy_check_mark:|:x:|:x:|:heavy_check_mark:|-|- blacked.com|vixenNetwork.yml|:heavy_check_mark:|:x:|:x:|:x:|Python|- blackedraw.com|vixenNetwork.yml|:heavy_check_mark:|:x:|:x:|:x:|Python|- +Blackfoxbound UK|BlackFoxBound.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- blackmarketxxx.com|CombatZone.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- blackmassivecocks.com|AdultEmpireCash.yml|:heavy_check_mark:|:x:|:heavy_check_mark:|:x:|-|- blackph.com|FFCSH.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- diff --git a/scrapers/BlackFoxBound.yml b/scrapers/BlackFoxBound.yml new file mode 100644 index 000000000..40c5de22d --- /dev/null +++ b/scrapers/BlackFoxBound.yml @@ -0,0 +1,31 @@ +name: Blackfoxbound UK +sceneByURL: + - action: scrapeXPath + url: + - blackfoxbound.net + scraper: sceneScraper + +xPathScrapers: + sceneScraper: + scene: + Title: //meta[@property="og:title"]/@content + Date: + selector: //div[@class="page-header"]//span[@title="Published at"]//text() + postProcess: + - parseDate: 2006-01-02 + Details: + selector: //meta[@property="og:description"]/@content + Performers: + Name: + selector: //span[@title="Models"]//a/text() + Tags: + Name: + selector: //span[@title="Categories"]//a/text() + #Image: //div[@class="video-container"]//video/@poster + Image: //meta[@property="og:image"]/@content + Studio: + Name: + fixed: "Blackfoxbound UK" + URL: //link[@rel="canonical"]/@href + +# Last Updated June 19, 2023 From 2dcfba41d367ad7c87861ca3a4dad365ab904d49 Mon Sep 17 00:00:00 2001 From: Silvered Gold <35001040+silveredgold@users.noreply.github.com> Date: Tue, 20 Jun 2023 00:45:30 +1000 Subject: [PATCH 143/624] Add support for BeltBound.com --- SCRAPERS-LIST.md | 1 + scrapers/BeltBound.yml | 35 +++++++++++++++++++++++++++++++++++ 2 files changed, 36 insertions(+) create mode 100644 scrapers/BeltBound.yml diff --git a/SCRAPERS-LIST.md b/SCRAPERS-LIST.md index 38c6bb75e..bb0d9aeef 100644 --- a/SCRAPERS-LIST.md +++ b/SCRAPERS-LIST.md @@ -167,6 +167,7 @@ bellanextdoor.com|bellapass.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- bellapass.com|bellapass.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- bellesafilms.com|RealityKingsOL.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- bellesahouse.com|RealityKingsOL.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +beltbound.com|BeltBound.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- bestoftealconrad.com|VNAGirls.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- bffs.com|PaperStreetMedia.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- bigbootytgirls.com|BigBootyTGirls.yml|:heavy_check_mark:|:x:|:x:|:x:|-|Trans diff --git a/scrapers/BeltBound.yml b/scrapers/BeltBound.yml new file mode 100644 index 000000000..f2cc90851 --- /dev/null +++ b/scrapers/BeltBound.yml @@ -0,0 +1,35 @@ +name: BeltBound +sceneByURL: + - action: scrapeXPath + url: + - beltbound.com + scraper: sceneScraper + +xPathScrapers: + sceneScraper: + scene: + Title: + selector: //a[@rel="bookmark"]/text() + Code: + selector: '//div[@class="post"]/a/img/@src' + postProcess: + - replace: + - regex: '.*\/updates\/bb(\d+)\/.*' + with: BB$1 + Date: + selector: //div[@class="post"]/*[starts-with(text(), "Posted on ")] + postProcess: + - replace: + - regex: 'Posted on ([A-Z]\w+) (\d{1,2})([a-z]{2})?, (\d{4})' + with: "$1 $2, $4" + - parseDate: January 2, 2006 + Details: + selector: '//div[@class="post"]/p[2]' + Performers: + Name: '//a[@rel="tag"]' + Image: '//div[@class="post"]/a/img/@src' + Studio: + Name: + fixed: "BeltBound" + +# Last Updated June 19, 2023 \ No newline at end of file From 3b3766cc67a43b90c0f0951a18262dcce297dec2 Mon Sep 17 00:00:00 2001 From: DogmaDragon <103123951+DogmaDragon@users.noreply.github.com> Date: Wed, 21 Jun 2023 22:51:25 +0300 Subject: [PATCH 144/624] Update outdated link --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 487bd6f8c..c6878bf8c 100644 --- a/README.md +++ b/README.md @@ -1,7 +1,7 @@ # CommunityScrapers This is a public repository containing scrapers created by the Stash Community. -**:exclamation: Make sure to read ALL of the instructions here before requesting any help in the discord channel. For a more user friendly / step by step guide you can checkout [stashdb's guide to scraping](https://guidelines.stashdb.org/docs/guide-to-scraping/):exclamation:** +**:exclamation: Make sure to read ALL of the instructions here before requesting any help in the discord channel. For a more user friendly / step by step guide you can checkout [stashdb's guide to scraping](https://docs.stashapp.cc/beginner-guides/guide-to-scraping/):exclamation:** To download the scrapers you can clone the git repo or download directly any of the scrapers. From 817a49a1faad710baa9ae22c6f0b7725d93a38ac Mon Sep 17 00:00:00 2001 From: CJ <72030708+Teda1@users.noreply.github.com> Date: Fri, 23 Jun 2023 16:18:31 -0500 Subject: [PATCH 145/624] Update CopyToGallery.py Updated the use of the INCLUDES_ALL modifier --- scrapers/CopyToGallery.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scrapers/CopyToGallery.py b/scrapers/CopyToGallery.py index cf4c11866..b5f935e15 100644 --- a/scrapers/CopyToGallery.py +++ b/scrapers/CopyToGallery.py @@ -26,7 +26,7 @@ def get_gallery_id_by_path(gallery_path): } } """ - variables = {"galleries_filter": {"path": {'value': gallery_path, "modifier": "INCLUDES_ALL"}}} + variables = {"galleries_filter": {"path": {'value': gallery_path, "modifier": "EQUALS"}}} result = call_graphql(query, variables) log.debug("get_gallery_by_path callGraphQL result " + str(result)) return result['findGalleries']['galleries'][0]['id'] From 07139831a59edbf8ac524df5abfa2b54a93149a2 Mon Sep 17 00:00:00 2001 From: escargotbuffed <102115247+escargotbuffed@users.noreply.github.com> Date: Fri, 23 Jun 2023 21:48:55 +0000 Subject: [PATCH 146/624] Update SCRAPERS-LIST.md --- SCRAPERS-LIST.md | 1 + 1 file changed, 1 insertion(+) diff --git a/SCRAPERS-LIST.md b/SCRAPERS-LIST.md index 38c6bb75e..f50e88f87 100644 --- a/SCRAPERS-LIST.md +++ b/SCRAPERS-LIST.md @@ -1343,6 +1343,7 @@ twistysnetwork.com|Twistys.yml|:heavy_check_mark:|:x:|:x:|:heavy_check_mark:|-|- twotgirls.com|TwoTGirls.yml|:heavy_check_mark:|:x:|:x:|:x:|-|Trans uk-tgirls.com|GroobyClub.yml|:heavy_check_mark:|:x:|:x:|:x:|-|Trans ultrafilms.com|UltraFilms.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +underhentai.com|UnderHentai.yml|:heavy_check_mark:|:x:|:x:|:x:|-|Hentai unlimitedmilfs.com|NewSensationsNetworkSites.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- unrealporn.com|Czechav.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- upherasshole.com|PervCity.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- From f3f14b09b32f39a9ea351f7698da435678031f1a Mon Sep 17 00:00:00 2001 From: escargotbuffed <102115247+escargotbuffed@users.noreply.github.com> Date: Fri, 23 Jun 2023 21:49:46 +0000 Subject: [PATCH 147/624] Add files via upload --- scrapers/UnderHentai.yml | 63 ++++++++++++++++++++++++++++++++++++++++ 1 file changed, 63 insertions(+) create mode 100644 scrapers/UnderHentai.yml diff --git a/scrapers/UnderHentai.yml b/scrapers/UnderHentai.yml new file mode 100644 index 000000000..0fb3340f6 --- /dev/null +++ b/scrapers/UnderHentai.yml @@ -0,0 +1,63 @@ +name: UnderHentai +################################################################################################################ +# HOW TO SET UP # +# Store this file in the ~/stash/scrapers/AniDB.yml # +# (If the scrapers directory is not there it needs to be created) # +################################################################################################################ +# HOW TO USE # +# SCENES: # +# The scene Scraper by Fragment is the best option in case the file name is the name of the anime # +# Scenes that were not found can easily be found by the name scraper # +# Don't put the episode number otherwise it won't find it # +# It is also possible to scrape individually with the anime URL # +# The scraper doesn't recognize the episode number, I recommend changing it manually at the end # +# THAT'S IT, ENJOY! # +# Made by @escargotbuffed # +################################################################################################################ +sceneByFragment: + action: scrapeXPath + queryURL: https://www.underhentai.net/{filename} + queryURLReplace: + filename: + - regex: \..+$|\d+ + with: "" + - regex: \s+ + with: "-" + scraper: sceneScraper +sceneByURL: + - action: scrapeXPath + url: + - underhentai.net/ + scraper: sceneScraper +sceneByName: + action: scrapeXPath + queryURL: https://www.underhentai.net/?s={} + scraper: sceneSearch +sceneByQueryFragment: + action: scrapeXPath + queryURL: "{url}" + scraper: sceneScraper + +xPathScrapers: + sceneSearch: + scene: + Title: //article[@class="data-block"]//h2/a + URL: + selector: //article[@class="data-block"]//h2/a/@href + postProcess: + - replace: + - regex: ^ + with: https://www.underhentai.net/ + Image: //article[@class="data-block"]//img/@src + sceneScraper: + scene: + Title: //h1[@class="content-box content-head sidebar-light"] + Details: //p[contains(text(),"Official Title")]/following-sibling::span + URL: //link[@rel="canonical"]/@href + Tags: + Name: //p[contains(text(),"Genres")]/following-sibling::a + Studio: + Name: + selector: //p[contains(text(),"Brand")]/following-sibling::a + Image: //div[@class="loading"]/img/@src +# Last Updated June 23, 2023 \ No newline at end of file From ffb6b99a532ff32f7fbe056d8a732cb1da22e6e8 Mon Sep 17 00:00:00 2001 From: escargotbuffed <102115247+escargotbuffed@users.noreply.github.com> Date: Fri, 23 Jun 2023 21:52:38 +0000 Subject: [PATCH 148/624] Update SCRAPERS-LIST.md --- SCRAPERS-LIST.md | 1 + 1 file changed, 1 insertion(+) diff --git a/SCRAPERS-LIST.md b/SCRAPERS-LIST.md index 38c6bb75e..a127ca027 100644 --- a/SCRAPERS-LIST.md +++ b/SCRAPERS-LIST.md @@ -91,6 +91,7 @@ analyzedgirls.com|Teencoreclub.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- angelasommers.com|angelasommers.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- angelawhite.com|AngelaWhite.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- angelinacastrolive.com|VNAGirls.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +anidb.net|AniDB.yml|:heavy_check_mark:|:x:|:x:|:heavy_check_mark:|-|Hentai Database anilos.com|Nubiles.yml|:heavy_check_mark:|:heavy_check_mark:|:x:|:x:|-|- animecharactersdatabase.com|AnimeCharactersDatabase.yml|:x:|:x:|:x:|:heavy_check_mark:|Python|Database archangelvideo.com|ArchAngelVideo.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- From 076c64931dd3f7a6fc9b4464cf2fefc9334a1cbe Mon Sep 17 00:00:00 2001 From: escargotbuffed <102115247+escargotbuffed@users.noreply.github.com> Date: Fri, 23 Jun 2023 21:57:12 +0000 Subject: [PATCH 149/624] Add files via upload --- scrapers/AniDB.yml | 137 +++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 137 insertions(+) create mode 100644 scrapers/AniDB.yml diff --git a/scrapers/AniDB.yml b/scrapers/AniDB.yml new file mode 100644 index 000000000..2a64a36da --- /dev/null +++ b/scrapers/AniDB.yml @@ -0,0 +1,137 @@ +name: AniDB +################################################################################################################ +# HOW TO SET UP # +# Store this file in the ~/stash/scrapers/AniDB.yml # +# (If the scrapers directory is not there it needs to be created) # +# SET COOKIES: # +# Access the anidb.net website > login > right button > inspect > find cookies storage # +# Copy the "Value" of "adbsess" and "adbuin" and replace in the cookies category of this document # +# If your account is new, you need to access any NSFW anime and confirm that you want to see 18+ content # +# Do not change the order of the columns, as it can make it stop working # +# I recommend creating a new account just for this # +# SET USER AGENT: # +# Go to your Stash > Settings > Metadata Providers > Scraping > Scraper User Agent # +# Use the User Agent of your choice # +# I'm currently using: Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:79.0) Gecko/20100101 Firefox/79.0 # +################################################################################################################ +# HOW TO USE # +# SCENES: # +# The scene Scraper by Fragment is the best option in case the file name is the name of the anime # +# Scenes that were not found can easily be found by the name scraper # +# It is also possible to scrape individually with the anime URL # +# The scraper doesn't recognize the episode number, I recommend changing it manually at the end # +# PERFORMERS: # +# Performers need to be individually scraped by name or URL # +# I recommend creating them by scraping scenes and then searching individually by name # +# THAT'S IT, ENJOY! # +# Made by @escargotbuffed # +################################################################################################################ +performerByURL: + - action: scrapeXPath + url: + - https://anidb.net + scraper: performerScraper +performerByName: + action: scrapeXPath + queryURL: https://anidb.net/search/anime/?adb.search={}&entity.chartb=1 + scraper: performerSearch + +sceneByFragment: + action: scrapeXPath + queryURL: https://anidb.net/anime/?adb.search={filename} + queryURLReplace: + filename: + - regex: \..+$|\d+ + with: "" + - regex: \s+ + with: "%20" + scraper: sceneScraper +sceneByURL: + - action: scrapeXPath + url: + - https://anidb.net/ + scraper: sceneScraper +sceneByName: + action: scrapeXPath + queryURL: https://anidb.net/search/anime/?adb.search={}&entity.animetb=1 + scraper: sceneSearch +sceneByQueryFragment: + action: scrapeXPath + queryURL: "{url}" + scraper: sceneScraper + +xPathScrapers: + performerSearch: + performer: + Name: //td[@class="relid"]/a + URL: + selector: //td[@class="relid"]/a/@href + postProcess: + - replace: + - regex: ^ + with: https://anidb.net + performerScraper: + common: + $info: //div[@class="g_section info"] + $tab_1_pane: //div[@class="g_section info"]//div[@id="tab_1_pane"] + $looks: //div[@class="g_section info"]//div[@id="tab_1_pane"]//tr[contains(@class, 'looks')] + performer: + Name: $tab_1_pane//tr[contains(@class, 'mainname')]//span[@itemprop="name"] + Aliases: $tab_1_pane//tr[contains(@class, 'official')]//label[@itemprop="alternateName"] + Gender: $tab_1_pane//tr[contains(@class, 'gender')]//span[@itemprop="gender"] + Ethnicity: $tab_1_pane//tr[contains(@class, 'entity')]//span[@class="tagname"] + HairColor: $looks//span[contains(@class, 'tagname') and contains(text(), 'hair')] + EyeColor: $looks//span[contains(@class, 'tagname') and contains(text(), 'eyes')] + Height: $tab_1_pane//tr[contains(@class, 'height')]//span[@itemprop="height"] + Weight: $tab_1_pane//tr[contains(@class, 'weight')]//span[@itemprop="weight"] + #Measurements: Todo + URL: //link[@rel="canonical"]/@href + Details: + selector: //div[@itemprop="description"]//text() + concat: "\n" + Tags: + Name: $tab_1_pane//span[@class="g_tag"]//span[@class="tagname"] + Image: $info//div[@class="image"]//img/@src + + sceneSearch: + scene: + Title: //td[@class="relid"]/a + URL: + selector: //td[@class="relid"]/a/@href + postProcess: + - replace: + - regex: ^ + with: https://anidb.net + Image: //td[@class="thumb anime"]//img/@src + sceneScraper: + common: + $info: //div[@class="g_section info"] + scene: + Title: $info//div[@id="tab_1_pane"]//span[@itemprop="name"] + Details: + selector: //div[@itemprop="description"]//text() + concat: "\n" + Tags: + Name: $info//div[@id="tab_1_pane"]//span[@class="tagname"] + Performers: + Name: //div[@id="characterlist"]//div[contains(@class, 'main character') or contains(@class, 'secondary cast')]//div[@itemprop="character"]/a/span + Studio: + Name: $info//table[@id="staffoverview"]//tr[last()]/td[@class="name creator"]/a + Image: $info//div[@class="image"]//img/@src + URL: //link[@rel="canonical"]/@href + +driver: + cookies: + - CookieURL: "https://anidb.net/" + Cookies: + # Access adult content requires a anidb account + # Replace value field + - Name: "adbsess" + Domain: "anidb.net" + Value: "" # Enter the value of the 'adbuin' here + Path: "/" + - Name: "adbuin" + Domain: "anidb.net" + Value: "" # Enter the value of the 'adbuin' here + Path: "/" +# Last Updated June 23, 2023 \ No newline at end of file From 058b4b95040950eadf564e5b4efb616be48eb235 Mon Sep 17 00:00:00 2001 From: Nic Patterson Date: Sun, 25 Jun 2023 02:11:48 -0700 Subject: [PATCH 150/624] Update BangBros.yml site got a new layout, updating scraper --- scrapers/BangBros.yml | 40 ++++++++++++++++++---------------------- 1 file changed, 18 insertions(+), 22 deletions(-) diff --git a/scrapers/BangBros.yml b/scrapers/BangBros.yml index 0913fb371..36d0f175b 100644 --- a/scrapers/BangBros.yml +++ b/scrapers/BangBros.yml @@ -7,34 +7,30 @@ sceneByURL: xPathScrapers: sceneScraper: scene: - Title: //div[@class="ps-vdoHdd"]/h1/text() - Details: //div[@class="vdoDesc"]/text() + Title: //h2[contains(@class,"jAsNxx")]/text() + Details: //p[contains(@class,"iANaVe")]/text() + Code: + selector: //link[@rel="canonical"]/@href + postProcess: + - replace: + - regex: .+?video\/([0-9]+?)\/.+ + with: $1 Tags: - Name: - selector: //div[@class="vdoTags"]/a/text() + Name: //div[contains(@class,"lgrCSo")]//a[starts-with(@href,"/videos/tags/")]/text() Performers: - Name: //div[@class="vdoCast"]/a[position()>1]/text() + Name: //h2[contains(@class,"llbToU")]//a/text() Image: - selector: //video/@poster - #selector: //img[@id="player-overlay-image"]/@src # Better image but can fail on older scenes + selector: //script[@type="application/ld+json"]/text() postProcess: - replace: - - regex: ^ - with: "https:" + - regex: ".+\"thumbnailUrl\": \"(.+?)\".+" + with: $1 Studio: - Name: - selector: //div[@class="vdoCast"]/a[1]/text() - postProcess: - # Fix special cases - - map: - MomIsHorny: Mom is Horny + Name: //div[contains(@class,"gQQXgf")]/a/text() Date: - selector: //div[@class="vdoCast" and contains(text(), "Release:")] + selector: //script[@type="application/ld+json"]/text() postProcess: - replace: - - regex: "^Release: " - with: "https://bangbros.com/search/" - - subScraper: - selector: //span[@class="thmb_mr_cmn thmb_mr_2 clearfix"]/span[@class="faTxt"] - - parseDate: Jan 2, 2006 -# Last Updated June 08, 2022 + - regex: ".+\"uploadDate\": \"(.+?)\".+" + with: $1 +# Last Updated June 25, 2023 From 424d25abb2cca18851a8ae6d7c9d67335a4b6878 Mon Sep 17 00:00:00 2001 From: Nic Patterson Date: Sun, 25 Jun 2023 02:14:34 -0700 Subject: [PATCH 151/624] Update BangBros.yml add URL so during rescrapes of old scenes the URL gets updated to the new format --- scrapers/BangBros.yml | 19 ++++++++++--------- 1 file changed, 10 insertions(+), 9 deletions(-) diff --git a/scrapers/BangBros.yml b/scrapers/BangBros.yml index 36d0f175b..6103bf84c 100644 --- a/scrapers/BangBros.yml +++ b/scrapers/BangBros.yml @@ -8,29 +8,30 @@ xPathScrapers: sceneScraper: scene: Title: //h2[contains(@class,"jAsNxx")]/text() - Details: //p[contains(@class,"iANaVe")]/text() Code: selector: //link[@rel="canonical"]/@href postProcess: - replace: - regex: .+?video\/([0-9]+?)\/.+ with: $1 - Tags: - Name: //div[contains(@class,"lgrCSo")]//a[starts-with(@href,"/videos/tags/")]/text() - Performers: - Name: //h2[contains(@class,"llbToU")]//a/text() - Image: + URL: //link[@rel="canonical"]/@href + Date: selector: //script[@type="application/ld+json"]/text() postProcess: - replace: - - regex: ".+\"thumbnailUrl\": \"(.+?)\".+" + - regex: ".+\"uploadDate\": \"(.+?)\".+" with: $1 Studio: Name: //div[contains(@class,"gQQXgf")]/a/text() - Date: + Performers: + Name: //h2[contains(@class,"llbToU")]//a/text() + Tags: + Name: //div[contains(@class,"lgrCSo")]//a[starts-with(@href,"/videos/tags/")]/text() + Details: //p[contains(@class,"iANaVe")]/text() + Image: selector: //script[@type="application/ld+json"]/text() postProcess: - replace: - - regex: ".+\"uploadDate\": \"(.+?)\".+" + - regex: ".+\"thumbnailUrl\": \"(.+?)\".+" with: $1 # Last Updated June 25, 2023 From bbb788ce78e83115743bb6ce5abb3e33000de0ad Mon Sep 17 00:00:00 2001 From: Silvered Gold <35001040+silveredgold@users.noreply.github.com> Date: Sun, 25 Jun 2023 20:56:56 +1000 Subject: [PATCH 152/624] Strip links from details; Should clean up some of the bios with embedded links a little better. --- scrapers/Kink.yml | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/scrapers/Kink.yml b/scrapers/Kink.yml index 2717e13e3..caf7ae6a7 100644 --- a/scrapers/Kink.yml +++ b/scrapers/Kink.yml @@ -166,9 +166,13 @@ xPathScrapers: Tags: Name: '//div/span[text()=" tags: "]/following-sibling::a/span/text()' Details: - selector: '//div/span/p[@class="bio"]/following-sibling::p//text()' + selector: '//div/span/p[@class="bio"]/following-sibling::p' concat: "\n" + postProcess: + - replace: + - regex: '(?i)]*>' + with: "" URL: //link[@rel="canonical"]/@href driver: useCDP: true -# Last Updated May 25, 2023 +# Last Updated June 25, 2023 From 61e4aa321b8c3b1dc30eacf7f03f01ced5d0c140 Mon Sep 17 00:00:00 2001 From: nrg101 Date: Wed, 28 Jun 2023 10:58:43 +0100 Subject: [PATCH 153/624] add handling of multiple paragraph scene descriptions (Details) --- scrapers/GroobyNetwork-Partial.yml | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/scrapers/GroobyNetwork-Partial.yml b/scrapers/GroobyNetwork-Partial.yml index ea459333c..e2e7d713d 100644 --- a/scrapers/GroobyNetwork-Partial.yml +++ b/scrapers/GroobyNetwork-Partial.yml @@ -40,7 +40,9 @@ xPathScrapers: selector: //div[@class="setdesc"]//b[contains(.,"Added")]/following-sibling::text()[1] postProcess: - parseDate: January 2, 2006 - Details: &details //div[@class="trailerpage_info"]/p[not(@class)]/text() + Details: &details + selector: //div[@class="trailerpage_info"]/p[not(@class)]/text()|//div[@class="trailerpage_info"]/p[not(@class)]/span/text() + concat: "\n\n" Performers: &performers Name: //div[@class="setdesc"]//a/text() Studio: &studio @@ -68,7 +70,9 @@ xPathScrapers: selector: //div[@class="set_meta"]//b[contains(.,"Added")]/following-sibling::text()[1] postProcess: - parseDate: January 2, 2006 - Details: //div[@class="trailerblock"]/p[not(@class)]/text() + Details: + selector: //div[@class="trailerblock"]/p[not(@class)]/text() + concat: "\n\n" Performers: Name: //div[@class="trailer_toptitle_left"]//a/text() Studio: *studio @@ -82,4 +86,4 @@ xPathScrapers: - regex: ^/ with: https://www.groobyvr.com/ Tags: *tags -# Last Updated June 07, 2023 +# Last Updated June 28, 2023 From a1e35b8c6cd7c8a2b8b26e2cc8d4ee855d934ef7 Mon Sep 17 00:00:00 2001 From: nrg101 Date: Wed, 28 Jun 2023 14:23:41 +0100 Subject: [PATCH 154/624] update POVR for current layout/style --- scrapers/POVR.yml | 20 +++++++++++++------- 1 file changed, 13 insertions(+), 7 deletions(-) diff --git a/scrapers/POVR.yml b/scrapers/POVR.yml index 19e352386..9bb7267ff 100644 --- a/scrapers/POVR.yml +++ b/scrapers/POVR.yml @@ -8,7 +8,7 @@ xPathScrapers: sceneScraper: scene: Title: - selector: //span[@class="video__quality video__quality--vr mr-3"]/following-sibling::text() + selector: //h1[contains(@class, "heading-title")]/text() Date: selector: //p[contains(@class, 'player__date')] postProcess: @@ -17,14 +17,20 @@ xPathScrapers: with: $1 - parseDate: 2 January, 2006 Performers: - Name: //a[@class="btn btn--eptenary btn--xsm"] + Name: //li[span[contains(text(), "Pornstars:")]]/following-sibling::li/a/text() Tags: - Name: //ul[@class="category-link mb-2"]//a + Name: //li[span[contains(text(), "Tags:")]]/following-sibling::li/a/text() Details: - selector: //p[@style] + selector: //div[contains(@class, "player__description")]/p/text() Image: - selector: //meta[@property="og:image"]/@content + selector: //script[@type="application/ld+json"] + postProcess: + - replace: + - regex: ^.*thumbnailUrl":"([^"]+).*$ + with: $1 + - regex: \d+.jpg + with: original.jpg Studio: Name: - selector: //a[@class="btn btn--secondary btn--xsm"]/text() -# Last Updated June 13, 2021 + selector: //span[contains(text(), "Studio:")]/following-sibling::a/text() +# Last Updated June 28, 2023 From 73ea73c4b46e08478a3938f3bfce46a71e7bae0f Mon Sep 17 00:00:00 2001 From: nrg101 Date: Wed, 28 Jun 2023 14:35:17 +0100 Subject: [PATCH 155/624] use meta image for cover --- scrapers/POVR.yml | 8 +------- 1 file changed, 1 insertion(+), 7 deletions(-) diff --git a/scrapers/POVR.yml b/scrapers/POVR.yml index 9bb7267ff..90028161c 100644 --- a/scrapers/POVR.yml +++ b/scrapers/POVR.yml @@ -23,13 +23,7 @@ xPathScrapers: Details: selector: //div[contains(@class, "player__description")]/p/text() Image: - selector: //script[@type="application/ld+json"] - postProcess: - - replace: - - regex: ^.*thumbnailUrl":"([^"]+).*$ - with: $1 - - regex: \d+.jpg - with: original.jpg + selector: //meta[@property="og:image"]/@content Studio: Name: selector: //span[contains(text(), "Studio:")]/following-sibling::a/text() From b6b9cd3f2dc394103462e0d3912b33acfce514f6 Mon Sep 17 00:00:00 2001 From: nrg101 Date: Wed, 28 Jun 2023 15:11:12 +0100 Subject: [PATCH 156/624] move sub-studios of POVR Premium --- SCRAPERS-LIST.md | 6 +++-- scrapers/MilfVR.yml | 28 ----------------------- scrapers/{WankzVR.yml => POVRPremium.yml} | 28 ++++++++++++++++------- 3 files changed, 24 insertions(+), 38 deletions(-) delete mode 100644 scrapers/MilfVR.yml rename scrapers/{WankzVR.yml => POVRPremium.yml} (68%) diff --git a/SCRAPERS-LIST.md b/SCRAPERS-LIST.md index 38c6bb75e..5af9fec0e 100644 --- a/SCRAPERS-LIST.md +++ b/SCRAPERS-LIST.md @@ -213,6 +213,7 @@ bradmontana.com|BradMontana.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- brandibelle.com|brandibelle.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- brandnewamateurs.com|BrandNewAmateurs.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- brandnewfaces.com|Vivid.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +brasilvr.com|POVRPremium.yml|:heavy_check_mark:|:x:|:x:|:x:|-|VR brattyfamily.com|Mypervmom.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- brattymilf.com|Nubiles.yml|:heavy_check_mark:|:heavy_check_mark:|:x:|:x:|-|- brattysis.com|Nubiles.yml|:heavy_check_mark:|:heavy_check_mark:|:x:|:x:|-|- @@ -799,7 +800,7 @@ milfed.com|MindGeek.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- milfthing.com|PerfectGonzo.yml|:heavy_check_mark:|:x:|:x:|:heavy_check_mark:|Python|- milftrip.com|VegasDreamsLLC.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- milftugs.com|MilfTugs.yml|:heavy_check_mark:|:x:|:x:|:heavy_check_mark:|-|- -milfvr.com|MilfVR.yml|:heavy_check_mark:|:x:|:x:|:x:|-|VR +milfvr.com|POVRPremium.yml|:heavy_check_mark:|:x:|:x:|:x:|-|VR milkingtable.com|GammaEntertainment.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- milovana.com|Milovana.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- minimuff.com|ChickPass.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- @@ -1317,6 +1318,7 @@ transroommates.com|Arx.yml|:heavy_check_mark:|:x:|:x:|:x:|Python|Trans transsensual.com|MindGeek.yml|:heavy_check_mark:|:x:|:heavy_check_mark:|:heavy_check_mark:|-|Trans transsexualangel.com|GammaEntertainment.yml|:heavy_check_mark:|:x:|:x:|:x:|-|Trans transsexualroadtrip.com|GammaEntertainment.yml|:heavy_check_mark:|:x:|:x:|:x:|-|Trans +tranzvr.com|POVRPremium.yml|:heavy_check_mark:|:x:|:x:|:x:|-|VR trickymasseur.com|TeenMegaWorld.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- trickyoldteacher.com|OldGoesYoung.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- trickyspa.com|Algolia_Adultime.yml|:heavy_check_mark:|:x:|:x:|:x:|Python|- @@ -1388,7 +1390,7 @@ vrxdb.com|VrxDB.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- wakeupnfuck.com|WakeUpnFuck.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- wankitnow.com|Wankitnow.yml|:heavy_check_mark:|:x:|:x:|:x:|-|VR wankz.com|wankz.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- -wankzvr.com|WankzVR.yml|:heavy_check_mark:|:x:|:heavy_check_mark:|:x:|-|VR +wankzvr.com|POVRPremium.yml|:heavy_check_mark:|:x:|:heavy_check_mark:|:x:|-|VR warashi-asian-pornstars.fr|WAPdB.yml|:x:|:x:|:x:|:heavy_check_mark:|Python|JAV watch4beauty.com|Watch4Beauty.yml|:heavy_check_mark:|:heavy_check_mark:|:x:|:x:|-|- watch4fetish.com|Watch4Fetish.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- diff --git a/scrapers/MilfVR.yml b/scrapers/MilfVR.yml deleted file mode 100644 index 1fd148967..000000000 --- a/scrapers/MilfVR.yml +++ /dev/null @@ -1,28 +0,0 @@ -name: "MilfVR" -sceneByURL: - - action: scrapeXPath - url: - - milfvr.com - scraper: sceneScraper -xPathScrapers: - sceneScraper: - common: - $info: //div[@class="detail"] - scene: - Title: //div[@class="detail__header detail__header-lg"]/h1 - Studio: - Name: - fixed: MilfVR - Date: - selector: $info//span[@class="detail__date"]/text() - postProcess: - - parseDate: 2 January, 2006 - Details: - selector: //div[@class="detail__txt detail__txt-show_lg"]/text()|//span[@class="more__body"]/text() - concat: " " - Tags: - Name: $info//div[@class="tag-list__body"]//a/text() - Performers: - Name: //div[@class="detail__inf detail__inf-align_right"]/div[@class="detail__models"]/a/text() - Image: (//div[@class="photo-strip__body"]/div[@class="photo-strip__slide"])[1]/@data-src -# Last Updated November 04, 2021 diff --git a/scrapers/WankzVR.yml b/scrapers/POVRPremium.yml similarity index 68% rename from scrapers/WankzVR.yml rename to scrapers/POVRPremium.yml index 19f05aa20..aecc60dee 100644 --- a/scrapers/WankzVR.yml +++ b/scrapers/POVRPremium.yml @@ -1,7 +1,10 @@ -name: "WankzVR" +name: "POVR Premium" sceneByURL: - action: scrapeXPath url: + - brasilvr.com + - milfvr.com + - tranzvr.com - wankzvr.com scraper: sceneScraper movieByURL: @@ -26,8 +29,19 @@ xPathScrapers: Name: $info//div[@class="tag-list__body"]//a/text() Performers: Name: //div[@class="detail__inf detail__inf-align_right"]/div[@class="detail__models"]/a/text() -# Blocked by website's anti-scraping. -# Image: &imageSel //meta[@property="og:image"]/@content + Image: &imageSel //meta[@property="og:image"]/@content + Studio: &studioAttr + Name: + selector: //link[@rel="canonical"]/@href + postProcess: + - replace: + - regex: ^.*//(?:www.)?([^/]*).*$ + with: $1 + - map: + brasilvr.com: BrasilVR + milfvr.com: MilfVR + tranzvr.com: TranzVR + wankzvr.com: WankzVR movieScraper: common: $info: *infoSel @@ -45,9 +59,7 @@ xPathScrapers: - regex: \smin with: ":00" Date: *dateAttr - Studio: - Name: - fixed: WankzVR + Studio: *studioAttr Synopsis: *detailsAttr -# FrontImage: *imageSel -# Last Updated September 28, 2021 + FrontImage: *imageSel +# Last Updated June 28, 2023 From 02584d018f61126402a7c9930c77521da97ddea5 Mon Sep 17 00:00:00 2001 From: nrg101 Date: Wed, 28 Jun 2023 15:32:07 +0100 Subject: [PATCH 157/624] add Code --- scrapers/POVR.yml | 6 ++++++ scrapers/POVRPremium.yml | 9 ++++++++- 2 files changed, 14 insertions(+), 1 deletion(-) diff --git a/scrapers/POVR.yml b/scrapers/POVR.yml index 90028161c..8f7e23d8a 100644 --- a/scrapers/POVR.yml +++ b/scrapers/POVR.yml @@ -27,4 +27,10 @@ xPathScrapers: Studio: Name: selector: //span[contains(text(), "Studio:")]/following-sibling::a/text() + Code: + selector: //link[@rel="canonical"]/@href + postProcess: + - replace: + - regex: ^.*-(\d+)$ + with: $1 # Last Updated June 28, 2023 diff --git a/scrapers/POVRPremium.yml b/scrapers/POVRPremium.yml index aecc60dee..ab0c3a7c8 100644 --- a/scrapers/POVRPremium.yml +++ b/scrapers/POVRPremium.yml @@ -16,6 +16,7 @@ xPathScrapers: sceneScraper: common: $info: &infoSel //div[@class="detail"] + $url: &urlSel //link[@rel="canonical"]/@href scene: Title: &titleSel //div[@class="detail__header detail__header-lg"]/h1 Date: &dateAttr @@ -32,7 +33,7 @@ xPathScrapers: Image: &imageSel //meta[@property="og:image"]/@content Studio: &studioAttr Name: - selector: //link[@rel="canonical"]/@href + selector: *urlSel postProcess: - replace: - regex: ^.*//(?:www.)?([^/]*).*$ @@ -42,6 +43,12 @@ xPathScrapers: milfvr.com: MilfVR tranzvr.com: TranzVR wankzvr.com: WankzVR + Code: + selector: *urlSel + postProcess: + - replace: + - regex: ^.*-(\d+)$ + with: $1 movieScraper: common: $info: *infoSel From 1eceaead0bd789be4b5c08d36e06c67a683ad43e Mon Sep 17 00:00:00 2001 From: nrg101 Date: Wed, 28 Jun 2023 15:41:22 +0100 Subject: [PATCH 158/624] combine into POVR.yml --- SCRAPERS-LIST.md | 8 ++--- scrapers/POVR.yml | 74 ++++++++++++++++++++++++++++++++++++---- scrapers/POVRPremium.yml | 72 -------------------------------------- 3 files changed, 72 insertions(+), 82 deletions(-) delete mode 100644 scrapers/POVRPremium.yml diff --git a/SCRAPERS-LIST.md b/SCRAPERS-LIST.md index 5af9fec0e..6a8a75afc 100644 --- a/SCRAPERS-LIST.md +++ b/SCRAPERS-LIST.md @@ -213,7 +213,7 @@ bradmontana.com|BradMontana.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- brandibelle.com|brandibelle.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- brandnewamateurs.com|BrandNewAmateurs.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- brandnewfaces.com|Vivid.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- -brasilvr.com|POVRPremium.yml|:heavy_check_mark:|:x:|:x:|:x:|-|VR +brasilvr.com|POVR.yml|:heavy_check_mark:|:x:|:x:|:x:|-|VR brattyfamily.com|Mypervmom.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- brattymilf.com|Nubiles.yml|:heavy_check_mark:|:heavy_check_mark:|:x:|:x:|-|- brattysis.com|Nubiles.yml|:heavy_check_mark:|:heavy_check_mark:|:x:|:x:|-|- @@ -800,7 +800,7 @@ milfed.com|MindGeek.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- milfthing.com|PerfectGonzo.yml|:heavy_check_mark:|:x:|:x:|:heavy_check_mark:|Python|- milftrip.com|VegasDreamsLLC.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- milftugs.com|MilfTugs.yml|:heavy_check_mark:|:x:|:x:|:heavy_check_mark:|-|- -milfvr.com|POVRPremium.yml|:heavy_check_mark:|:x:|:x:|:x:|-|VR +milfvr.com|POVR.yml|:heavy_check_mark:|:x:|:x:|:x:|-|VR milkingtable.com|GammaEntertainment.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- milovana.com|Milovana.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- minimuff.com|ChickPass.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- @@ -1318,7 +1318,7 @@ transroommates.com|Arx.yml|:heavy_check_mark:|:x:|:x:|:x:|Python|Trans transsensual.com|MindGeek.yml|:heavy_check_mark:|:x:|:heavy_check_mark:|:heavy_check_mark:|-|Trans transsexualangel.com|GammaEntertainment.yml|:heavy_check_mark:|:x:|:x:|:x:|-|Trans transsexualroadtrip.com|GammaEntertainment.yml|:heavy_check_mark:|:x:|:x:|:x:|-|Trans -tranzvr.com|POVRPremium.yml|:heavy_check_mark:|:x:|:x:|:x:|-|VR +tranzvr.com|POVR.yml|:heavy_check_mark:|:x:|:x:|:x:|-|VR trickymasseur.com|TeenMegaWorld.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- trickyoldteacher.com|OldGoesYoung.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- trickyspa.com|Algolia_Adultime.yml|:heavy_check_mark:|:x:|:x:|:x:|Python|- @@ -1390,7 +1390,7 @@ vrxdb.com|VrxDB.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- wakeupnfuck.com|WakeUpnFuck.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- wankitnow.com|Wankitnow.yml|:heavy_check_mark:|:x:|:x:|:x:|-|VR wankz.com|wankz.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- -wankzvr.com|POVRPremium.yml|:heavy_check_mark:|:x:|:heavy_check_mark:|:x:|-|VR +wankzvr.com|POVR.yml|:heavy_check_mark:|:x:|:heavy_check_mark:|:x:|-|VR warashi-asian-pornstars.fr|WAPdB.yml|:x:|:x:|:x:|:heavy_check_mark:|Python|JAV watch4beauty.com|Watch4Beauty.yml|:heavy_check_mark:|:heavy_check_mark:|:x:|:x:|-|- watch4fetish.com|Watch4Fetish.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- diff --git a/scrapers/POVR.yml b/scrapers/POVR.yml index 8f7e23d8a..8142cd446 100644 --- a/scrapers/POVR.yml +++ b/scrapers/POVR.yml @@ -4,7 +4,55 @@ sceneByURL: url: - povr.com scraper: sceneScraper + - action: scrapeXPath + url: + - brasilvr.com + - milfvr.com + - tranzvr.com + - wankzvr.com + scraper: sceneScraperPremium +movieByURL: + - action: scrapeXPath + url: + - wankzvr.com + scraper: movieScraper xPathScrapers: + sceneScraperPremium: + common: + $info: &infoSel //div[@class="detail"] + $url: &urlSel //link[@rel="canonical"]/@href + scene: + Title: &titleSel //div[@class="detail__header detail__header-lg"]/h1 + Date: &dateAttr + selector: $info//span[@class="detail__date"]/text() + postProcess: + - parseDate: 2 January, 2006 + Details: &detailsAttr + selector: //div[@class="detail__txt detail__txt-show_lg"]/text()|//span[@class="more__body"]/text() + concat: " " + Tags: + Name: $info//div[@class="tag-list__body"]//a/text() + Performers: + Name: //div[@class="detail__inf detail__inf-align_right"]/div[@class="detail__models"]/a/text() + Image: &imageSel //meta[@property="og:image"]/@content + Studio: &studioAttr + Name: + selector: *urlSel + postProcess: + - replace: + - regex: ^.*//(?:www.)?([^/]*).*$ + with: $1 + - map: + brasilvr.com: BrasilVR + milfvr.com: MilfVR + tranzvr.com: TranzVR + wankzvr.com: WankzVR + Code: &codeAttr + selector: *urlSel + postProcess: + - replace: + - regex: ^.*-(\d+)$ + with: $1 sceneScraper: scene: Title: @@ -22,15 +70,29 @@ xPathScrapers: Name: //li[span[contains(text(), "Tags:")]]/following-sibling::li/a/text() Details: selector: //div[contains(@class, "player__description")]/p/text() - Image: - selector: //meta[@property="og:image"]/@content + Image: *imageSel Studio: Name: selector: //span[contains(text(), "Studio:")]/following-sibling::a/text() - Code: - selector: //link[@rel="canonical"]/@href + Code: *codeAttr + movieScraper: + common: + $info: *infoSel + movie: + Name: + selector: *titleSel postProcess: - replace: - - regex: ^.*-(\d+)$ - with: $1 + - regex: ^ + with: "WankzVR - " + Duration: + selector: $info//span[@class="time"]/text() + postProcess: + - replace: + - regex: \smin + with: ":00" + Date: *dateAttr + Studio: *studioAttr + Synopsis: *detailsAttr + FrontImage: *imageSel # Last Updated June 28, 2023 diff --git a/scrapers/POVRPremium.yml b/scrapers/POVRPremium.yml deleted file mode 100644 index ab0c3a7c8..000000000 --- a/scrapers/POVRPremium.yml +++ /dev/null @@ -1,72 +0,0 @@ -name: "POVR Premium" -sceneByURL: - - action: scrapeXPath - url: - - brasilvr.com - - milfvr.com - - tranzvr.com - - wankzvr.com - scraper: sceneScraper -movieByURL: - - action: scrapeXPath - url: - - wankzvr.com - scraper: movieScraper -xPathScrapers: - sceneScraper: - common: - $info: &infoSel //div[@class="detail"] - $url: &urlSel //link[@rel="canonical"]/@href - scene: - Title: &titleSel //div[@class="detail__header detail__header-lg"]/h1 - Date: &dateAttr - selector: $info//span[@class="detail__date"]/text() - postProcess: - - parseDate: 2 January, 2006 - Details: &detailsAttr - selector: //div[@class="detail__txt detail__txt-show_lg"]/text()|//span[@class="more__body"]/text() - concat: " " - Tags: - Name: $info//div[@class="tag-list__body"]//a/text() - Performers: - Name: //div[@class="detail__inf detail__inf-align_right"]/div[@class="detail__models"]/a/text() - Image: &imageSel //meta[@property="og:image"]/@content - Studio: &studioAttr - Name: - selector: *urlSel - postProcess: - - replace: - - regex: ^.*//(?:www.)?([^/]*).*$ - with: $1 - - map: - brasilvr.com: BrasilVR - milfvr.com: MilfVR - tranzvr.com: TranzVR - wankzvr.com: WankzVR - Code: - selector: *urlSel - postProcess: - - replace: - - regex: ^.*-(\d+)$ - with: $1 - movieScraper: - common: - $info: *infoSel - movie: - Name: - selector: *titleSel - postProcess: - - replace: - - regex: ^ - with: "WankzVR - " - Duration: - selector: $info//span[@class="time"]/text() - postProcess: - - replace: - - regex: \smin - with: ":00" - Date: *dateAttr - Studio: *studioAttr - Synopsis: *detailsAttr - FrontImage: *imageSel -# Last Updated June 28, 2023 From 7d435e6ec1fe9589e025bdbd02f745e0ee16b33e Mon Sep 17 00:00:00 2001 From: nrg101 Date: Wed, 28 Jun 2023 16:04:38 +0100 Subject: [PATCH 159/624] make image scraping highest quality for each site --- scrapers/POVR.yml | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/scrapers/POVR.yml b/scrapers/POVR.yml index 8142cd446..888d50336 100644 --- a/scrapers/POVR.yml +++ b/scrapers/POVR.yml @@ -34,7 +34,12 @@ xPathScrapers: Name: $info//div[@class="tag-list__body"]//a/text() Performers: Name: //div[@class="detail__inf detail__inf-align_right"]/div[@class="detail__models"]/a/text() - Image: &imageSel //meta[@property="og:image"]/@content + Image: &imageAttr + selector: //meta[@property="og:image"]/@content|//div[@class="photo-strip__body"]/div[2]/@data-src + postProcess: + - replace: + - regex: medium.jpg + with: large.jpg Studio: &studioAttr Name: selector: *urlSel @@ -70,7 +75,7 @@ xPathScrapers: Name: //li[span[contains(text(), "Tags:")]]/following-sibling::li/a/text() Details: selector: //div[contains(@class, "player__description")]/p/text() - Image: *imageSel + Image: *imageAttr Studio: Name: selector: //span[contains(text(), "Studio:")]/following-sibling::a/text() @@ -94,5 +99,5 @@ xPathScrapers: Date: *dateAttr Studio: *studioAttr Synopsis: *detailsAttr - FrontImage: *imageSel + FrontImage: *imageAttr # Last Updated June 28, 2023 From e163c46d85babd64a9127ce34587aa0293e0fa37 Mon Sep 17 00:00:00 2001 From: nrg101 Date: Wed, 28 Jun 2023 16:58:36 +0100 Subject: [PATCH 160/624] get highest res cover image for tranzvr.com --- scrapers/POVR.yml | 2 ++ 1 file changed, 2 insertions(+) diff --git a/scrapers/POVR.yml b/scrapers/POVR.yml index 888d50336..974e614d0 100644 --- a/scrapers/POVR.yml +++ b/scrapers/POVR.yml @@ -40,6 +40,8 @@ xPathScrapers: - replace: - regex: medium.jpg with: large.jpg + - regex: 472/cover.jpg + with: 680/cover.jpg Studio: &studioAttr Name: selector: *urlSel From 5705869ac2a9d6d145f4f075467339b3ca55308c Mon Sep 17 00:00:00 2001 From: nrg101 Date: Fri, 30 Jun 2023 11:45:53 +0100 Subject: [PATCH 161/624] use canonical link to determine full image URL --- scrapers/GroobyNetwork-Partial.yml | 17 +++++++++++++++-- 1 file changed, 15 insertions(+), 2 deletions(-) diff --git a/scrapers/GroobyNetwork-Partial.yml b/scrapers/GroobyNetwork-Partial.yml index e2e7d713d..34d7c1b60 100644 --- a/scrapers/GroobyNetwork-Partial.yml +++ b/scrapers/GroobyNetwork-Partial.yml @@ -47,12 +47,25 @@ xPathScrapers: Name: //div[@class="setdesc"]//a/text() Studio: &studio Name: //meta[@name="author"]/@content + URL: + selector: //link[@rel="canonical"]/@href + postProcess: + - replace: + - regex: (https://[^/]*)/.* + with: $1 Image: - selector: //meta[@property="og:image"]/@content + selector: //link[@rel="canonical"]/@href|//img[contains(@class, "update_thumb thumbs stdimage")]/@src|//img[contains(@class, "update_thumb thumbs stdimage")]/@src0_1x + concat: "__SEPARATOR__" postProcess: - replace: - - regex: ^// # bobstgirls + - regex: ^.*__SEPARATOR__// # bobstgirls with: "https://" + - regex: ^(https://[^/]*)/.*(__SEPARATOR__.*)$ + with: $1$2 + - regex: content// + with: content/ + - regex: __SEPARATOR__ + with: '' Tags: &tags Name: //div[@class="set_tags"]/ul/li//a/text() galleryScraper: From 0bda58d3c9ada91406aa19e0180a135da801bf24 Mon Sep 17 00:00:00 2001 From: MrX292 <42774880+MrX292@users.noreply.github.com> Date: Fri, 30 Jun 2023 17:24:20 +0200 Subject: [PATCH 162/624] Update PaperStreetMedia.yml change # Last Updated May 10, 2023 --- scrapers/PaperStreetMedia.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scrapers/PaperStreetMedia.yml b/scrapers/PaperStreetMedia.yml index 79b785117..8e977eb82 100755 --- a/scrapers/PaperStreetMedia.yml +++ b/scrapers/PaperStreetMedia.yml @@ -91,4 +91,4 @@ xPathScrapers: teensloveblackcocks: Teens Love Black Cocks thickumz: Thickumz tinysis: Tiny Sis -# Last Updated June 27, 2022 +# Last Updated May 10, 2023 From f2b1cd25e3b65302a0ff66cd80f876909762c3e8 Mon Sep 17 00:00:00 2001 From: escargotbuffed <102115247+escargotbuffed@users.noreply.github.com> Date: Sat, 1 Jul 2023 02:18:00 +0000 Subject: [PATCH 163/624] Update UnderHentai.yml --- scrapers/UnderHentai.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/scrapers/UnderHentai.yml b/scrapers/UnderHentai.yml index 0fb3340f6..70ed8c9c1 100644 --- a/scrapers/UnderHentai.yml +++ b/scrapers/UnderHentai.yml @@ -1,7 +1,7 @@ name: UnderHentai ################################################################################################################ # HOW TO SET UP # -# Store this file in the ~/stash/scrapers/AniDB.yml # +# Store this file in the ~/stash/scrapers/UnderHentai.yml # # (If the scrapers directory is not there it needs to be created) # ################################################################################################################ # HOW TO USE # @@ -60,4 +60,4 @@ xPathScrapers: Name: selector: //p[contains(text(),"Brand")]/following-sibling::a Image: //div[@class="loading"]/img/@src -# Last Updated June 23, 2023 \ No newline at end of file +# Last Updated June 23, 2023 From 0b1ac1570db090063dde80c462347e13fd8f0bfa Mon Sep 17 00:00:00 2001 From: escargotbuffed <102115247+escargotbuffed@users.noreply.github.com> Date: Sat, 1 Jul 2023 02:19:00 +0000 Subject: [PATCH 164/624] Update UnderHentai.yml --- scrapers/UnderHentai.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scrapers/UnderHentai.yml b/scrapers/UnderHentai.yml index 70ed8c9c1..f9fd2f26b 100644 --- a/scrapers/UnderHentai.yml +++ b/scrapers/UnderHentai.yml @@ -1,7 +1,7 @@ name: UnderHentai ################################################################################################################ # HOW TO SET UP # -# Store this file in the ~/stash/scrapers/UnderHentai.yml # +# Store this file in the ~/stash/scrapers/UnderHentai.yml # # (If the scrapers directory is not there it needs to be created) # ################################################################################################################ # HOW TO USE # From 85b412ae24c57d0d01dd2eeae557c4e130d8fc78 Mon Sep 17 00:00:00 2001 From: Silvered Gold <35001040+silveredgold@users.noreply.github.com> Date: Sat, 1 Jul 2023 21:46:18 +1000 Subject: [PATCH 165/624] Update FetishPros scraping for current site layout --- scrapers/FetishPro.yml | 20 ++++++++------------ 1 file changed, 8 insertions(+), 12 deletions(-) diff --git a/scrapers/FetishPro.yml b/scrapers/FetishPro.yml index 4180c54e2..075865a07 100644 --- a/scrapers/FetishPro.yml +++ b/scrapers/FetishPro.yml @@ -8,23 +8,19 @@ xPathScrapers: sceneScraper: scene: Title: - selector: //h2[@class="title"] + selector: //h1 Date: - selector: //span[@class="update_date"] + selector: //ul[@class="contentInfo"]/li[3] postProcess: - - parseDate: 01/02/2006 + - parseDate: Jan 2, 2006 Performers: - Name: //span[@class="tour_update_models"]/a - Details: //span[@class="latest_update_description"] + Name: //div[@class="models"]//a/text() + Details: //div[contains(@class, "videoDescription")]/p Tags: - Name: //span[@class="tour_update_tags"]/a + Name: //div[@class="tags"]//a Image: - selector: //img[@class="stdimage thumbs"]/@src - postProcess: - - replace: - - regex: ^ - with: "https://www.fetishpros.com/updates/" + selector: //div[@class="videoPreview"]//img/@src Studio: Name: fixed: FetishPros -# Last Updated April 09, 2021 +# Last Updated July 01, 2023 From 81431bc5c9e427d02e217054b5ed8f0bee1c10ae Mon Sep 17 00:00:00 2001 From: escargotbuffed <102115247+escargotbuffed@users.noreply.github.com> Date: Sat, 1 Jul 2023 23:10:01 +0000 Subject: [PATCH 166/624] Update AniDB.yml --- scrapers/AniDB.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/scrapers/AniDB.yml b/scrapers/AniDB.yml index 2a64a36da..d99409795 100644 --- a/scrapers/AniDB.yml +++ b/scrapers/AniDB.yml @@ -128,10 +128,10 @@ driver: # Replace value field - Name: "adbsess" Domain: "anidb.net" - Value: "" # Enter the value of the 'adbuin' here + Value: "" # Enter the value of the 'adbsess' here Path: "/" - Name: "adbuin" Domain: "anidb.net" Value: "" # Enter the value of the 'adbuin' here Path: "/" -# Last Updated June 23, 2023 \ No newline at end of file +# Last Updated June 23, 2023 From bbd7a30077effb3b71e09db4b1ee0f326e1ac69d Mon Sep 17 00:00:00 2001 From: imagineimaginingthings Date: Sun, 2 Jul 2023 02:20:27 +0100 Subject: [PATCH 167/624] removed empty line --- scrapers/EroProfile.yml | 1 - 1 file changed, 1 deletion(-) diff --git a/scrapers/EroProfile.yml b/scrapers/EroProfile.yml index bf0cb1f31..d2c584922 100644 --- a/scrapers/EroProfile.yml +++ b/scrapers/EroProfile.yml @@ -18,5 +18,4 @@ xPathScrapers: with: $1 - parseDate: 2 January 2006 Details: //h1[@class="capMultiLine"]/following::p/text() | //table[@class="data marT"]//tbody//tr//th[contains(text(),"Description:")]/following::td/text() - # Last Updated May 24, 2023 From 6dae5601911959d8969d03804203bb27af767796 Mon Sep 17 00:00:00 2001 From: imagineimaginingthings Date: Sun, 2 Jul 2023 02:22:32 +0100 Subject: [PATCH 168/624] removed empty line --- scrapers/Xrares.yml | 1 - 1 file changed, 1 deletion(-) diff --git a/scrapers/Xrares.yml b/scrapers/Xrares.yml index 1bb441a92..a834d3b4f 100644 --- a/scrapers/Xrares.yml +++ b/scrapers/Xrares.yml @@ -26,5 +26,4 @@ xPathScrapers: - replace: - regex: .+/video/(\d+)/.+?$ with: $1 - # Last Updated June 10, 2023 From c653497d2c917974a7c3a619037cad2b574766ac Mon Sep 17 00:00:00 2001 From: Evan Deaubl Date: Sun, 2 Jul 2023 11:45:01 -0700 Subject: [PATCH 169/624] Fix line breaks in details --- scrapers/MenAtPlay.yml | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/scrapers/MenAtPlay.yml b/scrapers/MenAtPlay.yml index 5ea97cd78..44adafe95 100644 --- a/scrapers/MenAtPlay.yml +++ b/scrapers/MenAtPlay.yml @@ -10,8 +10,8 @@ xPathScrapers: Title: selector: //div[@class="gallery_info spacer"]/h1/text() Details: - selector: //div[@class="containerText"]/p - concat: "\n" + selector: //div[@class="containerText"]/p//text() + concat: "\n\n" Performers: Name: selector: //div[@class="gallery_info spacer"]/p/span[@class="tour_update_models"]/a/text() @@ -26,4 +26,4 @@ xPathScrapers: Studio: Name: fixed: MenAtPlay -# Last Updated May 29, 2022 +# Last Updated July 2, 2023 From d468db8ca5043c749c549e323e7308a2e01c2b4a Mon Sep 17 00:00:00 2001 From: nrg101 Date: Mon, 3 Jul 2023 11:42:35 +0100 Subject: [PATCH 170/624] fix description scraping to work regardless of syntax --- scrapers/GroobyNetwork-Partial.yml | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/scrapers/GroobyNetwork-Partial.yml b/scrapers/GroobyNetwork-Partial.yml index 34d7c1b60..2b5d0a740 100644 --- a/scrapers/GroobyNetwork-Partial.yml +++ b/scrapers/GroobyNetwork-Partial.yml @@ -41,8 +41,7 @@ xPathScrapers: postProcess: - parseDate: January 2, 2006 Details: &details - selector: //div[@class="trailerpage_info"]/p[not(@class)]/text()|//div[@class="trailerpage_info"]/p[not(@class)]/span/text() - concat: "\n\n" + selector: string(//div[@class="trailerpage_info"]/p[not(@class)]) Performers: &performers Name: //div[@class="setdesc"]//a/text() Studio: &studio From 8074d89c8dd1acd18ace86854381206a8ef24ebe Mon Sep 17 00:00:00 2001 From: nrg101 Date: Mon, 3 Jul 2023 13:28:33 +0100 Subject: [PATCH 171/624] fix Details scraping more --- scrapers/GroobyNetwork-Partial.yml | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/scrapers/GroobyNetwork-Partial.yml b/scrapers/GroobyNetwork-Partial.yml index 2b5d0a740..396e2748c 100644 --- a/scrapers/GroobyNetwork-Partial.yml +++ b/scrapers/GroobyNetwork-Partial.yml @@ -41,7 +41,8 @@ xPathScrapers: postProcess: - parseDate: January 2, 2006 Details: &details - selector: string(//div[@class="trailerpage_info"]/p[not(@class)]) + selector: //div[@class="trailerpage_info"]/p[not(@class)]/descendant-or-self::*/text() + concat: "\n\n" Performers: &performers Name: //div[@class="setdesc"]//a/text() Studio: &studio @@ -98,4 +99,4 @@ xPathScrapers: - regex: ^/ with: https://www.groobyvr.com/ Tags: *tags -# Last Updated June 28, 2023 +# Last Updated July 03, 2023 From d7823e9379a3b0de3b0586971fa1a80c9f0f8ff5 Mon Sep 17 00:00:00 2001 From: nrg101 Date: Thu, 25 May 2023 18:01:15 +0100 Subject: [PATCH 172/624] move isthisreal to algolia --- scrapers/IsThisReal.yml | 77 ----------------------------------------- 1 file changed, 77 deletions(-) delete mode 100644 scrapers/IsThisReal.yml diff --git a/scrapers/IsThisReal.yml b/scrapers/IsThisReal.yml deleted file mode 100644 index 6041926f3..000000000 --- a/scrapers/IsThisReal.yml +++ /dev/null @@ -1,77 +0,0 @@ -name: "IsThisReal" -sceneByURL: - - action: scrapeXPath - url: - - isthisreal.com/en/video/ - scraper: sceneScraper -xPathScrapers: - sceneScraper: - common: - $videoscript: //script[contains(text(), 'ScenePlayerId = "player"')]/text() - $datascript: //script[contains(text(), 'sceneDetails')]/text() - $imagescript: //script[contains(text(), 'picPreview')]/text() - scene: - Title: - selector: $videoscript - postProcess: - - replace: - - regex: .+(?:"sceneTitle":")([^"]+).+ - with: $1 - - regex: .+(?:"sceneTitle":"").+ - with: - Date: - selector: $videoscript - postProcess: - - replace: - - regex: .+(?:"sceneReleaseDate":")([^"]+).+ - with: $1 - - parseDate: 2006-01-02 - Details: - selector: $datascript - postProcess: - - replace: - - regex: .+(?:sceneDescription":")(.+)(?:","sceneActors).+ - with: $1 - - regex: .+(?:"sceneDescription":"").+ - with: - - regex: <\\\/br>||
- with: "\n" - Tags: - Name: - selector: $datascript - postProcess: - - replace: - - regex: .+(?:sceneCategories":\[)(.+)(?:\],"sceneViews").+ - with: $1 - - regex: \" - with: - split: "," - Performers: - Name: - selector: $datascript - postProcess: - - replace: - - regex: .+(?:"sceneActors":)(.+)(?:,"sceneCategories") - with: $1 - - regex: \{"actorId":"\d+","actorName":|}|\[|\]|" - with: - split: "," - Image: - selector: $imagescript - postProcess: - - replace: - - regex: .+(?:picPreview":")([\w:]+)(?:[\\\/]+)([\w-\.]+)(?:[\\\/]+)(\w+)(?:[\\\/]+)(\d+)(?:[\\\/]+)([\d_]+)(?:[\\\/]+)(\w+)(?:[\\\/]+)(\d+)(?:[\\\/]+)(\d+)(?:[\\\/]+)([\w]+)(?:[\\\/]+)([\w.]+).+ - with: $1//$2/$3/$4/$5/$6/$7/$8/$9/$10 - # if using the transport subdomain, parameters need to be passed - # otherwise a cropped square image is returned by default - - regex: (https:\/\/transform.+) - with: $1?width=960&height=543&enlarge=true - Studio: - Name: - selector: //link[@rel="canonical"]/@href - postProcess: - - replace: - - regex: .+(?:\/video\/)([^\/]+).+ - with: $1 - -# Last Updated November 08, 2020 From 6e3e14ab63818259b6dd1805ee1e57eaa4a14cb8 Mon Sep 17 00:00:00 2001 From: nrg101 Date: Thu, 25 May 2023 18:02:39 +0100 Subject: [PATCH 173/624] improve studio detection for existing and added domains --- SCRAPERS-LIST.md | 8 ++- scrapers/Algolia.py | 128 ++++++++++++++++++++++++++++------ scrapers/Algolia_Adultime.yml | 9 ++- 3 files changed, 122 insertions(+), 23 deletions(-) diff --git a/SCRAPERS-LIST.md b/SCRAPERS-LIST.md index 465afd76b..9c0717589 100644 --- a/SCRAPERS-LIST.md +++ b/SCRAPERS-LIST.md @@ -189,6 +189,7 @@ blacktgirlshardcore.com|GroobyNetwork-Partial.yml|:heavy_check_mark:|:heavy_chec blackvalleygirls.com|PaperStreetMedia.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- blackwhitefuckfest.com|Vivid.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- blakemason.com|BlakeMason.yml|:heavy_check_mark:|:x:|:x:|:x:|-|Gay +blowmepov.com|Algolia_Adultime.yml|:heavy_check_mark:|:x:|:x:|:x:|Python|- blownbyrone.com|VNAGirls.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- blowpass.com|GammaEntertainment.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- bobbiedenlive.com|VNAGirls.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- @@ -418,6 +419,7 @@ exotic4k.com|AMAMultimedia.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- explicite-art.com|ExpliciteArt.yml|:heavy_check_mark:|:heavy_check_mark:|:x:|:x:|-|- exploitedcollegegirls.com|ExploitedX.yml|:heavy_check_mark:|:x:|:x:|:heavy_check_mark:|-|- extrapackage.com|ExtraPackage.yml|:heavy_check_mark:|:x:|:heavy_check_mark:|:x:|-|- +extremepickups.com|Algolia_Adultime.yml|:heavy_check_mark:|:x:|:x:|:x:|Python|- exxxtrasmall.com|PaperStreetMedia.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- fabsluts.com|Teencoreclub.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- facials4k.com|AMAMultimedia.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- @@ -537,6 +539,7 @@ girlsunderarrest.com|GirlsUnderArrest.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- girlsway.com|Algolia_GirlsWay.yml|:heavy_check_mark:|:heavy_check_mark:|:x:|:x:|Python|Lesbian girlswhofuckgirls.com|Vivid.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- givemepink.com|sapphix.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +givemeteens.com|Algolia_Adultime.yml|:heavy_check_mark:|:x:|:x:|:x:|Python|- gloryholesecrets.com|Aziani.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- gloryholeswallow.com|GloryHoleSwallow.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- glosstightsglamour.com|GlossTightsGlamour.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- @@ -645,7 +648,7 @@ interracialpovs.com|FFCSH.yml|:heavy_check_mark:|:x:|:x:|:x:|-|Interracial intimatelesbians.com|Spizoo.yml|:heavy_check_mark:|:x:|:x:|:x:|-|Lesbian intimatepov.com|ItsPOV.yml|:heavy_check_mark:|:x:|:x:|:x:|CDP|- ipinkvisualpass.com|PinkVisual.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- -isthisreal.com|IsThisReal.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +isthisreal.com|Algolia_Adultime.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- italianshotclub.com|MVG.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- itscleolive.com|VNAGirls.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- itspov.com|ItsPOV.yml|:heavy_check_mark:|:x:|:x:|:x:|CDP|- @@ -1079,6 +1082,7 @@ rubberdoll.net|VNAGirls.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- rule34video.com|Rule34Video.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- russian-tgirls.com|GroobyClub.yml|:heavy_check_mark:|:x:|:x:|:x:|-|Trans rylskyart.com|SARJ-LLC.yml|:heavy_check_mark:|:heavy_check_mark:|:x:|:heavy_check_mark:|Python|- +sabiendemonia.com|Algolia_Adultime.yml|:heavy_check_mark:|:x:|:x:|:x:|Python|- samanthagrace.net|VNAGirls.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- sapphicerotica.com|sapphix.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- sapphix.com|sapphix.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- @@ -1278,6 +1282,7 @@ thelifeerotic.com|SARJ-LLC.yml|:heavy_check_mark:|:heavy_check_mark:|:x:|:heavy_ thenude.com|TheNude.yml|:x:|:x:|:x:|:heavy_check_mark:|-|- thestripperexperience.com|Spizoo.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- thetabutales.com|NewSensationsNetworkSites.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +theyeslist.com|Algolia_Adultime.yml|:heavy_check_mark:|:x:|:x:|:x:|Python|- thicc18.com|Fit18.yml|:heavy_check_mark:|:x:|:x:|:x:|Python|- thickandbig.com|AMAMultimedia.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- thickumz.com|PaperStreetMedia.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- @@ -1420,6 +1425,7 @@ wicked.com|Algolia_Wicked.yml|:heavy_check_mark:|:heavy_check_mark:|:heavy_check wildoncam.com|trafficpimps.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- williamhiggins.com|Williamhiggins.yml|:heavy_check_mark:|:x:|:x:|:x:|-|Gay willtilexxx.com|WillTileXXX.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +wolfwagner.com|Algolia_Adultime.yml|:heavy_check_mark:|:x:|:x:|:x:|Python|- woodmancastingx.com|WoodmancastingX.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- wowgirls.xxx|WowPorn.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- wowgirlsblog.com|WOWGirlsBlog.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- diff --git a/scrapers/Algolia.py b/scrapers/Algolia.py index 296d8ed15..17f94bf17 100644 --- a/scrapers/Algolia.py +++ b/scrapers/Algolia.py @@ -37,6 +37,15 @@ # Include non female performers NON_FEMALE = True +# a list of main channels (`mainChannelName` from the API) to use as the studio +# name for a scene +MAIN_CHANNELS_AS_STUDIO_FOR_SCENE = [ + "Buttman", + "Cock Choking Sluts", + "Devil's Film Parodies", + "Euro Angels", +] + # a dict with sites having movie sections # used when populating movie urls from the scene scraper MOVIE_SITES = { @@ -54,13 +63,36 @@ "zerotolerancefilms": "https://www.zerotolerancefilms.com/en/movie" } +# a dict of serie (`serie_name` from the API) which should set the value +# for the studio name for a scene +SERIE_USING_OVERRIDE_AS_STUDIO_FOR_SCENE = { + "Jonni Darkko's Stand Alone Scenes": "Jonni Darkko XXX", + "Big Boob Angels": "BAM Visions", + "Mick's ANAL PantyHOES": "BAM Visions", + "Real Anal Lovers": "BAM Visions", + "XXXmailed": "Blackmailed" +} + +# a list of serie (`serie_name` from the API) which should use the sitename +# for the studio name for a scene +SERIE_USING_SITENAME_AS_STUDIO_FOR_SCENE = [ + "Evil", # sitename_pretty: Evil Angel + "Trans-Active" # sitename_pretty: Evil Angel +] + # a dict of sites (`sitename_pretty` from the API) which should set the value # for the studio name for a scene # this is because the `serie_name` is the Movie (series) title on these sites, # not the studio SITES_USING_OVERRIDE_AS_STUDIO_FOR_SCENE = { + "Adamandevepictures": "Adam & Eve Pictures", + "AgentRedGirl": "Agent Red Girl", + "Devils Gangbangs": "Devil's Gangbangs", "Devilstgirls": "Devil's Tgirls", - "AgentRedGirl": "Agent Red Girl" + "Dpfanatics": "DP Fanatics", + "Janedoe": "Jane Doe Pictures", + "ModernDaySins": "Modern-Day Sins", + "Transgressivexxx": "TransgressiveXXX" } # a list of sites (`sitename_pretty` from the API) which should pick out the @@ -70,7 +102,14 @@ SITES_USING_SITENAME_AS_STUDIO_FOR_SCENE = [ "ChaosMen", "Devil's Film", - "GenderXFilms" + "GenderXFilms", + "Give Me Teens", + "Hairy Undies", + "Lesbian Factor", + "Oopsie", + "Out of the Family", + "Rocco Siffredi", + "Squirtalicious" ] # a list of sites (`sitename_pretty` from the API) which should pick out the @@ -78,9 +117,25 @@ # this is because the `serie_name` is the Movie (series) title on these sites, # not the studio SITES_USING_NETWORK_AS_STUDIO_FOR_SCENE = [ - "Muses", + "Extremepickups", # network_name: Adult Time Originals + "Isthisreal", # network_name: Is This Real + "Muses", # network_name: Transfixed + "Officemsconduct", # network_name: Transfixed + "Sabiendemonia", # network_name: Sabien DeMonia + "Upclosex" # network_name: UpCloseX ] +# a list of networks (`network_name` from the API) which should pick out the +# `sitename_pretty` for the studio name for a scene +NETWORKS_USING_SITENAME_AS_STUDIO_FOR_SCENE = [ + "Fame Digital" # this should support all sub-studios listed at https://stashdb.org/studios/cd5591a5-eb26-42fc-a406-b6969a8ef3dd +] + +# a dict of directors to use as the studio for a scene +DIRECTOR_AS_STUDIO_OVERRIDE_FOR_SCENE = { + "Le Wood": "LeWood" +} + def clean_text(details: str) -> str: """ @@ -517,6 +572,46 @@ def parse_movie_json(movie_json: dict) -> dict: scrape["director"] = ", ".join(directors) return scrape +def determine_studio_name_from_json(some_json): + ''' + Reusable function to determine studio name based on what was scraped. + This can be used for at least scene and gallery scraping + ''' + studio_name = None + if some_json.get('sitename_pretty'): + if some_json.get('sitename_pretty') in SITES_USING_OVERRIDE_AS_STUDIO_FOR_SCENE: + studio_name = \ + SITES_USING_OVERRIDE_AS_STUDIO_FOR_SCENE.get(some_json.get('sitename_pretty')) + elif some_json.get('sitename_pretty') in SITES_USING_SITENAME_AS_STUDIO_FOR_SCENE \ + or some_json.get('serie_name') in SERIE_USING_SITENAME_AS_STUDIO_FOR_SCENE \ + or some_json.get('network_name') \ + and some_json.get('network_name') in NETWORKS_USING_SITENAME_AS_STUDIO_FOR_SCENE: + studio_name = some_json.get('sitename_pretty') + elif some_json.get('sitename_pretty') in SITES_USING_NETWORK_AS_STUDIO_FOR_SCENE \ + and some_json.get('network_name'): + studio_name = some_json.get('network_name') + if not studio_name and some_json.get('network_name') and \ + some_json.get('network_name') in NETWORKS_USING_SITENAME_AS_STUDIO_FOR_SCENE: + studio_name = some_json.get('sitename_pretty') + if not studio_name and some_json.get('mainChannelName') and \ + some_json.get('mainChannelName') in MAIN_CHANNELS_AS_STUDIO_FOR_SCENE: + studio_name = some_json.get('mainChannelName') + if not studio_name and some_json.get('directors'): + directors = [] + for director in some_json.get('directors'): + log.debug(f"director: {director}") + directors.append(director.get('name').strip()) + for director in directors: + if DIRECTOR_AS_STUDIO_OVERRIDE_FOR_SCENE.get(director): + studio_name = \ + DIRECTOR_AS_STUDIO_OVERRIDE_FOR_SCENE.get(director) + if not studio_name and some_json.get('serie_name'): + if some_json.get('serie_name') in SERIE_USING_OVERRIDE_AS_STUDIO_FOR_SCENE: + studio_name = \ + SERIE_USING_OVERRIDE_AS_STUDIO_FOR_SCENE.get(some_json.get('serie_name')) + else: + studio_name = some_json.get('serie_name') + return studio_name def parse_scene_json(scene_json, url=None): """ @@ -544,15 +639,9 @@ def parse_scene_json(scene_json, url=None): # Studio scrape['studio'] = {} - if scene_json.get('sitename_pretty') and scene_json.get('sitename_pretty') in SITES_USING_OVERRIDE_AS_STUDIO_FOR_SCENE: - scrape['studio']['name'] = SITES_USING_OVERRIDE_AS_STUDIO_FOR_SCENE.get(scene_json.get('sitename_pretty')) - elif scene_json.get('sitename_pretty') and scene_json.get('sitename_pretty') in SITES_USING_SITENAME_AS_STUDIO_FOR_SCENE: - scrape['studio']['name'] = scene_json.get('sitename_pretty') - elif scene_json.get('sitename_pretty') and scene_json.get('sitename_pretty') in SITES_USING_NETWORK_AS_STUDIO_FOR_SCENE \ - and scene_json.get('network_name'): - scrape['studio']['name'] = scene_json.get('network_name') - elif scene_json.get('serie_name'): - scrape['studio']['name'] = scene_json.get('serie_name') + studio_name = determine_studio_name_from_json(scene_json) + if studio_name: + scrape['studio']['name'] = studio_name log.debug( f"[STUDIO] {scene_json.get('serie_name')} - {scene_json.get('network_name')} - {scene_json.get('mainChannelName')} - {scene_json.get('sitename_pretty')}" @@ -615,6 +704,9 @@ def parse_scene_json(scene_json, url=None): hostname = "21sextury" elif net_name.lower() == "21 naturals": hostname = "21naturals" + elif net_name.lower() == 'transfixed': + hostname = 'transfixed' + scrape[ 'url'] = f"https://{hostname.lower()}.com/en/video/{hostname.lower()}/{scene_json['url_title']}/{scene_json['clip_id']}" except Exception as exc: @@ -652,15 +744,9 @@ def parse_gallery_json(gallery_json: dict, url: str = None) -> dict: # Studio scrape['studio'] = {} - if gallery_json.get('sitename_pretty') and gallery_json.get('sitename_pretty') in SITES_USING_OVERRIDE_AS_STUDIO_FOR_SCENE: - scrape['studio']['name'] = SITES_USING_OVERRIDE_AS_STUDIO_FOR_SCENE.get(gallery_json.get('sitename_pretty')) - elif gallery_json.get('sitename_pretty') and gallery_json.get('sitename_pretty') in SITES_USING_SITENAME_AS_STUDIO_FOR_SCENE: - scrape['studio']['name'] = gallery_json.get('sitename_pretty') - elif gallery_json.get('sitename_pretty') and gallery_json.get('sitename_pretty') in SITES_USING_NETWORK_AS_STUDIO_FOR_SCENE \ - and gallery_json.get('network_name'): - scrape['studio']['name'] = gallery_json.get('network_name') - elif gallery_json.get('serie_name'): - scrape['studio']['name'] = gallery_json.get('serie_name') + studio_name = determine_studio_name_from_json(gallery_json) + if studio_name: + scrape['studio']['name'] = studio_name log.debug( f"[STUDIO] {gallery_json.get('serie_name')} - {gallery_json.get('network_name')} - {gallery_json.get('mainChannelName')} - {gallery_json.get('sitename_pretty')}" diff --git a/scrapers/Algolia_Adultime.yml b/scrapers/Algolia_Adultime.yml index ec90ade10..5e9867cea 100644 --- a/scrapers/Algolia_Adultime.yml +++ b/scrapers/Algolia_Adultime.yml @@ -10,15 +10,19 @@ sceneByURL: - analteenangels.com/en/video/ - assholefever.com/en/video/ - beingtrans247.com/en/video/ + - blowmepov.com/en/video/ - caughtfapping.com/en/video/ - devilsfilm.com/en/video/ - devilstgirls.com/en/video/ - dpfanatics.com/en/video/ + - extremepickups.com/en/video/ - famedigital.com/en/video/ - footsiebabes.com/en/video/ - forbiddenseductions.com/en/video/ - girlstryanal.com/en/video/ + - givemeteens.com/en/video/ - hairyundies.com/en/video/ + - isthisreal.com/en/video/ - joymii.com/en/video/ - kissmefuckme.com/en/video/ - lezcuties.com/en/video/ @@ -37,14 +41,17 @@ sceneByURL: - peternorth.com/en/video/ - prettydirty.com/en/video/ - puretaboo.com/en/video/ + - sabiendemonia.com/en/video/ - soapymassage.com/en/video/ - teensneaks.com/en/video/ + - theyeslist.com/en/video/ - transfixed.com/en/video/ - transgressivefilms.com/en/video/ - trickyspa.com/en/video/ - truelesbian.com/en/video/ - webyoung.com/en/video/ - welikegirls.com/en/video/ + - wolfwagner.com/en/video/ script: - python - Algolia.py @@ -96,4 +103,4 @@ movieByURL: - Algolia.py - puretaboo - movie -# Last Updated February 06, 2023 +# Last Updated May 25, 2023 From 7bfa81efeb916da5790613766e60ffba5682608a Mon Sep 17 00:00:00 2001 From: nrg101 Date: Fri, 26 May 2023 11:15:37 +0100 Subject: [PATCH 174/624] use list comprehension instead of another list variable --- scrapers/Algolia.py | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/scrapers/Algolia.py b/scrapers/Algolia.py index 17f94bf17..e0ab5707a 100644 --- a/scrapers/Algolia.py +++ b/scrapers/Algolia.py @@ -597,11 +597,7 @@ def determine_studio_name_from_json(some_json): some_json.get('mainChannelName') in MAIN_CHANNELS_AS_STUDIO_FOR_SCENE: studio_name = some_json.get('mainChannelName') if not studio_name and some_json.get('directors'): - directors = [] - for director in some_json.get('directors'): - log.debug(f"director: {director}") - directors.append(director.get('name').strip()) - for director in directors: + for director in [ d.get('name').strip() for d in some_json.get('directors') ]: if DIRECTOR_AS_STUDIO_OVERRIDE_FOR_SCENE.get(director): studio_name = \ DIRECTOR_AS_STUDIO_OVERRIDE_FOR_SCENE.get(director) From f847ab6d90024cde819f157e48706622c6c2629e Mon Sep 17 00:00:00 2001 From: nrg101 Date: Thu, 15 Jun 2023 16:47:56 +0100 Subject: [PATCH 175/624] add movie scraping for outofthefamily.com --- scrapers/Algolia_Adultime.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/scrapers/Algolia_Adultime.yml b/scrapers/Algolia_Adultime.yml index 5e9867cea..2b13e6123 100644 --- a/scrapers/Algolia_Adultime.yml +++ b/scrapers/Algolia_Adultime.yml @@ -98,6 +98,7 @@ movieByURL: - adulttime.com/en/dvd/ - devilsfilm.com/en/dvd/ - devilstgirls.com/en/dvd/ + - outofthefamily.com/en/dvd/ script: - python - Algolia.py From 6e5178ccd6275d770032f05731ff5da32b481c3a Mon Sep 17 00:00:00 2001 From: nrg101 Date: Fri, 30 Jun 2023 12:06:39 +0100 Subject: [PATCH 176/624] use same studio determining function for movies --- scrapers/Algolia.py | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/scrapers/Algolia.py b/scrapers/Algolia.py index e0ab5707a..328ab049f 100644 --- a/scrapers/Algolia.py +++ b/scrapers/Algolia.py @@ -534,7 +534,7 @@ def parse_movie_json(movie_json: dict) -> dict: """ scrape = {} try: - studio_name = movie_json[0].get("sitename_pretty") + studio_name = determine_studio_name_from_json(movie_json[0]) except IndexError: log.debug("No movie found") return scrape @@ -575,7 +575,10 @@ def parse_movie_json(movie_json: dict) -> dict: def determine_studio_name_from_json(some_json): ''' Reusable function to determine studio name based on what was scraped. - This can be used for at least scene and gallery scraping + This can be used for scraping: + - scene + - gallery + - movie ''' studio_name = None if some_json.get('sitename_pretty'): From c52ae85d1e8f998593b0412cb8198bc473bd3bb5 Mon Sep 17 00:00:00 2001 From: MrX292 <42774880+MrX292@users.noreply.github.com> Date: Tue, 4 Jul 2023 00:38:25 +0200 Subject: [PATCH 177/624] Update MenAtPlay.yml From b5f06029e923e4bd1facfe6216fcf56160e00df3 Mon Sep 17 00:00:00 2001 From: Evan Deaubl Date: Mon, 3 Jul 2023 15:44:07 -0700 Subject: [PATCH 178/624] Fix MenAtPlay validation failure --- scrapers/MenAtPlay.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scrapers/MenAtPlay.yml b/scrapers/MenAtPlay.yml index 44adafe95..05e53b7fd 100644 --- a/scrapers/MenAtPlay.yml +++ b/scrapers/MenAtPlay.yml @@ -26,4 +26,4 @@ xPathScrapers: Studio: Name: fixed: MenAtPlay -# Last Updated July 2, 2023 +# Last Updated July 02, 2023 From f48297f121dce3fd968dbf09b7880501bfa8cad5 Mon Sep 17 00:00:00 2001 From: nrg101 Date: Fri, 23 Jun 2023 14:56:17 +0100 Subject: [PATCH 179/624] add another xpath for image --- scrapers/Hustler.yml | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/scrapers/Hustler.yml b/scrapers/Hustler.yml index 5e4caf352..b8aef8e2c 100644 --- a/scrapers/Hustler.yml +++ b/scrapers/Hustler.yml @@ -50,7 +50,12 @@ xPathScrapers: postProcess: - parseDate: Jan 02, 2006 Details: //meta[@property="og:description"]/@content|//div[@class="description"]/p - Image: //div[@class="img-container"]/img/@src + Image: + selector: //div[@class="img-container"]/img/@src|//div[contains(@class, "jw-preview")]/@style + postProcess: + - replace: + - regex: (?:background-image:\s*url\(")(.+)(?:"\).*);? + with: $1 Tags: Name: //div[@class="tag-list"]/a/text() Studio: @@ -61,4 +66,4 @@ driver: clicks: - xpath: //a[@class="clickable"] sleep: 2 -# Last Updated February 01, 2020 +# Last Updated June 23, 2023 From e51d853f293d3bff3b9ec9751ae833a00079d857 Mon Sep 17 00:00:00 2001 From: nrg101 Date: Fri, 30 Jun 2023 12:50:48 +0100 Subject: [PATCH 180/624] tweak Date, Details, Performers, Title xpaths --- scrapers/Hustler.yml | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/scrapers/Hustler.yml b/scrapers/Hustler.yml index b8aef8e2c..041305adc 100644 --- a/scrapers/Hustler.yml +++ b/scrapers/Hustler.yml @@ -39,19 +39,21 @@ sceneByURL: scraper: sceneScraper xPathScrapers: sceneScraper: + common: + $content: //div[@class="panel-content"] scene: - Title: //h3/a/text() + Title: //h3/a/text()|$content//h3[contains(@class, "headline")]/a/text() URL: //link[@rel='canonical']/@href Performers: Name: - selector: //span[@class="attr-key" and contains(text(),"Cast")]/following-sibling::span[@class="attr-value"]/a[not(text()="Hustler Models") and not(text()="Barely Legal Models")]/text() + selector: //span[@class="attr-key" and contains(text(),"Cast")]/following-sibling::span[@class="attr-value"]/a[not(text()="Hustler Models") and not(text()="Barely Legal Models")]/text()|$content//span[contains(text(), "Cast")]//following-sibling::span/a/text() Date: - selector: //span[@class="attr-key" and contains(text(),"Released")]/following-sibling::span[@class="attr-value"]/text() + selector: //span[@class="attr-key" and contains(text(),"Released")]/following-sibling::span[@class="attr-value"]/text()|$content//span[contains(text(), "Released")]//following-sibling::span/text() postProcess: - parseDate: Jan 02, 2006 - Details: //meta[@property="og:description"]/@content|//div[@class="description"]/p + Details: //p[following-sibling::a[@class="clickable"]]|//meta[@property="og:description"]/@content|//div[@class="description"]/p Image: - selector: //div[@class="img-container"]/img/@src|//div[contains(@class, "jw-preview")]/@style + selector: //div[@class="img-container"]/img/@src|//div[contains(@class, "jw-preview")]/@style|$content/img/@src postProcess: - replace: - regex: (?:background-image:\s*url\(")(.+)(?:"\).*);? @@ -66,4 +68,4 @@ driver: clicks: - xpath: //a[@class="clickable"] sleep: 2 -# Last Updated June 23, 2023 +# Last Updated June 30, 2023 From 37427cde895f88dc3bb7682a6ac69ca312d60337 Mon Sep 17 00:00:00 2001 From: echo6ix <37937507+echo6ix@users.noreply.github.com> Date: Thu, 6 Jul 2023 01:52:10 -0400 Subject: [PATCH 181/624] Added karups sub-studio detection --- scrapers/Karups.yml | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/scrapers/Karups.yml b/scrapers/Karups.yml index 8c9a2f6ac..99fbda611 100644 --- a/scrapers/Karups.yml +++ b/scrapers/Karups.yml @@ -7,6 +7,14 @@ sceneByURL: xPathScrapers: sceneScraper: scene: + Studio: + Name: + selector: //span[@class="sup-title"]/span + map: + Karups: Karups + KarupsOW: Karups Older Women + KarupsHA: Karups Hometown Amateurs + KarupsPC: Karups Private Collection Title: //h1[@class="page-heading"]/span[@class="title"]/text() Date: selector: //span[@class="date"]/span[@class="content"]/text() @@ -22,4 +30,4 @@ xPathScrapers: Image: selector: //video[@id="player"]/@poster|//div[@class="video-poster"]/img/@src -# Last Updated November 08, 2020 +# Last Updated July 6, 2023 \ No newline at end of file From d45824e5cf1c0be24ca695c66b14bcfe2f7a480f Mon Sep 17 00:00:00 2001 From: benny502 Date: Thu, 6 Jul 2023 06:32:17 +0000 Subject: [PATCH 182/624] Add JavBus.yml --- scrapers/JavBus.yml | 104 ++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 104 insertions(+) create mode 100644 scrapers/JavBus.yml diff --git a/scrapers/JavBus.yml b/scrapers/JavBus.yml new file mode 100644 index 000000000..08fcce500 --- /dev/null +++ b/scrapers/JavBus.yml @@ -0,0 +1,104 @@ +name: Javbus +sceneByFragment: + action: scrapeXPath + queryURL: https://www.javbus.com/{filename} + queryURLReplace: + filename: + - regex: -JG\d + with: "" + - regex: (.*[^a-zA-Z0-9])*([a-zA-Z-]+\d+)(.+) + with: $2 + scraper: sceneScraper +sceneByURL: + - action: scrapeXPath + url: + - https://www.javbus.com + - https://www.seejav.bid + - https://www.cdnbus.lol + - https://www.dmmbus.lol + - https://www.seedmm.cfd + scraper: sceneScraper +sceneByName: + action: scrapeXPath + queryURL: https://www.javbus.com/search/{}&type=&parent=ce + scraper: sceneSearch +sceneByQueryFragment: + action: scrapeXPath + queryURL: "{url}" + scraper: sceneScraper + +performerByURL: + - action: scrapeXPath + url: + - https://www.javbus.com + - https://www.seejav.bid + - https://www.cdnbus.lol + - https://www.dmmbus.lol + - https://www.seedmm.cfd + scraper: performerScraper +performerByName: + action: scrapeXPath + queryURL: https://www.javbus.com/searchstar/{}&type=&parent=ce + scraper: performerSearch + +xPathScrapers: + performerSearch: + performer: + Name: //span[@class="mleft"] + URL: //*[@id="waterfall"]/div/a/@href + performerScraper: + performer: + Name: //*[@id="waterfall"]/div[1]/div/div[2]/span + Birthdate: + selector: //*[@id="waterfall"]/div[1]/div/div[2]/p[contains(text(), '生日')] + postProcess: + - replace: + - regex: ^(.*? ){1} + with: + Height: + selector: //*[@id="waterfall"]/div[1]/div/div[2]/p[contains(text(), '身高')] + postProcess: + - replace: + - regex: ^(.*? ){1} + with: + # Measurements: //*[@id="waterfall"]/div[1]/div/div[2]/p[contains(text(), '胸圍')]//*[@id="waterfall"]/div[1]/div/div[2]/p[contains(text(), '腰圍')]//*[@id="waterfall"]/div[1]/div/div[2]/p[contains(text(), '臀圍')]//*[@id="waterfall"]/div[1]/div/div[2]/p[contains(text(), '罩杯')] + Image: + selector: //*[@id="waterfall"]/div[1]/div/div[1]/img/@src + postProcess: + - replace: + - regex: ^ + with: https://www.javbus.com + + sceneSearch: + scene: + Title: //div[@class="photo-info"]/span + URL: //*[@id="waterfall"]/div/a/@href + sceneScraper: + scene: + Title: + selector: //div[@class="col-md-3 info"]//span[contains(text(), '識別碼')]/../span[2]/text() + URL: + selector: /html/head/link[@hreflang="zh"]/@href + Date: + selector: //div[@class="col-md-3 info"]//span[contains(text(), '發行日期')]/../text() + Details: + selector: //div[@class="container"]/h3/text() + postProcess: + - replace: + - regex: ^(.*? ){1} + with: + Tags: + Name: //div[@class="col-md-3 info"]//span[@class="genre"]/label/a/text() + Performers: + Name: //div[@id="video_cast"]/table/tbody/tr/td[@class="text"]/span/span/a + Director: //div[@id='video_director']/table/tbody/tr/td[@class="text"]/span/a/text() + Image: + selector: //div[@class="row movie"]/div[@class="col-md-9 screencap"]/a[@class="bigImage"]/img/@src + postProcess: + - replace: + - regex: ^ + with: https://www.javbus.com + Studio: + Name: //div[@class="col-md-3 info"]//span[contains(text(), '發行商')]/../a/text() + +# Last Updated Jul 6, 2023 \ No newline at end of file From cf2168a2bdeb16a5c1f93d04953d2254475c4deb Mon Sep 17 00:00:00 2001 From: echo6ix <37937507+echo6ix@users.noreply.github.com> Date: Thu, 6 Jul 2023 18:33:33 -0400 Subject: [PATCH 183/624] Mapped Karups "sub-studios" --- scrapers/Karups.yml | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/scrapers/Karups.yml b/scrapers/Karups.yml index 99fbda611..3006b7896 100644 --- a/scrapers/Karups.yml +++ b/scrapers/Karups.yml @@ -10,11 +10,12 @@ xPathScrapers: Studio: Name: selector: //span[@class="sup-title"]/span - map: - Karups: Karups - KarupsOW: Karups Older Women - KarupsHA: Karups Hometown Amateurs - KarupsPC: Karups Private Collection + postProcess: + - map: + Karups: Karups + KarupsOW: Karups Older Women + KarupsHA: Karups Hometown Amateurs + KarupsPC: Karups Private Collection Title: //h1[@class="page-heading"]/span[@class="title"]/text() Date: selector: //span[@class="date"]/span[@class="content"]/text() From 71bdcb7280fbec56c802ccb82bf22d0c07b585a3 Mon Sep 17 00:00:00 2001 From: echo6ix <37937507+echo6ix@users.noreply.github.com> Date: Thu, 6 Jul 2023 18:48:36 -0400 Subject: [PATCH 184/624] Added last update comment --- .vscode/settings.json | 5 +++++ scrapers/Karups.yml | 1 - 2 files changed, 5 insertions(+), 1 deletion(-) create mode 100644 .vscode/settings.json diff --git a/.vscode/settings.json b/.vscode/settings.json new file mode 100644 index 000000000..c237e57e1 --- /dev/null +++ b/.vscode/settings.json @@ -0,0 +1,5 @@ +{ + "yaml.schemas": { + "https://json.schemastore.org/yamllint.json": "vscode-vfs://github%2B7b2276223a312c22726566223a7b2274797065223a352c226964223a226d6173746572227d7d/echo6ix/CommunityScrapers/scrapers/Karups.yml" + } +} \ No newline at end of file diff --git a/scrapers/Karups.yml b/scrapers/Karups.yml index 3006b7896..03aaf91cb 100644 --- a/scrapers/Karups.yml +++ b/scrapers/Karups.yml @@ -30,5 +30,4 @@ xPathScrapers: Name: //span[@class="models"]/span[@class="content"]//a/text() Image: selector: //video[@id="player"]/@poster|//div[@class="video-poster"]/img/@src - # Last Updated July 6, 2023 \ No newline at end of file From 0c2d35308593825431ce60fd6dbc03006455308f Mon Sep 17 00:00:00 2001 From: echo6ix <37937507+echo6ix@users.noreply.github.com> Date: Thu, 6 Jul 2023 18:50:33 -0400 Subject: [PATCH 185/624] Fixed last updated comment formatting --- scrapers/Karups.yml | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/scrapers/Karups.yml b/scrapers/Karups.yml index 03aaf91cb..7da83743b 100644 --- a/scrapers/Karups.yml +++ b/scrapers/Karups.yml @@ -30,4 +30,5 @@ xPathScrapers: Name: //span[@class="models"]/span[@class="content"]//a/text() Image: selector: //video[@id="player"]/@poster|//div[@class="video-poster"]/img/@src -# Last Updated July 6, 2023 \ No newline at end of file + +# Last Updated July 06, 2023 \ No newline at end of file From bd1ad11acee57a30a9a014aa40d77b67c7983d15 Mon Sep 17 00:00:00 2001 From: nrg101 Date: Fri, 7 Jul 2023 11:21:33 +0100 Subject: [PATCH 186/624] fixes the date selector and matches Studio Name to stashdb --- scrapers/Transerotica.yml | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/scrapers/Transerotica.yml b/scrapers/Transerotica.yml index 4690bce12..4a1545c1a 100644 --- a/scrapers/Transerotica.yml +++ b/scrapers/Transerotica.yml @@ -12,10 +12,10 @@ xPathScrapers: Title: //h1[@class='title_bar'] Image: //div[@id="player"]/video/@poster Date: - selector: $update//comment() + selector: $update/p/span/preceding-sibling::comment() postProcess: - replace: - - regex: .*(?:class='upddate').*(\d{2}/\d{2}/\d{4}).* + - regex: .*(\d{2}/\d{2}/\d{4}).* with: $1 - parseDate: "01/02/2006" - map: @@ -35,5 +35,5 @@ xPathScrapers: split: "," Studio: Name: - fixed: Trans Erotica -# Last Updated February 06, 2023 + fixed: TransErotica +# Last Updated July 07, 2023 From 8d0d046d716459ef61e969d631f09e19fc849a85 Mon Sep 17 00:00:00 2001 From: echo6ix <37937507+echo6ix@users.noreply.github.com> Date: Fri, 7 Jul 2023 17:56:18 -0400 Subject: [PATCH 187/624] Added gallery scraping --- scrapers/Karups.yml | 34 ++++++++++++++++++++++++---------- 1 file changed, 24 insertions(+), 10 deletions(-) diff --git a/scrapers/Karups.yml b/scrapers/Karups.yml index 7da83743b..7546a4e60 100644 --- a/scrapers/Karups.yml +++ b/scrapers/Karups.yml @@ -1,34 +1,48 @@ -name: "Karups" +name: Karups sceneByURL: - action: scrapeXPath url: - karups.com/video/ scraper: sceneScraper +galleryByURL: + - action: scrapeXPath + url: + - karups.com/gallery/ + scraper: galleryScraper + xPathScrapers: sceneScraper: scene: - Studio: + Studio: &studio Name: selector: //span[@class="sup-title"]/span postProcess: - map: - Karups: Karups KarupsOW: Karups Older Women KarupsHA: Karups Hometown Amateurs KarupsPC: Karups Private Collection - Title: //h1[@class="page-heading"]/span[@class="title"]/text() - Date: + Title: &title //h1[@class="page-heading"]/span[@class="title"]/text() + Date: &date selector: //span[@class="date"]/span[@class="content"]/text() postProcess: - replace: - regex: (st|nd|rd|th)\, with: "," - parseDate: Jan 02, 2006 - Details: - selector: //div[@class="content-information-description"]/p/text() - Performers: + Performers: &performers Name: //span[@class="models"]/span[@class="content"]//a/text() - Image: + Details: &details + selector: //div[@class="content-information-description"]/p/text() + Image: &image selector: //video[@id="player"]/@poster|//div[@class="video-poster"]/img/@src -# Last Updated July 06, 2023 \ No newline at end of file + galleryScraper: + gallery: + Studio: *studio + Title: *title + Date: *date + Performers: *performers + Details: *details + Image: *image + +# Last Updated July 07, 2023 \ No newline at end of file From 771ad23ee18345aeead5f6c36bd3d327f237632b Mon Sep 17 00:00:00 2001 From: Maista6969 Date: Sat, 8 Jul 2023 21:35:24 +0200 Subject: [PATCH 188/624] Update RandyBlue for new site layout --- scrapers/RandyBlue.yml | 29 ++++++++++++++++------------- 1 file changed, 16 insertions(+), 13 deletions(-) diff --git a/scrapers/RandyBlue.yml b/scrapers/RandyBlue.yml index 1bc7c1296..92a257af9 100755 --- a/scrapers/RandyBlue.yml +++ b/scrapers/RandyBlue.yml @@ -2,29 +2,32 @@ name: "RandyBlue" sceneByURL: - action: scrapeXPath url: - - randyblue.com/video/ + - randyblue.com/scenes/ scraper: sceneScraper xPathScrapers: sceneScraper: + common: + $titleArea: //div[@class="title-zone"] scene: - Title: //h2[@class="content-item-name"] + Title: $titleArea/h1 Date: - selector: //div[@id="info"]//li[@class="added"]/span + selector: $titleArea/div[@class="calendar"] postProcess: - parseDate: 01/02/2006 - Details: //div[@class="short-description"] - Tags: - Name: //div[@class="tagcloud"]/a/text() - Performers: - Name: //a[@itemprop="actor"] - Image: - selector: "//script[contains(text(), 'image: ')]" + Details: + selector: //div[@id="collapseTwo"] postProcess: - replace: - - regex: '^.*image: "([^"]+)",.*$' - with: $1 + - regex: \x{0020}|\x{00A0} # unicode SP, NBSP + with: " " + Tags: + Name: $titleArea/ul[@class="scene-tags"]/li/a + Performers: + Name: $titleArea/ul[@class="scene-models-list"]/li/a + Image: //meta[@itemprop="thumbnailUrl"]/@content + URL: //link[@rel="canonical"]/@href Studio: Name: fixed: Randy Blue -# Last Updated December 17, 2021 +# Last Updated July 08, 2023 From 9258a2a4d928b0d32e521fafa0707db2a271cef4 Mon Sep 17 00:00:00 2001 From: Nic Patterson Date: Tue, 11 Jul 2023 15:43:54 +0000 Subject: [PATCH 189/624] move dogfart to the Algolia series of scrapers --- scrapers/Algolia_DogfartNetwork.yml | 39 +++++++++++++++++++++++++ scrapers/DogFart.yml | 44 ----------------------------- 2 files changed, 39 insertions(+), 44 deletions(-) create mode 100644 scrapers/Algolia_DogfartNetwork.yml delete mode 100644 scrapers/DogFart.yml diff --git a/scrapers/Algolia_DogfartNetwork.yml b/scrapers/Algolia_DogfartNetwork.yml new file mode 100644 index 000000000..f796ee43a --- /dev/null +++ b/scrapers/Algolia_DogfartNetwork.yml @@ -0,0 +1,39 @@ +name: "DogfartNetwork" +sceneByURL: + - action: script + url: + - dogfartnetwork.com/en/video + script: + - python + - Algolia.py + - dogfartnetwork +sceneByFragment: + action: script + script: + - python + - Algolia.py + - dogfartnetwork +sceneByName: + action: script + script: + - python + - Algolia.py + - dogfartnetwork + - searchName +sceneByQueryFragment: + action: script + script: + - python + - Algolia.py + - dogfartnetwork + - validName +galleryByURL: + - action: script + url: + - dogfartnetwork.com/en/photo/ + script: + - python + - Algolia.py + - dogfartnetwork + - gallery +# Last Updated December 22, 2022 diff --git a/scrapers/DogFart.yml b/scrapers/DogFart.yml deleted file mode 100644 index e2a758f89..000000000 --- a/scrapers/DogFart.yml +++ /dev/null @@ -1,44 +0,0 @@ -name: "DogFart" -sceneByURL: - - action: scrapeXPath - url: - - dogfartnetwork.com/ - scraper: sceneScraper -xPathScrapers: - sceneScraper: - scene: - Title: //h1[@class="description-title"]/text() - Date: - selector: //meta[@itemprop="uploadDate"]/@content - postProcess: - - replace: - - regex: ([\d-]*).+ - with: $1 - - parseDate: 2006-01-02 - Details: - selector: //div[@class="description shorten"]/text()|//span[@class="more-desc hide"]/text() - concat: " " - Tags: - Name: //div[@class="categories"]/p/a/text() - Performers: - Name: //h4[@class="more-scenes"]/a/text() - Image: - selector: //meta[@itemprop="thumbnailUrl"]/@content - postProcess: - - replace: - - regex: ^ - with: "https:" - Studio: - Name: - selector: //h3[@class="site-name"] - postProcess: - - replace: - - regex: (.+?)(\.com)$ - with: $1 - # 'BlacksOnBlondes' => 'Blacks On Blondes' - - regex: ([a-z])-?([A-Z]) - with: $1 $2 - # Fix special cases - - map: - Glory Hole: Gloryhole -# Last Updated June 08, 2022 From 3e33977be2761ff50c5888d6565b6d714591aa3b Mon Sep 17 00:00:00 2001 From: echo6ix <37937507+echo6ix@users.noreply.github.com> Date: Tue, 11 Jul 2023 19:37:05 -0400 Subject: [PATCH 190/624] Updated xpath, added studio code, and url sanitizing --- scrapers/MatureNL.yml | 41 +++++++++++++++++++++++++++++++++++------ 1 file changed, 35 insertions(+), 6 deletions(-) diff --git a/scrapers/MatureNL.yml b/scrapers/MatureNL.yml index 87ea8ed23..08452a127 100644 --- a/scrapers/MatureNL.yml +++ b/scrapers/MatureNL.yml @@ -5,25 +5,52 @@ sceneByURL: &byUrl - mature.nl/ scraper: sceneScraper galleryByURL: *byUrl +sceneByFragment: + action: scrapeXPath + queryURL: "{url}" + scraper: sceneScraper xPathScrapers: sceneScraper: + common: + $url: //link[@rel="canonical"]/@href scene: + URL: &url + selector: $url + postProcess: + - replace: + - regex: (^https?:\/\/(?:www\.)?mature\.nl\/..\/update\/\d+)\/.+ + with: $1 + Code: + selector: $url + postProcess: + - replace: + - regex: .+\/update\/(\d+)\/.+ + with: $1 Title: &title //div[@class="box"]/h1/text() Details: &details //div/span[text()="Synopsis:"]/following-sibling::text() | //meta[@name="description"]/@content Tags: &tags - Name: //div[@class="box-cnt"]/div[@class="mar-t"]//a[contains(@href,"/niche/")]/text() + Name: //div[@id="divPageUpdateNiches"]/a[contains(@class, "tag")]/text() Performers: &performers Name: - selector: //div[@class="name"]/span[@class="col-accent"]/text() + #This method is easier, but the names here are in uppercase + #selector: //div[contains(@class,"card-label")]/a/text() + selector: //div[@class="box-cnt"]//span[@class="col-accent"]/following-sibling::text() postProcess: - replace: - - regex: ( \(EU\))$ + - regex: \s\(\w+\) + with: + - regex: \s\(EU\) with: + - regex: " & " + with: ", " + - regex: (\w)\., + with: $1, + split: ", " Image: - selector: //span[@id="spnPageUpdateTrailer"]/a/img/@data-src + selector: //span[@id="spnPageUpdateTrailer"]/a/img/@data-src|//img[@class="img-responsive lazy"][1]/@data-src Date: &date - selector: //div[@class="box-cnt"]/div[@class="mar-t"][not (contains(a, "the full"))]/text()[1] + selector: //span[@class="val-m"][1] postProcess: - replace: - regex: ^(\d{1,2}-\d{1,2}-\d{4}).*$ @@ -33,10 +60,12 @@ xPathScrapers: Name: fixed: "Mature.nl" gallery: + URL: *url Title: *title Details: *details Tags: *tags Performers: *performers Date: *date Studio: *studio -# Last Updated January 25, 2023 + +# Last Updated July 11, 2023 From 07568ad1b799c402dc9096f08c4ce0efea6fce68 Mon Sep 17 00:00:00 2001 From: echo6ix <37937507+echo6ix@users.noreply.github.com> Date: Tue, 11 Jul 2023 19:43:27 -0400 Subject: [PATCH 191/624] Remove boiler plate comment at end of scene descriptions --- scrapers/MatureNL.yml | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/scrapers/MatureNL.yml b/scrapers/MatureNL.yml index 08452a127..8b5772765 100644 --- a/scrapers/MatureNL.yml +++ b/scrapers/MatureNL.yml @@ -28,7 +28,12 @@ xPathScrapers: - regex: .+\/update\/(\d+)\/.+ with: $1 Title: &title //div[@class="box"]/h1/text() - Details: &details //div/span[text()="Synopsis:"]/following-sibling::text() | //meta[@name="description"]/@content + Details: &details + selector: //div/span[text()="Synopsis:"]/following-sibling::text() | //meta[@name="description"]/@content + postProcess: + - replace: + - regex: " Watch this amazing porn video on mature.nl as a member today!" + with: Tags: &tags Name: //div[@id="divPageUpdateNiches"]/a[contains(@class, "tag")]/text() Performers: &performers From 6b2e657e9c0c7ac8c2b0bf6050f40c6793bf5eed Mon Sep 17 00:00:00 2001 From: echo6ix <37937507+echo6ix@users.noreply.github.com> Date: Tue, 11 Jul 2023 19:52:46 -0400 Subject: [PATCH 192/624] Fixed galleryScraper --- .vscode/settings.json | 5 ++++- scrapers/Karups.yml | 1 - 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/.vscode/settings.json b/.vscode/settings.json index c237e57e1..d910184a0 100644 --- a/.vscode/settings.json +++ b/.vscode/settings.json @@ -1,5 +1,8 @@ { "yaml.schemas": { - "https://json.schemastore.org/yamllint.json": "vscode-vfs://github%2B7b2276223a312c22726566223a7b2274797065223a352c226964223a226d6173746572227d7d/echo6ix/CommunityScrapers/scrapers/Karups.yml" + "https://json.schemastore.org/yamllint.json": [ + "vscode-vfs://github%2B7b2276223a312c22726566223a7b2274797065223a352c226964223a226d6173746572227d7d/echo6ix/CommunityScrapers/scrapers/Karups.yml", + "vscode-vfs://github%2B7b2276223a312c22726566223a7b2274797065223a352c226964223a226d6173746572227d7d/echo6ix/CommunityScrapers/scrapers/MatureNL.yml" + ] } } \ No newline at end of file diff --git a/scrapers/Karups.yml b/scrapers/Karups.yml index 7546a4e60..93520d52d 100644 --- a/scrapers/Karups.yml +++ b/scrapers/Karups.yml @@ -43,6 +43,5 @@ xPathScrapers: Date: *date Performers: *performers Details: *details - Image: *image # Last Updated July 07, 2023 \ No newline at end of file From be80f73dc7b4cd6f63ae6e37b9f0b99d5589a07a Mon Sep 17 00:00:00 2001 From: echo6ix <37937507+echo6ix@users.noreply.github.com> Date: Tue, 11 Jul 2023 19:53:17 -0400 Subject: [PATCH 193/624] Updated last updated --- scrapers/Karups.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scrapers/Karups.yml b/scrapers/Karups.yml index 93520d52d..31a033a53 100644 --- a/scrapers/Karups.yml +++ b/scrapers/Karups.yml @@ -44,4 +44,4 @@ xPathScrapers: Performers: *performers Details: *details -# Last Updated July 07, 2023 \ No newline at end of file +# Last Updated July 11, 2023 \ No newline at end of file From 4c5916924784883847a7b83dd9cb3b6f4d16ff39 Mon Sep 17 00:00:00 2001 From: echo6ix <37937507+echo6ix@users.noreply.github.com> Date: Tue, 11 Jul 2023 20:00:12 -0400 Subject: [PATCH 194/624] Update settings.json --- .vscode/settings.json | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/.vscode/settings.json b/.vscode/settings.json index d910184a0..016a1f613 100644 --- a/.vscode/settings.json +++ b/.vscode/settings.json @@ -1,8 +1,5 @@ { "yaml.schemas": { - "https://json.schemastore.org/yamllint.json": [ - "vscode-vfs://github%2B7b2276223a312c22726566223a7b2274797065223a352c226964223a226d6173746572227d7d/echo6ix/CommunityScrapers/scrapers/Karups.yml", - "vscode-vfs://github%2B7b2276223a312c22726566223a7b2274797065223a352c226964223a226d6173746572227d7d/echo6ix/CommunityScrapers/scrapers/MatureNL.yml" - ] + "validator/scraper.schema.json": "*.yml" } -} \ No newline at end of file +} From dba123e7f0e67403574d59cf61fc2860541df587 Mon Sep 17 00:00:00 2001 From: echo6ix <37937507+echo6ix@users.noreply.github.com> Date: Wed, 12 Jul 2023 22:23:39 -0400 Subject: [PATCH 195/624] Create 1passforallsites.yml --- scrapers/1passforallsites.yml | 33 +++++++++++++++++++++++++++++++++ 1 file changed, 33 insertions(+) create mode 100644 scrapers/1passforallsites.yml diff --git a/scrapers/1passforallsites.yml b/scrapers/1passforallsites.yml new file mode 100644 index 000000000..dee379079 --- /dev/null +++ b/scrapers/1passforallsites.yml @@ -0,0 +1,33 @@ +name: 1 Pass For All Sites +sceneByURL: + - action: scrapeXPath + url: + - 1passforallsites.com/episode/ + scraper: sceneScraper +xPathScrapers: + sceneScraper: + scene: + Studio: + Name: + selector: //a[contains(@href,'?site=')] + Title: + selector: //title + replace: + - regex: (^.+) - 1 .+$ + with: $1 + Details: //div[@class="sp-info-txt"]/p/text() + Performers: + Name: + selector: //p[@class="sp-info-name"]/a/text() + Tags: + Name: + selector: //p[@class="niches-list"]/a/text() + Date: + selector: //li[contains(text(),"Added:")] + replace: + - regex: "Added\\: (.+)" + with: $1 + parseDate: 2 Jan 2006 + Image: //video/@poster + +# Last Updated July 12, 2023 From edcb0ec0a3633724f1d52bd50c46abeffd85e65a Mon Sep 17 00:00:00 2001 From: echo6ix <37937507+echo6ix@users.noreply.github.com> Date: Wed, 12 Jul 2023 22:34:24 -0400 Subject: [PATCH 196/624] Updated date module --- scrapers/1passforallsites.yml | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/scrapers/1passforallsites.yml b/scrapers/1passforallsites.yml index dee379079..bc7b24e00 100644 --- a/scrapers/1passforallsites.yml +++ b/scrapers/1passforallsites.yml @@ -1,4 +1,4 @@ -name: 1 Pass For All Sites +name: "1 Pass For All Sites" sceneByURL: - action: scrapeXPath url: @@ -24,10 +24,11 @@ xPathScrapers: selector: //p[@class="niches-list"]/a/text() Date: selector: //li[contains(text(),"Added:")] - replace: - - regex: "Added\\: (.+)" - with: $1 - parseDate: 2 Jan 2006 + postProcess: + - replace: + - regex: "Added\\: (.+)" + with: $1 + - parseDate: 2 Jan 2006 Image: //video/@poster # Last Updated July 12, 2023 From 81ddf2c22468e7042815feea1a198d75da72f720 Mon Sep 17 00:00:00 2001 From: echo6ix <37937507+echo6ix@users.noreply.github.com> Date: Wed, 12 Jul 2023 22:50:30 -0400 Subject: [PATCH 197/624] Fixed title postProcessing --- scrapers/1passforallsites.yml | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/scrapers/1passforallsites.yml b/scrapers/1passforallsites.yml index bc7b24e00..a5cb88aa5 100644 --- a/scrapers/1passforallsites.yml +++ b/scrapers/1passforallsites.yml @@ -12,9 +12,10 @@ xPathScrapers: selector: //a[contains(@href,'?site=')] Title: selector: //title - replace: - - regex: (^.+) - 1 .+$ - with: $1 + postProcess: + - replace: + - regex: (^.+) - 1 .+$ + with: $1 Details: //div[@class="sp-info-txt"]/p/text() Performers: Name: From c699a274f71c68405f03cf5c271aa271e65bebc2 Mon Sep 17 00:00:00 2001 From: Fabio Tea Date: Mon, 17 Jul 2023 22:22:27 +0200 Subject: [PATCH 198/624] Updated RealJamVR.com --- scrapers/RealJamVR.yml | 21 +++++++++++---------- 1 file changed, 11 insertions(+), 10 deletions(-) diff --git a/scrapers/RealJamVR.yml b/scrapers/RealJamVR.yml index d3002c9ef..6db47678b 100644 --- a/scrapers/RealJamVR.yml +++ b/scrapers/RealJamVR.yml @@ -9,22 +9,23 @@ xPathScrapers: scene: Title: selector: //h1 - Date: - selector: //div[contains(@class, "date")] postProcess: - replace: - - regex: ',' - with: "" - - parseDate: January 2 2006 + - regex: ^\s+(.+)\s+$ + with: $1 + Date: + selector: //div[contains(@class, "specs-icon")]/following-sibling::strong + postProcess: + - parseDate: January 2, 2006 Performers: - Name: //span[text()="Featuring:"]/following-sibling::a + Name: (//a[starts-with(@href, "/actor")]/text())[1] Tags: - Name: //span[text()="TAGS:"]/following-sibling::a + Name: //a[starts-with(@href, "/scenes") and @class="tag"]/text() Details: - selector: //div[contains(@class, "c-video-item-desc")] + selector: //div[@class="opacity-75 my-2"] Image: - selector: //meta[@property="og:image"]/@content + selector: //*[@id="video-player"]//@poster Studio: Name: fixed: RealJamVR -# Last Updated June 25, 2021 +# Last Updated July 17, 2023 \ No newline at end of file From 8f76274a09790cbdd5ac65acda881ed5f9292e03 Mon Sep 17 00:00:00 2001 From: Fabio Tea Date: Mon, 17 Jul 2023 22:47:31 +0200 Subject: [PATCH 199/624] Update RealJamVR.yml --- scrapers/RealJamVR.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/scrapers/RealJamVR.yml b/scrapers/RealJamVR.yml index 6db47678b..eae552999 100644 --- a/scrapers/RealJamVR.yml +++ b/scrapers/RealJamVR.yml @@ -14,7 +14,7 @@ xPathScrapers: - regex: ^\s+(.+)\s+$ with: $1 Date: - selector: //div[contains(@class, "specs-icon")]/following-sibling::strong + selector: //div[@class="specs-icon"]/following-sibling::strong postProcess: - parseDate: January 2, 2006 Performers: @@ -28,4 +28,4 @@ xPathScrapers: Studio: Name: fixed: RealJamVR -# Last Updated July 17, 2023 \ No newline at end of file +# Last Updated July 17, 2023 From 3112f88ec832c3ca36869e838726e36623d75e6c Mon Sep 17 00:00:00 2001 From: Fabio Tea Date: Mon, 17 Jul 2023 23:52:26 +0200 Subject: [PATCH 200/624] Updated AdultEmpire.com --- scrapers/AdultEmpire.yml | 48 +++++++++++++++++++++++----------------- 1 file changed, 28 insertions(+), 20 deletions(-) diff --git a/scrapers/AdultEmpire.yml b/scrapers/AdultEmpire.yml index a891ca3d4..bd234480c 100644 --- a/scrapers/AdultEmpire.yml +++ b/scrapers/AdultEmpire.yml @@ -22,7 +22,6 @@ sceneByQueryFragment: queryURL: "{url}" scraper: sceneScraper - xPathScrapers: sceneSearch: scene: @@ -42,11 +41,20 @@ xPathScrapers: with: "https://www.adultdvdempire.com" Image: selector: //a[@class="boxcover"]/img/@data-src - movieScraper: movie: - Name: //h1/text() - Director: //a[@label="Director"]/text() + Name: + selector: //h1/text() + postProcess: + - replace: + - regex: ^\s+(.+)\s+$ + with: $1 + Director: + selector: //a[@label="Director"]/text() + postProcess: + - replace: + - regex: ^\s+(.+)\s+$ + with: $1 Duration: selector: //small[contains(text(), "Length")]/following-sibling::text() postProcess: @@ -76,29 +84,29 @@ xPathScrapers: URL: //meta[@name='og:url']/@content sceneScraper: scene: - Title: //h1/text() - Details: - selector: //h4[contains(@class,"synopsis")]//text() - concat: " " + Title: //div[@class="clip-page__detail__title__primary"]/text() Date: - selector: //small[contains(text(), "Released")]/following-sibling::text() + selector: //strong[starts-with(text(), "Released:")]//ancestor::li/text() postProcess: - parseDate: Jan 02 2006 - Image: //a[@id="front-cover"]/@data-href Studio: - Name: //a[@label="Studio"]/text() + Name: //a[@label="Studio" and contains(@href, "studio")] Movies: - Name: //h1/text() - URL: //link[@rel="canonical"]/@href + Name: //div[contains(text(), "from")]//following-sibling::a/text() + URL: + selector: //div[contains(text(), "from")]//following-sibling::a//@href + postProcess: + - replace: + - regex: ^ + with: "https://www.adultdvdempire.com" Tags: - Name: //div[h2[contains(.,'Categories')]]//a[@label="Category"]/text() + Name: //strong[starts-with(text(), "Attributes:")]//following-sibling::a/text() Performers: - Name: //a[@label="Performer"]//text() + Name: //strong[starts-with(text(), "Starring:")]//following-sibling::a/text() URL: - selector: //a[@label="Performer"]/@href + selector: //strong[starts-with(text(), "Starring:")]//following-sibling::a/@href postProcess: - replace: - - regex: ^ - with: "https://www.adultdvdempire.com" - URL: //meta[@name='og:url']/@content -# Last Updated December 16, 2021 + - regex: ^ + with: "https://www.adultdvdempire.com" +# Last Updated July 17, 2023 From f489dd82853d5ef2c5327a54aeadc7207870eeff Mon Sep 17 00:00:00 2001 From: Dystaxia Date: Tue, 18 Jul 2023 07:45:38 -0500 Subject: [PATCH 201/624] Update data18.yml Fixed front cover grab for movie --- scrapers/data18.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scrapers/data18.yml b/scrapers/data18.yml index 403f3004a..4be9893d0 100644 --- a/scrapers/data18.yml +++ b/scrapers/data18.yml @@ -81,6 +81,6 @@ xPathScrapers: - replace: - regex: '^Description\s*-\s*' with: - FrontImage: //a[@id='enlargecover']/@href + FrontImage: //a[@id='enlargecover']/@data-featherlight BackImage: //a[text()='+Back']/@href # Last Updated January 31, 2023 From f8c786335520885ada632c3dffdd7c0e53cf69b7 Mon Sep 17 00:00:00 2001 From: Dystaxia Date: Tue, 18 Jul 2023 07:47:01 -0500 Subject: [PATCH 202/624] Update data18.yml MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Updated “Last Updated” --- scrapers/data18.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scrapers/data18.yml b/scrapers/data18.yml index 4be9893d0..4a1b1f73c 100644 --- a/scrapers/data18.yml +++ b/scrapers/data18.yml @@ -83,4 +83,4 @@ xPathScrapers: with: FrontImage: //a[@id='enlargecover']/@data-featherlight BackImage: //a[text()='+Back']/@href -# Last Updated January 31, 2023 +# Last Updated July 18, 2023 From 2d12e832470e4a4b2b2c0dd1490e8fcf936f4788 Mon Sep 17 00:00:00 2001 From: Dystaxia Date: Tue, 18 Jul 2023 08:38:19 -0500 Subject: [PATCH 203/624] Age Verification Captcha - update data18.yml Age verification captcha --- scrapers/data18.yml | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/scrapers/data18.yml b/scrapers/data18.yml index 4a1b1f73c..0b533c24a 100644 --- a/scrapers/data18.yml +++ b/scrapers/data18.yml @@ -1,4 +1,15 @@ name: data18 +driver: + cookies: + - CookieURL: "https://data18.com" + Cookies: + - Name: "data_user_captcha" + Domain: ".data18.com" + Value: "1" + Path: "/" + headers: + - Key: User-Agent + Value: Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:79.0) Gecko/20100101 Firefox/79.0) movieByURL: - action: scrapeXPath url: From 2316ff2debff2b3ffa1832c13ba5d12ffbb533a6 Mon Sep 17 00:00:00 2001 From: nrg101 Date: Tue, 18 Jul 2023 14:57:24 +0100 Subject: [PATCH 204/624] add galleryByFragment --- scrapers/Algolia.py | 59 ++++++++++++++++++++++++----------- scrapers/Algolia_Adultime.yml | 9 +++++- 2 files changed, 48 insertions(+), 20 deletions(-) diff --git a/scrapers/Algolia.py b/scrapers/Algolia.py index 328ab049f..3d0aa2ef6 100644 --- a/scrapers/Algolia.py +++ b/scrapers/Algolia.py @@ -254,8 +254,10 @@ def write_config(date, app_id, api_key): # API Search Data def api_search_req(type_search, query, url): api_request = None - if type_search == "query": - api_request = api_search_query(query, url) + if type_search == "query_all_scenes": + api_request = api_search_query("all_scenes", query, url) + if type_search == "query_all_photosets": + api_request = api_search_query("all_photosets", query, url) if type_search == "id": api_request = api_search_id(query, url) if api_request: @@ -304,15 +306,15 @@ def api_search_gallery_id(p_id, url): return req -def api_search_query(query, url): +def api_search_query(index_name, query, url): request_api = { "requests": [{ - "indexName": "all_scenes", + "indexName": index_name, "params": "query=" + query + "&hitsPerPage=40&page=0" }] } - req = send_request(url, HEADERS, request_api) - return req + res = send_request(url, HEADERS, request_api) + return res # Searching Result @@ -814,6 +816,8 @@ def parse_gallery_json(gallery_json: dict, url: str = None) -> dict: SCENE_TITLE = FRAGMENT.get("title") SCENE_URL = FRAGMENT.get("url") +# log.trace(f"fragment: {FRAGMENT}") + # ACCESS API # Check existing API keys CURRENT_TIME = datetime.datetime.now() @@ -910,7 +914,7 @@ def parse_gallery_json(gallery_json: dict, url: str = None) -> dict: if SEARCH_TITLE: SEARCH_TITLE = SEARCH_TITLE.replace(".", " ") log.debug(f"[API] Searching for: {SEARCH_TITLE}") - api_search = api_search_req("query", SEARCH_TITLE, api_url) + api_search = api_search_req("query_all_scenes", SEARCH_TITLE, api_url) final_json = None if api_search: result_search = [] @@ -936,13 +940,13 @@ def parse_gallery_json(gallery_json: dict, url: str = None) -> dict: log.warning("[API] No result") if url_title and api_json is None: log.debug("[API] Searching using URL_TITLE") - api_search = api_search_req("query", url_title, api_url) + api_search = api_search_req("query_all_scenes", url_title, api_url) if api_search: log.info(f"[API] Search gives {len(api_search)} result(s)") api_json = json_parser(api_search) if SCENE_TITLE and api_json is None: log.debug("[API] Searching using STASH_TITLE") - api_search = api_search_req("query", SCENE_TITLE, api_url) + api_search = api_search_req("query_all_scenes", SCENE_TITLE, api_url) if api_search: log.info(f"[API] Search gives {len(api_search)} result(s)") api_json = json_parser(api_search) @@ -966,13 +970,30 @@ def parse_gallery_json(gallery_json: dict, url: str = None) -> dict: #log.debug(scraped_movie) print(json.dumps(scraped_movie)) elif "gallery" in sys.argv: - log.debug("Scraping gallery") - gallery_id = get_id_from_url(SCENE_URL) - if gallery_id: - gallery_results = api_search_gallery_id(gallery_id, api_url) - gallery = gallery_results.json()["results"][0].get("hits") - if gallery: - #log.debug(gallery[0]) - scraped_gallery = parse_gallery_json(gallery[0]) - #log.debug(scraped_gallery) - print(json.dumps(scraped_gallery)) + scraped_gallery = None + if SCENE_URL: + log.debug("Scraping gallery by URL") + gallery_id = get_id_from_url(SCENE_URL) + if gallery_id: + gallery_results = api_search_gallery_id(gallery_id, api_url) + gallery = gallery_results.json()["results"][0].get("hits") + if gallery: + #log.debug(gallery[0]) + scraped_gallery = parse_gallery_json(gallery[0]) + #log.debug(scraped_gallery) + elif SCENE_TITLE: + log.debug("Scraping gallery by fragment") + # log.debug(f"[API] Searching using SCENE_TITLE: {SCENE_TITLE}") + api_search = api_search_req("query_all_photosets", SCENE_TITLE, api_url) + if api_search: + log.info(f"[API] Search gives {len(api_search)} result(s)") + # log.trace(f"api_search: {api_search}") + log.debug(f"Galleries found: {'; '.join([g['title'] for g in api_search])}") + scraped_gallery = parse_gallery_json(api_search[0]) + # Scraping the JSON + if scraped_gallery: + print(json.dumps(scraped_gallery)) + else: + log.error("Can't find the gallery") + print(json.dumps({})) + sys.exit() diff --git a/scrapers/Algolia_Adultime.yml b/scrapers/Algolia_Adultime.yml index 2b13e6123..2a1294b57 100644 --- a/scrapers/Algolia_Adultime.yml +++ b/scrapers/Algolia_Adultime.yml @@ -76,6 +76,13 @@ sceneByQueryFragment: - Algolia.py - girlsway - validName +galleryByFragment: + action: script + script: + - python + - Algolia.py + - girlsway + - gallery galleryByURL: - action: script url: @@ -104,4 +111,4 @@ movieByURL: - Algolia.py - puretaboo - movie -# Last Updated May 25, 2023 +# Last Updated July 18, 2023 From 2f4554ab5a47997425de203dff956a2ecf31f4a8 Mon Sep 17 00:00:00 2001 From: nrg101 Date: Tue, 18 Jul 2023 15:30:51 +0100 Subject: [PATCH 205/624] use `puretaboo` as appears to give better results from API --- scrapers/Algolia_Adultime.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scrapers/Algolia_Adultime.yml b/scrapers/Algolia_Adultime.yml index 2a1294b57..f8a8d3811 100644 --- a/scrapers/Algolia_Adultime.yml +++ b/scrapers/Algolia_Adultime.yml @@ -81,7 +81,7 @@ galleryByFragment: script: - python - Algolia.py - - girlsway + - puretaboo - gallery galleryByURL: - action: script From ef449ba6c19474bc6b9ad2da0fee5bb5b68c8574 Mon Sep 17 00:00:00 2001 From: nrg101 Date: Tue, 18 Jul 2023 15:36:47 +0100 Subject: [PATCH 206/624] update studio lists --- SCRAPERS-LIST.md | 5 +++-- scrapers/Algolia_Adultime.yml | 1 + 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/SCRAPERS-LIST.md b/SCRAPERS-LIST.md index a7ea432a5..e126c9c0d 100644 --- a/SCRAPERS-LIST.md +++ b/SCRAPERS-LIST.md @@ -104,6 +104,7 @@ asianamericantgirls.com|GroobyClub.yml|:heavy_check_mark:|:x:|:x:|:x:|-|Trans asianfever.com|Hustler.yml|:heavy_check_mark:|:x:|:x:|:x:|CDP|- asiansexdiary.com|VegasDreamsLLC.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- asiantgirl.com|GroobyNetwork-Partial.yml|:heavy_check_mark:|:heavy_check_mark:|:x:|:x:|-|Trans +asmrfantasy.com|Algolia_Adultime.yml|:heavy_check_mark:|:heavy_check_mark:|:x:|:x:|Python|- assholefever.com|Algolia_Adultime.yml|:heavy_check_mark:|:x:|:x:|:x:|Python|- assmeat.com|Hustler.yml|:heavy_check_mark:|:x:|:x:|:x:|CDP|- assteenmouth.com|Teencoreclub.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- @@ -938,7 +939,7 @@ onlyblowjob.com|DDFNetwork.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- onlygolddigger.com|Only3xGirls.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- onlyprince.com|ThirdRockEnt.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- onlyteenblowjobs.com|GammaEntertainment.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- -oopsie.com|Algolia_Adultime.yml|:heavy_check_mark:|:x:|:x:|:x:|Python|- +oopsie.com|Algolia_Adultime.yml|:heavy_check_mark:|:heavy_check_mark:|:x:|:x:|Python|- openlife.com|GammaEntertainment.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- oraloverdose.com|PervCity.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- oreno3d.com|Oreno3d.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- @@ -1323,7 +1324,7 @@ transerotica.com|Transerotica.yml|:heavy_check_mark:|:x:|:x:|:x:|-|Trans transexdomination.com|GroobyClub.yml|:heavy_check_mark:|:x:|:x:|:x:|-|Trans transexpov.com|GroobyNetwork-Partial.yml|:heavy_check_mark:|:heavy_check_mark:|:x:|:x:|-|Trans transfeet.com|FFCSH.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- -transfixed.com|Algolia_Adultime.yml|:heavy_check_mark:|:x:|:x:|:x:|Python|Trans +transfixed.com|Algolia_Adultime.yml|:heavy_check_mark:|:heavy_check_mark:|:x:|:x:|Python|Trans transgasm.com|GroobyNetwork-Partial.yml|:heavy_check_mark:|:heavy_check_mark:|:x:|:x:|-|Trans transgressivefilms.com|Algolia_Adultime.yml|:heavy_check_mark:|:x:|:x:|:x:|Python|Trans transgressivexxx.com|Algolia_EvilAngel|:heavy_check_mark:|:heavy_check_mark:|:heavy_check_mark:|:x:|Python|Trans diff --git a/scrapers/Algolia_Adultime.yml b/scrapers/Algolia_Adultime.yml index f8a8d3811..bdeaa9130 100644 --- a/scrapers/Algolia_Adultime.yml +++ b/scrapers/Algolia_Adultime.yml @@ -8,6 +8,7 @@ sceneByURL: - adulttimepilots.com/en/video/ - agentredgirl.com/en/video/ - analteenangels.com/en/video/ + - asmrfantasy.com/en/video/ - assholefever.com/en/video/ - beingtrans247.com/en/video/ - blowmepov.com/en/video/ From c44a8acf6ced1ca2feb2303faee87ea67f05836e Mon Sep 17 00:00:00 2001 From: Maista6969 Date: Tue, 18 Jul 2023 19:30:35 +0200 Subject: [PATCH 207/624] Update SCRAPERS-LIST Add scene scraping for 1passforallsites Add gallery scraping for Karups --- SCRAPERS-LIST.md | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/SCRAPERS-LIST.md b/SCRAPERS-LIST.md index a7ea432a5..486c49df5 100644 --- a/SCRAPERS-LIST.md +++ b/SCRAPERS-LIST.md @@ -14,6 +14,7 @@ Supported Site|Scraper| S | G | M | P |Needs|Contents 18tokyo.com|karatmedia.yml|:heavy_check_mark:|:x:|:x:|:x:|-|JAV 18vr.com|BaDoink.yml|:heavy_check_mark:|:x:|:heavy_check_mark:|:heavy_check_mark:|-|VR 1by-day.com|DDFNetwork.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +1passforallsites.com|1passforallsites.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- 1pondo.tv|1pondo.yml|:heavy_check_mark:|:x:|:x:|:x:|-|JAV 21naturals.com|Algolia_21Naturals.yml|:heavy_check_mark:|:x:|:x:|:x:|Python|- 21roles.com|21Roles.yml|:heavy_check_mark:|:x:|:x:|:x:|CDP|- @@ -700,7 +701,7 @@ jschoolgirls.com|karatmedia.yml|:heavy_check_mark:|:x:|:x:|:x:|-|JAV julesjordan.com|JulesJordan.yml|:heavy_check_mark:|:heavy_check_mark:|:heavy_check_mark:|:heavy_check_mark:|-|- juliaannlive.com|VNAGirls.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- karissa-diamond.com|Karissa-Diamond.yml|:heavy_check_mark:|:heavy_check_mark:|:x:|:x:|-|- -karups.com|Karups.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +karups.com|Karups.yml|:heavy_check_mark:|:heavy_check_mark:|:x:|:x:|-|- katiebanks.com|bellapass.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- kellymadison.com|KellyMadisonMedia.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- kendrajames.com|VNAGirls.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- From f80656a076a59cf64375bdbf329a96a223e94f80 Mon Sep 17 00:00:00 2001 From: echo6ix Date: Tue, 18 Jul 2023 22:30:19 -0400 Subject: [PATCH 208/624] Created AuntJudysXXX scraper --- scrapers/AuntJudysXXX.yml | 24 ++++++++++++++++++++++++ 1 file changed, 24 insertions(+) create mode 100644 scrapers/AuntJudysXXX.yml diff --git a/scrapers/AuntJudysXXX.yml b/scrapers/AuntJudysXXX.yml new file mode 100644 index 000000000..116c91055 --- /dev/null +++ b/scrapers/AuntJudysXXX.yml @@ -0,0 +1,24 @@ +name: AuntJudysXXX +sceneByURL: + - action: scrapeXPath + url: + - auntjudysxxx.com + scraper: sceneScraper +xPathScrapers: + sceneScraper: + scene: + Studio: + Name: + fixed: Aunt Judy's XXX + Title: //span[@class="title_bar_hilite"] + Details: //span[@class="update_description"] + Performers: + Name: //p/span[@class="update_models"]/a + URL: //p/span[@class="update_models"]/a/@href + Tags: + Name: //span[@class="update_tags"]/a + Date: + selector: //div[@class="cell update_date"]/text()[1] + postProcess: + - parseDate: 1/2/2006 +# Last Updated July 18, 2023 From 488805df389a34a557bc625db265919f9ac338d9 Mon Sep 17 00:00:00 2001 From: echo6ix Date: Tue, 18 Jul 2023 22:44:21 -0400 Subject: [PATCH 209/624] Created DFB Network scraper --- scrapers/DFB.yml | 49 ++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 49 insertions(+) create mode 100644 scrapers/DFB.yml diff --git a/scrapers/DFB.yml b/scrapers/DFB.yml new file mode 100644 index 000000000..8fda2caa1 --- /dev/null +++ b/scrapers/DFB.yml @@ -0,0 +1,49 @@ +name: "DFB Network" +sceneByURL: + - action: scrapeXPath + scraper: sceneScraper + url: + - dfbnetwork.com/videos/ +xPathScrapers: + sceneScraper: + scene: + Date: + selector: //p[contains(text(),"Release date:")] + postProcess: + - replace: + - regex: .*(\d{2}\.\d{2}\.\d{4}).* + with: $1 + - parseDate: 02.01.2006 + Details: + selector: //p[@class="set-descr"]/text() + postProcess: + - replace: + - regex: (^\W*Cast\W*) + with: + Image: + selector: //img[@class="img-responsive"]/@src|//link[@rel="canonical"]/@href + concat: '|' + postProcess: + - replace: + - regex: (.*)\|(https?\:\/\/.+\.com).* + with: $2$1 + Performers: + Name: //p[@class="set-descr"]/a/text()|//a[contains(@href,"/models/preview/")]/@title + URL: + selector: //p[@class="set-descr"]/a/@href + postProcess: + - replace: + - regex: ^ + with: https://www.dfbnetwork.com + Studio: + Name: + fixed: DFB Network + Tags: + Name: + selector: //a[@class="tag"]/i/text() + postProcess: + - replace: + - regex: \- + with: " " + Title: //h1[@class="movie-name"]/text() +# Last Updated July 18, 2023 From 0d96c9afaaafc83539101e6b887d7b2c1e895257 Mon Sep 17 00:00:00 2001 From: echo6ix Date: Tue, 18 Jul 2023 22:46:19 -0400 Subject: [PATCH 210/624] Created SmutPuppet Network scraper --- scrapers/SmutPuppet.yml | 66 +++++++++++++++++++++++++++++++++++++++++ 1 file changed, 66 insertions(+) create mode 100644 scrapers/SmutPuppet.yml diff --git a/scrapers/SmutPuppet.yml b/scrapers/SmutPuppet.yml new file mode 100644 index 000000000..9ca9fbb97 --- /dev/null +++ b/scrapers/SmutPuppet.yml @@ -0,0 +1,66 @@ +name: SmutPuppet +sceneByURL: + - action: scrapeXPath + url: + - blackandbig.com/update + - darksodomy.com/update + - dothewife.com/update + - dreamtranny.com/update + - genlez.com/update + - goldenslut.com/update + - grannyvsbbc.com/update + - jefssmodels.com/update + - milfsodomy.com/update + - smutmerchants.com/update + - suggabunny.com/update + - teenerotica.xxx/update + scraper: sceneScraper +sceneByFragment: + action: scrapeXPath + queryURL: "{url}" + scraper: sceneScraper +xPathScrapers: + sceneScraper: + scene: + Performers: + Name: //div[@class="model-rich"]/h4[@class="theme-color"]/a + Title: //div[@class="section-title"]/h4 + Details: + selector: //p[@class="read-more"]/text() + postProcess: + - replace: + - regex: '^\s*:\s*' + with: "" + Date: + selector: //small[@class="updated-at"]/text() + postProcess: + - parseDate: Jan 2, 2006 + Tags: + Name: + selector: //div[@class="model-categories"]/a/text() + Studio: + Name: + selector: //div[@class="block-logo"]/a/img/@alt + Image: + selector: //img[@class="video-banner"]/@src|//video/@poster + postProcess: + - replace: + - regex: (?:.+)(\/usermedia\/.+\.jpg)(?:.+) + with: $1 + - regex: "^/usermedia/" + with: "https://smutpuppet.com/usermedia/" + Code: + selector: //div[@class="model-player"]/a/img/@src + postProcess: + - replace: + - regex: .+\/updates/(\d+)\/.+ + with: $1 + # Return the sanitized URL + URL: + selector: //div[@class="block-logo"]/a/img/@src|//div[@class="model-player"]/a/img/@src + concat: "|" + postProcess: + - replace: + - regex: \/static\/(\w+\....)\/.+\/(\d+)\/.+ + with: https://$1/update/$2 +# Last Updated July 18, 2023 From 109c86ba570e5ae615c9446e5d753ecada6e035e Mon Sep 17 00:00:00 2001 From: echo6ix Date: Tue, 18 Jul 2023 22:47:57 -0400 Subject: [PATCH 211/624] Created OldNanny Network scraper --- scrapers/OldNanny.yml | 35 +++++++++++++++++++++++++++++++++++ 1 file changed, 35 insertions(+) create mode 100644 scrapers/OldNanny.yml diff --git a/scrapers/OldNanny.yml b/scrapers/OldNanny.yml new file mode 100644 index 000000000..507c3b632 --- /dev/null +++ b/scrapers/OldNanny.yml @@ -0,0 +1,35 @@ +# This scraper does not parse older scenes that use a different page layout +name: "OldNanny" +sceneByURL: + - action: scrapeXPath + url: + - oldnanny.com + scraper: sceneScraper +xPathScrapers: + sceneScraper: + scene: + Studio: + Name: + selector: //div[@class="col-12 col-xl-4"]//dl[2]//a/text() + Title: + selector: //title/text() + postProcess: + - replace: + - regex: ^.+\s:\s(.+)$ + with: $1 + Tags: + Name: + selector: //div[@class="col-12 col-xl-4"]//dl[3]//a/text() + Date: + selector: //div[@class="col-12 text-center title-wrapp"]/h1/small/text() + postProcess: + - parseDate: January 2, 2006 + Performers: + Name: + selector: //div[@class="col-12 col-xl-4"]//dl[1]//a + postProcess: + - replace: + - regex: \, + with: + Image: //video[@id="video-player"]/@poster +# Last Updated July 18, 2023 From 3e54543ed187c2d036493c522cacf8189403945a Mon Sep 17 00:00:00 2001 From: echo6ix Date: Tue, 18 Jul 2023 22:50:21 -0400 Subject: [PATCH 212/624] Created TugPass network scraper --- scrapers/TugPass.yml | 70 ++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 70 insertions(+) create mode 100644 scrapers/TugPass.yml diff --git a/scrapers/TugPass.yml b/scrapers/TugPass.yml new file mode 100644 index 000000000..04b5ef451 --- /dev/null +++ b/scrapers/TugPass.yml @@ -0,0 +1,70 @@ +name: "Tug Pass" +sceneByURL: + - action: scrapeXPath + url: + - tugpass.com/videos + scraper: sceneScraper_network + - action: scrapeXPath + url: + - familylust.com/videos + - petite18.com/videos + - seemomsuck.com/videos + - clubtug.com/videos + - teentugs.com/videos + - teasepov.com/videos + - over40handjobs.com/videos + - meanmassage.com/videos + - finishhim.com/videos + - ebonytugs.com/videos + - cumblastcity.com/videos + scraper: sceneScraper_sites +xPathScrapers: + sceneScraper_network: + scene: + Details: &details //meta[@name="description"]/@content + Performers: &performers + Name: //div[@class="featuringWrapper"]/a + Title: &title + selector: //title/text() + postProcess: + - replace: + - regex: \s\|.+$ + with: + - regex: \s\-.+$ + with: + # Scrape sanitized URL + URL: &url //link[@rel="canonical"]/@href + Image: + selector: //div[@class="player"]/img/@src + postProcess: + - replace: + - regex: ^ + with: https://www.tugpass.com/ + Studio: + Name: + selector: //div[@class="player"]/img/@src + postProcess: + - replace: + - regex: ^\/[A-Za-z_-]+\/([A-Za-z0-9-]+)\/.+ + with: $1 + sceneScraper_sites: + scene: + Details: *details + Performers: *performers + Title: *title + URL: *url + Image: + selector: //img[@id="playerImagePreview"]/@src|//script/text()[contains(.,"posterImage")]|//link[@rel="canonical"]/@href + concat: "|" + postProcess: + - replace: + - regex: .*(\/.+\/.+\.(jpe?g|gif)).*(https?\:\/\/.+\.com).* + with: $3$1 + Studio: + Name: + selector: //meta[@name="copyright"]/@content + postProcess: + - replace: + - regex: Copyright\s(.+)\s\d{4} + with: $1 +# Last Updated July 18, 2023 From 9763fd1aed0b62d581bd6834fc36332cc33e280f Mon Sep 17 00:00:00 2001 From: echo6ix Date: Tue, 18 Jul 2023 22:52:14 -0400 Subject: [PATCH 213/624] Added gallery; Updated scraper --- scrapers/wearehairy.yml | 67 +++++++++++++++++++++++++++++++++-------- 1 file changed, 55 insertions(+), 12 deletions(-) diff --git a/scrapers/wearehairy.yml b/scrapers/wearehairy.yml index c0d769530..76a786691 100644 --- a/scrapers/wearehairy.yml +++ b/scrapers/wearehairy.yml @@ -1,25 +1,68 @@ -name: wearehairy +name: "We Are Hairy" sceneByURL: - action: scrapeXPath url: - wearehairy.com scraper: sceneScraper +galleryByURL: + - action: scrapeXPath + url: + - wearehairy.com/models + scraper: galleryScraper +performerByURL: + - action: scrapeXPath + url: + - wearehairy.com/models + scraper: performerScraper xPathScrapers: sceneScraper: scene: - Title: - selector: //h1[@itemprop="name"]//h1 - Date: - selector: //span[@class="added"]//@datetime + Title: &title //title + Date: &date + selector: //time/@datetime postProcess: - parseDate: 2006-01-02 - Details: //div[@class="line"][contains(h3,"Description")]/p - Tags: + Details: &details //meta[@name="description"]/@content + Tags: &tags Name: //div[@class="tagline"]//a - Image: //video[@id="trailer"]/@poster - Performers: + Image: + selector: //meta[@itemprop="thumbnailUrl"]/@content + postProcess: + - replace: + - regex: \/\/(.*) + with: https://$1 + Performers: &performers Name: //div[@class="meet"]//a - Studio: + Director: //h3[contains(text(),"Shot by")]/following-sibling::p + Studio: &studio Name: - fixed: WeAreHairy -# Last Updated March 01, 2021 + fixed: We Are Hairy + galleryScraper: + gallery: + Title: *title + Date: *date + Studio: *studio + Performers: *performers + Tags: *tags + Details: *details + performerScraper: + performer: + Name: //span/span[@itemprop="name"]/text() + Gender: + fixed: Female +# Uncomment to scrape performer image +# Image: +# selector: //div[@class="lhs"]/img/@src + Height: + selector: //p[@id="height_both"]/text() + postProcess: + - replace: + - regex: .+(/d/d/d)cm + with: $1 + Weight: + selector: //p[@id="weight_both"]/text() + postProcess: + - replace: + - regex: .+(/d/d)kg + with: $1 +# Last Updated July 18, 2023 From dc65c90f09a66e29533b51e71a7b275707e57349 Mon Sep 17 00:00:00 2001 From: echo6ix Date: Tue, 18 Jul 2023 22:59:19 -0400 Subject: [PATCH 214/624] Sanitize URL; added site; gets code, real image --- scrapers/TheScoreGroup.yml | 51 +++++++++++++++++++++++++++++--------- 1 file changed, 39 insertions(+), 12 deletions(-) diff --git a/scrapers/TheScoreGroup.yml b/scrapers/TheScoreGroup.yml index 138733878..059f583f6 100644 --- a/scrapers/TheScoreGroup.yml +++ b/scrapers/TheScoreGroup.yml @@ -8,6 +8,7 @@ sceneByURL: - 60plusmilfs.com/ - legsex.com/ - pornmegaload.com/ + - naughtymag.com - scoreland.com/ - scoreland2.com/ - xlgirls.com/ @@ -18,8 +19,10 @@ galleryByURL: scraper: galleryScraper xPathScrapers: sceneScraper: + common: + $url: //link[@rel="canonical"]/@href scene: - Title: //section[contains(@id,"_page-page")]/div[@class="row"]/div/h1/text() + Title: &titleAttr //h1[1]/text() Studio: &studioAttr Name: selector: //meta[@property="og:site_name"]/@content @@ -30,20 +33,44 @@ xPathScrapers: - regex: .., with: - parseDate: January 2 2006 - Details: - selector: //div[@class="row"]/div/div[@class="p-desc"]/text() - concat: "\n" + Details: &details + selector: //div[@class="p-desc p-3"]//text() + postProcess: + - replace: + - regex: Read More » + with: + concat: "\n\n" Tags: - Name: //div[@class="row"]/div/div[@class="mb-3"]/a/text() + Name: //div[@class="mb-3"]/a/text() Performers: &performersAttr Name: //div[@class="stat mb-2"]/span[contains(.,"Featuring:")]/../span[@class="value"]/a/text() Image: - selector: //meta[@itemprop="image"]/@content - # Enable this post process if you want better image quality but sometimes it can fail - # postProcess: - # - replace: - # - regex: _lg - # with: _x_800 +# This selector scrapes the canonical scene page cover image + selector: //script[contains(text(), "poster")] + postProcess: + - replace: + - regex: ^.+poster.+'(.+jpg)'.+$ + with: $1 +# This Selector scrapes the image posted on social media sites +# selector: //meta[@itemprop="image"]/@content +# Enable this post process if you want better image quality but sometimes it can fail +# postProcess: +# - replace: +# - regex: _lg +# with: _x_800 + URL: &urlAttr + selector: $url + # Sanitize the URL + postProcess: + - replace: + - regex: (https?:\/\/(?:www\.)?[0-9A-za-z-]+\.com\/[0-9A-Za-z-_]+\/[0-9A-Za-z-_]+\/\d+)\/?.* + with: $1 + Code: &codeAttr + selector: $url + postProcess: + - replace: + - regex: https?:\/\/(?:www\.)?[0-9A-za-z-]+\.com\/[0-9A-Za-z-_]+\/[0-9A-Za-z-_]+\/(\d+)\/?.* + with: $1 galleryScraper: common: $photopage: //section[@id='photos_page-page'] @@ -57,4 +84,4 @@ xPathScrapers: Tags: Name: $photopage//div[@class='mb-3']/a/text() Performers: *performersAttr -# Last Updated January 31, 2023 +# Last Updated July 18, 2023 From 7c4a5cf977119f11b52f826983e6630a16cb3ab7 Mon Sep 17 00:00:00 2001 From: awalkabout <140019299+awalkabout@users.noreply.github.com> Date: Wed, 19 Jul 2023 16:09:59 -0500 Subject: [PATCH 215/624] an updated tokyohot scraper --- scrapers/tokyohotv2.py | 353 ++++++++++++++++++++++++++++++++++++++++ scrapers/tokyohotv2.yml | 12 ++ 2 files changed, 365 insertions(+) create mode 100644 scrapers/tokyohotv2.py create mode 100644 scrapers/tokyohotv2.yml diff --git a/scrapers/tokyohotv2.py b/scrapers/tokyohotv2.py new file mode 100644 index 000000000..eae6b8c9f --- /dev/null +++ b/scrapers/tokyohotv2.py @@ -0,0 +1,353 @@ +import base64 +import json +import sys +import re +from dataclasses import dataclass + +BASE_QUERY_MEDIA_SEARCH = "https://my.tokyo-hot.com/product/?q=" +BASE_DETAIL_URL = "https://my.tokyo-hot.com" + +JAP_TO_US_BUST = { + "A": "AA", + "B": "A", + "C": "B", + "D": "C", + "E": "D", + "F": "DD", + "G": "DDD", + "H": "F", + "I": "G", + "J": "H", + "K": "I", +} + +MEDIA_CONFIGURATIONS = [ + ## must contain either 1 or 2 capture groups + ## group 1 = the code + ## group 2 (optional) = the part number if it's a multi-part (split) scene + "(n\d{4})\D*_\D{2}(\d)\S*", # "mult-part N series" + "(n\d{4})\S*", # "single part N series" + "(k\d{4})\S*", # "single part K series" + "(kb\d{4})\S*", # "single part KB series" +] + +try: + from py_common import log +except ModuleNotFoundError: + print( + "You need to download the folder 'py_common' from the community repo! (CommunityScrapers/tree/master/scrapers/py_common)", + file=sys.stderr, + ) + sys.exit() + +try: + import requests +except ModuleNotFoundError: + print( + "You need to install the requests module. (https://docs.python-requests.org/en/latest/user/install/)", + file=sys.stderr, + ) + print( + "If you have pip (normally installed with python), run this command in a terminal (cmd): pip install requests", + file=sys.stderr, + ) + sys.exit() + +try: + from bs4 import BeautifulSoup +except ModuleNotFoundError: + print( + "You need to install the Beautiful Soup module. (https://pypi.org/project/beautifulsoup4/)", + file=sys.stderr, + ) + print( + "If you have pip (normally installed with python), run this command in a terminal (cmd): pip install beautifulsoup4", + file=sys.stderr, + ) + sys.exit() + + +class ScenePage: + def __init__(self, scene_id, multipart, partnum, url): + self.url = url + self.soup = _soup_maker(self.url) + self.scene_id = scene_id + self.multipart = multipart + self.partnum = partnum + self.title = self.get_title() + self.studio = self.get_studio() + self.image = self.get_image() + self.details = self.get_details() + self.performers = self.get_performers() + self.date = self.get_date() + + def get_title(self): + title = self.scene_id + if self.multipart: + title = title + f" - Part {self.partnum}" + scene_title = self.soup.find("div", {"class": "pagetitle"}) + if scene_title: + title = title + " - " + scene_title.text.strip() + return title + + def get_studio(self): + info = self.soup.find("div", {"class": "infowrapper"}) + info_links = info.find_all("a") + for link in info_links: + if "vendor" in link.get("href"): + return link.text + return None + + def get_image(self): + info = self.soup.find("video") + if info: + return get_image(info.get("poster")) + return None + + def get_performers(self): + performers = [] + info = self.soup.find("div", {"class": "infowrapper"}) + info_links = info.find_all("a") + for link in info_links: + if "cast" in link.get("href"): + perf = TokyoHotModel(model_url=BASE_DETAIL_URL + link.get("href")).get_json() + performers.append(perf) + return performers + + def get_details(self): + details = None + scene_details = self.soup.find("div", {"class": "sentence"}) + if scene_details: + details = scene_details.text.strip() + return details + + def get_date(self): + log.info("Invoking self date") + info_dd = self.soup.find("div", {"class": "infowrapper"}).find_all("dd") + for dd in info_dd: + search = re.search("(\d{4})/(\d{2})/(\d{2})", dd.text) + if search: + date = f"{search[1]}-{search[2]}-{search[3]}" + return date + return None + + def get_json(self): + return { + "Title": self.title, + "Details": self.details, + "URL": self.url, + "Date": self.date, + "Performers": self.performers, + "Studio": {"Name": self.studio}, + "Code": self.scene_id, + "Image": self.image, + } + + +class TokyoHotModel: + def __init__(self, model_url): + self.url = model_url + self.model_soup = _soup_maker(self.url) + self.model_name = self.get_name() + self.height = self.get_height() + self.weight = self.get_weight() + self.measurements = self.get_measurements() + self.images = self.get_images() + self.gender = "Female" + self.ethnicity = "Asian" + self.country = "JP" + + def get_name(self): + name = None + model_name = self.model_soup.find("div", {"class": "pagetitle mb0"}) + if model_name: + name = model_name.text.strip() + return name + + def get_height(self): + info_dt = self.model_soup.find("dl", {"class": "info"}).find_all("dt") + info_dd = self.model_soup.find("dl", {"class": "info"}).find_all("dd") + info_dict = dict(map(lambda k, v: (k.text, v.text), info_dt, info_dd)) + + if info_dict.get("Height"): + parse_data = re.search("(\d{3})cm\s~\s(\d{3})cm", info_dict.get("Height")) + if parse_data: + data = (int(parse_data[1]) + int(parse_data[2])) / 2 + return str(data) + return None + + def get_weight(self): + info_dt = self.model_soup.find("dl", {"class": "info"}).find_all("dt") + info_dd = self.model_soup.find("dl", {"class": "info"}).find_all("dd") + info_dict = dict(map(lambda k, v: (k.text, v.text), info_dt, info_dd)) + if info_dict.get("Weight"): + parse_data = re.search( + "(\d{2,3})cm\s~\s(\d{2,3})cm", info_dict.get("Weight") + ) + if parse_data: + data = (int(parse_data[1]) + int(parse_data[2])) / 2 + return str(data) + return None + + def get_measurements(self): + info_dt = self.model_soup.find("dl", {"class": "info"}).find_all("dt") + info_dd = self.model_soup.find("dl", {"class": "info"}).find_all("dd") + info_dict = dict(map(lambda k, v: (k.text, v.text), info_dt, info_dd)) + + cup = None + bust = None + waist = None + hip = None + + if info_dict.get("Cup Size"): + parse_cup = re.search("^(\w)", info_dict.get("Cup Size")) + if parse_cup: + cup = JAP_TO_US_BUST.get(parse_cup[1].strip()) + + if info_dict.get("Bust Size"): + parse_bust = re.search( + "(\d{2,3})cm\s~\s(\d{2,3})cm", info_dict.get("Bust Size") + ) + if parse_bust: + bust = round(((int(parse_bust[1]) + int(parse_bust[2])) / 2) * 0.393701) + + if info_dict.get("Waist Size"): + parse_waist = re.search( + "(\d{2,3})cm\s~\s(\d{2,3})cm", info_dict.get("Waist Size") + ) + if parse_waist: + waist = round( + ((int(parse_waist[1]) + int(parse_waist[2])) / 2) * 0.393701 + ) + + if info_dict.get("Hip"): + parse_hip = re.search("(\d{2,3})cm\s~\s(\d{2,3})cm", info_dict.get("Hip")) + if parse_hip: + hip = round(((int(parse_hip[1]) + int(parse_hip[2])) / 2) * 0.393701) + + if cup and bust and waist and hip: + return f"{bust}{cup}-{waist}-{hip}" + + return None + + def get_images(self): + try: + model_url = ( + self.model_soup.find("div", {"id": "profile"}).find("img").get("src") + ) + return [get_image(model_url)] + except: + return None + + def get_json(self): + return { + "Name": self.model_name, + "Gender": self.gender, + "URL": self.url, + "Ethnicity": self.ethnicity, + "Country": self.country, + "Height": self.height, + "Weight": self.weight, + "Measurements": self.measurements, + "Images": self.images, + } + + +def query(fragment, query_type): + res = None + media_info = None + + if query_type in ("scene"): + name = re.sub(r"\s", "_", fragment["title"]).lower() + media_info = _extract_media_id(name) + + if media_info: + res = scrape_scene( + name=media_info["code"], + multipart=media_info["multipart"], + partnum=media_info["partnum"], + ) + + return res + + +def _soup_maker(url: str): + requests.packages.urllib3.disable_warnings() + try: + html = requests.get(url, verify=False) + soup = BeautifulSoup(html.text, "html.parser") + except Exception as e: + log.error("Error retrieving specified URL") + raise e + return soup + + +def _parse_media_search(soup): + detail_page_url = None + detail_object = soup.find("a", {"class": "rm"}) + if detail_object: + detail_page_url = BASE_DETAIL_URL + detail_object.get("href") + log.info(f"Scene URL found: {detail_page_url}") + return detail_page_url + + +def _extract_media_id(media_title: str, configuration: dict = MEDIA_CONFIGURATIONS): + log.info(f"Extracting Media ID for {media_title}") + + def _extract_multi_part(search_results): + if len(search_results.groups()) > 1: + return (True, search_results[2]) + return (False, False) + + for config in configuration: + search = re.search(pattern=config, string=media_title) + if search: + scene_info = { + "code": search[1], + "multipart": _extract_multi_part(search)[0], + "partnum": _extract_multi_part(search)[1], + } + log.info(f"Regex matched. Details {scene_info}") + return scene_info + return None + + +def scrape_scene(name, multipart, partnum): + search_soup = _soup_maker(BASE_QUERY_MEDIA_SEARCH + name) + scene_url = _parse_media_search(soup=search_soup) + if scene_url is None: + log.info(f"Scene not found: {name}. Try another server region, e.g. Hong Kong") + return None + scene_page = ScenePage( + scene_id=name, multipart=multipart, partnum=partnum, url=scene_url + ) + response = scene_page.get_json() + return response + + +def get_image(image_url): + try: + response = requests.get(image_url, verify=False, timeout=(3, 6)) + except requests.exceptions.RequestException as req_ex: + log.error(f"Error fetching URL {image_url}") + + if response.status_code < 400: + mime = "image/jpeg" + encoded = base64.b64encode(response.content).decode("utf-8") + return f"data:{mime};base64,{encoded}" + + log.info(f"Fetching {image_url} resulted in error: {response.status_code}") + return None + + +def main(): + scraper_input = sys.stdin.read() + i = json.loads(scraper_input) + ret = {} + if sys.argv[1] == "query": + ret = query(i, sys.argv[2]) + output = json.dumps(ret) + print(output) + + +main() diff --git a/scrapers/tokyohotv2.yml b/scrapers/tokyohotv2.yml new file mode 100644 index 000000000..6c9796dc2 --- /dev/null +++ b/scrapers/tokyohotv2.yml @@ -0,0 +1,12 @@ +name: Tokyo Hot V2 + +sceneByFragment: + action: script + script: + - python3 + - tokyohotv2.py + - query + - scene + + +# Last Updated July 17, 2023 From b56c5504735803ffb59a308dc02c93abe476f904 Mon Sep 17 00:00:00 2001 From: echo6ix Date: Wed, 19 Jul 2023 17:39:56 -0400 Subject: [PATCH 216/624] Renamed file to camel case --- scrapers/{wearehairy.yml => WeAreHairy.yml} | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename scrapers/{wearehairy.yml => WeAreHairy.yml} (100%) diff --git a/scrapers/wearehairy.yml b/scrapers/WeAreHairy.yml similarity index 100% rename from scrapers/wearehairy.yml rename to scrapers/WeAreHairy.yml From 75773bffd16f7cec7770d78bdb0b38327a460727 Mon Sep 17 00:00:00 2001 From: JackDawson94 <100418085+JackDawson94@users.noreply.github.com> Date: Thu, 20 Jul 2023 00:20:32 +0200 Subject: [PATCH 217/624] New scraper: Futanarica --- SCRAPERS-LIST.md | 1 + scrapers/Futanarica.yml | 26 ++++++++++++++++++++++++++ 2 files changed, 27 insertions(+) create mode 100644 scrapers/Futanarica.yml diff --git a/SCRAPERS-LIST.md b/SCRAPERS-LIST.md index a7ea432a5..9c8af77a0 100644 --- a/SCRAPERS-LIST.md +++ b/SCRAPERS-LIST.md @@ -510,6 +510,7 @@ fuckingstreet.com|PornCZ.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- fuckstudies.com|TeenMegaWorld.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- fuckthegeek.com|ChickPass.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- funbags.com|Voyeurweb.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +futanarica.com|Futanarica.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- futanari.xxx|Hentaied.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- gag-n-gape.com|TeenMegaWorld.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- gangav.com|Jhdv.yml|:heavy_check_mark:|:x:|:x:|:x:|-|JAV Uncensored diff --git a/scrapers/Futanarica.yml b/scrapers/Futanarica.yml new file mode 100644 index 000000000..fff328664 --- /dev/null +++ b/scrapers/Futanarica.yml @@ -0,0 +1,26 @@ +name: "Futanarica" +sceneByURL: + - action: scrapeXPath + url: + - futanarica.com + scraper: sceneScraper + +xPathScrapers: + sceneScraper: + scene: + Title: //article//h3 + Image: //article//a/img/@src + Studio: + Name: + fixed: Futanarica + URL: //meta[@property="og:url"]/@content + Details: //meta[@property="og:description"]/@content + Date: + selector: //meta[@property="article:published_time"]/@content + postProcess: + - replace: + - regex: "T(.*)$" + with: "" + - parseDate: 2006-01-02 + +# Last Updated July 20, 2023 \ No newline at end of file From d419d561dc9283c6861106aa11018851dc6bc338 Mon Sep 17 00:00:00 2001 From: awalkabout <140019299+awalkabout@users.noreply.github.com> Date: Thu, 20 Jul 2023 17:04:09 -0500 Subject: [PATCH 218/624] Renamed as Tokyohot.py Removed separate tokyohotv2.yml and merged into Tokyohot.yml Added english scene tag extraction per Maista6969 --- scrapers/Tokyohot.py | 358 ++++++++++++++++++++++++++++++++++++++++++ scrapers/Tokyohot.yml | 9 +- 2 files changed, 366 insertions(+), 1 deletion(-) create mode 100644 scrapers/Tokyohot.py diff --git a/scrapers/Tokyohot.py b/scrapers/Tokyohot.py new file mode 100644 index 000000000..255c9ff75 --- /dev/null +++ b/scrapers/Tokyohot.py @@ -0,0 +1,358 @@ +import base64 +import json +import sys +import re + +BASE_QUERY_MEDIA_SEARCH = "https://my.tokyo-hot.com/product/?q=" +BASE_DETAIL_URL = "https://my.tokyo-hot.com" + +JAP_TO_US_BUST = { + "A": "AA", + "B": "A", + "C": "B", + "D": "C", + "E": "D", + "F": "DD", + "G": "DDD", + "H": "F", + "I": "G", + "J": "H", + "K": "I", +} + +MEDIA_CONFIGURATIONS = [ + ## must contain either 1 or 2 capture groups + ## group 1 = the code + ## group 2 (optional) = the part number if it's a multi-part (split) scene + "(n\d{4})\D*_\D{2}(\d)\S*", # "mult-part N series" + "(n\d{4})\S*", # "single part N series" + "(k\d{4})\S*", # "single part K series" + "(kb\d{4})\S*", # "single part KB series" +] + +try: + from py_common import log +except ModuleNotFoundError: + print( + "You need to download the folder 'py_common' from the community repo! (CommunityScrapers/tree/master/scrapers/py_common)", + file=sys.stderr, + ) + sys.exit() + +try: + import requests +except ModuleNotFoundError: + print( + "You need to install the requests module. (https://docs.python-requests.org/en/latest/user/install/)", + file=sys.stderr, + ) + print( + "If you have pip (normally installed with python), run this command in a terminal (cmd): pip install requests", + file=sys.stderr, + ) + sys.exit() + +try: + from bs4 import BeautifulSoup +except ModuleNotFoundError: + print( + "You need to install the Beautiful Soup module. (https://pypi.org/project/beautifulsoup4/)", + file=sys.stderr, + ) + print( + "If you have pip (normally installed with python), run this command in a terminal (cmd): pip install beautifulsoup4", + file=sys.stderr, + ) + sys.exit() + + +class ScenePage: + def __init__(self, scene_id, multipart, partnum, url): + self.url = url + self.soup = _soup_maker(self.url) + self.scene_id = scene_id + self.multipart = multipart + self.partnum = partnum + self.title = self.get_title() + self.studio = self.get_studio() + self.image = self.get_image() + self.details = self.get_details() + self.performers = self.get_performers() + self.date = self.get_date() + self.tags = self.get_tags() + + def get_title(self): + title = self.scene_id + if self.multipart: + title = title + f" - Part {self.partnum}" + scene_title = self.soup.find("div", {"class": "pagetitle"}) + if scene_title: + title = title + " - " + scene_title.text.strip() + return title + + def get_studio(self): + info = self.soup.find("div", {"class": "infowrapper"}) + info_links = info.find_all("a") + for link in info_links: + if "vendor" in link.get("href"): + return link.text + return None + + def get_image(self): + info = self.soup.find("video") + if info: + return get_image(info.get("poster")) + return None + + def get_performers(self): + performers = [] + info = self.soup.find("div", {"class": "infowrapper"}) + info_links = info.find_all("a") + for link in info_links: + if "cast" in link.get("href"): + perf = TokyoHotModel(model_url=BASE_DETAIL_URL + link.get("href")).get_json() + performers.append(perf) + return performers + + def get_details(self): + details = None + scene_details = self.soup.find("div", {"class": "sentence"}) + if scene_details: + details = scene_details.text.strip() + return details + + def get_date(self): + log.info("Invoking self date") + info_dd = self.soup.find("div", {"class": "infowrapper"}).find_all("dd") + for dd in info_dd: + search = re.search("(\d{4})/(\d{2})/(\d{2})", dd.text) + if search: + date = f"{search[1]}-{search[2]}-{search[3]}" + return date + return None + + def get_tags(self): + potential_tags = self.soup.find("div", {"class": "infowrapper"}).find_all("a", href=lambda h: "type" in h) + return [{"Name":a.text} for a in potential_tags if a.text.isascii()] + + def get_json(self): + return { + "Title": self.title, + "Details": self.details, + "URL": self.url, + "Date": self.date, + "Performers": self.performers, + "Studio": {"Name": self.studio}, + "Code": self.scene_id, + "Image": self.image, + "Tags": self.tags + } + + +class TokyoHotModel: + def __init__(self, model_url): + self.url = model_url + self.model_soup = _soup_maker(self.url) + self.model_name = self.get_name() + self.height = self.get_height() + self.weight = self.get_weight() + self.measurements = self.get_measurements() + self.images = self.get_images() + self.gender = "Female" + self.ethnicity = "Asian" + self.country = "JP" + + def get_name(self): + name = None + model_name = self.model_soup.find("div", {"class": "pagetitle mb0"}) + if model_name: + name = model_name.text.strip() + return name + + def get_height(self): + info_dt = self.model_soup.find("dl", {"class": "info"}).find_all("dt") + info_dd = self.model_soup.find("dl", {"class": "info"}).find_all("dd") + info_dict = dict(map(lambda k, v: (k.text, v.text), info_dt, info_dd)) + + if info_dict.get("Height"): + parse_data = re.search("(\d{3})cm\s~\s(\d{3})cm", info_dict.get("Height")) + if parse_data: + data = (int(parse_data[1]) + int(parse_data[2])) / 2 + return str(data) + return None + + def get_weight(self): + info_dt = self.model_soup.find("dl", {"class": "info"}).find_all("dt") + info_dd = self.model_soup.find("dl", {"class": "info"}).find_all("dd") + info_dict = dict(map(lambda k, v: (k.text, v.text), info_dt, info_dd)) + if info_dict.get("Weight"): + parse_data = re.search( + "(\d{2,3})cm\s~\s(\d{2,3})cm", info_dict.get("Weight") + ) + if parse_data: + data = (int(parse_data[1]) + int(parse_data[2])) / 2 + return str(data) + return None + + def get_measurements(self): + info_dt = self.model_soup.find("dl", {"class": "info"}).find_all("dt") + info_dd = self.model_soup.find("dl", {"class": "info"}).find_all("dd") + info_dict = dict(map(lambda k, v: (k.text, v.text), info_dt, info_dd)) + + cup = None + bust = None + waist = None + hip = None + + if info_dict.get("Cup Size"): + parse_cup = re.search("^(\w)", info_dict.get("Cup Size")) + if parse_cup: + cup = JAP_TO_US_BUST.get(parse_cup[1].strip()) + + if info_dict.get("Bust Size"): + parse_bust = re.search( + "(\d{2,3})cm\s~\s(\d{2,3})cm", info_dict.get("Bust Size") + ) + if parse_bust: + bust = round(((int(parse_bust[1]) + int(parse_bust[2])) / 2) * 0.393701) + + if info_dict.get("Waist Size"): + parse_waist = re.search( + "(\d{2,3})cm\s~\s(\d{2,3})cm", info_dict.get("Waist Size") + ) + if parse_waist: + waist = round( + ((int(parse_waist[1]) + int(parse_waist[2])) / 2) * 0.393701 + ) + + if info_dict.get("Hip"): + parse_hip = re.search("(\d{2,3})cm\s~\s(\d{2,3})cm", info_dict.get("Hip")) + if parse_hip: + hip = round(((int(parse_hip[1]) + int(parse_hip[2])) / 2) * 0.393701) + + if cup and bust and waist and hip: + return f"{bust}{cup}-{waist}-{hip}" + + return None + + def get_images(self): + try: + model_url = ( + self.model_soup.find("div", {"id": "profile"}).find("img").get("src") + ) + return [get_image(model_url)] + except: + return None + + def get_json(self): + return { + "Name": self.model_name, + "Gender": self.gender, + "URL": self.url, + "Ethnicity": self.ethnicity, + "Country": self.country, + "Height": self.height, + "Weight": self.weight, + "Measurements": self.measurements, + "Images": self.images, + } + + +def query(fragment, query_type): + res = None + media_info = None + + if query_type in ("scene"): + name = re.sub(r"\s", "_", fragment["title"]).lower() + media_info = _extract_media_id(name) + + if media_info: + res = scrape_scene( + name=media_info["code"], + multipart=media_info["multipart"], + partnum=media_info["partnum"], + ) + + return res + + +def _soup_maker(url: str): + requests.packages.urllib3.disable_warnings() + try: + html = requests.get(url, verify=False) + soup = BeautifulSoup(html.text, "html.parser") + except Exception as e: + log.error("Error retrieving specified URL") + raise e + return soup + + +def _parse_media_search(soup): + detail_page_url = None + detail_object = soup.find("a", {"class": "rm"}) + if detail_object: + detail_page_url = BASE_DETAIL_URL + detail_object.get("href") + log.info(f"Scene URL found: {detail_page_url}") + return detail_page_url + + +def _extract_media_id(media_title: str, configuration: dict = MEDIA_CONFIGURATIONS): + log.info(f"Extracting Media ID for {media_title}") + + def _extract_multi_part(search_results): + if len(search_results.groups()) > 1: + return (True, search_results[2]) + return (False, False) + + for config in configuration: + search = re.search(pattern=config, string=media_title) + if search: + scene_info = { + "code": search[1], + "multipart": _extract_multi_part(search)[0], + "partnum": _extract_multi_part(search)[1], + } + log.info(f"Regex matched. Details {scene_info}") + return scene_info + return None + + +def scrape_scene(name, multipart, partnum): + search_soup = _soup_maker(BASE_QUERY_MEDIA_SEARCH + name) + scene_url = _parse_media_search(soup=search_soup) + if scene_url is None: + log.info(f"Scene not found: {name}. Try another server region, e.g. Hong Kong") + return None + scene_page = ScenePage( + scene_id=name, multipart=multipart, partnum=partnum, url=scene_url + ) + response = scene_page.get_json() + return response + + +def get_image(image_url): + try: + response = requests.get(image_url, verify=False, timeout=(3, 6)) + except requests.exceptions.RequestException as req_ex: + log.error(f"Error fetching URL {image_url}") + + if response.status_code < 400: + mime = "image/jpeg" + encoded = base64.b64encode(response.content).decode("utf-8") + return f"data:{mime};base64,{encoded}" + + log.info(f"Fetching {image_url} resulted in error: {response.status_code}") + return None + + +def main(): + scraper_input = sys.stdin.read() + i = json.loads(scraper_input) + ret = {} + if sys.argv[1] == "query": + ret = query(i, sys.argv[2]) + output = json.dumps(ret) + print(output) + + +main() diff --git a/scrapers/Tokyohot.yml b/scrapers/Tokyohot.yml index 6e1935766..32cbb8378 100644 --- a/scrapers/Tokyohot.yml +++ b/scrapers/Tokyohot.yml @@ -19,5 +19,12 @@ xPathScrapers: Performers: Name: $movieinfo/dd[1]/a/text() Image: //li[@class="package"]/a[1]/@href +sceneByFragment: + action: script + script: + - python3 + - Tokyohot.py + - query + - scene -# Last Updated November 08, 2020 +# Last Updated July 20, 2023 From fcea6231a5e2bd85e49c622327802fc7453a501b Mon Sep 17 00:00:00 2001 From: awalkabout <140019299+awalkabout@users.noreply.github.com> Date: Thu, 20 Jul 2023 17:15:17 -0500 Subject: [PATCH 219/624] Delete tokyohotv2.py --- scrapers/tokyohotv2.py | 353 ----------------------------------------- 1 file changed, 353 deletions(-) delete mode 100644 scrapers/tokyohotv2.py diff --git a/scrapers/tokyohotv2.py b/scrapers/tokyohotv2.py deleted file mode 100644 index eae6b8c9f..000000000 --- a/scrapers/tokyohotv2.py +++ /dev/null @@ -1,353 +0,0 @@ -import base64 -import json -import sys -import re -from dataclasses import dataclass - -BASE_QUERY_MEDIA_SEARCH = "https://my.tokyo-hot.com/product/?q=" -BASE_DETAIL_URL = "https://my.tokyo-hot.com" - -JAP_TO_US_BUST = { - "A": "AA", - "B": "A", - "C": "B", - "D": "C", - "E": "D", - "F": "DD", - "G": "DDD", - "H": "F", - "I": "G", - "J": "H", - "K": "I", -} - -MEDIA_CONFIGURATIONS = [ - ## must contain either 1 or 2 capture groups - ## group 1 = the code - ## group 2 (optional) = the part number if it's a multi-part (split) scene - "(n\d{4})\D*_\D{2}(\d)\S*", # "mult-part N series" - "(n\d{4})\S*", # "single part N series" - "(k\d{4})\S*", # "single part K series" - "(kb\d{4})\S*", # "single part KB series" -] - -try: - from py_common import log -except ModuleNotFoundError: - print( - "You need to download the folder 'py_common' from the community repo! (CommunityScrapers/tree/master/scrapers/py_common)", - file=sys.stderr, - ) - sys.exit() - -try: - import requests -except ModuleNotFoundError: - print( - "You need to install the requests module. (https://docs.python-requests.org/en/latest/user/install/)", - file=sys.stderr, - ) - print( - "If you have pip (normally installed with python), run this command in a terminal (cmd): pip install requests", - file=sys.stderr, - ) - sys.exit() - -try: - from bs4 import BeautifulSoup -except ModuleNotFoundError: - print( - "You need to install the Beautiful Soup module. (https://pypi.org/project/beautifulsoup4/)", - file=sys.stderr, - ) - print( - "If you have pip (normally installed with python), run this command in a terminal (cmd): pip install beautifulsoup4", - file=sys.stderr, - ) - sys.exit() - - -class ScenePage: - def __init__(self, scene_id, multipart, partnum, url): - self.url = url - self.soup = _soup_maker(self.url) - self.scene_id = scene_id - self.multipart = multipart - self.partnum = partnum - self.title = self.get_title() - self.studio = self.get_studio() - self.image = self.get_image() - self.details = self.get_details() - self.performers = self.get_performers() - self.date = self.get_date() - - def get_title(self): - title = self.scene_id - if self.multipart: - title = title + f" - Part {self.partnum}" - scene_title = self.soup.find("div", {"class": "pagetitle"}) - if scene_title: - title = title + " - " + scene_title.text.strip() - return title - - def get_studio(self): - info = self.soup.find("div", {"class": "infowrapper"}) - info_links = info.find_all("a") - for link in info_links: - if "vendor" in link.get("href"): - return link.text - return None - - def get_image(self): - info = self.soup.find("video") - if info: - return get_image(info.get("poster")) - return None - - def get_performers(self): - performers = [] - info = self.soup.find("div", {"class": "infowrapper"}) - info_links = info.find_all("a") - for link in info_links: - if "cast" in link.get("href"): - perf = TokyoHotModel(model_url=BASE_DETAIL_URL + link.get("href")).get_json() - performers.append(perf) - return performers - - def get_details(self): - details = None - scene_details = self.soup.find("div", {"class": "sentence"}) - if scene_details: - details = scene_details.text.strip() - return details - - def get_date(self): - log.info("Invoking self date") - info_dd = self.soup.find("div", {"class": "infowrapper"}).find_all("dd") - for dd in info_dd: - search = re.search("(\d{4})/(\d{2})/(\d{2})", dd.text) - if search: - date = f"{search[1]}-{search[2]}-{search[3]}" - return date - return None - - def get_json(self): - return { - "Title": self.title, - "Details": self.details, - "URL": self.url, - "Date": self.date, - "Performers": self.performers, - "Studio": {"Name": self.studio}, - "Code": self.scene_id, - "Image": self.image, - } - - -class TokyoHotModel: - def __init__(self, model_url): - self.url = model_url - self.model_soup = _soup_maker(self.url) - self.model_name = self.get_name() - self.height = self.get_height() - self.weight = self.get_weight() - self.measurements = self.get_measurements() - self.images = self.get_images() - self.gender = "Female" - self.ethnicity = "Asian" - self.country = "JP" - - def get_name(self): - name = None - model_name = self.model_soup.find("div", {"class": "pagetitle mb0"}) - if model_name: - name = model_name.text.strip() - return name - - def get_height(self): - info_dt = self.model_soup.find("dl", {"class": "info"}).find_all("dt") - info_dd = self.model_soup.find("dl", {"class": "info"}).find_all("dd") - info_dict = dict(map(lambda k, v: (k.text, v.text), info_dt, info_dd)) - - if info_dict.get("Height"): - parse_data = re.search("(\d{3})cm\s~\s(\d{3})cm", info_dict.get("Height")) - if parse_data: - data = (int(parse_data[1]) + int(parse_data[2])) / 2 - return str(data) - return None - - def get_weight(self): - info_dt = self.model_soup.find("dl", {"class": "info"}).find_all("dt") - info_dd = self.model_soup.find("dl", {"class": "info"}).find_all("dd") - info_dict = dict(map(lambda k, v: (k.text, v.text), info_dt, info_dd)) - if info_dict.get("Weight"): - parse_data = re.search( - "(\d{2,3})cm\s~\s(\d{2,3})cm", info_dict.get("Weight") - ) - if parse_data: - data = (int(parse_data[1]) + int(parse_data[2])) / 2 - return str(data) - return None - - def get_measurements(self): - info_dt = self.model_soup.find("dl", {"class": "info"}).find_all("dt") - info_dd = self.model_soup.find("dl", {"class": "info"}).find_all("dd") - info_dict = dict(map(lambda k, v: (k.text, v.text), info_dt, info_dd)) - - cup = None - bust = None - waist = None - hip = None - - if info_dict.get("Cup Size"): - parse_cup = re.search("^(\w)", info_dict.get("Cup Size")) - if parse_cup: - cup = JAP_TO_US_BUST.get(parse_cup[1].strip()) - - if info_dict.get("Bust Size"): - parse_bust = re.search( - "(\d{2,3})cm\s~\s(\d{2,3})cm", info_dict.get("Bust Size") - ) - if parse_bust: - bust = round(((int(parse_bust[1]) + int(parse_bust[2])) / 2) * 0.393701) - - if info_dict.get("Waist Size"): - parse_waist = re.search( - "(\d{2,3})cm\s~\s(\d{2,3})cm", info_dict.get("Waist Size") - ) - if parse_waist: - waist = round( - ((int(parse_waist[1]) + int(parse_waist[2])) / 2) * 0.393701 - ) - - if info_dict.get("Hip"): - parse_hip = re.search("(\d{2,3})cm\s~\s(\d{2,3})cm", info_dict.get("Hip")) - if parse_hip: - hip = round(((int(parse_hip[1]) + int(parse_hip[2])) / 2) * 0.393701) - - if cup and bust and waist and hip: - return f"{bust}{cup}-{waist}-{hip}" - - return None - - def get_images(self): - try: - model_url = ( - self.model_soup.find("div", {"id": "profile"}).find("img").get("src") - ) - return [get_image(model_url)] - except: - return None - - def get_json(self): - return { - "Name": self.model_name, - "Gender": self.gender, - "URL": self.url, - "Ethnicity": self.ethnicity, - "Country": self.country, - "Height": self.height, - "Weight": self.weight, - "Measurements": self.measurements, - "Images": self.images, - } - - -def query(fragment, query_type): - res = None - media_info = None - - if query_type in ("scene"): - name = re.sub(r"\s", "_", fragment["title"]).lower() - media_info = _extract_media_id(name) - - if media_info: - res = scrape_scene( - name=media_info["code"], - multipart=media_info["multipart"], - partnum=media_info["partnum"], - ) - - return res - - -def _soup_maker(url: str): - requests.packages.urllib3.disable_warnings() - try: - html = requests.get(url, verify=False) - soup = BeautifulSoup(html.text, "html.parser") - except Exception as e: - log.error("Error retrieving specified URL") - raise e - return soup - - -def _parse_media_search(soup): - detail_page_url = None - detail_object = soup.find("a", {"class": "rm"}) - if detail_object: - detail_page_url = BASE_DETAIL_URL + detail_object.get("href") - log.info(f"Scene URL found: {detail_page_url}") - return detail_page_url - - -def _extract_media_id(media_title: str, configuration: dict = MEDIA_CONFIGURATIONS): - log.info(f"Extracting Media ID for {media_title}") - - def _extract_multi_part(search_results): - if len(search_results.groups()) > 1: - return (True, search_results[2]) - return (False, False) - - for config in configuration: - search = re.search(pattern=config, string=media_title) - if search: - scene_info = { - "code": search[1], - "multipart": _extract_multi_part(search)[0], - "partnum": _extract_multi_part(search)[1], - } - log.info(f"Regex matched. Details {scene_info}") - return scene_info - return None - - -def scrape_scene(name, multipart, partnum): - search_soup = _soup_maker(BASE_QUERY_MEDIA_SEARCH + name) - scene_url = _parse_media_search(soup=search_soup) - if scene_url is None: - log.info(f"Scene not found: {name}. Try another server region, e.g. Hong Kong") - return None - scene_page = ScenePage( - scene_id=name, multipart=multipart, partnum=partnum, url=scene_url - ) - response = scene_page.get_json() - return response - - -def get_image(image_url): - try: - response = requests.get(image_url, verify=False, timeout=(3, 6)) - except requests.exceptions.RequestException as req_ex: - log.error(f"Error fetching URL {image_url}") - - if response.status_code < 400: - mime = "image/jpeg" - encoded = base64.b64encode(response.content).decode("utf-8") - return f"data:{mime};base64,{encoded}" - - log.info(f"Fetching {image_url} resulted in error: {response.status_code}") - return None - - -def main(): - scraper_input = sys.stdin.read() - i = json.loads(scraper_input) - ret = {} - if sys.argv[1] == "query": - ret = query(i, sys.argv[2]) - output = json.dumps(ret) - print(output) - - -main() From 6d06dc304df16a448ff0dad1fc7239a542274e30 Mon Sep 17 00:00:00 2001 From: awalkabout <140019299+awalkabout@users.noreply.github.com> Date: Thu, 20 Jul 2023 17:15:31 -0500 Subject: [PATCH 220/624] Delete tokyohotv2.yml --- scrapers/tokyohotv2.yml | 12 ------------ 1 file changed, 12 deletions(-) delete mode 100644 scrapers/tokyohotv2.yml diff --git a/scrapers/tokyohotv2.yml b/scrapers/tokyohotv2.yml deleted file mode 100644 index 6c9796dc2..000000000 --- a/scrapers/tokyohotv2.yml +++ /dev/null @@ -1,12 +0,0 @@ -name: Tokyo Hot V2 - -sceneByFragment: - action: script - script: - - python3 - - tokyohotv2.py - - query - - scene - - -# Last Updated July 17, 2023 From 11c0f6d9680fb1186525cc0a65b495994593885d Mon Sep 17 00:00:00 2001 From: echo6ix Date: Thu, 20 Jul 2023 21:06:37 -0400 Subject: [PATCH 221/624] WeAreHairy now supports galleries --- SCRAPERS-LIST.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/SCRAPERS-LIST.md b/SCRAPERS-LIST.md index 486c49df5..7f2d8af54 100644 --- a/SCRAPERS-LIST.md +++ b/SCRAPERS-LIST.md @@ -1417,7 +1417,7 @@ watchreal.com|Hustler.yml|:heavy_check_mark:|:x:|:x:|:x:|CDP|- watchyoujerk.com|WatchYouJerk.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- waybig.com|Waybig.yml|:heavy_check_mark:|:x:|:x:|:heavy_check_mark:|-|- wcpclub.com|AdultEmpireCash.yml|:heavy_check_mark:|:x:|:heavy_check_mark:|:x:|-|- -wearehairy.com|wearehairy.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +wearehairy.com|wearehairy.yml|:heavy_check_mark:|:heavy_check_mark:|:x:|:x:|-|- webyoung.com|Algolia_Adultime.yml|:heavy_check_mark:|:heavy_check_mark:|:x:|:x:|Python|Lesbian wefuckblackgirls.com|WeFuckBlackGirls.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- welikegirls.com|Algolia_Adultime.yml|:heavy_check_mark:|:heavy_check_mark:|:heavy_check_mark:|:x:|Python|- From 96b45369b2f9c281cf6e73a245ff75fad545f420 Mon Sep 17 00:00:00 2001 From: nrg101 Date: Fri, 21 Jul 2023 12:56:53 +0100 Subject: [PATCH 222/624] add galleryByURL (scene URL) for transsensual.com --- scrapers/MindGeek.yml | 30 +++++++++++++++++++++++------- 1 file changed, 23 insertions(+), 7 deletions(-) diff --git a/scrapers/MindGeek.yml b/scrapers/MindGeek.yml index 454d665d8..df2506452 100644 --- a/scrapers/MindGeek.yml +++ b/scrapers/MindGeek.yml @@ -39,6 +39,12 @@ sceneByURL: - trueamateurs.com/scene/ scraper: scriptScraper +galleryByURL: + - action: scrapeXPath + url: + - transsensual.com/scene + scraper: galleryFromSceneScriptScraper + movieByURL: - action: scrapeXPath url: @@ -149,17 +155,17 @@ xPathScrapers: with: $1 scriptScraper: common: - $script: //script[@type="application/ld+json"] + $script: &script //script[@type="application/ld+json"] $canonicalUrl: *canonicalUrl $movieUriPath: *movieUriPath scene: - Title: + Title: &title selector: $script postProcess: - replace: - regex: '.+"name": "([^"]+)".+' with: $1 - Date: + Date: &date selector: $script postProcess: - replace: @@ -172,7 +178,7 @@ xPathScrapers: - replace: - regex: '.+"thumbnailUrl": "([^"]+)".+' with: $1 - Studio: + Studio: &studio Name: selector: //div[contains(@class,"tg5e7m")]/ancestor::section//a[contains(@href,"site=")]/@title|//link[@rel="canonical"]/@href postProcess: @@ -197,7 +203,7 @@ xPathScrapers: transsensual: TransSensual trueamateurs: True Amateurs Tags: *tags - Details: + Details: &details selector: $script postProcess: - replace: @@ -207,7 +213,7 @@ xPathScrapers: with: $1 - regex: '\|' with: '"' - Performers: + Performers: &performers Name: //div/*[self::h1 or self::h2]/..//a[contains(@href,"/model")] Movies: *sceneMovies Code: *sceneCode @@ -361,4 +367,14 @@ xPathScrapers: Image: selector: //img[contains(@src, "model")]/@src URL: //link[@rel="canonical"]/@href -# Last Updated May 31, 2023 + galleryFromSceneScriptScraper: + common: + $script: *script + gallery: + Title: *title + Date: *date + Details: *details + Performers: *performers + Tags: *tags + Studio: *studio +# Last Updated July 21, 2023 From 6010752d216ebf20e968824ffe0451de3f72d9a7 Mon Sep 17 00:00:00 2001 From: nrg101 Date: Fri, 21 Jul 2023 13:01:48 +0100 Subject: [PATCH 223/624] get details/description text from all element in div --- scrapers/Mylf.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/scrapers/Mylf.yml b/scrapers/Mylf.yml index 8bd1ab119..b0c1f03ea 100644 --- a/scrapers/Mylf.yml +++ b/scrapers/Mylf.yml @@ -8,7 +8,7 @@ xPathScrapers: sceneScraper: scene: Title: //h2[contains(@class,"sceneTitle")]/text() - Details: //div[contains(@class,"sceneDesc")]/text() + Details: //div[contains(@class,"sceneDesc")]//text() Date: selector: //div[contains(@class,"sceneDate")]/text() postProcess: @@ -47,4 +47,4 @@ xPathScrapers: MylfBoss: Mylf Boss MylfSelects: Mylf Selects StayHomeMilf: Stay Home Milf -# Last Updated June 27, 2022 +# Last Updated July 21, 2023 From 392e8972d6a49834f2916df9013180f32566ce29 Mon Sep 17 00:00:00 2001 From: nrg101 Date: Fri, 21 Jul 2023 13:15:09 +0100 Subject: [PATCH 224/624] update scraper list for transsensual (gallery) --- SCRAPERS-LIST.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/SCRAPERS-LIST.md b/SCRAPERS-LIST.md index a7ea432a5..e25824bac 100644 --- a/SCRAPERS-LIST.md +++ b/SCRAPERS-LIST.md @@ -1330,7 +1330,7 @@ transgressivexxx.com|Algolia_EvilAngel|:heavy_check_mark:|:heavy_check_mark:|:he transmodeldatabase.com|TransModelDatabase.yml|:x:|:x:|:x:|:heavy_check_mark:|-|Trans transnificent.com|GroobyNetwork-Partial.yml|:heavy_check_mark:|:heavy_check_mark:|:x:|:x:|-|Trans transroommates.com|Arx.yml|:heavy_check_mark:|:x:|:x:|:x:|Python|Trans -transsensual.com|MindGeek.yml|:heavy_check_mark:|:x:|:heavy_check_mark:|:heavy_check_mark:|-|Trans +transsensual.com|MindGeek.yml|:heavy_check_mark:|:heavy_check_mark:|:heavy_check_mark:|:heavy_check_mark:|-|Trans transsexualangel.com|GammaEntertainment.yml|:heavy_check_mark:|:x:|:x:|:x:|-|Trans transsexualroadtrip.com|GammaEntertainment.yml|:heavy_check_mark:|:x:|:x:|:x:|-|Trans tranzvr.com|POVR.yml|:heavy_check_mark:|:x:|:x:|:x:|-|VR From ff77ae739200a4c8578889e6d3cb595ac19af23e Mon Sep 17 00:00:00 2001 From: nrg101 Date: Fri, 21 Jul 2023 15:52:47 +0100 Subject: [PATCH 225/624] fix for updated layout --- scrapers/SexBabesVR.yml | 30 ++++++++++++++---------------- 1 file changed, 14 insertions(+), 16 deletions(-) diff --git a/scrapers/SexBabesVR.yml b/scrapers/SexBabesVR.yml index 6b16840eb..ce5668096 100644 --- a/scrapers/SexBabesVR.yml +++ b/scrapers/SexBabesVR.yml @@ -7,30 +7,28 @@ sceneByURL: xPathScrapers: sceneScraper: common: - $info: //div[@class="video-info"] + $detail: //div[@class="video-detail"] scene: - Title: $info//div[@class="video-group-left"]//h1[@class="title"]/text() + Title: $detail//h1/text() Date: - selector: $info//span[@class="date-display-single"]/@content + selector: $detail//div[@class="video-detail__description--container"]/div[contains(@style, "padding-top")]/text() postProcess: - - replace: - - regex: (\d{4}-\d{2}-\d{2})T.+ - with: $1 - - parseDate: 2006-01-02 + - parseDate: Jan 02 Details: - selector: $info//p/text() + selector: $detail/div[@class="container"]/p/text() concat: " " Tags: - Name: $info//div[@class="video-group-left"]/div[@class="video-tags"]//a/text() + Name: $detail//div[@class="tags"]//a/text() Performers: - Name: $info//div[@class="video-group-left"]/div[@class="video-actress-name"]//a/text() + Name: + selector: $detail//div[@class="video-detail__description--author"]//a/text() + postProcess: + - replace: + - regex: ^\s*(.+)\s*$ + with: $1 Image: - selector: //div[@class="splash-screen fullscreen-message is-visible"]/@style - postProcess: - - replace: - - regex: .+(http[^\)]+).+ - with: $1 + selector: //dl8-video/@poster Studio: Name: fixed: "SexBabesVR" -# Last Updated August 14, 2022 +# Last Updated July 21, 2023 From 696041d2715330b86d9f9f4ba22645231c632a0c Mon Sep 17 00:00:00 2001 From: nrg101 Date: Fri, 21 Jul 2023 15:58:46 +0100 Subject: [PATCH 226/624] simplify scraper config --- scrapers/SexBabesVR.yml | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/scrapers/SexBabesVR.yml b/scrapers/SexBabesVR.yml index ce5668096..7ef89f712 100644 --- a/scrapers/SexBabesVR.yml +++ b/scrapers/SexBabesVR.yml @@ -26,8 +26,7 @@ xPathScrapers: - replace: - regex: ^\s*(.+)\s*$ with: $1 - Image: - selector: //dl8-video/@poster + Image: //dl8-video/@poster Studio: Name: fixed: "SexBabesVR" From f1ef88a018615fac558167ca89c68a2f28e02ed2 Mon Sep 17 00:00:00 2001 From: awalkabout <140019299+awalkabout@users.noreply.github.com> Date: Fri, 21 Jul 2023 17:26:13 -0500 Subject: [PATCH 227/624] updated scene tag extraction filters --- scrapers/Tokyohot.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/scrapers/Tokyohot.py b/scrapers/Tokyohot.py index 255c9ff75..34ea81233 100644 --- a/scrapers/Tokyohot.py +++ b/scrapers/Tokyohot.py @@ -132,8 +132,8 @@ def get_date(self): return None def get_tags(self): - potential_tags = self.soup.find("div", {"class": "infowrapper"}).find_all("a", href=lambda h: "type" in h) - return [{"Name":a.text} for a in potential_tags if a.text.isascii()] + potential_tags = self.soup.find("div", {"class": "infowrapper"}).find_all("a") + return [{"Name":a.text} for a in potential_tags if "type=play" in a.get("href")] def get_json(self): return { From fa7bb87a0010eea4eb186dc8ae6f4afee61aa4b6 Mon Sep 17 00:00:00 2001 From: WillyOrtrun <129560075+WillyOrtrun@users.noreply.github.com> Date: Sun, 23 Jul 2023 18:24:31 +0200 Subject: [PATCH 228/624] Create Redgifs.py --- scrapers/Redgifs.py | 182 ++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 182 insertions(+) create mode 100644 scrapers/Redgifs.py diff --git a/scrapers/Redgifs.py b/scrapers/Redgifs.py new file mode 100644 index 000000000..0675dcd43 --- /dev/null +++ b/scrapers/Redgifs.py @@ -0,0 +1,182 @@ +import base64 +import json +import os +import re +import sys +from datetime import datetime + +# to import from a parent directory we need to add that directory to the system path +csd = os.path.dirname( + os.path.realpath(__file__)) # get current script directory +parent = os.path.dirname(csd) # parent directory (should be the scrapers one) +sys.path.append( + parent +) # add parent dir to sys path so that we can import py_common from there + +try: + import py_common.log as log +except ModuleNotFoundError: + print( + "You need to download the folder 'py_common' from the community repo! (CommunityScrapers/tree/master/scrapers/py_common)", + file=sys.stderr, + ) + sys.exit() +try: + import requests +except ModuleNotFoundError: + log.error( + "You need to install the requests module. (https://docs.python-requests.org/en/latest/user/install/)" + ) + log.error( + "If you have pip (normally installed with python), run this command in a terminal (cmd): pip install requests" + ) + sys.exit() + +PROXIES = {} +TIMEOUT = 10 + + +class Redgifs: + def __init__(self): + self.session = requests.Session() + self.session.headers.update( + {"content-type": "application/json; charset=UTF-8"} + ) + + self.session.proxies.update(PROXIES) + + self.getTemporaryToken() + + def log_session_headers(self): + log.debug(self.session.headers) + + def GET_req(self, url): + scraped = None + try: + scraped = self.session.get(url, timeout=TIMEOUT) + except: + log.error("scrape error") + return None + if scraped.status_code >= 400: + log.error(f"HTTP Error: {scraped.status_code}") + return None + return scraped.content + + def GET_req_json(self, url): + scraped = None + try: + scraped = self.session.get(url, timeout=TIMEOUT) + except: + log.error("scrape error") + return None + if scraped.status_code >= 400: + log.error(f"HTTP Error: {scraped.status_code}") + return None + return scraped.json() + + def output_json(self, title, tags, url, b64img, performers, date): + return { + "title": title, + "tags": [{"name": x} for x in tags], + "url": url, + "image": "data:image/jpeg;base64," + b64img.decode("utf-8"), + "performers": [{"name": x.strip()} for x in performers], + "date": date + } + + def getTemporaryToken(self): + req = self.GET_req_json("https://api.redgifs.com/v2/auth/temporary") + + authToken = req.get("token") + + self.session.headers.update( + {"Authorization": 'Bearer ' + authToken,} + ) + + log.debug(req) + + def getIdFromUrl(self, url): + id = url.split("/") + id = id[-1] + id = id.split("?")[0] + + return id; + + def getApiUrlFromId(self, id): + return f"https://api.redgifs.com/v2/gifs/{id}?views=yes&users=yes" + + + def getParseUrl(self, url): + id = self.getIdFromUrl(url) + return self.getParseId(id) + + def getParseId(self, id): + id_lowercase = id.lower() + + log.debug(str(id)) + + apiurl = self.getApiUrlFromId(id_lowercase) + + req = self.GET_req_json(apiurl) + + log.debug(req) + + gif = req.get("gif") + user = req.get("user") + + tags = gif.get("tags") + + date = gif.get("createDate") + date = datetime.fromtimestamp(date) + date = str(date.date()) + + imgurl = gif.get("urls").get("poster") + img = self.GET_req(imgurl) + b64img = base64.b64encode(img) + + studio_name = user.get("name") + + performers = [] + + if user.get("name"): + performers = [user.get("name")] + elif user.get("username"): + performers = [user.get("username")] + + + return self.output_json( + id, tags, f"https://www.redgifs.com/watch/{id}", b64img, performers, date + ) + +def parseFilename(filename): + return filename.split(".")[0] + + +FRAGMENT = json.loads(sys.stdin.read()) + +log.debug(FRAGMENT) + +scraper = Redgifs() + +result = "" + +if sys.argv[1] == "url": + url = FRAGMENT.get("url") + + log.debug(url) + + result = json.dumps(scraper.getParseUrl(url)) +elif sys.argv[1] == "queryFragment" or sys.argv[1] == "fragment": + id = parseFilename(FRAGMENT.get("title")) + + log.debug(id) + + result = json.dumps(scraper.getParseId(id)) +elif sys.argv[1] == "name": + id = parseFilename(FRAGMENT.get("name")) + + log.debug(id) + + result = json.dumps([scraper.getParseId(id)]) + +print(result) From f6f011c4573054f8bedc3277f70cf577174677bf Mon Sep 17 00:00:00 2001 From: WillyOrtrun <129560075+WillyOrtrun@users.noreply.github.com> Date: Sun, 23 Jul 2023 18:26:17 +0200 Subject: [PATCH 229/624] Create Redgifs.yml --- scrapers/Redgifs.yml | 30 ++++++++++++++++++++++++++++++ 1 file changed, 30 insertions(+) create mode 100644 scrapers/Redgifs.yml diff --git a/scrapers/Redgifs.yml b/scrapers/Redgifs.yml new file mode 100644 index 000000000..44092e837 --- /dev/null +++ b/scrapers/Redgifs.yml @@ -0,0 +1,30 @@ +name: Redgifs +sceneByURL: + - action: script + script: + - python + - Redgifs.py + - url + url: + - www.redgifs.com/watch/ + +sceneByFragment: + action: script + script: + - python + - Redgifs.py + - fragment + +sceneByQueryFragment: + action: script + script: + - python + - Redgifs.py + - queryFragment + +sceneByName: + action: script + script: + - python + - Redgifs.py + - name From f6b13a916ac540267b27fc6bac3541c546f22b55 Mon Sep 17 00:00:00 2001 From: WillyOrtrun <129560075+WillyOrtrun@users.noreply.github.com> Date: Sun, 23 Jul 2023 18:49:17 +0200 Subject: [PATCH 230/624] handle filename prefix --- scrapers/Redgifs.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/scrapers/Redgifs.py b/scrapers/Redgifs.py index 0675dcd43..6f4810e26 100644 --- a/scrapers/Redgifs.py +++ b/scrapers/Redgifs.py @@ -149,7 +149,10 @@ def getParseId(self, id): ) def parseFilename(filename): - return filename.split(".")[0] + id = filename.replace("redgifs_", "") #remove possible filename prefix + id = id.split(".")[0] #remove file extension + + return id FRAGMENT = json.loads(sys.stdin.read()) From ea76e7f47a337b4b53652abdb7ac73a1e80ca156 Mon Sep 17 00:00:00 2001 From: nrg101 Date: Mon, 24 Jul 2023 12:33:32 +0100 Subject: [PATCH 231/624] use subsite (sub-studio) at realitykings.com --- scrapers/MindGeek.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/scrapers/MindGeek.yml b/scrapers/MindGeek.yml index 454d665d8..a02ec2d08 100644 --- a/scrapers/MindGeek.yml +++ b/scrapers/MindGeek.yml @@ -174,7 +174,7 @@ xPathScrapers: with: $1 Studio: Name: - selector: //div[contains(@class,"tg5e7m")]/ancestor::section//a[contains(@href,"site=")]/@title|//link[@rel="canonical"]/@href + selector: //div[text()="Subsite"]/following-sibling::a/text()|//div[contains(@class,"tg5e7m")]/ancestor::section//a[contains(@href,"site=")]/@title|//link[@rel="canonical"]/@href postProcess: - replace: - regex: (.+www\.)(\w+)(.+) @@ -361,4 +361,4 @@ xPathScrapers: Image: selector: //img[contains(@src, "model")]/@src URL: //link[@rel="canonical"]/@href -# Last Updated May 31, 2023 +# Last Updated July 24, 2023 From c7b09327bd687a74ba0afa9697df80c7ea9d8a05 Mon Sep 17 00:00:00 2001 From: Darthsternie Date: Mon, 24 Jul 2023 15:32:58 +0200 Subject: [PATCH 232/624] Updated javdb.yml Fixed javdb.yml scraper by changing javdb36.com to javdb.com for both regex and cookies I tried using javdb but javdb36.com seems to be no longer accessible (atleast from Germany or USA/Luxembourg with VPN) It works perfectly fine now on my stash instance even for FC2 files when paired with a login cookie. --- scrapers/javdb.yml | 24 +++++++++++++++++------- 1 file changed, 17 insertions(+), 7 deletions(-) diff --git a/scrapers/javdb.yml b/scrapers/javdb.yml index f9d7606e1..326cbbe4e 100644 --- a/scrapers/javdb.yml +++ b/scrapers/javdb.yml @@ -1,7 +1,7 @@ name: JavDB sceneByFragment: action: scrapeXPath - queryURL: https://javdb36.com/search?q={filename}&f=all + queryURL: https://javdb.com/search?q={filename}&f=all queryURLReplace: filename: - regex: \..+$ @@ -17,10 +17,10 @@ sceneByURL: queryURLReplace: url: - regex: javdb\.com - with: "javdb36.com" + with: "javdb.com" sceneByName: action: scrapeXPath - queryURL: https://javdb36.com/search?q={} + queryURL: https://javdb.com/search?q={} scraper: sceneSearch sceneByQueryFragment: action: scrapeXPath @@ -37,7 +37,7 @@ movieByURL: queryURLReplace: url: - regex: javdb\.com - with: "javdb36.com" + with: "javdb.com" xPathScrapers: sceneSearch: common: @@ -49,7 +49,7 @@ xPathScrapers: postProcess: - replace: - regex: ^ - with: "https://javdb36.com" + with: "https://javdb.com" Image: selector: $videoItem//img/@src postProcess: @@ -68,7 +68,7 @@ xPathScrapers: postProcess: - replace: - regex: ^ - with: "https://javdb36.com" + with: "https://javdb.com" # If you don't support cookie you can use this regex. # - regex: $ # with: "?locale=en" @@ -135,6 +135,16 @@ driver: Domain: "javdb.com" Value: "en" Path: "/" + # Access to certain titles requires a javdb account + # Uncomment the below replacing the Value part + #- Name: "_jdb_session" + # Value: "Add here the actual value from your cookies" + # Path: "/" + # Domain: "javdb.com" + #- Name: "remember_me_token" + # Value: "Add here the actual value from your cookies" + # Path: "/" + # Domain: "javdb.com" - CookieURL: "https://javdb36.com" Cookies: - Name: "locale" @@ -151,4 +161,4 @@ driver: # Value: "Add here the actual value from your cookies" # Path: "/" # Domain: "javdb36.com" -# Last Updated June 02, 2022 +# Last Updated July 24, 2023 From 908435f76c4b01447fc4130ab70d752b703fbe8a Mon Sep 17 00:00:00 2001 From: Silvered Gold <35001040+silveredgold@users.noreply.github.com> Date: Thu, 27 Jul 2023 00:43:40 +1000 Subject: [PATCH 233/624] Fix date parsing; remove CDP requirement; --- scrapers/MetalBondage.yml | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/scrapers/MetalBondage.yml b/scrapers/MetalBondage.yml index dd7832ab1..5beee9d55 100644 --- a/scrapers/MetalBondage.yml +++ b/scrapers/MetalBondage.yml @@ -10,8 +10,11 @@ xPathScrapers: scene: Title: //title/text() Date: - selector: //div[@class="metabar"]/text() + selector: //div[@class="metabar"]//text() postProcess: + - replace: + - regex: '(.*\d{1,2}, \d{4}).*' + with: $1 - parseDate: January 2, 2006 Details: selector: //div[@class="textcontent"]/p @@ -34,6 +37,4 @@ xPathScrapers: fixed: "Metal Bondage" URL: //link[@rel="canonical"]/@href -driver: - useCDP: true -# Last Updated December 16, 2022 +# Last Updated July 27, 2023 From 32138faeeaf0eaffcb270ebc2d605dd6ff738465 Mon Sep 17 00:00:00 2001 From: echo6ix Date: Wed, 26 Jul 2023 20:45:33 -0400 Subject: [PATCH 234/624] Updated scraper list --- SCRAPERS-LIST.md | 1 + 1 file changed, 1 insertion(+) diff --git a/SCRAPERS-LIST.md b/SCRAPERS-LIST.md index 6bf79116a..062a16fa8 100644 --- a/SCRAPERS-LIST.md +++ b/SCRAPERS-LIST.md @@ -116,6 +116,7 @@ atkhairy.com|ATKHairy.yml|:heavy_check_mark:|:x:|:x:|:x:|CDP|- atkpetites.com|ATKHairy.yml|:heavy_check_mark:|:x:|:x:|:x:|CDP|- atkpremium.com|ATKHairy.yml|:heavy_check_mark:|:x:|:x:|:x:|CDP|- auntykathy.com|auntykathy.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +auntjudysxxx.com|AuntJudysXXX.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- aussieass.com|AussieAss.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- aussiefellatioqueens.com|AussieFelatioQueens.yml|:heavy_check_mark:|:x:|:x:|:x:|CDP|- aussiepov.com|AussieAss.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- From ebb24b49462408a097f6a8079baddfa08968305c Mon Sep 17 00:00:00 2001 From: nrg101 Date: Thu, 27 Jul 2023 18:16:44 +0100 Subject: [PATCH 235/624] move brazilian-transsexuals to other Grooby scraper --- scrapers/GroobyNetwork-Brazilian.yml | 3 +-- scrapers/GroobyNetwork-Partial.yml | 3 ++- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/scrapers/GroobyNetwork-Brazilian.yml b/scrapers/GroobyNetwork-Brazilian.yml index ea1ad4733..2edb3a886 100644 --- a/scrapers/GroobyNetwork-Brazilian.yml +++ b/scrapers/GroobyNetwork-Brazilian.yml @@ -2,7 +2,6 @@ name: "GroobyNetwork-Brazilian" sceneByURL: - action: scrapeXPath url: - - brazilian-transsexuals.com - braziltgirls.xxx - tgirlsfuck.com scraper: sceneScraper @@ -36,4 +35,4 @@ xPathScrapers: - regex: ^\/\/ with: "https://" -# Last Updated January 09, 2023 +# Last Updated July 27, 2023 diff --git a/scrapers/GroobyNetwork-Partial.yml b/scrapers/GroobyNetwork-Partial.yml index 396e2748c..86372c202 100644 --- a/scrapers/GroobyNetwork-Partial.yml +++ b/scrapers/GroobyNetwork-Partial.yml @@ -7,6 +7,7 @@ sceneByURL: - blacktgirlshardcore.com - black-tgirls.com - bobstgirls.com + - brazilian-transsexuals.com - femout.xxx - femoutsex.xxx #Scenes on 'femout.xxx' can some times be found on this one as well - franks-tgirlworld.com @@ -99,4 +100,4 @@ xPathScrapers: - regex: ^/ with: https://www.groobyvr.com/ Tags: *tags -# Last Updated July 03, 2023 +# Last Updated July 27, 2023 From bc975f8f843fba0c226b54e29727776736e5516a Mon Sep 17 00:00:00 2001 From: nrg101 Date: Thu, 27 Jul 2023 18:22:38 +0100 Subject: [PATCH 236/624] also move tgirlsfuck.com --- scrapers/GroobyNetwork-Brazilian.yml | 1 - scrapers/GroobyNetwork-Partial.yml | 1 + 2 files changed, 1 insertion(+), 1 deletion(-) diff --git a/scrapers/GroobyNetwork-Brazilian.yml b/scrapers/GroobyNetwork-Brazilian.yml index 2edb3a886..a7a8994bb 100644 --- a/scrapers/GroobyNetwork-Brazilian.yml +++ b/scrapers/GroobyNetwork-Brazilian.yml @@ -3,7 +3,6 @@ sceneByURL: - action: scrapeXPath url: - braziltgirls.xxx - - tgirlsfuck.com scraper: sceneScraper xPathScrapers: sceneScraper: diff --git a/scrapers/GroobyNetwork-Partial.yml b/scrapers/GroobyNetwork-Partial.yml index 86372c202..37ce8eefa 100644 --- a/scrapers/GroobyNetwork-Partial.yml +++ b/scrapers/GroobyNetwork-Partial.yml @@ -19,6 +19,7 @@ sceneByURL: - tgirlsex.xxx - tgirls.porn - tgirls.xxx + - tgirlsfuck.com - tgirlshookup.com - tgirltops.com - transexpov.com From 3d94bafe06a6d3ff2434c8ac0b7c10c6860448c4 Mon Sep 17 00:00:00 2001 From: nrg101 Date: Thu, 27 Jul 2023 18:22:48 +0100 Subject: [PATCH 237/624] update scrapers list --- SCRAPERS-LIST.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/SCRAPERS-LIST.md b/SCRAPERS-LIST.md index 6bf79116a..2fa59e956 100644 --- a/SCRAPERS-LIST.md +++ b/SCRAPERS-LIST.md @@ -222,7 +222,7 @@ brasilvr.com|POVR.yml|:heavy_check_mark:|:x:|:x:|:x:|-|VR brattyfamily.com|Mypervmom.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- brattymilf.com|Nubiles.yml|:heavy_check_mark:|:heavy_check_mark:|:x:|:x:|-|- brattysis.com|Nubiles.yml|:heavy_check_mark:|:heavy_check_mark:|:x:|:x:|-|- -brazilian-transsexuals.com|GroobyNetwork-Brazilian.yml|:heavy_check_mark:|:x:|:x:|:x:|-|Trans +brazilian-transsexuals.com|GroobyNetwork-Partial.yml|:heavy_check_mark:|:x:|:x:|:x:|-|Trans braziltgirls.xxx|GroobyNetwork-Brazilian.yml|:heavy_check_mark:|:x:|:x:|:x:|-|Trans brazzers.com|Brazzers.yml|:heavy_check_mark:|:heavy_check_mark:|:x:|:heavy_check_mark:|CDP|- breeditraw.com|BreedItRaw.yml|:heavy_check_mark:|:x:|:x:|:heavy_check_mark:|-|Gay @@ -1279,7 +1279,7 @@ tgirlpostop.com|GroobyClub.yml|:heavy_check_mark:|:x:|:x:|:x:|-|Trans tgirls.porn|GroobyNetwork-Partial.yml|:heavy_check_mark:|:heavy_check_mark:|:x:|:x:|-|Trans tgirls.xxx|GroobyNetwork-Partial.yml|:heavy_check_mark:|:heavy_check_mark:|:x:|:x:|-|Trans tgirlsex.xxx|GroobyNetwork-Partial.yml|:heavy_check_mark:|:heavy_check_mark:|:x:|:x:|-|Trans -tgirlsfuck.com|GroobyNetwork-Brazilian.yml|:heavy_check_mark:|:x:|:x:|:x:|-|Trans +tgirlsfuck.com|GroobyNetwork-Partial.yml|:heavy_check_mark:|:x:|:x:|:x:|-|Trans tgirlshookup.com|GroobyNetwork-Partial.yml|:heavy_check_mark:|:heavy_check_mark:|:x:|:x:|-|Trans tgirltops.com|GroobyNetwork-Partial.yml|:heavy_check_mark:|:heavy_check_mark:|:x:|:x:|-|Trans thatsitcomshow.com|Nubiles.yml|:heavy_check_mark:|:heavy_check_mark:|:x:|:x:|-|- From 6622b662e0d43838b18c96f3fa52ee311c11410d Mon Sep 17 00:00:00 2001 From: nrg101 Date: Thu, 27 Jul 2023 18:33:34 +0100 Subject: [PATCH 238/624] fix image scraping at tgirlsfuck.com --- scrapers/GroobyNetwork-Partial.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scrapers/GroobyNetwork-Partial.yml b/scrapers/GroobyNetwork-Partial.yml index 37ce8eefa..98501c4b3 100644 --- a/scrapers/GroobyNetwork-Partial.yml +++ b/scrapers/GroobyNetwork-Partial.yml @@ -56,7 +56,7 @@ xPathScrapers: - regex: (https://[^/]*)/.* with: $1 Image: - selector: //link[@rel="canonical"]/@href|//img[contains(@class, "update_thumb thumbs stdimage")]/@src|//img[contains(@class, "update_thumb thumbs stdimage")]/@src0_1x + selector: //link[@rel="canonical"]/@href|//img[contains(@class, "update_thumb thumbs stdimage")]/@src|//img[contains(@class, "update_thumb thumbs stdimage")]/@src0_1x|//div[@class="trailerposter"]/img/@src0 concat: "__SEPARATOR__" postProcess: - replace: From 80b287741006edfab8b6636b7f105279e3a68712 Mon Sep 17 00:00:00 2001 From: echo6ix Date: Thu, 27 Jul 2023 14:02:18 -0400 Subject: [PATCH 239/624] Fix typo; include trailing slash for URL --- scrapers/SmutPuppet.yml | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/scrapers/SmutPuppet.yml b/scrapers/SmutPuppet.yml index 9ca9fbb97..d90f416ed 100644 --- a/scrapers/SmutPuppet.yml +++ b/scrapers/SmutPuppet.yml @@ -9,7 +9,7 @@ sceneByURL: - genlez.com/update - goldenslut.com/update - grannyvsbbc.com/update - - jefssmodels.com/update + - jeffsmodels.com/update - milfsodomy.com/update - smutmerchants.com/update - suggabunny.com/update @@ -55,6 +55,9 @@ xPathScrapers: - replace: - regex: .+\/updates/(\d+)\/.+ with: $1 + # 3 digit studio codes having a leading zero using this selector. not good for our purposes. lets strip the leading zero + - regex: ^0 + with: # Return the sanitized URL URL: selector: //div[@class="block-logo"]/a/img/@src|//div[@class="model-player"]/a/img/@src @@ -62,5 +65,8 @@ xPathScrapers: postProcess: - replace: - regex: \/static\/(\w+\....)\/.+\/(\d+)\/.+ - with: https://$1/update/$2 -# Last Updated July 18, 2023 + with: https://$1/update/$2/ + # 3 digit studio codes having a leading zero using this selector. not good for our purposes. lets strip the leading zero + - regex: \/0 + with: / +# Last Updated July 27, 2023 From 71503424ecad19146c9fb75483e4fd5bc5571178 Mon Sep 17 00:00:00 2001 From: echo6ix Date: Thu, 27 Jul 2023 14:06:41 -0400 Subject: [PATCH 240/624] Fixed code and URL parsing --- scrapers/SmutPuppet.yml | 14 ++++---------- 1 file changed, 4 insertions(+), 10 deletions(-) diff --git a/scrapers/SmutPuppet.yml b/scrapers/SmutPuppet.yml index d90f416ed..357fefb1a 100644 --- a/scrapers/SmutPuppet.yml +++ b/scrapers/SmutPuppet.yml @@ -50,23 +50,17 @@ xPathScrapers: - regex: "^/usermedia/" with: "https://smutpuppet.com/usermedia/" Code: - selector: //div[@class="model-player"]/a/img/@src + selector: //script[contains(text(),"/api/update/")] postProcess: - replace: - - regex: .+\/updates/(\d+)\/.+ + - regex: .+\/api\/update\/(\d{3,})\/.+ with: $1 - # 3 digit studio codes having a leading zero using this selector. not good for our purposes. lets strip the leading zero - - regex: ^0 - with: # Return the sanitized URL URL: - selector: //div[@class="block-logo"]/a/img/@src|//div[@class="model-player"]/a/img/@src + selector: //div[@class="block-logo"]/a/img/@src|//script[contains(text(),"/api/update/")] concat: "|" postProcess: - replace: - - regex: \/static\/(\w+\....)\/.+\/(\d+)\/.+ + - regex: \/static\/(\w+\.[a-z]{3})\/.+\/api\/update\/(\d{3,})\/.+ with: https://$1/update/$2/ - # 3 digit studio codes having a leading zero using this selector. not good for our purposes. lets strip the leading zero - - regex: \/0 - with: / # Last Updated July 27, 2023 From c101638488696a65a3e7b0d5e678de153afdf7d2 Mon Sep 17 00:00:00 2001 From: echo6ix Date: Thu, 27 Jul 2023 14:14:29 -0400 Subject: [PATCH 241/624] Remove redundant scrapers --- scrapers/DoTheWife.yml | 33 --------------------------------- scrapers/JeffsModels.yml | 29 ----------------------------- 2 files changed, 62 deletions(-) delete mode 100644 scrapers/DoTheWife.yml delete mode 100644 scrapers/JeffsModels.yml diff --git a/scrapers/DoTheWife.yml b/scrapers/DoTheWife.yml deleted file mode 100644 index 7128a5623..000000000 --- a/scrapers/DoTheWife.yml +++ /dev/null @@ -1,33 +0,0 @@ -name: Do The Wife -sceneByURL: - - action: scrapeXPath - url: - - dothewife.com/update - scraper: sceneScraper -xPathScrapers: - sceneScraper: - scene: - Title: //div[@class="section-title"]/h4 - Performers: - Name: //div[@class="model-rich"]/h4[@class="theme-color"]/a - Details: - selector: //p[@class="read-more"]/text() - postProcess: - - replace: - - regex: '^\s*:\s*' - with: "" - Tags: - Name: - selector: //div[@class="model-categories"]/a/text() - Studio: - Name: - fixed: Do The Wife - Image: - selector: //img[@class="video-banner"]/@src|//video/@poster - postProcess: - - replace: - - regex: (?:.+)(\/usermedia\/.+\.jpg)(?:.+) - with: $1 - - regex: "^/usermedia/" - with: "https://dothewife.com/usermedia/" -# Last Updated February 18, 2023 diff --git a/scrapers/JeffsModels.yml b/scrapers/JeffsModels.yml deleted file mode 100644 index e18a750cb..000000000 --- a/scrapers/JeffsModels.yml +++ /dev/null @@ -1,29 +0,0 @@ -name: "JeffsModels" -sceneByURL: - - action: scrapeXPath - url: - - jeffsmodels.com/ - scraper: sceneScraper -xPathScrapers: - sceneScraper: - scene: - Title: - selector: //div[@class="updateInfo"]/h2/text() - Details: - selector: //div[@class="updateDescription"]/p/text() - Performers: - Name: //div[@class="updateModels"]/a/text() - Image: - selector: //a[@class="hi iconPlay"]/img[@alt="Main Sample"]/@src - postProcess: - - replace: - - regex: ^ - with: "https://jeffsmodels.com" - Studio: - Name: - fixed: "Jeff's Models" - Date: - selector: //span[@class="updateDate"]/text() - postProcess: - - parseDate: Jan 2, 2006 -# Last Updated March 19, 2021 From 8d6c7c2ff7409d55e233899f93650201921f512e Mon Sep 17 00:00:00 2001 From: echo6ix Date: Thu, 27 Jul 2023 14:28:57 -0400 Subject: [PATCH 242/624] Updated SCARPERS-LIST --- SCRAPERS-LIST.md | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) diff --git a/SCRAPERS-LIST.md b/SCRAPERS-LIST.md index 667fbaea2..340d51008 100644 --- a/SCRAPERS-LIST.md +++ b/SCRAPERS-LIST.md @@ -183,6 +183,7 @@ bjraw.com|BJRaw.yml|:heavy_check_mark:|:x:|:x:|:heavy_check_mark:|CDP|- black-tgirls.com|GroobyNetwork-Partial.yml|:heavy_check_mark:|:heavy_check_mark:|:x:|:x:|-|Trans black4k.com|Vip4K.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- blackambush.com|ExploitedX.yml|:heavy_check_mark:|:x:|:x:|:heavy_check_mark:|-|- +blackandbig.com|SmutPuppet.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- blacked.com|vixenNetwork.yml|:heavy_check_mark:|:x:|:x:|:x:|Python|- blackedraw.com|vixenNetwork.yml|:heavy_check_mark:|:x:|:x:|:x:|Python|- Blackfoxbound UK|BlackFoxBound.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- @@ -349,6 +350,7 @@ danejones.com|RealityKingsOL.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- danidaniels.com|ModelCentroAPI.yml|:heavy_check_mark:|:x:|:x:|:x:|Python|- danni.com|Danni.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- darkroomvr.com|DarkRoomVR.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +darksodomy.com|SmutPuppet.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- darkx.com|Algolia_xEmpire.yml|:heavy_check_mark:|:heavy_check_mark:|:x:|:x:|Python|- data18.com|data18.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- daughterswap.com|PaperStreetMedia.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- @@ -390,7 +392,7 @@ dollsporn.com|Wtfpass.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- domai.com|SARJ-LLC.yml|:x:|:heavy_check_mark:|:x:|:heavy_check_mark:|Python|- dorcelclub.com|DorcelClub.yml|:heavy_check_mark:|:x:|:heavy_check_mark:|:x:|-|- dorcelvision.com|DorcelVision.yml|:heavy_check_mark:|:x:|:heavy_check_mark:|:x:|-|- -dothewife.com|DoTheWife.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +dothewife.com|SmutPuppet.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- doubleteamedteens.com|Teencoreclub.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- downtofuckdating.com|ThirdRockEnt.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- dpfanatics.com|Algolia_Adultime.yml|:heavy_check_mark:|:x:|:x:|:x:|Python|- @@ -530,6 +532,7 @@ gayhorrorporn.com|Czechav.yml|:heavy_check_mark:|:x:|:x:|:x:|-|Gay gayroom.com|AMAMultimedia.yml|:heavy_check_mark:|:x:|:x:|:x:|-|Gay gayviolations.com|AMAMultimedia.yml|:heavy_check_mark:|:x:|:x:|:x:|-|Gay genderxfilms.com|Algolia_GenderXFilms.yml|:heavy_check_mark:|:heavy_check_mark:|:heavy_check_mark:|:x:|-|Trans +genlez.com|SmutPuppet.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- genuinesin.com|GenuineSin.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- girlcum.com|AMAMultimedia.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- girlfaction.com|ThirdRockEnt.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- @@ -554,9 +557,11 @@ gloryholeswallow.com|GloryHoleSwallow.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- glosstightsglamour.com|GlossTightsGlamour.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- goddessnudes.com|SARJ-LLC.yml|:x:|:heavy_check_mark:|:x:|:heavy_check_mark:|Python|- goddesssnow.com|GoddessSnow.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +goldenslut.com|SmutPuppet.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- gostuckyourself.net|Mypervmom.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- gotfilled.com|BJRaw.yml|:heavy_check_mark:|:x:|:x:|:heavy_check_mark:|CDP|- grannyghetto.com|GammaEntertainment.yml|:heavy_check_mark:|:x:|:x:|:x:|-|Granny +grannyvsbbc.com|SmutPuppet.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- grooby-archives.com|GroobyNetwork-Partial.yml|:heavy_check_mark:|:heavy_check_mark:|:x:|:x:|-|- grooby.club|GroobyClub.yml|:heavy_check_mark:|:x:|:x:|:x:|-|Trans groobygirls.com|GroobyNetwork-Partial.yml|:heavy_check_mark:|:heavy_check_mark:|:x:|:x:|-|Trans @@ -683,7 +688,7 @@ jaysinxxx.com|GammaEntertainment.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- jayspov.net|AdultEmpireCash.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- jcosplay.com|karatmedia.yml|:heavy_check_mark:|:x:|:x:|:x:|-|JAV jeedoo.com|Jeedoo.yml|:heavy_check_mark:|:x:|:heavy_check_mark:|:x:|-|- -jeffsmodels.com|JeffsModels.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +jeffsmodels.com|SmutPuppet.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- jelenajensen.com|VNAGirls.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- jerk-offpass.com|Teencoreclub.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- jerkaoke.com|ModelMediaUS.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- @@ -813,6 +818,7 @@ milfed.com|MindGeek.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- milfthing.com|PerfectGonzo.yml|:heavy_check_mark:|:x:|:x:|:heavy_check_mark:|Python|- milftrip.com|VegasDreamsLLC.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- milftugs.com|MilfTugs.yml|:heavy_check_mark:|:x:|:x:|:heavy_check_mark:|-|- +milfsodomy.com|SmutPuppet.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- milfvr.com|POVR.yml|:heavy_check_mark:|:x:|:x:|:x:|-|VR milkingtable.com|GammaEntertainment.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- milovana.com|Milovana.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- @@ -1163,6 +1169,7 @@ smashed.xxx|Nubiles.yml|:heavy_check_mark:|:heavy_check_mark:|:x:|:x:|-|- smashpictures.com|CombatZone.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- smokingmina.com|SmokingMina.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- smutfactor.com|AdultEmpireCash.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +smutmerchants.com|SmutPuppet.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- soapymassage.com|Algolia_Adultime.yml|:heavy_check_mark:|:x:|:x:|:x:|Python|- sofiemariexxx.com|SofieMariexxx.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- sologirlsmania.com|MVG.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- @@ -1210,6 +1217,7 @@ submissivex.com|SubmissiveX.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- subspaceland.com|SubspaceLand.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- sugarcookie.xxx|sugarcookie.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- sugardaddyporn.com|BlurredMedia.yml|:heavy_check_mark:|:x:|:x:|:x:|CDP|- +suggabunny.com|SmutPuppet.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- sunnylanelive.com|VNAGirls.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- sunnyleone.com|GammaEntertainment.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- superbemodels.com|superbemodels.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- @@ -1252,6 +1260,7 @@ teencoreclub.com|Teencoreclub.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- teencorezine.com|Teencoreclub.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- teencurves.com|PaperStreetMedia.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- teendrillers.com|Teencoreclub.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +teenerotica.xxx|SmutPuppet.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- teenfidelity.com|KellyMadisonMedia.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- teenfrombohemia.com|PornCZ.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- teenmegaworld.net|TeenMegaWorld.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- From 44b5768cd33407b462792431d28de53f73c79bb3 Mon Sep 17 00:00:00 2001 From: echo6ix Date: Thu, 27 Jul 2023 14:46:39 -0400 Subject: [PATCH 243/624] Updated scrapers list --- SCRAPERS-LIST.md | 1 + 1 file changed, 1 insertion(+) diff --git a/SCRAPERS-LIST.md b/SCRAPERS-LIST.md index 6bf79116a..3d97fb10d 100644 --- a/SCRAPERS-LIST.md +++ b/SCRAPERS-LIST.md @@ -935,6 +935,7 @@ old4k.com|Vip4K.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- oldgoesyoung.com|OldGoesYoung.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- oldje-3some.com|Oldje.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- oldje.com|Oldje.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +oldnanny.com|OldNanny.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- oldsfuckdolls.com|DollsHub.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- only3xgirls.com|Only3xGirls.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- only3xlost.com|Only3xGirls.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- From c91828a4f6f948dd0dface1f3f9714d3effbc349 Mon Sep 17 00:00:00 2001 From: echo6ix Date: Thu, 27 Jul 2023 14:47:25 -0400 Subject: [PATCH 244/624] Improve returned sub-studio name --- scrapers/OldNanny.yml | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/scrapers/OldNanny.yml b/scrapers/OldNanny.yml index 507c3b632..5e6df0581 100644 --- a/scrapers/OldNanny.yml +++ b/scrapers/OldNanny.yml @@ -11,6 +11,10 @@ xPathScrapers: Studio: Name: selector: //div[@class="col-12 col-xl-4"]//dl[2]//a/text() + postProcess: + - replace: + - regex: ([a-z])([A-Z]) + with: $1 $2 Title: selector: //title/text() postProcess: @@ -32,4 +36,4 @@ xPathScrapers: - regex: \, with: Image: //video[@id="video-player"]/@poster -# Last Updated July 18, 2023 +# Last Updated July 27, 2023 From e82c5c7e2cf62dc23540535498fdc2a839d72db4 Mon Sep 17 00:00:00 2001 From: echo6ix Date: Thu, 27 Jul 2023 14:54:20 -0400 Subject: [PATCH 245/624] Updated scrapers list --- SCRAPERS-LIST.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/SCRAPERS-LIST.md b/SCRAPERS-LIST.md index 195d531b1..81560e6e9 100644 --- a/SCRAPERS-LIST.md +++ b/SCRAPERS-LIST.md @@ -1433,7 +1433,7 @@ watchreal.com|Hustler.yml|:heavy_check_mark:|:x:|:x:|:x:|CDP|- watchyoujerk.com|WatchYouJerk.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- waybig.com|Waybig.yml|:heavy_check_mark:|:x:|:x:|:heavy_check_mark:|-|- wcpclub.com|AdultEmpireCash.yml|:heavy_check_mark:|:x:|:heavy_check_mark:|:x:|-|- -wearehairy.com|wearehairy.yml|:heavy_check_mark:|:heavy_check_mark:|:x:|:x:|-|- +wearehairy.com|WeAreHairy.yml|:heavy_check_mark:|:heavy_check_mark:|:x:|:heavy_check_mark:|-|- webyoung.com|Algolia_Adultime.yml|:heavy_check_mark:|:heavy_check_mark:|:x:|:x:|Python|Lesbian wefuckblackgirls.com|WeFuckBlackGirls.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- welikegirls.com|Algolia_Adultime.yml|:heavy_check_mark:|:heavy_check_mark:|:heavy_check_mark:|:x:|Python|- From 4bfe86e35dda8693f8b0e750879d66cc67d1f5a4 Mon Sep 17 00:00:00 2001 From: echo6ix Date: Thu, 27 Jul 2023 14:57:20 -0400 Subject: [PATCH 246/624] Improve and fix invalid regex --- scrapers/WeAreHairy.yml | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/scrapers/WeAreHairy.yml b/scrapers/WeAreHairy.yml index 76a786691..273973527 100644 --- a/scrapers/WeAreHairy.yml +++ b/scrapers/WeAreHairy.yml @@ -57,12 +57,12 @@ xPathScrapers: selector: //p[@id="height_both"]/text() postProcess: - replace: - - regex: .+(/d/d/d)cm + - regex: .*(\d{3}) cm with: $1 Weight: selector: //p[@id="weight_both"]/text() postProcess: - replace: - - regex: .+(/d/d)kg + - regex: .*(\d{2,}) kg with: $1 -# Last Updated July 18, 2023 +# Last Updated July 27, 2023 From 7b429add58dd58fa58e0d0f57a33e61734cd313b Mon Sep 17 00:00:00 2001 From: echo6ix Date: Thu, 27 Jul 2023 15:09:32 -0400 Subject: [PATCH 247/624] Updated scrapers list --- SCRAPERS-LIST.md | 1 + 1 file changed, 1 insertion(+) diff --git a/SCRAPERS-LIST.md b/SCRAPERS-LIST.md index 81560e6e9..f597e531e 100644 --- a/SCRAPERS-LIST.md +++ b/SCRAPERS-LIST.md @@ -371,6 +371,7 @@ devilsfilmparodies.com|GammaEntertainment.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- devilsgangbangs.com|GammaEntertainment.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- devilstgirls.com|Algolia_Adultime.yml|:heavy_check_mark:|:x:|:heavy_check_mark:|:x:|Python|Trans devonlee.com|GammaEntertainment.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +dfbnetwork.com|DFB.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- diabolic.com|Algolia_zerotolerancefilms.yml|:heavy_check_mark:|:x:|:heavy_check_mark:|:x:|python|- dianafeet.com|FFCSH.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- dickdrainers.com|Dickdrainers.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- From 2ddd8dad70d88131ff9efc913ca9404e76db26c3 Mon Sep 17 00:00:00 2001 From: echo6ix Date: Thu, 27 Jul 2023 15:34:12 -0400 Subject: [PATCH 248/624] Improved to also scrape AuntJudy.com --- scrapers/{AuntJudysXXX.yml => AuntJudys.yml} | 14 +++++++++++--- 1 file changed, 11 insertions(+), 3 deletions(-) rename scrapers/{AuntJudysXXX.yml => AuntJudys.yml} (62%) diff --git a/scrapers/AuntJudysXXX.yml b/scrapers/AuntJudys.yml similarity index 62% rename from scrapers/AuntJudysXXX.yml rename to scrapers/AuntJudys.yml index 116c91055..8a7446d4b 100644 --- a/scrapers/AuntJudysXXX.yml +++ b/scrapers/AuntJudys.yml @@ -1,7 +1,8 @@ -name: AuntJudysXXX +name: AuntJudys sceneByURL: - action: scrapeXPath url: + - auntjudys.com - auntjudysxxx.com scraper: sceneScraper xPathScrapers: @@ -9,7 +10,14 @@ xPathScrapers: scene: Studio: Name: - fixed: Aunt Judy's XXX + selector: //base/@href + postProcess: + - replace: + - regex: ^https?://(?:www\.)?(.+)\.com/.*$ + with: $1 + - map: + auntjudys: Aunt Judy's + auntjudysxxx: Aunt Judy's XXX Title: //span[@class="title_bar_hilite"] Details: //span[@class="update_description"] Performers: @@ -21,4 +29,4 @@ xPathScrapers: selector: //div[@class="cell update_date"]/text()[1] postProcess: - parseDate: 1/2/2006 -# Last Updated July 18, 2023 +# Last Updated July 27, 2023 From 2f02d91a9ffa79fbae832a0b9a5adb02ee7bcd12 Mon Sep 17 00:00:00 2001 From: echo6ix Date: Thu, 27 Jul 2023 15:35:16 -0400 Subject: [PATCH 249/624] Updated scraper list --- SCRAPERS-LIST.md | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/SCRAPERS-LIST.md b/SCRAPERS-LIST.md index 919f8bf0d..034da3317 100644 --- a/SCRAPERS-LIST.md +++ b/SCRAPERS-LIST.md @@ -116,7 +116,8 @@ atkhairy.com|ATKHairy.yml|:heavy_check_mark:|:x:|:x:|:x:|CDP|- atkpetites.com|ATKHairy.yml|:heavy_check_mark:|:x:|:x:|:x:|CDP|- atkpremium.com|ATKHairy.yml|:heavy_check_mark:|:x:|:x:|:x:|CDP|- auntykathy.com|auntykathy.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- -auntjudysxxx.com|AuntJudysXXX.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +auntjudys.com|AuntJudys.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +auntjudysxxx.com|AuntJudys.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- aussieass.com|AussieAss.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- aussiefellatioqueens.com|AussieFelatioQueens.yml|:heavy_check_mark:|:x:|:x:|:x:|CDP|- aussiepov.com|AussieAss.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- From 181f2cc314b6bfc9ce15e1cc41896e8f473ea71e Mon Sep 17 00:00:00 2001 From: echo6ix Date: Thu, 27 Jul 2023 15:58:08 -0400 Subject: [PATCH 250/624] Removed unnecessary URL sanitization --- scrapers/TheScoreGroup.yml | 5 ----- 1 file changed, 5 deletions(-) diff --git a/scrapers/TheScoreGroup.yml b/scrapers/TheScoreGroup.yml index 059f583f6..dbad0b15b 100644 --- a/scrapers/TheScoreGroup.yml +++ b/scrapers/TheScoreGroup.yml @@ -60,11 +60,6 @@ xPathScrapers: # with: _x_800 URL: &urlAttr selector: $url - # Sanitize the URL - postProcess: - - replace: - - regex: (https?:\/\/(?:www\.)?[0-9A-za-z-]+\.com\/[0-9A-Za-z-_]+\/[0-9A-Za-z-_]+\/\d+)\/?.* - with: $1 Code: &codeAttr selector: $url postProcess: From fa325ad94a41adde5e7761956f667b5568aa6684 Mon Sep 17 00:00:00 2001 From: echo6ix Date: Thu, 27 Jul 2023 16:03:31 -0400 Subject: [PATCH 251/624] Improved how studio code is processed --- scrapers/TheScoreGroup.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/scrapers/TheScoreGroup.yml b/scrapers/TheScoreGroup.yml index dbad0b15b..66f4d9fb2 100644 --- a/scrapers/TheScoreGroup.yml +++ b/scrapers/TheScoreGroup.yml @@ -64,7 +64,7 @@ xPathScrapers: selector: $url postProcess: - replace: - - regex: https?:\/\/(?:www\.)?[0-9A-za-z-]+\.com\/[0-9A-Za-z-_]+\/[0-9A-Za-z-_]+\/(\d+)\/?.* + - regex: .*\/(\d+)\/?$ with: $1 galleryScraper: common: @@ -79,4 +79,4 @@ xPathScrapers: Tags: Name: $photopage//div[@class='mb-3']/a/text() Performers: *performersAttr -# Last Updated July 18, 2023 +# Last Updated July 27, 2023 From 7015a4ae8d3294146aad163ebd61fe560fbd2b54 Mon Sep 17 00:00:00 2001 From: echo6ix Date: Thu, 27 Jul 2023 16:23:08 -0400 Subject: [PATCH 252/624] Fix erroneous line breaks in details --- scrapers/TheScoreGroup.yml | 3 +++ 1 file changed, 3 insertions(+) diff --git a/scrapers/TheScoreGroup.yml b/scrapers/TheScoreGroup.yml index 66f4d9fb2..c955a0cbf 100644 --- a/scrapers/TheScoreGroup.yml +++ b/scrapers/TheScoreGroup.yml @@ -39,6 +39,9 @@ xPathScrapers: - replace: - regex: Read More » with: + # Attempt to fix erroneous line breaks where HTML tags existed + - regex: \n\n([0-9a-zA-Z\.]+)\n\n + with: " $1 " concat: "\n\n" Tags: Name: //div[@class="mb-3"]/a/text() From 73a549ce3375406646aa276135ec15b65ae39606 Mon Sep 17 00:00:00 2001 From: echo6ix Date: Thu, 27 Jul 2023 16:36:06 -0400 Subject: [PATCH 253/624] Updated scraper list --- SCRAPERS-LIST.md | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/SCRAPERS-LIST.md b/SCRAPERS-LIST.md index 034da3317..4b7781abb 100644 --- a/SCRAPERS-LIST.md +++ b/SCRAPERS-LIST.md @@ -266,6 +266,7 @@ clubelegantangel.com|ClubElegantAngel.yml|:heavy_check_mark:|:x:|:heavy_check_ma clubinfernodungeon.com|Algolia_ClubInfernoDungeon.yml|:heavy_check_mark:|:heavy_check_mark:|:x:|:x:|-|Gay clubseventeen.com|ClubSeventeen.yml|:heavy_check_mark:|:heavy_check_mark:|:x:|:x:|-|- clubsweethearts.com|ClubSeventeen.yml|:heavy_check_mark:|:heavy_check_mark:|:x:|:x:|-|- +clubtug.com|TugPass.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- cockhero.info|Cockhero.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- cocksuremen.com|CocksureMen.yml|:heavy_check_mark:|:x:|:x:|:x:|-|Gay colbyknox.com|ColbyKnox.yml|:heavy_check_mark:|:x:|:x:|:x:|-|Gay @@ -288,6 +289,7 @@ cuckhunter.com|Arx.yml|:heavy_check_mark:|:x:|:x:|:x:|Python|- culioneros.com|Pornditos.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- cum4k.com|AMAMultimedia.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- cumaholicteens.com|Teencoreclub.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +cumblastcity.com|TugPass.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- cumforcover.com|PerfectGonzo.yml|:heavy_check_mark:|:x:|:x:|:heavy_check_mark:|Python|- cumlouder.com|Cumlouder.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- cumshotoasis.com|GammaEntertainment.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- @@ -408,6 +410,7 @@ dtfsluts.com|ThirdRockEnt.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- dyked.com|PaperStreetMedia.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- dylanryder.com|GammaEntertainment.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- eastboys.com|EastBoys.yml|:heavy_check_mark:|:x:|:x:|:x:|-|Gay +ebonytugs.com|TugPass.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- elegantangel.com|AdultEmpireCash.yml|:heavy_check_mark:|:x:|:heavy_check_mark:|:x:|-|- elitebabes.com|EliteBabes.yml|:x:|:x:|:x:|:heavy_check_mark:|-|Babes ericjohnssexadventures.com|EricJohnsSexAdventures.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- @@ -445,6 +448,7 @@ fallinlovia.com|ModelCentroAPI.yml|:heavy_check_mark:|:x:|:x:|:x:|Python|- famedigital.com|Algolia_Adultime.yml|:heavy_check_mark:|:x:|:x:|:heavy_check_mark:|Python|- familycreep.com|GammaEntertainment.yml|:heavy_check_mark:|:x:|:x:|:x:|-|Gay familyhookups.com|MindGeek.yml|:heavy_check_mark:|:x:|:x:|:heavy_check_mark:|-|- +familylust.com|TugPass.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- familysexmassage.com|FamilySexMassage.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- familysinners.com|MindGeek.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- familystrokes.com|PaperStreetMedia.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- @@ -474,6 +478,7 @@ filthyfamily.com|FilthyFamily.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- filthygapers.com|DollsHub.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- filthykings.com|Algolia_filthykings.yml|:heavy_check_mark:|:heavy_check_mark:|:x:|:x:|Python|- finishesthejob.com|FinishesTheJob.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +finishhim.com|TugPass.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- firstanalquest.com|Firstanalquest.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- firstbgg.com|TeenMegaWorld.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- firstclasspov.com|Spizoo.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- @@ -805,6 +810,7 @@ max-hardcore.com|MaxHardcore.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- maxinex.com|VNAGirls.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- meanawolf.com|MeanaWolf.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- meanbitches.com|MeanBitches.yml|:heavy_check_mark:|:x:|:x:|:heavy_check_mark:|-|-| +meanmassage.com|TugPass.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- meetsuckandfuck.com|Wtfpass.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- men.com|Brazzers.yml|:heavy_check_mark:|:x:|:x:|:x:|CDP|Gay menatplay.com|MenAtPlay.yml|:heavy_check_mark:|:x:|:x:|:x:|-|Gay @@ -963,6 +969,7 @@ orgytrain.com|Vivid.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- outdoorjp.com|karatmedia.yml|:heavy_check_mark:|:x:|:x:|:x:|-|JAV outhim.com|AMAMultimedia.yml|:heavy_check_mark:|:x:|:x:|:x:|-|Gay outofthefamily.com|Algolia_Adultime.yml|:heavy_check_mark:|:x:|:x:|:x:|Python|- +over40handjobs.com|TugPass.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- p54u.com|JavLibrary.yml|:heavy_check_mark:|:x:|:heavy_check_mark:|:x:|-|JAV pacopacomama.com|Paco.yml|:heavy_check_mark:|:x:|:x:|:x:|-|JAV paintoy.com|insexOL.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- @@ -990,6 +997,7 @@ pervmom.com|PaperStreetMedia.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- pervnana.com|PaperStreetMedia.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- pervtherapy.com|PaperStreetMedia.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- peternorth.com|Algolia_Adultime.yml|:heavy_check_mark:|:heavy_check_mark:|:x:|:x:|Python|- +petite18.com|TugPass.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- petiteballerinasfucked.com|Nubiles.yml|:heavy_check_mark:|:heavy_check_mark:|:x:|:x:|-|- petited.com|Vivid.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- petitehdporn.com|Nubiles.yml|:heavy_check_mark:|:heavy_check_mark:|:x:|:x:|-|- @@ -1120,7 +1128,7 @@ seancody.com|MindGeek.yml|:heavy_check_mark:|:x:|:x:|:heavy_check_mark:|-|Gay seductive18.com|Teencoreclub.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- seehimfuck.com|hussiemodels.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- seehimsolo.com|hussiemodels.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- -seemomsuck.com|SeeMomSuck.yml|:heavy_check_mark:|:x:|:x:|:x:|CDP|- +seemomsuck.com|Tugpass.yml|:heavy_check_mark:|:x:|:x:|:x:|CDP|- seemysextapes.com|Hustler.yml|:heavy_check_mark:|:x:|:x:|:x:|CDP|- selfiesuck.com|SelfieSuck.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- sensualpain.com|insexOL.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- @@ -1279,6 +1287,7 @@ teensnaturalway.com|Teencoreclub.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- teensneaks.com|Algolia_Adultime.yml|:heavy_check_mark:|:x:|:x:|:x:|Python|- teenstryblacks.com|Teencoreclub.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- teenthais.com|Baberotica.yml|:heavy_check_mark:|:x:|:x:|:x:|-|Thai Uncensored +teentugs.com|TugPass.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- teenytaboo.com|TeenyTaboo.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- tenshigao.com|Tenshigao.yml|:heavy_check_mark:|:x:|:x:|:x:|-|Jav terapatrick.com|NikkiAndTera.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- @@ -1370,6 +1379,7 @@ tsplayground.com|TSPlayground.yml|:heavy_check_mark:|:x:|:x:|:heavy_check_mark:| tspov.com|ChristianXXX.yml|:heavy_check_mark:|:x:|:x:|:x:|-|Trans tsraw.com|LadyboyGold.yml|:heavy_check_mark:|:x:|:x:|:heavy_check_mark:|-|Trans tsvirtuallovers.com|RealityLovers.yml|:heavy_check_mark:|:x:|:x:|:heavy_check_mark:|Python|- +tugpass.com|TugPass.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- tuktukpatrol.com|TukTukPatrol.yml|:heavy_check_mark:|:x:|:x:|:heavy_check_mark:|-|- tushy.com|vixenNetwork.yml|:heavy_check_mark:|:x:|:x:|:x:|Python|- tushyraw.com|vixenNetwork.yml|:heavy_check_mark:|:x:|:x:|:x:|Python|- From 2f648a7864b8bb06677bbc99ca2c5606804c1c5f Mon Sep 17 00:00:00 2001 From: Maista6969 Date: Thu, 27 Jul 2023 23:52:43 +0200 Subject: [PATCH 254/624] Fix name formatting for LoveHerFeet.com Also rename the YAML file to LoveHerFilms.yml to match the network name. --- SCRAPERS-LIST.md | 8 ++++---- scrapers/{LoveHerFeet.yml => LoveHerFilms.yml} | 8 +++++--- 2 files changed, 9 insertions(+), 7 deletions(-) rename scrapers/{LoveHerFeet.yml => LoveHerFilms.yml} (97%) diff --git a/SCRAPERS-LIST.md b/SCRAPERS-LIST.md index 6e7f09526..e1399ef86 100644 --- a/SCRAPERS-LIST.md +++ b/SCRAPERS-LIST.md @@ -776,9 +776,9 @@ lookathernow.com|MindGeek.yml|:heavy_check_mark:|:x:|:x:|:heavy_check_mark:|-|- lordaardvark.com|LordAardvark.yml|:heavy_check_mark:|:heavy_check_mark:|:x:|:x:|CDP|- lovehairy.com|SARJ-LLC.yml|:heavy_check_mark:|:heavy_check_mark:|:x:|:heavy_check_mark:|Python|- loveherass.com|devianteNetwork.yml|:heavy_check_mark:|:x:|:x:|:x:|CDP|- -loveherboobs.com|LoveHerFeet.yml|:heavy_check_mark:|:x:|:x:|:heavy_check_mark:|-|Boob Fetish -loveherfeet.com|LoveHerFeet.yml|:heavy_check_mark:|:x:|:x:|:heavy_check_mark:|-|Foot Fetish -loveherfilms.com|LoveHerFeet.yml|:heavy_check_mark:|:x:|:x:|:heavy_check_mark:|-|- +loveherboobs.com|LoveHerFilms.yml|:heavy_check_mark:|:x:|:x:|:heavy_check_mark:|-|Boob Fetish +loveherfeet.com|LoveHerFilms.yml|:heavy_check_mark:|:x:|:x:|:heavy_check_mark:|-|Foot Fetish +loveherfilms.com|LoveHerFilms.yml|:heavy_check_mark:|:x:|:x:|:heavy_check_mark:|-|- lubed.com|AMAMultimedia.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- lucasentertainment.com|LucasEntertainment.yml|:heavy_check_mark:|:x:|:x:|:x:|-|Gay lustcinema.com|LustCinema.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- @@ -1152,7 +1152,7 @@ shandafay.com|VNAGirls.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- shanedieselsbanginbabes.com|NewSensationsNetworkSites.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- share.myfreecams.com|MFC.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- shegotsix.com|Teencoreclub.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- -shelovesblack.com|LoveHerFeet.yml|:heavy_check_mark:|:x:|:x:|:heavy_check_mark:|-|- +shelovesblack.com|LoveHerFilms.yml|:heavy_check_mark:|:x:|:x:|:heavy_check_mark:|-|- shesbrandnew.com|TopWebModels.yml|:heavy_check_mark:|:x:|:x:|:x:|Python|- sheseducedme.com|Andomark.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- shewillcheat.com|RealityKingsOL.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- diff --git a/scrapers/LoveHerFeet.yml b/scrapers/LoveHerFilms.yml similarity index 97% rename from scrapers/LoveHerFeet.yml rename to scrapers/LoveHerFilms.yml index daed233c2..a8fbe9e4e 100644 --- a/scrapers/LoveHerFeet.yml +++ b/scrapers/LoveHerFilms.yml @@ -1,5 +1,4 @@ name: LoveHerFeet - sceneByURL: - action: scrapeXPath url: @@ -24,7 +23,6 @@ performerByURL: - loveherfilms.com/tour/models/ - shelovesblack.com/tour/models/ scraper: performerScraper - xPathScrapers: sceneScraper: scene: @@ -53,7 +51,11 @@ xPathScrapers: - regex: ^/tour([^|]+)\|(.+) # video/@poster urls need a domain with: $2$1 Studio: - Name: //meta[@name='author']/@content + Name: + selector: //meta[@name='author']/@content + postProcess: + - map: + LoveHerFeet.com: "Love Her Feet" Tags: Name: //div[@class='video-tags']/a/text() Performers: From 5ed1819fba390a3c2cf5cc27a3eadb6b07a83c3e Mon Sep 17 00:00:00 2001 From: Maista6969 Date: Fri, 28 Jul 2023 01:12:06 +0200 Subject: [PATCH 255/624] Fix 403 unauthorized error when scraping from POVR subsites --- scrapers/POVR.yml | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/scrapers/POVR.yml b/scrapers/POVR.yml index 974e614d0..5bd3b4b38 100644 --- a/scrapers/POVR.yml +++ b/scrapers/POVR.yml @@ -40,8 +40,19 @@ xPathScrapers: - replace: - regex: medium.jpg with: large.jpg + # TranzVR defaults to smaller covers, but we can grab a bigger one - regex: 472/cover.jpg with: 680/cover.jpg + # All of these domains give 403 errors when saving the scraped image + # but povr.com has the same images and is totally cool with our scraping + - regex: cdns-i.wankzvr.com + with: images.povr.com/wvr + - regex: images.tranzvr.com + with: images.povr.com/tvr + - regex: cdns-i.milfvr.com + with: images.povr.com/mvr + - regex: cdns-i.brasilvr.com + with: images.povr.com Studio: &studioAttr Name: selector: *urlSel From a322907b447a998a5bbaadb4bd0619a1b5331b17 Mon Sep 17 00:00:00 2001 From: Nic Patterson Date: Thu, 27 Jul 2023 18:53:24 -0700 Subject: [PATCH 256/624] Update Algolia_DogfartNetwork.yml --- scrapers/Algolia_DogfartNetwork.yml | 32 +++++++++++++++++++++++++++-- 1 file changed, 30 insertions(+), 2 deletions(-) diff --git a/scrapers/Algolia_DogfartNetwork.yml b/scrapers/Algolia_DogfartNetwork.yml index f796ee43a..064968446 100644 --- a/scrapers/Algolia_DogfartNetwork.yml +++ b/scrapers/Algolia_DogfartNetwork.yml @@ -2,7 +2,21 @@ name: "DogfartNetwork" sceneByURL: - action: script url: + - blackmeatwhitefeet.com/en/video + - blacksonblondes.com/en/video + - blacksoncougars.com/en/video + - cuckoldsessions.com/en/video + - cumbang.com/en/video + - dfxtra.com/en/video - dogfartnetwork.com/en/video + - gloryhole.com/en/video + - gloryholeinitiations.com/en/video + - interracialblowbang.com/en/video + - interracialpickups.com/en/video + - watchingmydaughtergoblack.com/en/video + - watchingmymomgoblack.com/en/video + - wefuckblackgirls.com/en/video + - zebragirls.com/en/video script: - python - Algolia.py @@ -30,10 +44,24 @@ sceneByQueryFragment: galleryByURL: - action: script url: - - dogfartnetwork.com/en/photo/ + - blackmeatwhitefeet.com/en/photo + - blacksonblondes.com/en/photo + - blacksoncougars.com/en/photo + - cuckoldsessions.com/en/photo + - cumbang.com/en/photo + - dfxtra.com/en/photo + - dogfartnetwork.com/en/photo + - gloryhole.com/en/photo + - gloryholeinitiations.com/en/photo + - interracialblowbang.com/en/photo + - interracialpickups.com/en/photo + - watchingmydaughtergoblack.com/en/photo + - watchingmymomgoblack.com/en/photo + - wefuckblackgirls.com/en/photo + - zebragirls.com/en/photo script: - python - Algolia.py - dogfartnetwork - gallery -# Last Updated December 22, 2022 +# Last Updated July 27, 2023 From 8a12523264caf354bf071acce75f85c63f08a4ba Mon Sep 17 00:00:00 2001 From: Nic Patterson Date: Fri, 28 Jul 2023 01:55:53 +0000 Subject: [PATCH 257/624] update scraper list --- SCRAPERS-LIST.md | 18 ++++++++++++++++-- 1 file changed, 16 insertions(+), 2 deletions(-) diff --git a/SCRAPERS-LIST.md b/SCRAPERS-LIST.md index a7ea432a5..0c6ab31f6 100644 --- a/SCRAPERS-LIST.md +++ b/SCRAPERS-LIST.md @@ -185,7 +185,10 @@ blacked.com|vixenNetwork.yml|:heavy_check_mark:|:x:|:x:|:x:|Python|- blackedraw.com|vixenNetwork.yml|:heavy_check_mark:|:x:|:x:|:x:|Python|- blackmarketxxx.com|CombatZone.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- blackmassivecocks.com|AdultEmpireCash.yml|:heavy_check_mark:|:x:|:heavy_check_mark:|:x:|-|- +blackmeatwhitefeet.com|Algolia_DogfartNetwork.yml|:heavy_check_mark:|:x:|:x:|:heavy_check_mark:|-|- blackph.com|FFCSH.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +blacksonblondes.com|Algolia_DogfartNetwork.yml|:heavy_check_mark:|:x:|:x:|:heavy_check_mark:|-|- +blacksoncougars.com|Algolia_DogfartNetwork.yml|:heavy_check_mark:|:x:|:x:|:heavy_check_mark:|-|- blacktgirlshardcore.com|GroobyNetwork-Partial.yml|:heavy_check_mark:|:heavy_check_mark:|:x:|:x:|-|Trans blackvalleygirls.com|PaperStreetMedia.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- blackwhitefuckfest.com|Vivid.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- @@ -278,9 +281,11 @@ creampie-angels.com|TeenMegaWorld.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- creativeporn.com|Czechav.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- cruelgf.com|CruelGirlfriend.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- cuckhunter.com|Arx.yml|:heavy_check_mark:|:x:|:x:|:x:|Python|- +cuckoldsessions.com|Algolia_DogfartNetwork.yml|:heavy_check_mark:|:x:|:x:|:heavy_check_mark:|-|- culioneros.com|Pornditos.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- cum4k.com|AMAMultimedia.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- cumaholicteens.com|Teencoreclub.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +cumbang.com|Algolia_DogfartNetwork.yml|:heavy_check_mark:|:x:|:x:|:heavy_check_mark:|-|- cumforcover.com|PerfectGonzo.yml|:heavy_check_mark:|:x:|:x:|:heavy_check_mark:|Python|- cumlouder.com|Cumlouder.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- cumshotoasis.com|GammaEntertainment.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- @@ -365,6 +370,7 @@ devilsfilmparodies.com|GammaEntertainment.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- devilsgangbangs.com|GammaEntertainment.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- devilstgirls.com|Algolia_Adultime.yml|:heavy_check_mark:|:x:|:heavy_check_mark:|:x:|Python|Trans devonlee.com|GammaEntertainment.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +dfxtra.com|Algolia_DogfartNetwork.yml|:heavy_check_mark:|:x:|:x:|:heavy_check_mark:|-|- diabolic.com|Algolia_zerotolerancefilms.yml|:heavy_check_mark:|:x:|:heavy_check_mark:|:x:|python|- dianafeet.com|FFCSH.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- dickdrainers.com|Dickdrainers.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- @@ -379,7 +385,7 @@ dirtycosplay.com|LetsDoeIt.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- disruptivefilms.com|Algolia_disruptivefilms.yml|:heavy_check_mark:|:heavy_check_mark:|:x:|:x:|Python|Gay dlsite.com|DLsite.yml|:heavy_check_mark:|:heavy_check_mark:|:x:|:x:|-|- doegirls.com|LetsDoeIt.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- -dogfartnetwork.com|DogFart.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +dogfartnetwork.com|Algolia_DogfartNetwork.yml|:heavy_check_mark:|:x:|:x:|:heavy_check_mark:|-|- doghousedigital.com|MindGeek.yml|:heavy_check_mark:|:x:|:heavy_check_mark:|:heavy_check_mark:|-|- dollrotic.com|GymRotic.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- dollsporn.com|Wtfpass.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- @@ -544,6 +550,8 @@ girlsway.com|Algolia_GirlsWay.yml|:heavy_check_mark:|:heavy_check_mark:|:x:|:x:| girlswhofuckgirls.com|Vivid.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- givemepink.com|sapphix.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- givemeteens.com|Algolia_Adultime.yml|:heavy_check_mark:|:x:|:x:|:x:|Python|- +gloryhole.com|Algolia_DogfartNetwork.yml|:heavy_check_mark:|:x:|:x:|:heavy_check_mark:|-|- +gloryholeinitiations.com|Algolia_DogfartNetwork.yml|:heavy_check_mark:|:x:|:x:|:heavy_check_mark:|-|- gloryholesecrets.com|Aziani.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- gloryholeswallow.com|GloryHoleSwallow.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- glosstightsglamour.com|GlossTightsGlamour.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- @@ -647,7 +655,9 @@ innocenthigh.com|PaperStreetMedia.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- inserted.com|KBProductions.yml|:heavy_check_mark:|:x:|:x:|:heavy_check_mark:|Python|- insex.com|insex.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- insexondemand.com|insex.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +interracialblowbang.com|Algolia_DogfartNetwork.yml|:heavy_check_mark:|:x:|:x:|:heavy_check_mark:|-|- interracialpass.com|InterracialPass.yml|:heavy_check_mark:|:x:|:x:|:x:|-|Interracial +interracialpickups.com|Algolia_DogfartNetwork.yml|:heavy_check_mark:|:x:|:x:|:heavy_check_mark:|-|- interracialpovs.com|FFCSH.yml|:heavy_check_mark:|:x:|:x:|:x:|-|Interracial intimatelesbians.com|Spizoo.yml|:heavy_check_mark:|:x:|:x:|:x:|-|Lesbian intimatepov.com|ItsPOV.yml|:heavy_check_mark:|:x:|:x:|:x:|CDP|- @@ -672,8 +682,8 @@ javdb.com|javdb.yml|:heavy_check_mark:|:x:|:heavy_check_mark:|:x:|-|Database javdb36.com|javdb.yml|:heavy_check_mark:|:x:|:heavy_check_mark:|:x:|-|Database javhd.com|JavHD.yml|:heavy_check_mark:|:x:|:x:|:x:|-|JAV Uncensored javhub.com|JavHub.yml|:heavy_check_mark:|:x:|:x:|:x:|-|JAV Uncensored -javlibrary.com|JavLibrary.yml|:heavy_check_mark:|:x:|:heavy_check_mark:|:x:|-|JAV javlibrary.com|JavLibrary_python.yml|:heavy_check_mark:|:x:|:x:|:x:|Python|JAV +javlibrary.com|JavLibrary.yml|:heavy_check_mark:|:x:|:heavy_check_mark:|:x:|-|JAV jaysinxxx.com|GammaEntertainment.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- jayspov.net|AdultEmpireCash.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- jcosplay.com|karatmedia.yml|:heavy_check_mark:|:x:|:x:|:x:|-|JAV @@ -1410,7 +1420,9 @@ wankzvr.com|POVR.yml|:heavy_check_mark:|:x:|:heavy_check_mark:|:x:|-|VR warashi-asian-pornstars.fr|WAPdB.yml|:x:|:x:|:x:|:heavy_check_mark:|Python|JAV watch4beauty.com|Watch4Beauty.yml|:heavy_check_mark:|:heavy_check_mark:|:x:|:x:|-|- watch4fetish.com|Watch4Fetish.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +watchingmydaughtergoblack.com|Algolia_DogfartNetwork.yml|:heavy_check_mark:|:x:|:x:|:heavy_check_mark:|-|- watchingmydaughtergoblack.com|WatchingMyDaughterGoBlack.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +watchingmymomgoblack.com|Algolia_DogfartNetwork.yml|:heavy_check_mark:|:x:|:x:|:heavy_check_mark:|-|- watchmygf.me|WatchMyGF.yml|:heavy_check_mark:|:x:|:x:|:x:|-|Tube Site watchreal.com|Hustler.yml|:heavy_check_mark:|:x:|:x:|:x:|CDP|- watchyoujerk.com|WatchYouJerk.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- @@ -1418,6 +1430,7 @@ waybig.com|Waybig.yml|:heavy_check_mark:|:x:|:x:|:heavy_check_mark:|-|- wcpclub.com|AdultEmpireCash.yml|:heavy_check_mark:|:x:|:heavy_check_mark:|:x:|-|- wearehairy.com|wearehairy.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- webyoung.com|Algolia_Adultime.yml|:heavy_check_mark:|:heavy_check_mark:|:x:|:x:|Python|Lesbian +wefuckblackgirls.com|Algolia_DogfartNetwork.yml|:heavy_check_mark:|:x:|:x:|:heavy_check_mark:|-|- wefuckblackgirls.com|WeFuckBlackGirls.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- welikegirls.com|Algolia_Adultime.yml|:heavy_check_mark:|:heavy_check_mark:|:heavy_check_mark:|:x:|Python|- weliketosuck.com|PuffyNetworkSite.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- @@ -1474,6 +1487,7 @@ yourmomdoesanal.com|ThirdRockEnt.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- yourmomdoesporn.com|ThirdRockEnt.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- yummysofie.com|YummySofie.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- z-filmz-originals.com|Z-Filmz-Originals.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +zebragirls.com|Algolia_DogfartNetwork.yml|:heavy_check_mark:|:x:|:x:|:heavy_check_mark:|-|- zentaidolls.com|ZentaiDolls.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- zerotolerancefilms.com|Algolia_zerotolerancefilms.yml|:heavy_check_mark:|:heavy_check_mark:|:heavy_check_mark:|:x:|Python|- zexyvr.com|ZexyVR.yml|:heavy_check_mark:|:x:|:x:|:heavy_check_mark:|-|VR From 5a22396b98a85d9f1fbf67f86fe5a10117592de0 Mon Sep 17 00:00:00 2001 From: Nic Patterson Date: Fri, 28 Jul 2023 01:57:25 +0000 Subject: [PATCH 258/624] add python requirement to scraper list --- SCRAPERS-LIST.md | 30 +++++++++++++++--------------- 1 file changed, 15 insertions(+), 15 deletions(-) diff --git a/SCRAPERS-LIST.md b/SCRAPERS-LIST.md index 0c6ab31f6..2a7350aac 100644 --- a/SCRAPERS-LIST.md +++ b/SCRAPERS-LIST.md @@ -185,10 +185,10 @@ blacked.com|vixenNetwork.yml|:heavy_check_mark:|:x:|:x:|:x:|Python|- blackedraw.com|vixenNetwork.yml|:heavy_check_mark:|:x:|:x:|:x:|Python|- blackmarketxxx.com|CombatZone.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- blackmassivecocks.com|AdultEmpireCash.yml|:heavy_check_mark:|:x:|:heavy_check_mark:|:x:|-|- -blackmeatwhitefeet.com|Algolia_DogfartNetwork.yml|:heavy_check_mark:|:x:|:x:|:heavy_check_mark:|-|- +blackmeatwhitefeet.com|Algolia_DogfartNetwork.yml|:heavy_check_mark:|:x:|:x:|:heavy_check_mark:|Python|- blackph.com|FFCSH.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- -blacksonblondes.com|Algolia_DogfartNetwork.yml|:heavy_check_mark:|:x:|:x:|:heavy_check_mark:|-|- -blacksoncougars.com|Algolia_DogfartNetwork.yml|:heavy_check_mark:|:x:|:x:|:heavy_check_mark:|-|- +blacksonblondes.com|Algolia_DogfartNetwork.yml|:heavy_check_mark:|:x:|:x:|:heavy_check_mark:|Python|- +blacksoncougars.com|Algolia_DogfartNetwork.yml|:heavy_check_mark:|:x:|:x:|:heavy_check_mark:|Python|- blacktgirlshardcore.com|GroobyNetwork-Partial.yml|:heavy_check_mark:|:heavy_check_mark:|:x:|:x:|-|Trans blackvalleygirls.com|PaperStreetMedia.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- blackwhitefuckfest.com|Vivid.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- @@ -281,11 +281,11 @@ creampie-angels.com|TeenMegaWorld.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- creativeporn.com|Czechav.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- cruelgf.com|CruelGirlfriend.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- cuckhunter.com|Arx.yml|:heavy_check_mark:|:x:|:x:|:x:|Python|- -cuckoldsessions.com|Algolia_DogfartNetwork.yml|:heavy_check_mark:|:x:|:x:|:heavy_check_mark:|-|- +cuckoldsessions.com|Algolia_DogfartNetwork.yml|:heavy_check_mark:|:x:|:x:|:heavy_check_mark:|Python|- culioneros.com|Pornditos.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- cum4k.com|AMAMultimedia.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- cumaholicteens.com|Teencoreclub.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- -cumbang.com|Algolia_DogfartNetwork.yml|:heavy_check_mark:|:x:|:x:|:heavy_check_mark:|-|- +cumbang.com|Algolia_DogfartNetwork.yml|:heavy_check_mark:|:x:|:x:|:heavy_check_mark:|Python|- cumforcover.com|PerfectGonzo.yml|:heavy_check_mark:|:x:|:x:|:heavy_check_mark:|Python|- cumlouder.com|Cumlouder.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- cumshotoasis.com|GammaEntertainment.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- @@ -370,7 +370,7 @@ devilsfilmparodies.com|GammaEntertainment.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- devilsgangbangs.com|GammaEntertainment.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- devilstgirls.com|Algolia_Adultime.yml|:heavy_check_mark:|:x:|:heavy_check_mark:|:x:|Python|Trans devonlee.com|GammaEntertainment.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- -dfxtra.com|Algolia_DogfartNetwork.yml|:heavy_check_mark:|:x:|:x:|:heavy_check_mark:|-|- +dfxtra.com|Algolia_DogfartNetwork.yml|:heavy_check_mark:|:x:|:x:|:heavy_check_mark:|Python|- diabolic.com|Algolia_zerotolerancefilms.yml|:heavy_check_mark:|:x:|:heavy_check_mark:|:x:|python|- dianafeet.com|FFCSH.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- dickdrainers.com|Dickdrainers.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- @@ -385,7 +385,7 @@ dirtycosplay.com|LetsDoeIt.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- disruptivefilms.com|Algolia_disruptivefilms.yml|:heavy_check_mark:|:heavy_check_mark:|:x:|:x:|Python|Gay dlsite.com|DLsite.yml|:heavy_check_mark:|:heavy_check_mark:|:x:|:x:|-|- doegirls.com|LetsDoeIt.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- -dogfartnetwork.com|Algolia_DogfartNetwork.yml|:heavy_check_mark:|:x:|:x:|:heavy_check_mark:|-|- +dogfartnetwork.com|Algolia_DogfartNetwork.yml|:heavy_check_mark:|:x:|:x:|:heavy_check_mark:|Python|- doghousedigital.com|MindGeek.yml|:heavy_check_mark:|:x:|:heavy_check_mark:|:heavy_check_mark:|-|- dollrotic.com|GymRotic.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- dollsporn.com|Wtfpass.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- @@ -550,8 +550,8 @@ girlsway.com|Algolia_GirlsWay.yml|:heavy_check_mark:|:heavy_check_mark:|:x:|:x:| girlswhofuckgirls.com|Vivid.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- givemepink.com|sapphix.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- givemeteens.com|Algolia_Adultime.yml|:heavy_check_mark:|:x:|:x:|:x:|Python|- -gloryhole.com|Algolia_DogfartNetwork.yml|:heavy_check_mark:|:x:|:x:|:heavy_check_mark:|-|- -gloryholeinitiations.com|Algolia_DogfartNetwork.yml|:heavy_check_mark:|:x:|:x:|:heavy_check_mark:|-|- +gloryhole.com|Algolia_DogfartNetwork.yml|:heavy_check_mark:|:x:|:x:|:heavy_check_mark:|Python|- +gloryholeinitiations.com|Algolia_DogfartNetwork.yml|:heavy_check_mark:|:x:|:x:|:heavy_check_mark:|Python|- gloryholesecrets.com|Aziani.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- gloryholeswallow.com|GloryHoleSwallow.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- glosstightsglamour.com|GlossTightsGlamour.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- @@ -655,9 +655,9 @@ innocenthigh.com|PaperStreetMedia.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- inserted.com|KBProductions.yml|:heavy_check_mark:|:x:|:x:|:heavy_check_mark:|Python|- insex.com|insex.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- insexondemand.com|insex.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- -interracialblowbang.com|Algolia_DogfartNetwork.yml|:heavy_check_mark:|:x:|:x:|:heavy_check_mark:|-|- +interracialblowbang.com|Algolia_DogfartNetwork.yml|:heavy_check_mark:|:x:|:x:|:heavy_check_mark:|Python|- interracialpass.com|InterracialPass.yml|:heavy_check_mark:|:x:|:x:|:x:|-|Interracial -interracialpickups.com|Algolia_DogfartNetwork.yml|:heavy_check_mark:|:x:|:x:|:heavy_check_mark:|-|- +interracialpickups.com|Algolia_DogfartNetwork.yml|:heavy_check_mark:|:x:|:x:|:heavy_check_mark:|Python|- interracialpovs.com|FFCSH.yml|:heavy_check_mark:|:x:|:x:|:x:|-|Interracial intimatelesbians.com|Spizoo.yml|:heavy_check_mark:|:x:|:x:|:x:|-|Lesbian intimatepov.com|ItsPOV.yml|:heavy_check_mark:|:x:|:x:|:x:|CDP|- @@ -1420,9 +1420,9 @@ wankzvr.com|POVR.yml|:heavy_check_mark:|:x:|:heavy_check_mark:|:x:|-|VR warashi-asian-pornstars.fr|WAPdB.yml|:x:|:x:|:x:|:heavy_check_mark:|Python|JAV watch4beauty.com|Watch4Beauty.yml|:heavy_check_mark:|:heavy_check_mark:|:x:|:x:|-|- watch4fetish.com|Watch4Fetish.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- -watchingmydaughtergoblack.com|Algolia_DogfartNetwork.yml|:heavy_check_mark:|:x:|:x:|:heavy_check_mark:|-|- +watchingmydaughtergoblack.com|Algolia_DogfartNetwork.yml|:heavy_check_mark:|:x:|:x:|:heavy_check_mark:|Python|- watchingmydaughtergoblack.com|WatchingMyDaughterGoBlack.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- -watchingmymomgoblack.com|Algolia_DogfartNetwork.yml|:heavy_check_mark:|:x:|:x:|:heavy_check_mark:|-|- +watchingmymomgoblack.com|Algolia_DogfartNetwork.yml|:heavy_check_mark:|:x:|:x:|:heavy_check_mark:|Python|- watchmygf.me|WatchMyGF.yml|:heavy_check_mark:|:x:|:x:|:x:|-|Tube Site watchreal.com|Hustler.yml|:heavy_check_mark:|:x:|:x:|:x:|CDP|- watchyoujerk.com|WatchYouJerk.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- @@ -1430,7 +1430,7 @@ waybig.com|Waybig.yml|:heavy_check_mark:|:x:|:x:|:heavy_check_mark:|-|- wcpclub.com|AdultEmpireCash.yml|:heavy_check_mark:|:x:|:heavy_check_mark:|:x:|-|- wearehairy.com|wearehairy.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- webyoung.com|Algolia_Adultime.yml|:heavy_check_mark:|:heavy_check_mark:|:x:|:x:|Python|Lesbian -wefuckblackgirls.com|Algolia_DogfartNetwork.yml|:heavy_check_mark:|:x:|:x:|:heavy_check_mark:|-|- +wefuckblackgirls.com|Algolia_DogfartNetwork.yml|:heavy_check_mark:|:x:|:x:|:heavy_check_mark:|Python|- wefuckblackgirls.com|WeFuckBlackGirls.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- welikegirls.com|Algolia_Adultime.yml|:heavy_check_mark:|:heavy_check_mark:|:heavy_check_mark:|:x:|Python|- weliketosuck.com|PuffyNetworkSite.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- @@ -1487,7 +1487,7 @@ yourmomdoesanal.com|ThirdRockEnt.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- yourmomdoesporn.com|ThirdRockEnt.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- yummysofie.com|YummySofie.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- z-filmz-originals.com|Z-Filmz-Originals.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- -zebragirls.com|Algolia_DogfartNetwork.yml|:heavy_check_mark:|:x:|:x:|:heavy_check_mark:|-|- +zebragirls.com|Algolia_DogfartNetwork.yml|:heavy_check_mark:|:x:|:x:|:heavy_check_mark:|Python|- zentaidolls.com|ZentaiDolls.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- zerotolerancefilms.com|Algolia_zerotolerancefilms.yml|:heavy_check_mark:|:heavy_check_mark:|:heavy_check_mark:|:x:|Python|- zexyvr.com|ZexyVR.yml|:heavy_check_mark:|:x:|:x:|:heavy_check_mark:|-|VR From 0e83356932122e9eb8fcae50720851f58523dfcc Mon Sep 17 00:00:00 2001 From: Nic Patterson Date: Thu, 27 Jul 2023 19:01:04 -0700 Subject: [PATCH 259/624] reset javlibrary ordering --- SCRAPERS-LIST.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/SCRAPERS-LIST.md b/SCRAPERS-LIST.md index 67bf5be3c..bea49b8e5 100644 --- a/SCRAPERS-LIST.md +++ b/SCRAPERS-LIST.md @@ -701,8 +701,8 @@ javdb.com|javdb.yml|:heavy_check_mark:|:x:|:heavy_check_mark:|:x:|-|Database javdb36.com|javdb.yml|:heavy_check_mark:|:x:|:heavy_check_mark:|:x:|-|Database javhd.com|JavHD.yml|:heavy_check_mark:|:x:|:x:|:x:|-|JAV Uncensored javhub.com|JavHub.yml|:heavy_check_mark:|:x:|:x:|:x:|-|JAV Uncensored -javlibrary.com|JavLibrary_python.yml|:heavy_check_mark:|:x:|:x:|:x:|Python|JAV javlibrary.com|JavLibrary.yml|:heavy_check_mark:|:x:|:heavy_check_mark:|:x:|-|JAV +javlibrary.com|JavLibrary_python.yml|:heavy_check_mark:|:x:|:x:|:x:|Python|JAV jaysinxxx.com|GammaEntertainment.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- jayspov.net|AdultEmpireCash.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- jcosplay.com|karatmedia.yml|:heavy_check_mark:|:x:|:x:|:x:|-|JAV From fa3ed9e5fe11dda7c8990ef963bd6ae8402e1bac Mon Sep 17 00:00:00 2001 From: Nic Patterson Date: Fri, 28 Jul 2023 02:03:18 +0000 Subject: [PATCH 260/624] removing old watchingmydaughtergoblack.com scraper --- SCRAPERS-LIST.md | 1 - scrapers/WatchingMyDaughterGoBlack.yml | 66 -------------------------- 2 files changed, 67 deletions(-) delete mode 100644 scrapers/WatchingMyDaughterGoBlack.yml diff --git a/SCRAPERS-LIST.md b/SCRAPERS-LIST.md index bea49b8e5..da682b058 100644 --- a/SCRAPERS-LIST.md +++ b/SCRAPERS-LIST.md @@ -1452,7 +1452,6 @@ warashi-asian-pornstars.fr|WAPdB.yml|:x:|:x:|:x:|:heavy_check_mark:|Python|JAV watch4beauty.com|Watch4Beauty.yml|:heavy_check_mark:|:heavy_check_mark:|:x:|:x:|-|- watch4fetish.com|Watch4Fetish.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- watchingmydaughtergoblack.com|Algolia_DogfartNetwork.yml|:heavy_check_mark:|:x:|:x:|:heavy_check_mark:|Python|- -watchingmydaughtergoblack.com|WatchingMyDaughterGoBlack.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- watchingmymomgoblack.com|Algolia_DogfartNetwork.yml|:heavy_check_mark:|:x:|:x:|:heavy_check_mark:|Python|- watchmygf.me|WatchMyGF.yml|:heavy_check_mark:|:x:|:x:|:x:|-|Tube Site watchreal.com|Hustler.yml|:heavy_check_mark:|:x:|:x:|:x:|CDP|- diff --git a/scrapers/WatchingMyDaughterGoBlack.yml b/scrapers/WatchingMyDaughterGoBlack.yml deleted file mode 100644 index f8a760ce1..000000000 --- a/scrapers/WatchingMyDaughterGoBlack.yml +++ /dev/null @@ -1,66 +0,0 @@ -name: "watchingmydaughtergoblack" -sceneByURL: - - action: scrapeXPath - url: - - watchingmydaughtergoblack.com/ - scraper: sceneScraper -xPathScrapers: - sceneScraper: - scene: - Title: //h3[@class="scene-title col-xs-12"]/text() - Date: - selector: //meta[@itemprop="uploadDate"]/@content - postProcess: - - replace: - - regex: ([\d-]*).+ - with: $1 - - parseDate: 2006-01-02 - Details: - selector: //p[@itemprop="description"]/text() - Tags: - Name: - selector: //p[@class="categories"]/text() - postProcess: - - replace: - - regex: ',(\x{00A0}|\s)+' - with: "," - split: "," - Performers: - Name: //span[@class="starring-list"]/a/text() - Image: - selector: //div[@class="trailer-container"]//@data-img|//meta[@itemprop="thumbnailUrl"]/@content - postProcess: - - replace: - - regex: ^ - with: "https:" - Studio: - Name: - selector: //meta[@itemprop="thumbnailUrl"]/@content - postProcess: - - replace: - - regex: '.*/([^/]+)\.com/[^/]+$' - with: $1 - - map: - barbcummings: Barb Cummings - blackmeatwhitefeet: Black Meat White Feet - blacksonblondes: Blacks On Blondes - blacksoncougars: Blacks On Cougars - candymonroe: Candy Monroe - cuckoldsessions: Cuckold Sessions - cumbang: Cum Bang - dogfartbehindthescenes: Dogfart Behind The Scenes - gloryhole-initiations: Gloryhole-Initiations - gloryhole: Glory Hole - interracialblowbang: Interracial Blow Bang - interracialpickups: Interracial Pickups - katiethomas: Katie Thomas - ruthblackwell: Ruth Blackwell - springthomas: Spring Thomas - theminion: The Minion - watchingmydaughtergoblack: Watching My Daughter Go Black - watchingmymomgoblack: Watching My Mom Go Black - wefuckblackgirls: We Fuck Black Girls - wifewriting: Wife Writing - zebragirls: Zebra Girls - URL: //link[@rel='canonical']/@href -# Last Updated May 03, 2021 From b72a08fc879312c2af030c2077305102ad662668 Mon Sep 17 00:00:00 2001 From: Maista6969 Date: Sat, 29 Jul 2023 02:34:35 +0200 Subject: [PATCH 261/624] Revert "Updated AdultEmpire.com" This reverts commit 3112f88ec832c3ca36869e838726e36623d75e6c. --- scrapers/AdultEmpire.yml | 48 +++++++++++++++++----------------------- 1 file changed, 20 insertions(+), 28 deletions(-) diff --git a/scrapers/AdultEmpire.yml b/scrapers/AdultEmpire.yml index bd234480c..a891ca3d4 100644 --- a/scrapers/AdultEmpire.yml +++ b/scrapers/AdultEmpire.yml @@ -22,6 +22,7 @@ sceneByQueryFragment: queryURL: "{url}" scraper: sceneScraper + xPathScrapers: sceneSearch: scene: @@ -41,20 +42,11 @@ xPathScrapers: with: "https://www.adultdvdempire.com" Image: selector: //a[@class="boxcover"]/img/@data-src + movieScraper: movie: - Name: - selector: //h1/text() - postProcess: - - replace: - - regex: ^\s+(.+)\s+$ - with: $1 - Director: - selector: //a[@label="Director"]/text() - postProcess: - - replace: - - regex: ^\s+(.+)\s+$ - with: $1 + Name: //h1/text() + Director: //a[@label="Director"]/text() Duration: selector: //small[contains(text(), "Length")]/following-sibling::text() postProcess: @@ -84,29 +76,29 @@ xPathScrapers: URL: //meta[@name='og:url']/@content sceneScraper: scene: - Title: //div[@class="clip-page__detail__title__primary"]/text() + Title: //h1/text() + Details: + selector: //h4[contains(@class,"synopsis")]//text() + concat: " " Date: - selector: //strong[starts-with(text(), "Released:")]//ancestor::li/text() + selector: //small[contains(text(), "Released")]/following-sibling::text() postProcess: - parseDate: Jan 02 2006 + Image: //a[@id="front-cover"]/@data-href Studio: - Name: //a[@label="Studio" and contains(@href, "studio")] + Name: //a[@label="Studio"]/text() Movies: - Name: //div[contains(text(), "from")]//following-sibling::a/text() - URL: - selector: //div[contains(text(), "from")]//following-sibling::a//@href - postProcess: - - replace: - - regex: ^ - with: "https://www.adultdvdempire.com" + Name: //h1/text() + URL: //link[@rel="canonical"]/@href Tags: - Name: //strong[starts-with(text(), "Attributes:")]//following-sibling::a/text() + Name: //div[h2[contains(.,'Categories')]]//a[@label="Category"]/text() Performers: - Name: //strong[starts-with(text(), "Starring:")]//following-sibling::a/text() + Name: //a[@label="Performer"]//text() URL: - selector: //strong[starts-with(text(), "Starring:")]//following-sibling::a/@href + selector: //a[@label="Performer"]/@href postProcess: - replace: - - regex: ^ - with: "https://www.adultdvdempire.com" -# Last Updated July 17, 2023 + - regex: ^ + with: "https://www.adultdvdempire.com" + URL: //meta[@name='og:url']/@content +# Last Updated December 16, 2021 From 2a1b6a284bd6ba13fc44bbd78d73482542c43133 Mon Sep 17 00:00:00 2001 From: Maista6969 Date: Sat, 29 Jul 2023 13:32:15 +0200 Subject: [PATCH 262/624] Disable CDP for Kink.com Setting the User-Agent is just as effective --- scrapers/Kink.yml | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/scrapers/Kink.yml b/scrapers/Kink.yml index caf7ae6a7..004289992 100644 --- a/scrapers/Kink.yml +++ b/scrapers/Kink.yml @@ -174,5 +174,7 @@ xPathScrapers: with: "" URL: //link[@rel="canonical"]/@href driver: - useCDP: true + headers: + - Key: User-Agent + Value: stash-scraper/1.0.0 # Last Updated June 25, 2023 From 567e60b4775843e59e580984eb48b9eb977d2974 Mon Sep 17 00:00:00 2001 From: Maista6969 Date: Sat, 29 Jul 2023 13:36:33 +0200 Subject: [PATCH 263/624] Update list of channels for Kink.com --- scrapers/Kink.yml | 85 ++++++++++++++++++++++++++++++++--------------- 1 file changed, 58 insertions(+), 27 deletions(-) diff --git a/scrapers/Kink.yml b/scrapers/Kink.yml index 004289992..9684aa6bf 100644 --- a/scrapers/Kink.yml +++ b/scrapers/Kink.yml @@ -63,67 +63,98 @@ xPathScrapers: - regex: /channel/ with: "" - map: - 30-minutes-of-torment: 30 Minutes of Torment - alternadudes: Alternadudes - amator: Amator + # List of sites as of 2023-07-29 from https://www.kink.com/channels + # [...new Set([...document.querySelectorAll('h3 > a')] + # .map(a => a.href.split("/").pop() + ": " + a.innerText))] + # .toSorted() + # .join("\n") + analized: Analized animated-kink: Animated Kink ashley-fires-scifi-dreamgirls: Ashley Fires SciFi Dreamgirls aziani-iron: Aziani Iron + badfam-pov: Bad Family POV ball-gaggers: Ball Gaggers banana-jacks: Banana Jacks - bifuck: BiFuck - bizarre-video: Bizarre Video + bifuck: BiFUCK bizarre-video-transsexual: Bizarre Video Transsexual + bizarre-video: Bizarre Video bleu-films: Bleu Films bondage-liberation: Bondage Liberation - bonus-hole-boys: Bonus Hole Boys - bound-and-gagged: Bound & Gagged - bound-gang-bangs: Bound Gangbangs - bound-gods: Bound Gods - bound-in-public: Bound in Public + bound-gang-bangs: Bound Gang Bangs + bound-men-wanked: Bound Men Wanked brutal-sessions: Brutal Sessions - butt-machine-boys: Butt Machine Boys - captive-male: Captive Male - chanta-s-bitches: Chanta's Bitches + carmen-rivera: Carmen Rivera + cfnmeu: CFNMEU + ddf-network: DDF Network + deviant-hardcore: Deviant Hardcore device-bondage: Device Bondage + digital-sin: Digital Sin divine-bitches: Divine Bitches - electrosluts: Electro Sluts + electrosluts: Electrosluts everything-butt: Everything Butt - families-tied: Familes Tied + evolved-fights-lesbian-edition: Evolved Fights Lesbian Edition + evolved-fights: Evolved Fights + families-tied: Families Tied + fembot-academy: Fembot Academy + femdum: FemDum + femme-fatale-films: Femme Fatale Films + fetishnetwork: FetishNetwork filth-syndicate: Filth Syndicate filthy-femdom: Filthy Femdom foot-worship: Foot Worship - fucked-and-bound: Fucked and Bound fucking-machines: Fucking Machines + gangbang-chief: Gangbang Chief + gloryhole-secrets: Gloryhole Secrets hardcore-gangbang: Hardcore Gangbang + hardcore-punishments: Hardcore Punishments harmony-fetish: Harmony Fetish + hogtied-up: Hogtied Up hogtied: Hogtied + hot-legs-and-feet: Hot Legs & Feet + house-of-taboo: House Of Taboo kink-classics: Kink Classics - kink-compilations: Kink Compilations kink-features: Kink Features + kink-test-shoots: Kink Test Shoots kink-university: Kink University kinklive: KinkLive - kinkmen-classics: Kink Men Classics - kinkrawtestshoots: KinkRawTestShoots - kinktestshoots: KinkTestShoots kinky-bites: Kinky Bites - kinky-bites-men: Kinky Bites Men + lakeview-entertainment: Lakeview Entertainment + machine-dom: Machine Dom mean-bitch: Mean Bitch + medical-y-sado: Medical Y Sado men-in-pain: Men In Pain - men-on-edge: Men on Edge - my-friends-feet: My Friends Feet - naked-combat: Naked Combat + pascals-sub-sluts: Pascals Sub Sluts + pegging: Pegging + peghim: PegHim + plumperd: Plumperd + pornforce: Porn Force + pornstar-platinum: Pornstar Platinum pov-pickups: POV Pickups public-disgrace: Public Disgrace - sadistic-rope: Sadistic Rope - sex-and-submission: Sex and Submission + revenge-of-the-baroness: Revenge Of The Baroness + royal-fetish-films: Royal Fetish Films + savage-gangbang: Savage Gangbang + severe-sex-films: Severe Sex Films + sex-and-submission: Sex And Submission + sexual-disgrace: Sexual Disgrace + sister-wives: Sister Wives + slutinspection: Slut Inspection + spizoo: Spizoo + strapon-squad: Strapon Squad struggling-babes: Struggling Babes submissive-x: Submissive X - the-training-of-o: The Training of O + submissived: Submissived + sweet-femdom: Sweet FemDom + the-training-of-o: The Training Of O the-upper-floor: The Upper Floor + the-venus-girls: The Venus Girls + torment-time: Torment Time + transerotica: TransErotica ts-pussy-hunters: TS Pussy Hunters ts-seduction: TS Seduction + twisted-visual: Twisted Visual ultimate-surrender: Ultimate Surrender + wasteland: Wasteland water-bondage: Water Bondage whipped-ass: Whipped Ass wired-pussy: Wired Pussy From fac60f25d1776e550fb7c5afd291253a6da57416 Mon Sep 17 00:00:00 2001 From: StashPRs <126653947+StashPRs@users.noreply.github.com> Date: Sat, 29 Jul 2023 21:51:06 -0500 Subject: [PATCH 264/624] Pornhub: Fix URLs no longer having `ph` before the ID Before: https://regex101.com/r/8goDRc/1 After: https://regex101.com/r/pGrS9o/1 --- scrapers/Pornhub.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/scrapers/Pornhub.yml b/scrapers/Pornhub.yml index 28dd41d78..f2c1aa0da 100644 --- a/scrapers/Pornhub.yml +++ b/scrapers/Pornhub.yml @@ -18,7 +18,7 @@ sceneByFragment: queryURL: https://www.pornhub.com/view_video.php?viewkey={filename} queryURLReplace: filename: - - regex: (?:.*[^a-zA-Z\d])?(ph(?:[a-zA-Z\d]+)).+ + - regex: (?:.*[^a-zA-Z\d])?((?:ph)?(?:[a-zA-Z\d]{13})).+ with: $1 - regex: .*\.[^\.]+$ # if no ph id is found in the filename with: # clear the filename so that it doesn't leak to ph @@ -151,4 +151,4 @@ driver: Domain: ".pornhub.com" Value: "1" Path: "/" -# Last Updated April 23, 2023 +# Last Updated July 29, 2023 From 36a5370382f5181f481e9aef5d5b69312ec28b50 Mon Sep 17 00:00:00 2001 From: Fabio Tea Date: Sun, 30 Jul 2023 16:58:29 +0200 Subject: [PATCH 265/624] added fixes from @Maista6969 - thx! --- scrapers/RealJamVR.yml | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/scrapers/RealJamVR.yml b/scrapers/RealJamVR.yml index eae552999..49bda1041 100644 --- a/scrapers/RealJamVR.yml +++ b/scrapers/RealJamVR.yml @@ -16,7 +16,10 @@ xPathScrapers: Date: selector: //div[@class="specs-icon"]/following-sibling::strong postProcess: - - parseDate: January 2, 2006 + - replace: + - regex: ^([a-zA-Z]{3})\D*(\d{1,2},\s*\d+)$ + with: $1. $2 + - parseDate: Jan. 2, 2006 Performers: Name: (//a[starts-with(@href, "/actor")]/text())[1] Tags: From 40b55ccf0850a554233ed2e8449133e8e096457c Mon Sep 17 00:00:00 2001 From: WithoutPants <53250216+WithoutPants@users.noreply.github.com> Date: Mon, 31 Jul 2023 14:25:49 +1000 Subject: [PATCH 266/624] Update Bang scraper --- scrapers/Bang.yml | 42 +++++++++++++++--------------------------- 1 file changed, 15 insertions(+), 27 deletions(-) diff --git a/scrapers/Bang.yml b/scrapers/Bang.yml index e19a251c9..ce2ba5a73 100644 --- a/scrapers/Bang.yml +++ b/scrapers/Bang.yml @@ -17,19 +17,19 @@ performerByURL: xPathScrapers: sceneScraper: common: - $movie: //p[contains(text(),"From the Movie:")]//a[contains(@href,"/dvd/")] - $performer: //p[@class="capitalize" and contains(text(),"With:")]/a[contains(@href,"/pornstar/")] + $movie: //div[@data-controller="video-entry"]//a[contains(@href,"/dvd/")]//div[contains(@class,"name")]/span[1] + $performer: //p[contains(@class,"capitalize") and contains(text(),"With:")]/a[contains(@href,"/pornstar/")] scene: - Title: //meta[@name="og:title"]/@content + Title: //meta[@property="og:title"]/@content Details: //meta[@name="description"]/@content Image: - selector: //meta[@name="og:image"]/@content + selector: //meta[@property="og:image"]/@content postProcess: - replace: - regex: (.+)(\?.+) with: $1 Date: - selector: //div[@data-controller="login"]//p[contains(text(),"Date:")]/text() + selector: //p[contains(text(),"Date:")]/text() postProcess: - replace: - regex: \w+:\s*(\w+\s)(\d+),(\s\d{4}).* @@ -47,7 +47,7 @@ xPathScrapers: - regex: ^/ with: "https://www.bang.com/" Studio: - Name: //p[contains(text(),"Studio:")]//a[contains(@href,"from=")] + Name: //p[contains(text(),"Studio:")]//a[contains(@href,"from=")]/img/@alt Movies: Name: $movie URL: @@ -55,20 +55,20 @@ xPathScrapers: postProcess: *addhost movieScraper: common: - $details: //div[@data-controller="login"] - $image: //div[contains(@class,"justify-start")]/div/img[contains(@src,"/front")]/@src + $details: //div[@class="w-full"][1] + $image: //body/div[contains(@class,"w-full")][3]/div/div/picture//img[contains(@src,"/front")]/@src movie: Name: - selector: //div[@class="w-full"]/h1 + selector: $details/h1 Duration: - selector: (//div[@class="w-full"]//span[*[name()='svg']])[2] + selector: //p[contains(text(),"Date:")]/span[2]/text() Date: - selector: $details//p[contains(text(), "Released:")]/span + selector: //p[contains(text(),"Date:")]/span[1]/text() postProcess: - parseDate: Jan 02, 2006 Synopsis: $details//p[contains(@class,"clear-both")] Studio: - Name: $details//p[contains(text(),"Studio")]/a + Name: $details//p[contains(text(),"Studio")]/a/text() FrontImage: selector: $image postProcess: @@ -85,15 +85,9 @@ xPathScrapers: common: $overlay: //div[@class="flex flex-col md:items-start items-center"] performer: - Name: $overlay/h1 + Name: $overlay/h2 URL: selector: //link[@rel="canonical"][1]/@href - Twitter: - selector: "//script[@type=\"application/ld+json\"][contains(.,'\"@type\": \"Person\"')][contains(.,'https://twitter.com/')]/text()" - postProcess: - - replace: - - regex: .+https://twitter.com/([^"]+).+ - with: $1 Birthdate: selector: $overlay//div[contains(text(),"Born")]/span[contains(text(),"old")] postProcess: @@ -101,12 +95,6 @@ xPathScrapers: - regex: \s*(\w+\s)(\d+),(\s\d{4}).* with: $1$2$3 - parseDate: January 2 2006 - Country: - selector: $overlay//div[contains(text(),"Born")]/text()[contains(.,"in")]/following-sibling::span[not(@class="mx-1")] - postProcess: - - replace: - - regex: (?:.+\s)?(\w+)$ - with: $1 HairColor: selector: $overlay//div[contains(@class,"md:text-left")]/text()[contains(.,"Hair Color")]/following-sibling::span[1] Ethnicity: @@ -114,9 +102,9 @@ xPathScrapers: EyeColor: selector: $overlay//div[contains(@class,"md:text-left")]/text()[contains(.,"Eye Color")]/following-sibling::span[1] Aliases: - selector: $overlay//h1/following-sibling::span + selector: $overlay//h1/following-sibling::div Image: - selector: //div[@class="relative"]/img/@src + selector: //div[@class="relative"]//img/@src postProcess: - replace: - regex: \?.+$ From 8e6eb36fabd38c5e0fd46a1bdb901883e8e48325 Mon Sep 17 00:00:00 2001 From: Scum-Bum Date: Mon, 31 Jul 2023 14:17:31 +0100 Subject: [PATCH 267/624] Added studio code and director, fixed tags to work with newer scenes --- scrapers/vixenNetwork.py | 19 +++++++++++++++++-- scrapers/vixenNetwork.yml | 2 +- 2 files changed, 18 insertions(+), 3 deletions(-) diff --git a/scrapers/vixenNetwork.py b/scrapers/vixenNetwork.py index aac186843..b3c356dea 100644 --- a/scrapers/vixenNetwork.py +++ b/scrapers/vixenNetwork.py @@ -16,7 +16,7 @@ sys.exit() # Max number of scenes that a site can return for the search. -MAX_SCENES = 6 +MAX_SCENES = 12 class Site: @@ -110,6 +110,10 @@ def parse_scene(self, response): scene['title'] = data.get('title') scene['details'] = data.get('description') scene['studio'] = {"name": self.name} + scene['code'] = data.get('videoId') + director = data.get("directors") + if director is not None: + scene["director"] = ", ".join(d["name"] for d in data.get("directors", [])) date = data.get('releaseDate') if date: @@ -123,6 +127,9 @@ def parse_scene(self, response): if data.get('tags'): for tag in data['tags']: scene['tags'].append({"name": tag}) + else: + for tag in data['categories']: + scene['tags'].append({"name": tag['name']}) if data.get('images'): if data['images'].get('poster'): @@ -155,6 +162,7 @@ def parse_search(self, response): sc['title'] = scene.get('title') sc['details'] = scene.get('description') sc['url'] = f"https://www.{self.id.lower()}.com/videos/{slug}" + sc['code'] = scene.get('videoId') sc['studio'] = {"name": self.name} date = scene.get('releaseDate') if date: @@ -164,7 +172,6 @@ def parse_search(self, response): for model in scene['modelsSlugged']: sc['performers'].append( {"name": model['name']}) - if scene.get('images'): if scene['images'].get('listing'): maxWidth = 0 @@ -188,6 +195,10 @@ def length(self, studio): models { name } + videoId + directors { + name + } images { poster { src @@ -195,6 +206,9 @@ def length(self, studio): } } tags + categories { + name + } } } """ @@ -211,6 +225,7 @@ def length(self, studio): name slugged: slug } + videoId images { listing { src diff --git a/scrapers/vixenNetwork.yml b/scrapers/vixenNetwork.yml index 44d4f5b0d..6aa178278 100644 --- a/scrapers/vixenNetwork.yml +++ b/scrapers/vixenNetwork.yml @@ -30,4 +30,4 @@ sceneByQueryFragment: - python - vixenNetwork.py -# Last Updated June 17, 2023 +# Last Updated July 31, 2023 From 4163c62d4c056ecd82f1a7cda988e6037d805282 Mon Sep 17 00:00:00 2001 From: Scum-Bum Date: Mon, 31 Jul 2023 14:31:51 +0100 Subject: [PATCH 268/624] returned max scenes to default --- scrapers/vixenNetwork.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scrapers/vixenNetwork.py b/scrapers/vixenNetwork.py index b3c356dea..995f2fb0a 100644 --- a/scrapers/vixenNetwork.py +++ b/scrapers/vixenNetwork.py @@ -16,7 +16,7 @@ sys.exit() # Max number of scenes that a site can return for the search. -MAX_SCENES = 12 +MAX_SCENES = 6 class Site: From 5b700bd137ceda3991d3bdcd141e6a3ae1ddda62 Mon Sep 17 00:00:00 2001 From: WithoutPants <53250216+WithoutPants@users.noreply.github.com> Date: Tue, 1 Aug 2023 09:40:18 +1000 Subject: [PATCH 269/624] Update date --- scrapers/Bang.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scrapers/Bang.yml b/scrapers/Bang.yml index ce2ba5a73..409265425 100644 --- a/scrapers/Bang.yml +++ b/scrapers/Bang.yml @@ -109,4 +109,4 @@ xPathScrapers: - replace: - regex: \?.+$ with: "" -# Last Updated November 10, 2022 +# Last Updated August 01, 2023 From db6d9bdf7209f0926048fde551827c894835d69d Mon Sep 17 00:00:00 2001 From: Emilo2 <99644577+Emilo2@users.noreply.github.com> Date: Tue, 1 Aug 2023 11:14:34 +0300 Subject: [PATCH 270/624] Fix performer scraping Fetch additional tags Gallery scrape --- scrapers/RealJamVR.yml | 24 ++++++++++++++++-------- 1 file changed, 16 insertions(+), 8 deletions(-) diff --git a/scrapers/RealJamVR.yml b/scrapers/RealJamVR.yml index 09fb509ac..1ce0c1732 100644 --- a/scrapers/RealJamVR.yml +++ b/scrapers/RealJamVR.yml @@ -16,22 +16,30 @@ xPathScrapers: - replace: - regex: ^\s+(.+)\s+$ with: $1 - Date: + Date: &date selector: //div[@class="specs-icon"]/following-sibling::strong postProcess: - replace: - regex: ^([a-zA-Z]{3})\D*(\d{1,2},\s*\d+)$ with: $1. $2 - parseDate: Jan. 2, 2006 - Performers: - Name: (//a[starts-with(@href, "/actor")]/text())[1] - Tags: - Name: //a[starts-with(@href, "/scenes") and @class="tag"]/text() - Details: + Performers: &performers + Name: //div[contains(@class,"scene-view")]/a[contains(@href,"/actor/")] + Tags: &tags + Name: //a[starts-with(@href, "/scenes") and @class="tag"]/text() | //div[not(@class)]/div[@class="specs-icon" and not(i)] + Details: &details selector: //div[@class="opacity-75 my-2"] Image: selector: //*[@id="video-player"]//@poster - Studio: + Studio: &studio Name: fixed: RealJamVR -# Last Updated July 17, 2023 + gallery: + Title: *title + Date: *date + Performers: *performers + Tags: *tags + Details: *details + Studio: *studio + +# Last Updated August 01, 2023 From b56d0cc7b498f1321f794955f17893613ce2cda6 Mon Sep 17 00:00:00 2001 From: KinkCastle <141179453+KinkCastle@users.noreply.github.com> Date: Tue, 1 Aug 2023 21:21:48 +0200 Subject: [PATCH 271/624] Fix insex.yml --- scrapers/insex.yml | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/scrapers/insex.yml b/scrapers/insex.yml index d795c3f19..1e8663fb0 100644 --- a/scrapers/insex.yml +++ b/scrapers/insex.yml @@ -73,4 +73,9 @@ xPathScrapers: Name: $studio Image: *imageSelector -# Last Updated January 18, 2021 +driver: + useCDP: true + clicks: + - xpath: //a/button[@class="button is-danger"] + +# Last Updated August 01, 2023 From 242f66cada488e817781a7ea173926f9264fa436 Mon Sep 17 00:00:00 2001 From: Maista6969 Date: Wed, 2 Aug 2023 00:46:53 +0200 Subject: [PATCH 272/624] Add Smutpuppet network site to Smutpuppet scraper --- scrapers/SmutPuppet.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/scrapers/SmutPuppet.yml b/scrapers/SmutPuppet.yml index 357fefb1a..9dae14d57 100644 --- a/scrapers/SmutPuppet.yml +++ b/scrapers/SmutPuppet.yml @@ -12,6 +12,7 @@ sceneByURL: - jeffsmodels.com/update - milfsodomy.com/update - smutmerchants.com/update + - smutpuppet.com/update - suggabunny.com/update - teenerotica.xxx/update scraper: sceneScraper From fae4e8944775ee0c7c3f6b94de22dd4dd0023586 Mon Sep 17 00:00:00 2001 From: KinkCastle <141179453+KinkCastle@users.noreply.github.com> Date: Wed, 2 Aug 2023 20:39:56 +0200 Subject: [PATCH 273/624] move from cdp to cookie based access on insex sites --- scrapers/insex.yml | 42 ++++++++++++++++++++++++++++++++++++++---- 1 file changed, 38 insertions(+), 4 deletions(-) diff --git a/scrapers/insex.yml b/scrapers/insex.yml index 1e8663fb0..afb291ca7 100644 --- a/scrapers/insex.yml +++ b/scrapers/insex.yml @@ -74,8 +74,42 @@ xPathScrapers: Image: *imageSelector driver: - useCDP: true - clicks: - - xpath: //a/button[@class="button is-danger"] + cookies: + - CookieURL: "https://hardtied.com" + Cookies: + - Name: "consent" + Value: "yes" + Domain: "hardtied.com" + Path: "/" + - CookieURL: "https://infernalrestraints.com" + Cookies: + - Name: "consent" + Value: "yes" + Domain: "infernalrestraints.com" + Path: "/" + - CookieURL: "https://insex.com" + Cookies: + - Name: "consent" + Value: "yes" + Domain: "insex.com" + Path: "/" + - CookieURL: "https://insexondemand.com" + Cookies: + - Name: "consent" + Value: "yes" + Domain: "insexondemand.com" + Path: "/" + - CookieURL: "https://realtimebondage.com" + Cookies: + - Name: "consent" + Value: "yes" + Domain: "realtimebondage.com" + Path: "/" + - CookieURL: "https://sexuallybroken.com" + Cookies: + - Name: "consent" + Value: "yes" + Domain: "sexuallybroken.com" + Path: "/" -# Last Updated August 01, 2023 +# Last Updated August 02, 2023 From f23fcb81ab50e70034d43cba068986575f71a697 Mon Sep 17 00:00:00 2001 From: WillyOrtrun <129560075+WillyOrtrun@users.noreply.github.com> Date: Thu, 3 Aug 2023 00:50:52 +0200 Subject: [PATCH 274/624] Added redgifs last updated date --- scrapers/Redgifs.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/scrapers/Redgifs.yml b/scrapers/Redgifs.yml index 44092e837..d62e9c3a1 100644 --- a/scrapers/Redgifs.yml +++ b/scrapers/Redgifs.yml @@ -28,3 +28,4 @@ sceneByName: - python - Redgifs.py - name +# Last Updated August 03, 2023 From 5b2fcf9ca698bf80258ef6ca48d769a9b0d641f4 Mon Sep 17 00:00:00 2001 From: Maista6969 Date: Thu, 3 Aug 2023 01:01:41 +0200 Subject: [PATCH 275/624] Add Redgifs scraper to SCRAPERS-LIST.md --- SCRAPERS-LIST.md | 1 + 1 file changed, 1 insertion(+) diff --git a/SCRAPERS-LIST.md b/SCRAPERS-LIST.md index a8ef6ba87..85742ed92 100644 --- a/SCRAPERS-LIST.md +++ b/SCRAPERS-LIST.md @@ -1105,6 +1105,7 @@ realsensual.com|RealSensual.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- realtgirls.com|GroobyNetwork-Partial.yml|:heavy_check_mark:|:heavy_check_mark:|:x:|:x:|-|Trans realtimebondage.com|insex.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- realvr.com|BaDoink.yml|:heavy_check_mark:|:x:|:heavy_check_mark:|:heavy_check_mark:|-|VR +redgifs.com|Redgifs.yml|:heavy_check_mark:|:x:|:heavy_check_mark:|:heavy_check_mark:|Python|Gifs redheadmariah.com|ChickPass.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- redpolishfeet.com|FFCSH.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- reidmylips.com|Andomark.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- From b38cc184fa5a36239c1277931302700b424d491e Mon Sep 17 00:00:00 2001 From: VillageIdiot Date: Thu, 3 Aug 2023 17:21:59 -0700 Subject: [PATCH 276/624] Added Date extraction for Tonight's Girlfriend --- scrapers/Tonightsgirlfriend.yml | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/scrapers/Tonightsgirlfriend.yml b/scrapers/Tonightsgirlfriend.yml index 608ee33f0..fc96aba99 100644 --- a/scrapers/Tonightsgirlfriend.yml +++ b/scrapers/Tonightsgirlfriend.yml @@ -9,6 +9,12 @@ xPathScrapers: scene: Title: //h1 Details: //p[@class="scene-description"] + Date: + selector: //script[contains(text(),"uploadDate")]/text() + postProcess: + - replace: + - regex: '.+(\d{4}-\d{2}-\d{2}).+' + with: "$1" Performers: Name: selector: //p[@class="grey-performers"]//text() @@ -28,4 +34,4 @@ xPathScrapers: URL: //link[@rel='canonical']/@href Tags: Name: //a[@class="cat-tag"] -# Last Updated April 22, 2022 +# Last Updated August 03, 2023 From caf24339a94171ff01d470fbc8237e732159d834 Mon Sep 17 00:00:00 2001 From: VillageIdiot Date: Thu, 3 Aug 2023 17:38:00 -0700 Subject: [PATCH 277/624] Added scene search by name --- scrapers/AdultDvdMarketPlace.yml | 48 ++++++++++++++++++++++++++++++++ 1 file changed, 48 insertions(+) diff --git a/scrapers/AdultDvdMarketPlace.yml b/scrapers/AdultDvdMarketPlace.yml index 3b7b9d0f4..9033c5c78 100644 --- a/scrapers/AdultDvdMarketPlace.yml +++ b/scrapers/AdultDvdMarketPlace.yml @@ -7,6 +7,22 @@ movieByURL: - adultdvdmarketplace.com/dvd_view scraper: movieScraper +sceneByName: + action: scrapeXPath + queryURL: https://www.adultdvdmarketplace.com/xcart/adult_dvd/dvd_search.php?type=title&search={} + scraper: sceneSearch + +sceneByURL: + - action: scrapeXPath + url: + - adultdvdmarketplace.com/dvd_view + scraper: sceneScraper + +sceneByQueryFragment: + action: scrapeXPath + queryURL: "{url}" + scraper: sceneScraper + xPathScrapers: movieScraper: movie: @@ -22,4 +38,36 @@ xPathScrapers: FrontImage: //strong[contains(text(),"Large Front")]/parent::a/@href BackImage: //strong[contains(text(),"Large Back")]/parent::a/@href + sceneSearch: + common: + $selection: //div[contains(concat(' ',normalize-space(@class),' '),' product-col ')] + scene: + Title: $selection//h4/a + URL: + selector: $selection//h4/a/@href + postProcess: + - replace: + - regex: ^ + with: https://www.adultdvdmarketplace.com + Image: $selection//a/img/@src + + sceneScraper: + scene: + Title: //meta[@property="og:title"]/@content + Details: //h3[contains(text(), "Description")]/following-sibling::p + Date: + selector: //span[contains(text(),"Released")]/following-sibling::text() + postProcess: + - parseDate: 01/2006 + Image: //strong[contains(text(),"Large Front")]/parent::a/@href + Studio: + Name: //span[@itemprop="brand"]/text() + Movies: + Name: //meta[@property="og:title"]/@content + URL: //meta[@property="og:url"]/@content + Performers: + Name: //h3[text()="Cast"]/following-sibling::a + Tags: + Name: //span[text()="Category:"]/following-sibling::a + # Last Updated September 07, 2020 From 2c2ece043888050a328f69583b07e29ae3c97fb0 Mon Sep 17 00:00:00 2001 From: VillageIdiot Date: Thu, 3 Aug 2023 17:38:59 -0700 Subject: [PATCH 278/624] Added required cookies --- scrapers/Clips4Sale.yml | 16 +++++++++++++++- 1 file changed, 15 insertions(+), 1 deletion(-) diff --git a/scrapers/Clips4Sale.yml b/scrapers/Clips4Sale.yml index 024eb0628..d8bce0955 100644 --- a/scrapers/Clips4Sale.yml +++ b/scrapers/Clips4Sale.yml @@ -113,4 +113,18 @@ xPathScrapers: # Clips4Sale doesn't have an explict performer field, but performers are # often included in the video tags. So we attempt to find matches there. Name: //span[contains(text(),"Keywords:")]/following-sibling::a -# Last Updated February 03, 2023 + +driver: + cookies: + - CookieURL: "https://clips4sale.com" + Cookies: + - Name: "iAgreeWithTerms" + Domain: ".clips4sale.com" + Value: "true" + Path: "/" + headers: + - Key: User-Agent + Value: Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:79.0) Gecko/20100101 Firefox/79.0) + + +# Last Updated July 24, 2023 From 663763d8a17d3937533264f24b3763982fe838ec Mon Sep 17 00:00:00 2001 From: VillageIdiot Date: Thu, 3 Aug 2023 17:40:07 -0700 Subject: [PATCH 279/624] Fixed title scrape --- scrapers/CzechVR.yml | 18 ++++++++++++++++-- 1 file changed, 16 insertions(+), 2 deletions(-) diff --git a/scrapers/CzechVR.yml b/scrapers/CzechVR.yml index 96854f42d..114c9745e 100644 --- a/scrapers/CzechVR.yml +++ b/scrapers/CzechVR.yml @@ -22,7 +22,8 @@ xPathScrapers: $info: &infoSel //div[@class="post"] $url: &urlSel //meta[@name="dl8-customization-brand-url"]/@content scene: - Title: &titleSel $info//div[@class="nazev"]/h2 +# Title: &titleSel $info//div[@class="nazev"]/h2 + Title: &titleSel $info//div[contains(concat(' ',normalize-space(@class),' '),' nazev ')]/h2|$info//div[contains(concat(' ',normalize-space(@class),' '),' nazev ')]/h1 Date: &dateSel selector: $info//div[@class="datum"]/text() postProcess: @@ -59,4 +60,17 @@ xPathScrapers: Name: *studioSel Synopsis: *detailsSel FrontImage: *imageSel -# Last Updated December 16, 2022 + +driver: + cookies: + - CookieURL: "https://www.czechvr.com" + Cookies: + - Name: "iagree" + Domain: ".czechvr.com" + Value: "ano" + Path: "/" + headers: + - Key: User-Agent + Value: Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:79.0) Gecko/20100101 Firefox/79.0) + +# Last Updated July 25, 2023 From b9038b0c4732fb1699ea1c0cea22e63fe5b64d62 Mon Sep 17 00:00:00 2001 From: VillageIdiot Date: Thu, 3 Aug 2023 17:41:24 -0700 Subject: [PATCH 280/624] Added scene by name and URL also movie by URL --- scrapers/FreeonesCommunity.yml | 134 ++++++++++++++++++++++++++++----- 1 file changed, 114 insertions(+), 20 deletions(-) diff --git a/scrapers/FreeonesCommunity.yml b/scrapers/FreeonesCommunity.yml index 29a5232e3..5686d1f05 100644 --- a/scrapers/FreeonesCommunity.yml +++ b/scrapers/FreeonesCommunity.yml @@ -1,8 +1,10 @@ name: FreeonesCommunity + performerByName: action: scrapeXPath queryURL: https://www.freeones.com/babes?q={}&v=teasers&s=relevance&l=96&m%5BcanPreviewFeatures%5D=0 scraper: performerSearch + performerByURL: - action: scrapeXPath url: @@ -10,6 +12,28 @@ performerByURL: - freeones.com scraper: performerScraper +sceneByName: + action: scrapeXPath + queryURL: https://www.freeones.com/vod?q={} + scraper: sceneSearch + +sceneByQueryFragment: + action: scrapeXPath + queryURL: "{url}" + scraper: sceneScraper + +sceneByURL: + - action: scrapeXPath + url: + - www.freeones.com + scraper: sceneScraper + +movieByURL: + - action: scrapeXPath + url: + - www.freeones.com + scraper: movieScraper + xPathScrapers: performerSearch: performer: @@ -22,6 +46,71 @@ xPathScrapers: with: https://www.freeones.com - regex: /feed$ with: /bio + sceneSearch: + common: + $movieTitle: //div[@data-test="teaser-vod"]//img + scene: + Title: $movieTitle/@alt + Image: $movieTitle/@src + URL: + selector: //div[@data-test="teaser-vod"]/a/@href + postProcess: + - replace: + - regex: ^ + with: "https://www.freeones.com" + + sceneScraper: + common: + $commonRoot: //*[@id="description"] + $performerName: //a[@data-test="link_Cast"] + scene: + Title: //h1 + URL: //link[@rel="alternate"][1]/@href + Details: $commonRoot//div[contains(concat(' ',normalize-space(@class),' '),' pb-2 ')] + Studio: + Name: $commonRoot//span[@data-test="link_span_Studio"] + Director: $commonRoot//span[@data-test="link_span_Director"] + Date: + selector: //div[contains(concat(' ',normalize-space(@class),' '),' mid-content-pr-past-date ')] + postProcess: + - replace: + - regex: .+?(\w+\s\d{1,2},\s\d{4}).+ + with: $1 + - parseDate: January 2, 2006 + Movies: + Name: //h1 + URL: //link[@rel="alternate"][1]/@href + Performers: + Name: $performerName/span + Tags: + Name: //li[@class="list-inline-item"]/a +# URL: +# selector: $performerName/@href +# postProcess: +# - replace: +# - regex: ^ +# with: "https://www.freeones.com" + + movieScraper: + common: + $commonRoot: //*[@id="description"] + $performerName: //a[@data-test="link_Cast"] + movie: + Name: //h1 + Synopsis: $commonRoot//div[contains(concat(' ',normalize-space(@class),' '),' pb-2 ')] + Duration: $commonRoot//span[@data-test="link_span_Duration"] + Studio: + Name: $commonRoot//span[@data-test="link_span_Studio"] + Director: $commonRoot//span[@data-test="link_span_Director"] + Date: + selector: //div[contains(concat(' ',normalize-space(@class),' '),' mid-content-pr-past-date ')] + postProcess: + - replace: + - regex: .+?(\w+\s\d{1,2},\s\d{4}).+ + with: $1 + - parseDate: January 2, 2006 + FrontImage: //*[@id="fxgp-gallery"]/a[1]/@href + BackImage: //*[@id="fxgp-gallery"]/a[2]/@href performerScraper: performer: @@ -35,38 +124,40 @@ xPathScrapers: Twitter: //form//a[contains(@href,'twitter.com/')]/@href Instagram: //form//a[contains(@href,'instagram.com/')]/@href Birthdate: - selector: //span[contains(text(),'Born On')] + selector: //span[@data-test="link_span_dateOfBirth"]/text() postProcess: - - replace: - - regex: Born On - with: - parseDate: January 2, 2006 Ethnicity: - selector: //a[@data-test="link_ethnicity"]/span/text() + selector: //span[@data-test="link_span_ethnicity"] postProcess: - map: Asian: Asian Caucasian: White Black: Black Latin: Hispanic - Country: //a[@data-test="link-country"]/span/text() - EyeColor: //span[text()='Eye Color']/following-sibling::span/a + Country: + selector: //a[@data-test="link_placeOfBirth"][contains(@href, 'country')]/span/text() + postProcess: + - map: + United States: "USA" + EyeColor: //span[text()='Eye Color:']/following-sibling::span/a/span/text() Height: - selector: //span[text()='Height']/following-sibling::span/a + selector: //span[text()='Height:']/following-sibling::span/a postProcess: - - replace: - - regex: \D+[\s\S]+ - with: "" + - feetToCm: true - map: Unknown: "" Measurements: - selector: //span[text()='Measurements']/following-sibling::span/span/a + selector: //span[(@data-test='link_span_bra') or (@data-test='link_span_waist') or (@data-test='link_span_hip')] concat: " - " postProcess: + - replace: + - regex: \sIn + with: "" - map: Unknown: "" FakeTits: - selector: //span[text()='Boobs']/following-sibling::span/a + selector: //span[text()='Boobs:']/following-sibling::span/a postProcess: - map: Unknown: "" @@ -75,14 +166,16 @@ xPathScrapers: CareerLength: selector: //div[contains(@class,'timeline-horizontal')]//p[@class='m-0'] concat: "-" - Aliases: //p[@data-test='p_aliases']/text() + Aliases: + selector: //span[@data-test='link_span_aliases']/text() + concat: ", " Tattoos: - selector: //span[text()='Tattoos']/following-sibling::span/span + selector: //span[text()='Tattoo locations:']/following-sibling::span postProcess: - map: Unknown: "" Piercings: - selector: //span[text()='Piercings']/following-sibling::span/span + selector: //span[text()='Piercing locations:']/following-sibling::span postProcess: - map: Unknown: "" @@ -90,7 +183,7 @@ xPathScrapers: selector: //div[contains(@class,'image-container')]//a/img/@src Gender: fixed: "Female" - Details: //div[@data-test="biography"] + #Details: //div[@data-test="biography"] DeathDate: selector: //div[contains(text(),'Passed away on')] postProcess: @@ -102,8 +195,9 @@ xPathScrapers: Weight: selector: //span[@data-test="link_span_weight"] postProcess: - - replace: - - regex: \D+[\s\S]+ + - replace: + - regex: \slbs with: "" + - lbToKg: true -# Last Updated April 16, 2021 +# Last Updated January 19, 2023 From 317160c2158c11ff37865572a2ac640d9e1a0495 Mon Sep 17 00:00:00 2001 From: VillageIdiot Date: Thu, 3 Aug 2023 17:42:25 -0700 Subject: [PATCH 281/624] Added performer by name and URL, also fixed some data etraction --- scrapers/Private.yml | 77 +++++++++++++++++++++++++++++++++++++++++--- 1 file changed, 73 insertions(+), 4 deletions(-) diff --git a/scrapers/Private.yml b/scrapers/Private.yml index cedc34836..583d183d8 100644 --- a/scrapers/Private.yml +++ b/scrapers/Private.yml @@ -9,6 +9,15 @@ movieByURL: url: - private.com scraper: movieScraper +performerByName: + action: scrapeXPath + queryURL: https://www.private.com/search.php?query={} + scraper: performerSearch +performerByURL: + - action: scrapeXPath + url: + - private.com + scraper: performerScraper xPathScrapers: sceneScraper: common: @@ -21,11 +30,13 @@ xPathScrapers: postProcess: # The format changes when another language is selected - parseDate: 01/02/2006 - Details: $content//p[@id="description-section"] + Details: + selector: $content//p[@id="description-section"]/text() + concat: "\n" Tags: - Name: $content//ul[@class="scene-tags"]/li/a/text() + Name: //li[@class="tag-tags"]//a/text() Performers: - Name: $content//ul[@class="scene-models-list"]//a/text() + Name: //li[@class="tag-models"]//a/text() Movies: Name: selector: //a[@data-track="FULL MOVIE"]/@href @@ -52,6 +63,57 @@ xPathScrapers: fixed: Private Synopsis: //p[@class="sinopsys"] FrontImage: //div[@class="dvds-wrapper"]//img[@class="img-responsive"]/@src + performerSearch: + common: + $searchData: //a[@data-track="PORNSTAR_NAME"] + performer: + Name: $searchData + URL: $searchData/@href + performerScraper: + common: + $performerData: //div[contains(concat(' ',normalize-space(@class),' '),' pornstar-wrapper ')] + performer: + Name: $performerData//h1 + URL: //meta[@property="og:url"]/@content + Aliases: $performerData//p[@class="aka"]/text() + Details: $performerData//li[@class="model-facts-long"]/div + Measurements: $performerData//em[text()="Measurements:"]/../text() + Height: + selector: $performerData//em[text()="Height:"]/../text() + postProcess: + - replace: + - regex: (\d+)cm.+ + with: $1 + - map: + -: "" + Weight: + selector: $performerData//em[text()="Weight:"]/../text() + postProcess: + - replace: + - regex: (\d+)kg.+ + with: $1 + - map: + -: "" + Country: $performerData//em[text()="Birth place:"]/../text() + HairColor: $performerData//em[text()="Hair Color:"]/../text() + EyeColor: + selector: $performerData//em[text()="Eye color:"]/../text() + postProcess: + - map: + -: "" + Tattoos: + selector: $performerData//em[text()="Tattoos:"]/../text() + postProcess: + - map: + -: "" + Piercings: + selector: $performerData//em[text()="Piercings:"]/../text() + postProcess: + - map: + -: "" + Image: $performerData//img/@src + Gender: + fixed: "Female" driver: cookies: - CookieURL: https://private.com @@ -60,4 +122,11 @@ driver: Domain: ".private.com" Value: "en" Path: "/" -# Last Updated March 10, 2020 + - Name: "agreed18" + Domain: ".private.com" + Value: "true" + Path: "/" + headers: + - Key: User-Agent + Value: Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:79.0) Gecko/20100101 Firefox/79.0) +# Last Updated August 03, 2023 From 6064e06b74e1f99269556b47601eeae27136aa8d Mon Sep 17 00:00:00 2001 From: Maista6969 Date: Fri, 4 Aug 2023 02:57:19 +0200 Subject: [PATCH 282/624] Fix gallery scraper for Femjoy - thank you eloiselle --- scrapers/Femjoy.yml | 79 ++++++++++++++++++++++++++++++++++++--------- 1 file changed, 63 insertions(+), 16 deletions(-) diff --git a/scrapers/Femjoy.yml b/scrapers/Femjoy.yml index 34df8bd0e..60b207ace 100644 --- a/scrapers/Femjoy.yml +++ b/scrapers/Femjoy.yml @@ -1,26 +1,73 @@ name: Femjoy +galleryByURL: + - action: scrapeXPath + url: + - femjoy.com/post/ + scraper: galleryScraper +sceneByQueryFragment: + action: scrapeXPath + queryURL: "{url}" + scraper: sceneScraper +sceneByName: + action: scrapeXPath + queryURL: "https://www.femjoy.com/videos?s={}" + scraper: sceneSearch sceneByURL: - action: scrapeXPath url: - - femjoy.com + - femjoy.com/post/ scraper: sceneScraper xPathScrapers: sceneScraper: + common: + $performer: //h1[@class='post_title']/a[starts-with(@href,"/models")] scene: - Title: //div[@class='post_details']/h1[@class='post_title']/span/text() - Studio: - Name: + Title: &titleSel //h1[@class='post_title']/span[last()]/text() + Studio: &studioAttr + Name: fixed: Femjoy - Date: - selector: //div[@class='post_details']/h2[@class='post_title'] - postProcess: + Date: &dateAttr + selector: //h2[@class='post_title']/text()[2] + postProcess: &datePP - replace: - - regex: .+on\s(.+) - with: $1 - - parseDate: Jan 2, 2006 - Performers: - Name: //div[@class='post_details']/h1[@class='post_title']/a[contains(@href,"/models/")] - URL: //div[@class='post_details']/h1[@class='post_title']/a[contains(@href,"/models/")]/@href - #Image: Uses placeholder cover now - Details: //div[@class='post_details']/h2[@class='post_description']/p -# Last Updated October 13, 2022 + - regex: '.*released on\s+' + with: + - parseDate: Jan 2, 2006 + Director: //*[@class='post_title']/a[starts-with(@href,"/director")] + Performers: &performersAttr + Name: $performer + URL: + selector: $performer/@href + postProcess: &prependDomain + - replace: + - regex: ^ + with: https://femjoy.com + Details: &details + selector: //*[@class='post_description']//text() + concat: "\n" + Image: //meta[@name='twitter:image']/@content + sceneSearch: + common: + $scene: //div[@class='post_video'] + $preview: //div[@class='post_video']//a[@class='preview'] + scene: + Title: $preview/@title + Date: + selector: $scene//span[@class='posted_on']/text() + postProcess: *datePP + URL: + selector: $preview/@href + postProcess: *prependDomain + Image: + selector: $preview/@data-media-poster + galleryScraper: + common: + $performer: //h1[@class='post_title']/a[starts-with(@href,"/models")] + gallery: + Title: *titleSel + Studio: *studioAttr + Date: *dateAttr + Performers: *performersAttr + Details: *details + +# Last Updated August 03, 2023 From be50298b4147b16d0e624348448fe118c22901d2 Mon Sep 17 00:00:00 2001 From: Maista6969 Date: Fri, 4 Aug 2023 04:17:35 +0200 Subject: [PATCH 283/624] Add VR Intimacy to CzechVR --- SCRAPERS-LIST.md | 1 + scrapers/CzechVR.yml | 2 ++ 2 files changed, 3 insertions(+) diff --git a/SCRAPERS-LIST.md b/SCRAPERS-LIST.md index 85742ed92..ce519fd6b 100644 --- a/SCRAPERS-LIST.md +++ b/SCRAPERS-LIST.md @@ -1442,6 +1442,7 @@ vrconk.com|VRBangers.yml|:heavy_check_mark:|:x:|:x:|:x:|-|VR vrcosplayx.com|BaDoink.yml|:heavy_check_mark:|:x:|:heavy_check_mark:|:heavy_check_mark:|-|VR vrfirsttimer.com|SLRoriginals.yml|:heavy_check_mark:|:x:|:heavy_check_mark:|:x:|-|VR vrhush.com|VRHush.yml|:heavy_check_mark:|:x:|:heavy_check_mark:|:x:|-|VR +vrintimacy.com|CzechVR.yml|:heavy_check_mark:|:x:|:heavy_check_mark:|:x:|-|VR vrlatina.com|VRLatina.yml|:heavy_check_mark:|:x:|:heavy_check_mark:|:heavy_check_mark:|-|VR vrporn.com|VRPorn.yml|:heavy_check_mark:|:x:|:x:|:x:|-|VR vrporncz.com|PornCZ.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- diff --git a/scrapers/CzechVR.yml b/scrapers/CzechVR.yml index 114c9745e..b7c826745 100644 --- a/scrapers/CzechVR.yml +++ b/scrapers/CzechVR.yml @@ -6,6 +6,7 @@ sceneByURL: - czechvrcasting.com - czechvrfetish.com - czechvrnetwork.com + - vrintimacy.com scraper: sceneScraper movieByURL: - action: scrapeXPath @@ -14,6 +15,7 @@ movieByURL: - czechvrcasting.com - czechvrfetish.com - czechvrnetwork.com + - vrintimacy.com scraper: movieScraper xPathScrapers: From df331417edc8c5a7616a2493b71c3f14e6985d05 Mon Sep 17 00:00:00 2001 From: Maista6969 Date: Fri, 4 Aug 2023 04:17:46 +0200 Subject: [PATCH 284/624] Get correct studio for Czech VR Network sites --- scrapers/CzechVR.yml | 31 ++++++++++++++++++++----------- 1 file changed, 20 insertions(+), 11 deletions(-) diff --git a/scrapers/CzechVR.yml b/scrapers/CzechVR.yml index b7c826745..93c59784d 100644 --- a/scrapers/CzechVR.yml +++ b/scrapers/CzechVR.yml @@ -24,9 +24,14 @@ xPathScrapers: $info: &infoSel //div[@class="post"] $url: &urlSel //meta[@name="dl8-customization-brand-url"]/@content scene: -# Title: &titleSel $info//div[@class="nazev"]/h2 - Title: &titleSel $info//div[contains(concat(' ',normalize-space(@class),' '),' nazev ')]/h2|$info//div[contains(concat(' ',normalize-space(@class),' '),' nazev ')]/h1 - Date: &dateSel + Title: &title + # Czech VR Network uses h1, the substudios use h2 + selector: &titleSel $info//h1|$info//h2 + postProcess: + - replace: + - regex: ".+ - " + with: + Date: &date selector: $info//div[@class="datum"]/text() postProcess: - parseDate: Jan 2, 2006 @@ -35,9 +40,14 @@ xPathScrapers: Name: //div[@id="MoreTags"]/div[@class="tagsipka"]//a/text() Performers: Name: $info//div[@class="featuring"]//a/text() - Studio: - Name: &studioSel //meta[@name="dl8-customization-brand-name"]/@content - Image: &imageSel + Studio: &studio + Name: + selector: *titleSel + postProcess: + - replace: + - regex: "\\d* - .+" + with: + Image: &image selector: $url|//dl8-video/@poster concat: " " postProcess: @@ -55,13 +65,12 @@ xPathScrapers: $info: *infoSel $url: *urlSel movie: - Name: *titleSel + Name: *title Duration: $info//div[@class="casDetail"]/span[@class="desktop"]/text() - Date: *dateSel - Studio: - Name: *studioSel + Date: *date + Studio: *studio Synopsis: *detailsSel - FrontImage: *imageSel + FrontImage: *image driver: cookies: From 04e5897ae2f8d74ed0e06258548124e6d40e683f Mon Sep 17 00:00:00 2001 From: Maista6969 Date: Fri, 4 Aug 2023 06:40:29 +0200 Subject: [PATCH 285/624] Scrape title and studio for scenes from Private.com --- SCRAPERS-LIST.md | 2 +- scrapers/Private.yml | 6 ++---- 2 files changed, 3 insertions(+), 5 deletions(-) diff --git a/SCRAPERS-LIST.md b/SCRAPERS-LIST.md index 85742ed92..dbd677d50 100644 --- a/SCRAPERS-LIST.md +++ b/SCRAPERS-LIST.md @@ -1067,7 +1067,7 @@ prettydirtyteens.com|devianteNetwork.yml|:heavy_check_mark:|:x:|:x:|:x:|CDP|- pridestudios.com|Algolia_MenOver30.yml|:heavy_check_mark:|:heavy_check_mark:|:x:|:x:|Python|Gay primecups.com|PerfectGonzo.yml|:heavy_check_mark:|:x:|:x:|:heavy_check_mark:|Python|- princesscum.com|Nubiles.yml|:heavy_check_mark:|:heavy_check_mark:|:x:|:x:|-|- -private.com|Private.yml|:heavy_check_mark:|:x:|:heavy_check_mark:|:x:|-|- +private.com|Private.yml|:heavy_check_mark:|:x:|:heavy_check_mark:|:heavy_check_mark:|-|- privatecastings.com|privatecastings.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- privatesextapes.com|Wtfpass.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- producersfun.com|ProducersFun.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- diff --git a/scrapers/Private.yml b/scrapers/Private.yml index 583d183d8..11a03172f 100644 --- a/scrapers/Private.yml +++ b/scrapers/Private.yml @@ -23,8 +23,7 @@ xPathScrapers: common: $content: //section[@class="video-description-and-tags clearfix"] scene: - Title: - selector: $content//ul[@class="scene-models-list"]/preceding-sibling::h1/text() + Title: //div[@class="title-zone"]/h1 Date: selector: //meta[@itemprop="uploadDate"]/@content postProcess: @@ -44,8 +43,7 @@ xPathScrapers: - subScraper: //div[@class="dvds-wrapper"]/h1/text() URL: //a[@data-track="FULL MOVIE"]/@href Studio: - Name: $content//span[@class="title-site"]/text() - #fixed: Private + Name: //div[@class="title-zone"]//span[@class="title-site"] Image: //meta[@property="og:image"]/@content movieScraper: movie: From 55026428fcfc72f3f079c2b00efb6a80012a0b08 Mon Sep 17 00:00:00 2001 From: Maista6969 Date: Fri, 4 Aug 2023 07:07:47 +0200 Subject: [PATCH 286/624] Return Private as default studio when tag is missing at Private.com --- scrapers/Private.yml | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/scrapers/Private.yml b/scrapers/Private.yml index 11a03172f..a294bf7cc 100644 --- a/scrapers/Private.yml +++ b/scrapers/Private.yml @@ -43,7 +43,12 @@ xPathScrapers: - subScraper: //div[@class="dvds-wrapper"]/h1/text() URL: //a[@data-track="FULL MOVIE"]/@href Studio: - Name: //div[@class="title-zone"]//span[@class="title-site"] + Name: + selector: //div[@class="title-zone"]//li/a/span[@class="title-site"]/text()|/html/@lang + postProcess: + - replace: + - regex: ^en$ + with: Private Image: //meta[@property="og:image"]/@content movieScraper: movie: From ba60632f0d2344c7196b7130f99a4147baf20a80 Mon Sep 17 00:00:00 2001 From: VillageIdiot Date: Fri, 4 Aug 2023 08:00:59 -0700 Subject: [PATCH 287/624] Updated SCRAPER-LIST.md to reflect additional functionality --- SCRAPERS-LIST.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/SCRAPERS-LIST.md b/SCRAPERS-LIST.md index 85742ed92..93d7633db 100644 --- a/SCRAPERS-LIST.md +++ b/SCRAPERS-LIST.md @@ -511,8 +511,8 @@ franks-tgirlworld.com|GroobyNetwork-Partial.yml|:heavy_check_mark:|:heavy_check_ fratx.com|FratX.yml|:heavy_check_mark:|:x:|:x:|:x:|Python|Gay freakmobmedia.com|FreakMobMedia.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- free.premiumbukkake.com|PremiumBukkake.yml|:heavy_check_mark:|:x:|:x:|:x:|Python|- -freeones.com|FreeonesCommunity.yml|:x:|:x:|:x:|:heavy_check_mark:|-|Database -freeones.xxx|FreeonesCommunity.yml|:x:|:x:|:x:|:heavy_check_mark:|-|- +freeones.com|FreeonesCommunity.yml|:heavy_check_mark:|:x:|:heavy_check_mark:|:heavy_check_mark:|-|Database +freeones.xxx|FreeonesCommunity.yml|:heavy_check_mark:|:x:|:heavy_check_mark:|:heavy_check_mark:|-|- freeusefantasy.com|PaperStreetMedia.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- freeusemilf.com|PaperStreetMedia.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- french-twinks.com|Frenchtwinks.yml|:heavy_check_mark:|:x:|:x:|:x:|-|Gay From 038e9c66979d1a41cc8d1b68a7962e10a3a97b88 Mon Sep 17 00:00:00 2001 From: VillageIdiot Date: Fri, 4 Aug 2023 14:51:41 -0700 Subject: [PATCH 288/624] Fixed description extraction for VirtualTaboo --- scrapers/VirtualTaboo.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/scrapers/VirtualTaboo.yml b/scrapers/VirtualTaboo.yml index c7794c0b5..4d9978800 100644 --- a/scrapers/VirtualTaboo.yml +++ b/scrapers/VirtualTaboo.yml @@ -20,7 +20,7 @@ xPathScrapers: selector: $genInfo/span[@class="bullet"]/following-sibling::text() postProcess: - parseDate: 02 January, 2006 - Details: &detailsSel $info//div[@class="description"]/text() + Details: &detailsSel $info//*[@class="description"] Tags: Name: $info//div[starts-with( @class,"tag-list")]/a/text() Performers: @@ -54,5 +54,5 @@ xPathScrapers: fixed: VirtualTaboo Synopsis: *detailsSel FrontImage: *imageSel -# Last Updated August 14, 2022 +# Last Updated August 04, 2023 From eb072909a01957471785da976d04eb0a9ee35a89 Mon Sep 17 00:00:00 2001 From: VillageIdiot Date: Fri, 4 Aug 2023 16:16:41 -0700 Subject: [PATCH 289/624] Fixed broken image scraper for VRHush --- scrapers/VRHush.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scrapers/VRHush.yml b/scrapers/VRHush.yml index 6f754c486..990dc9635 100644 --- a/scrapers/VRHush.yml +++ b/scrapers/VRHush.yml @@ -30,7 +30,7 @@ xPathScrapers: Performers: Name: $info//h5[@class="latest-scene-subtitle"]//a/text() Image: &imageAttr - selector: $info//deo-video[1]/@cover-image + selector: //web-vr-video-player/@coverimage postProcess: - replace: - regex: ^ From 4196dad1d9f55f18881686d868842a4dbd240722 Mon Sep 17 00:00:00 2001 From: Maista6969 Date: Sat, 5 Aug 2023 06:28:48 +0200 Subject: [PATCH 290/624] Fix Pornhub scene scraping They seem to rehydrate data on the clientside instead of sending it as HTML like they used to, so we'll just grab the data from their JSON --- scrapers/Pornhub.yml | 53 ++++++++++++++++++++++++++------------------ 1 file changed, 31 insertions(+), 22 deletions(-) diff --git a/scrapers/Pornhub.yml b/scrapers/Pornhub.yml index f2c1aa0da..558eb7a97 100644 --- a/scrapers/Pornhub.yml +++ b/scrapers/Pornhub.yml @@ -112,36 +112,45 @@ xPathScrapers: Image: //div[@class="thumbImage"]/img/@src|//img[@id="getAvatar"]/@src sceneScraper: common: - $performer: //div[@class="pornstarsWrapper js-pornstarsWrapper"]/a[@data-mxptype="Pornstar"] - $studio: //div[@data-type="channel"]/a + $datablob: //script[contains(., 'videodata')]/text() scene: - Title: //h1[@class="title"]/span/text() + Title: //meta[@property="og:title"]/@content URL: //meta[@property="og:url"]/@content Date: - selector: //script[contains(., 'uploadDate')]/text() + selector: $datablob postProcess: - replace: - - regex: .+(?:"uploadDate":\s")([^"]+).+ + - regex: .+(?:'video_date_published'\s*:\s*')([^']+).+ with: $1 - - regex: (.+)T.+ - with: $1 - - parseDate: 2006-01-02 + - parseDate: "20060102" Tags: - Name: //div[@class="categoriesWrapper"]//a[not(@class="add-btn-small ")]|//div[@class="tagsWrapper"]//a[not(@class="add-btn-small")] - Image: - selector: //meta[@property="og:image"][1]/@content + Name: + selector: $datablob + postProcess: + - replace: + - regex: .+(?:'categories_in_video'\s*:\s*')([^']+).+ + with: $1 + split: "," Performers: - Name: $performer/@data-mxptext - URL: $performer/@href + Name: + selector: $datablob + postProcess: + - replace: + - regex: .+(?:'pornstars_in_video'\s*:\s*')([^']+).+ + with: $1 + - regex: ^No$ # Hardcoded value representing lack of performers + with: "" + split: "," + + Image: + selector: //meta[@property="og:image"]/@content Studio: - Name: $studio - URL: $studio/@href - Details: - selector: //div[@class="video-info-row"][1]/text()[starts-with(normalize-space(.),"Description:")] - postProcess: - - replace: - - regex: "Description: (.*)" - with: $1 + Name: + selector: $datablob + postProcess: + - replace: + - regex: .+(?:'video_uploader_name'\s*:\s*')([^']+).+ + with: $1 driver: cookies: @@ -151,4 +160,4 @@ driver: Domain: ".pornhub.com" Value: "1" Path: "/" -# Last Updated July 29, 2023 +# Last Updated August 05, 2023 From fa59b5434eef266cb85180a0119075533fb0c129 Mon Sep 17 00:00:00 2001 From: Scum-Bum Date: Sat, 5 Aug 2023 23:06:50 +0100 Subject: [PATCH 291/624] fixed bug where newer scenes could scrape duplicate tags --- scrapers/vixenNetwork.py | 10 ++++++---- scrapers/vixenNetwork.yml | 2 +- 2 files changed, 7 insertions(+), 5 deletions(-) diff --git a/scrapers/vixenNetwork.py b/scrapers/vixenNetwork.py index 995f2fb0a..1c64425db 100644 --- a/scrapers/vixenNetwork.py +++ b/scrapers/vixenNetwork.py @@ -124,12 +124,14 @@ def parse_scene(self, response): scene['performers'].append({"name": model['name']}) scene['tags'] = [] - if data.get('tags'): - for tag in data['tags']: - scene['tags'].append({"name": tag}) - else: + tags = data.get('tags') + categories = data.get('categories') + if tags == [] and categories: for tag in data['categories']: scene['tags'].append({"name": tag['name']}) + elif tags: + for tag in data['tags']: + scene['tags'].append({"name": tag}) if data.get('images'): if data['images'].get('poster'): diff --git a/scrapers/vixenNetwork.yml b/scrapers/vixenNetwork.yml index 6aa178278..bebff994f 100644 --- a/scrapers/vixenNetwork.yml +++ b/scrapers/vixenNetwork.yml @@ -30,4 +30,4 @@ sceneByQueryFragment: - python - vixenNetwork.py -# Last Updated July 31, 2023 +# Last Updated August 05, 2023 From f6caaae22dfa9364544ee2187f9a3bce7f52714d Mon Sep 17 00:00:00 2001 From: Maista6969 Date: Mon, 7 Aug 2023 00:00:08 +0200 Subject: [PATCH 292/624] Update JapanHDV to grab the full description The old og:description property now contains a truncated version of the description --- scrapers/JapanHDV.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scrapers/JapanHDV.yml b/scrapers/JapanHDV.yml index 498da036f..ed6dd3149 100644 --- a/scrapers/JapanHDV.yml +++ b/scrapers/JapanHDV.yml @@ -15,7 +15,7 @@ xPathScrapers: $movieinfo: //div[@class="pure-u-1-5 hidden-sm hidden-xs"]/div[@class="video-info"] scene: Title: $movieinfo/p[starts-with(strong,"Title")]/text() - Details: //meta[@property="og:description"]/@content + Details: //div[contains(@class, "video-description")] Date: selector: //meta[@itemprop="datePublished"]/@content postProcess: From b9587257299b92468608b0fb85f38cd0f0a30134 Mon Sep 17 00:00:00 2001 From: VillageIdiot Date: Wed, 9 Aug 2023 15:52:38 -0700 Subject: [PATCH 293/624] Created scraper for rachel-steele.com as requested in Scrapers Request #73 --- SCRAPERS-LIST.md | 1 + scrapers/RachelSteele.yml | 56 +++++++++++++++++++++++++++++++++++++++ 2 files changed, 57 insertions(+) create mode 100644 scrapers/RachelSteele.yml diff --git a/SCRAPERS-LIST.md b/SCRAPERS-LIST.md index be623a44a..75d576382 100644 --- a/SCRAPERS-LIST.md +++ b/SCRAPERS-LIST.md @@ -1090,6 +1090,7 @@ r18.com|JavLibrary_python.yml|:heavy_check_mark:|:x:|:x:|:x:|Python|JAV r18.com|r18.yml|:heavy_check_mark:|:x:|:heavy_check_mark:|:x:|-|JAV r18.dev|R18.dev.yml|:heavy_check_mark:|:x:|:x:|:x:|-|JAV rachelaldana.com|PinupDollars.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +rachel-steele.com|RachelSteele.yml|:heavy_check_mark:|:x:|:x:|:x:|-|MILF rachelstormsxxx.com|VNAGirls.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- ragingstallion.com|GammaEntertainment.yml|:heavy_check_mark:|:x:|:x:|:x:|-|Gay randyblue.com|RandyBlue.yml|:heavy_check_mark:|:x:|:x:|:x:|-|Gay diff --git a/scrapers/RachelSteele.yml b/scrapers/RachelSteele.yml new file mode 100644 index 000000000..f27a028fc --- /dev/null +++ b/scrapers/RachelSteele.yml @@ -0,0 +1,56 @@ +name: RachelSteele +sceneByURL: + - action: scrapeXPath + url: + - rachel-steele.com + scraper: sceneScraper + +sceneByName: + action: scrapeXPath + queryURL: https://rachel-steele.com/x-new/new-preview-list.php?user=rachel-steele&search={}&type=all&submit=Search + scraper: sceneSearch + +sceneByQueryFragment: + action: scrapeXPath + queryURL: "{url}" + scraper: sceneScraper + +xPathScrapers: + sceneSearch: + common: + $root: //li[contains(concat(' ',normalize-space(@class),' '),' first ')] + scene: + Title: $root//h3 + Image: + selector: $root//img/@src + postProcess: + - replace: + - regex: ^ + with: https://rachel-steele.com + URL: + selector: $root/a/@href + postProcess: + - replace: + - regex: ^ + with: https://rachel-steele.com + + sceneScraper: + scene: + Title: //div[contains(concat(' ',normalize-space(@class),' '),' span12 ')]/h3/text() + Image: + selector: //div[contains(concat(' ',normalize-space(@class),' '),' album-details ')]/div/div/img/@src + postProcess: + - replace: + - regex: ^ + with: https://rachel-steele.com + Details: //div[contains(concat(' ',normalize-space(@class),' '),' span8 ')]//p[contains(text(),"video")]/following-sibling::p[1]|//div[contains(concat(' ',normalize-space(@class),' '),' span8 ')]//br/following-sibling::span + Performers: + Name: + fixed: Rachel Steele + Studio: + Name: + fixed: Rachel-Steele.com + URL: + fixed: https://Rachel-Steele.com + +# Last Updated August 09, 2023 From db3015046dc5983e618083dc8c9de9b46456b19d Mon Sep 17 00:00:00 2001 From: Maista6969 Date: Sun, 13 Aug 2023 09:43:42 +0200 Subject: [PATCH 294/624] Add Algolia scraper for Next Door Studios This subsumes the Rod's Room scraper! The only studio that does not appear to be in the Next Door Studios API yet is Next Door Hookups --- SCRAPERS-LIST.md | 31 +++++++++++------- scrapers/Algolia_NextDoorStudios.yml | 48 ++++++++++++++++++++++++++++ scrapers/Algolia_Rodsroom.yml | 39 ---------------------- scrapers/GammaEntertainment.yml | 20 ------------ scrapers/Pornhub.yml | 6 +--- scrapers/Tokyohot.py | 12 +++++-- scrapers/Twistys.yml | 7 +++- 7 files changed, 84 insertions(+), 79 deletions(-) create mode 100644 scrapers/Algolia_NextDoorStudios.yml delete mode 100644 scrapers/Algolia_Rodsroom.yml diff --git a/SCRAPERS-LIST.md b/SCRAPERS-LIST.md index be623a44a..c07b19492 100644 --- a/SCRAPERS-LIST.md +++ b/SCRAPERS-LIST.md @@ -122,6 +122,7 @@ auntjudysxxx.com|AuntJudys.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- aussieass.com|AussieAss.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- aussiefellatioqueens.com|AussieFelatioQueens.yml|:heavy_check_mark:|:x:|:x:|:x:|CDP|- aussiepov.com|AussieAss.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +austinwilde.com|Algolia_NextDoorStudios.yml|:heavy_check_mark:|:heavy_check_mark:|:x:|:x:|Python|Gay av69.tv|Jhdv.yml|:heavy_check_mark:|:x:|:x:|:x:|-|JAV Uncensored avadawn.com|bellapass.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- avanal.com|Jhdv.yml|:heavy_check_mark:|:x:|:x:|:x:|-|JAV Uncensored @@ -273,6 +274,7 @@ clubsweethearts.com|ClubSeventeen.yml|:heavy_check_mark:|:heavy_check_mark:|:x:| clubtug.com|TugPass.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- cockhero.info|Cockhero.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- cocksuremen.com|CocksureMen.yml|:heavy_check_mark:|:x:|:x:|:x:|-|Gay +codycummings.com|Algolia_NextDoorStudios.yml|:heavy_check_mark:|:heavy_check_mark:|:x:|:x:|Python|Gay colbyknox.com|ColbyKnox.yml|:heavy_check_mark:|:x:|:x:|:x:|-|Gay colette.com|Colette.yml|:heavy_check_mark:|:heavy_check_mark:|:x:|:x:|-|- college-amateurs.com|ChickPass.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- @@ -807,6 +809,7 @@ manojob.com|FinishesTheJob.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- manroyale.com|AMAMultimedia.yml|:heavy_check_mark:|:x:|:x:|:x:|-|Gay manuelferrara.com|JulesJordan.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- manyvids.com|ManyVids|:heavy_check_mark:|:x:|:x:|:heavy_check_mark:|python|- +marcusmojo.com|Algolia_NextDoorStudios.yml|:heavy_check_mark:|:heavy_check_mark:|:x:|:x:|Python|Gay mariskax.com|MariskaX.yml|:heavy_check_mark:|:x:|:x:|:heavy_check_mark:|-|- maskurbate.com|GammaEntertainment.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- masqulin.com|Masqulin.yml|:heavy_check_mark:|:x:|:x:|:x:|-|Gay @@ -918,15 +921,17 @@ naughtyamerica.com|NaughtyAmerica.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- naughtyamericavr.com|NaughtyAmericaVR.yml|:heavy_check_mark:|:x:|:x:|:x:|-|VR newsensations.com/tour_ns/|NewSensationsMain.yml|:heavy_check_mark:|:x:|:heavy_check_mark:|:x:|-|- newsensations.com/tour_rs/|NewSensationsNetworkSites.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- -nextdoorbuddies.com|GammaEntertainment.yml|:heavy_check_mark:|:x:|:x:|:x:|-|Gay -nextdoorcasting.com|GammaEntertainment.yml|:heavy_check_mark:|:x:|:x:|:x:|-|Gay -nextdoorebony.com|GammaEntertainment.yml|:heavy_check_mark:|:x:|:x:|:x:|-|Gay -nextdoorhookups.com|GammaEntertainment.yml|:heavy_check_mark:|:x:|:x:|:x:|-|Gay -nextdoormale.com|GammaEntertainment.yml|:heavy_check_mark:|:x:|:x:|:x:|-|Gay -nextdoorraw.com|GammaEntertainment.yml|:heavy_check_mark:|:x:|:x:|:x:|-|Gay -nextdoorstudios.com|GammaEntertainment.yml|:heavy_check_mark:|:x:|:x:|:x:|-|Gay -nextdoortaboo.com|GammaEntertainment.yml|:heavy_check_mark:|:x:|:x:|:x:|-|Gay -nextdoortwink.com|GammaEntertainment.yml|:heavy_check_mark:|:x:|:x:|:x:|-|Gay +nextdoorbuddies.com|Algolia_NextDoorStudios.yml|:heavy_check_mark:|:heavy_check_mark:|:x:|:x:|Python|Gay +nextdoorcasting.com|Algolia_NextDoorStudios.yml|:heavy_check_mark:|:heavy_check_mark:|:x:|:x:|Python|Gay +nextdoorebony.com|Algolia_NextDoorStudios.yml|:heavy_check_mark:|:heavy_check_mark:|:x:|:x:|Python|Gay +nextdoorfilms.com|Algolia_NextDoorStudios.yml|:heavy_check_mark:|:heavy_check_mark:|:x:|:x:|Python|Gay +nextdoorhomemade.com|Algolia_NextDoorStudios.yml|:heavy_check_mark:|:heavy_check_mark:|:x:|:x:|Python|Gay +nextdoormale.com|Algolia_NextDoorStudios.yml|:heavy_check_mark:|:heavy_check_mark:|:x:|:x:|Python|Gay +nextdoororiginals.com|Algolia_NextDoorStudios.yml|:heavy_check_mark:|:heavy_check_mark:|:x:|:x:|Python|Gay +nextdoorraw.com|Algolia_NextDoorStudios.yml|:heavy_check_mark:|:heavy_check_mark:|:x:|:x:|Python|Gay +nextdoorstudios.com|Algolia_NextDoorStudios.yml|:heavy_check_mark:|:heavy_check_mark:|:x:|:x:|Python|Gay +nextdoortaboo.com|Algolia_NextDoorStudios.yml|:heavy_check_mark:|:heavy_check_mark:|:x:|:x:|Python|Gay +nextdoortwink.com|Algolia_NextDoorStudios.yml|:heavy_check_mark:|:heavy_check_mark:|:x:|:x:|Python|Gay nfbusty.com|Nubiles.yml|:heavy_check_mark:|:heavy_check_mark:|:x:|:x:|-|- nhentai.net|nhentai.yml|:x:|:heavy_check_mark:|:x:|:x:|-|Hentai nikkibenz.com|VNAGirls.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- @@ -1118,7 +1123,8 @@ riggsfilms.vip|RiggsFilms.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- rim4k.com|Vip4K.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- rk.com|MindGeek.yml|:heavy_check_mark:|:x:|:x:|:heavy_check_mark:|-|- roccosiffredi.com|Algolia_RoccoSiffredi.yml|:heavy_check_mark:|:heavy_check_mark:|:x:|:x:|Python|- -rodsroom.com|Algolia_Rodsroom.yml|:heavy_check_mark:|:heavy_check_mark:|:x:|:x:|Python|Gay +roddaily.com|Algolia_NextDoorStudios.yml|:heavy_check_mark:|:heavy_check_mark:|:x:|:x:|Python|Gay +rodsroom.com|Algolia_NextDoorStudios.yml|:heavy_check_mark:|:heavy_check_mark:|:x:|:x:|Python|Gay romemajor.com|VNAGirls.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- rubberdoll.net|VNAGirls.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- rule34video.com|Rule34Video.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- @@ -1126,6 +1132,7 @@ russian-tgirls.com|GroobyClub.yml|:heavy_check_mark:|:x:|:x:|:x:|-|Trans rylskyart.com|SARJ-LLC.yml|:heavy_check_mark:|:heavy_check_mark:|:x:|:heavy_check_mark:|Python|- sabiendemonia.com|Algolia_Adultime.yml|:heavy_check_mark:|:x:|:x:|:x:|Python|- samanthagrace.net|VNAGirls.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +samuelotoole.com|Algolia_NextDoorStudios.yml|:heavy_check_mark:|:heavy_check_mark:|:x:|:x:|Python|Gay sapphicerotica.com|sapphix.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- sapphix.com|sapphix.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- sarajay.com|VNAGirls.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- @@ -1214,7 +1221,7 @@ squirtalicious.com|GammaEntertainment.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- squirted.com|MindGeek.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- squirtinglesbian.com|GammaEntertainment.yml|:heavy_check_mark:|:x:|:x:|:x:|-|Lesbian squirtingorgies.com|GammaEntertainment.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- -stagcollective.com|GammaEntertainment.yml|:heavy_check_mark:|:x:|:x:|:x:|-|Gay +stagcollective.com|Algolia_NextDoorStudios.yml|:heavy_check_mark:|:heavy_check_mark:|:x:|:x:|Python|Gay stasyq.com|StasyQ.yml|:heavy_check_mark:|:x:|:x:|:heavy_check_mark:|-|- staxus.com|Staxus.yml|:heavy_check_mark:|:x:|:x:|:heavy_check_mark:|-|gay stayhomepov.com|PaperStreetMedia.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- @@ -1346,6 +1353,7 @@ titworld.com|Hustler.yml|:heavy_check_mark:|:x:|:x:|:x:|CDP|- tmwvrnet.com|TmwVRnet.yml|:heavy_check_mark:|:x:|:heavy_check_mark:|:x:|-|VR tokyo-hot.com|Tokyohot.yml|:heavy_check_mark:|:x:|:x:|:x:|-|JAV Uncensored tokyobang.com|karatmedia.yml|:heavy_check_mark:|:x:|:x:|:x:|-|JAV +tommydxxx.com|Algolia_NextDoorStudios.yml|:heavy_check_mark:|:heavy_check_mark:|:x:|:x:|Python|Gay tonightsgirlfriend.com|Tonightsgirlfriend.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- toomanytrannies.com|Hustler.yml|:heavy_check_mark:|:x:|:x:|:x:|CDP|Trans topgrl.com|insex.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- @@ -1383,6 +1391,7 @@ tripforfuck.com|TripForFuck.yml|:heavy_check_mark:|:x:|:x:|:heavy_check_mark:|-| trueamateurs.com|MindGeek.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- trueanal.com|Nympho.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- truelesbian.com|Algolia_Adultime.yml|:heavy_check_mark:|:x:|:x:|:x:|Python|- +trystanbull.com|Algolia_NextDoorStudios.yml|:heavy_check_mark:|:heavy_check_mark:|:x:|:x:|Python|Gay tryteens.com|Teencoreclub.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- ts-castingcouch.com|GroobyClub.yml|:heavy_check_mark:|:x:|:x:|:x:|-|Trans tsfactor.com|Algolia_EvilAngel.yml|:heavy_check_mark:|:heavy_check_mark:|:heavy_check_mark:|:x:|Python|Trans diff --git a/scrapers/Algolia_NextDoorStudios.yml b/scrapers/Algolia_NextDoorStudios.yml new file mode 100644 index 000000000..0a9e3f05b --- /dev/null +++ b/scrapers/Algolia_NextDoorStudios.yml @@ -0,0 +1,48 @@ +name: Next Door Studios +sceneByURL: + - action: script + url: + - austinwilde.com/en/video + - codycummings.com/en/video + - marcusmojo.com/en/video + - nextdoorbuddies.com/en/video + - nextdoorcasting.com/en/video + - nextdoorfilms.com/en/video + - nextdoorhomemade.com/en/video + - nextdoormale.com/en/video + - nextdoororiginals.com/en/video + - nextdoorraw.com/en/video + - nextdoorstudios.com/en/video + - nextdoortaboo.com/en/video + - nextdoortwink.com/en/video + - roddaily.com/en/video + - rodsroom.com/en/video + - samuelotoole.com/en/video + - stagcollective.com/en/video + - tommydxxx.com/en/video + - trystanbull.com/en/video + script: + - python + - Algolia.py + - nextdoorstudios +sceneByFragment: + action: script + script: + - python + - Algolia.py + - nextdoorstudios +sceneByName: + action: script + script: + - python + - Algolia.py + - nextdoorstudios + - searchName +sceneByQueryFragment: + action: script + script: + - python + - Algolia.py + - nextdoorstudios + - validName +# Last Updated January 10, 2023 diff --git a/scrapers/Algolia_Rodsroom.yml b/scrapers/Algolia_Rodsroom.yml deleted file mode 100644 index 516382fe8..000000000 --- a/scrapers/Algolia_Rodsroom.yml +++ /dev/null @@ -1,39 +0,0 @@ -name: "Rod's Room" -sceneByURL: - - action: script - url: - - rodsroom.com/en/video - script: - - python - - Algolia.py - - rodsroom -sceneByFragment: - action: script - script: - - python - - Algolia.py - - rodsroom -sceneByName: - action: script - script: - - python - - Algolia.py - - rodsroom - - searchName -sceneByQueryFragment: - action: script - script: - - python - - Algolia.py - - rodsroom - - validName -galleryByURL: - - action: script - url: - - rodsroom.com/en/photo/ - script: - - python - - Algolia.py - - rodsroom - - gallery -# Last Updated December 26, 2022 diff --git a/scrapers/GammaEntertainment.yml b/scrapers/GammaEntertainment.yml index ca8e71964..82325d6b8 100644 --- a/scrapers/GammaEntertainment.yml +++ b/scrapers/GammaEntertainment.yml @@ -36,15 +36,6 @@ sceneByURL: - mommyblowsbest.com/en/scene/ - motherfuckerxxx.com/ - myteenoasis.com/ - - nextdoorbuddies.com/en/video/ - - nextdoorcasting.com/ - - nextdoorebony.com/ - - nextdoorhookups.com/ - - nextdoormale.com/ - - nextdoorraw.com/en/video/ - - nextdoorstudios.com/en/video/ - - nextdoortaboo.com/en/video/ - - nextdoortwink.com/en/video/ - onlyteenblowjobs.com/en/scene/ - openlife.com/ - pantypops.com/ @@ -56,7 +47,6 @@ sceneByURL: - squirtalicious.com/ - squirtinglesbian.com/en/video/ - squirtingorgies.com/en/scene/ - - stagcollective.com/ - strapattackers.com/ - sunnyleone.com/ - throated.com/en/video/ @@ -188,15 +178,6 @@ xPathScrapers: mommyblowsbest: Mommy Blows Best motherfuckerxxx: Mother Fucker XXX myteenoasis: My Teen Oasis - nextdoorbuddies: Next Door Buddies - nextdoorcasting: Next Door Casting - nextdoorebony: Next Door Ebony - nextdoorhookups: Next Door Hookups - nextdoormale: Next Door Male - nextdoorraw: Next Door Raw - nextdoorstudios: Next Door Studios - nextdoortaboo: Next Door Taboo - nextdoortwink: Next Door Twink onlyteenblowjobs: Only Teen Blowjobs openlife: Open Life outofthefamily: Out Of The Family @@ -209,7 +190,6 @@ xPathScrapers: squirtalicious: Squirtalicious squirtinglesbian: Squirting Lesbian squirtingorgies: Squirting Orgies - stagcollective: Stag Collective strapattackers: Strap Attackers sunnyleone: Sunny Leone throated: Throated diff --git a/scrapers/Pornhub.yml b/scrapers/Pornhub.yml index 558eb7a97..5eb81b9a8 100644 --- a/scrapers/Pornhub.yml +++ b/scrapers/Pornhub.yml @@ -146,11 +146,7 @@ xPathScrapers: selector: //meta[@property="og:image"]/@content Studio: Name: - selector: $datablob - postProcess: - - replace: - - regex: .+(?:'video_uploader_name'\s*:\s*')([^']+).+ - with: $1 + fixed: Pornhub driver: cookies: diff --git a/scrapers/Tokyohot.py b/scrapers/Tokyohot.py index 34ea81233..25d95f3dc 100644 --- a/scrapers/Tokyohot.py +++ b/scrapers/Tokyohot.py @@ -28,6 +28,8 @@ "(n\d{4})\S*", # "single part N series" "(k\d{4})\S*", # "single part K series" "(kb\d{4})\S*", # "single part KB series" + "(red-?\d{3})\S*", # "RED Hot collection series" + "(sky-?\d{3})\S*", # "SKY series" ] try: @@ -110,7 +112,9 @@ def get_performers(self): info_links = info.find_all("a") for link in info_links: if "cast" in link.get("href"): - perf = TokyoHotModel(model_url=BASE_DETAIL_URL + link.get("href")).get_json() + perf = TokyoHotModel( + model_url=BASE_DETAIL_URL + link.get("href") + ).get_json() performers.append(perf) return performers @@ -133,7 +137,9 @@ def get_date(self): def get_tags(self): potential_tags = self.soup.find("div", {"class": "infowrapper"}).find_all("a") - return [{"Name":a.text} for a in potential_tags if "type=play" in a.get("href")] + return [ + {"Name": a.text} for a in potential_tags if "type=play" in a.get("href") + ] def get_json(self): return { @@ -145,7 +151,7 @@ def get_json(self): "Studio": {"Name": self.studio}, "Code": self.scene_id, "Image": self.image, - "Tags": self.tags + "Tags": self.tags, } diff --git a/scrapers/Twistys.yml b/scrapers/Twistys.yml index 38338d275..7442694cb 100644 --- a/scrapers/Twistys.yml +++ b/scrapers/Twistys.yml @@ -15,8 +15,12 @@ performerByURL: xPathScrapers: sceneScraper: + common: + $poster: //section[1] + $info: //section[2] + $description: //h3 scene: - Title: //h2[contains(@class,"edmOvr")]/text() + Title: //h2[contains(@class,"font-primary")] Date: selector: //h2[contains(@class,"jywyKe")]/text() postProcess: @@ -29,6 +33,7 @@ xPathScrapers: - replace: - regex: ',\s*$' with: + URL: //link[@rel="canonical"]/@href Performers: Name: //h2[contains(@class,"hgImKM")]//a/text() Studio: From 79579333dea0a4c66dfb747238f50cc245760c33 Mon Sep 17 00:00:00 2001 From: JackDawson94 <100418085+JackDawson94@users.noreply.github.com> Date: Mon, 14 Aug 2023 11:54:50 +0200 Subject: [PATCH 295/624] Add eporner scraper --- scrapers/Eporner.yml | 28 ++++++++++++++++++++++++++++ 1 file changed, 28 insertions(+) create mode 100644 scrapers/Eporner.yml diff --git a/scrapers/Eporner.yml b/scrapers/Eporner.yml new file mode 100644 index 000000000..5cf7ff2a0 --- /dev/null +++ b/scrapers/Eporner.yml @@ -0,0 +1,28 @@ +name: Eporner +sceneByURL: + - action: scrapeXPath + url: + - www.eporner.com + scraper: sceneScraper +xPathScrapers: + sceneScraper: + scene: + Title: + selector: //*[@id="video-info"]/h1/text() + Date: + selector: //script[contains(.,"uploadDate")]/text() + postProcess: + - replace: + - regex: .*uploadDate":\s"(\d{4}-\d{2}-\d{2})T.* + with: $1 + - parseDate: 2006-01-02 + Performers: + Name: //*[@id="video-info-tags"]/ul//li[@class='vit-pornstar starw']/a/text() + Tags: + Name: //*[@id="video-info-tags"]/ul//li[@class='vit-category' or @class='vit-tag']/a/text() + Studio: + Name: //*[@id="video-info-tags"]/ul/li[@class='vit-uploader']/a/text() + Image: //meta[@property="og:image"]/@content + URL: //meta[@property="og:url"]/@content + +# Last Updated August 14, 2023 From 36429aa5eb07da0c4306a310c9eb521f8b436dba Mon Sep 17 00:00:00 2001 From: BlokeCDP <92161256+BlokeCDP@users.noreply.github.com> Date: Tue, 15 Aug 2023 14:14:04 +0100 Subject: [PATCH 296/624] Create DownblouseJerk.yml Created scraper in correct location this time....sorry. --- scrapers/DownblouseJerk.yml | 33 +++++++++++++++++++++++++++++++++ 1 file changed, 33 insertions(+) create mode 100644 scrapers/DownblouseJerk.yml diff --git a/scrapers/DownblouseJerk.yml b/scrapers/DownblouseJerk.yml new file mode 100644 index 000000000..4f14069c6 --- /dev/null +++ b/scrapers/DownblouseJerk.yml @@ -0,0 +1,33 @@ +name: Downblouse Jerk +sceneByURL: + - action: scrapeXPath + url: + - downblousejerk.com/videoentry/ + scraper: sceneScraper +xPathScrapers: + sceneScraper: + scene: + Title: + selector: //h1[@class='page_title']/text() + postProcess: + - replace: + - regex: .*\x{201C}(.*). + with: $1 + Date: + selector: //div[@class="date_likes_holder"]/a[@href="#"]/@title + postProcess: + - replace: + - regex: .+?,\s+([A-Za-z]+) (\d+).+, (\d+),.+ + with: $1 $2, $3 + - parseDate: January 2, 2006 + Performers: + Name: //span[@class="meta_modelcategory meta_category"]//a + Details: //div[@class="post_excerpt"]/p + Tags: + Name: //span[@class="meta_videocategory meta_category"]//a + Image: + selector: //video/@poster + Studio: + Name: + fixed: Downblouse Jerk +# Last Updated August 15, 2023 From ff8adf6efcc59747a74486c072e244fc31ee2424 Mon Sep 17 00:00:00 2001 From: BlokeCDP <92161256+BlokeCDP@users.noreply.github.com> Date: Tue, 15 Aug 2023 14:15:15 +0100 Subject: [PATCH 297/624] Update Wankitnow.yml Updated to get the correct year for 2023 scenes --- scrapers/Wankitnow.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/scrapers/Wankitnow.yml b/scrapers/Wankitnow.yml index 827b08cbb..25a575fad 100644 --- a/scrapers/Wankitnow.yml +++ b/scrapers/Wankitnow.yml @@ -29,6 +29,6 @@ xPathScrapers: postProcess: - replace: - regex: ^(\w+)/(\d+)$ - with: $1/22/$2 # Needs to be updated every year + with: $1/23/$2 # Needs to be updated every year - parseDate: Jan/06/02 -# Last Updated June 13, 2022 +# Last Updated August 15, 2023 From 0f82d8ce184f107f3de657f08c03509be899b603 Mon Sep 17 00:00:00 2001 From: VillageIdiot Date: Wed, 16 Aug 2023 08:02:46 -0700 Subject: [PATCH 298/624] Implemented recommended updates --- scrapers/RachelSteele.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/scrapers/RachelSteele.yml b/scrapers/RachelSteele.yml index f27a028fc..ffcfd16c6 100644 --- a/scrapers/RachelSteele.yml +++ b/scrapers/RachelSteele.yml @@ -43,13 +43,13 @@ xPathScrapers: - replace: - regex: ^ with: https://rachel-steele.com - Details: //div[contains(concat(' ',normalize-space(@class),' '),' span8 ')]//p[contains(text(),"video")]/following-sibling::p[1]|//div[contains(concat(' ',normalize-space(@class),' '),' span8 ')]//br/following-sibling::span + Details: //meta[@name="twitter:description"]/@content Performers: Name: fixed: Rachel Steele Studio: Name: - fixed: Rachel-Steele.com + fixed: Rachel Steele URL: fixed: https://Rachel-Steele.com From 80ccb73f8599010ef7b1a66306888a0bdf293ff7 Mon Sep 17 00:00:00 2001 From: woodgen Date: Wed, 16 Aug 2023 17:53:51 +0200 Subject: [PATCH 299/624] AnalVids: Rename from old name LegalPorno. --- scrapers/{LegalPorno.py => AnalVids.py} | 0 scrapers/{LegalPorno.yml => AnalVids.yml} | 4 ++-- 2 files changed, 2 insertions(+), 2 deletions(-) rename scrapers/{LegalPorno.py => AnalVids.py} (100%) rename scrapers/{LegalPorno.yml => AnalVids.yml} (97%) diff --git a/scrapers/LegalPorno.py b/scrapers/AnalVids.py similarity index 100% rename from scrapers/LegalPorno.py rename to scrapers/AnalVids.py diff --git a/scrapers/LegalPorno.yml b/scrapers/AnalVids.yml similarity index 97% rename from scrapers/LegalPorno.yml rename to scrapers/AnalVids.yml index 92264091b..105e9a759 100644 --- a/scrapers/LegalPorno.yml +++ b/scrapers/AnalVids.yml @@ -1,6 +1,6 @@ # yaml-language-server: $schema=../validator/scraper.schema.json -name: "LegalPorno" +name: "AnalVids" sceneByURL: - action: scrapeXPath url: @@ -16,7 +16,7 @@ sceneByFragment: script: - python # use python3 instead if needed - - LegalPorno.py + - AnalVids.py - query xPathScrapers: From be3f5bf5f528df289523dd235e3c2f5a5fda8a8b Mon Sep 17 00:00:00 2001 From: woodgen Date: Wed, 16 Aug 2023 17:56:00 +0200 Subject: [PATCH 300/624] AnalVids: Update for new website. Rewrite all selectors. Add support for pissvids.com url, since it's the same site. Add URLs to performers and studio. --- scrapers/AnalVids.yml | 40 ++++++++++++++++++++-------------------- 1 file changed, 20 insertions(+), 20 deletions(-) diff --git a/scrapers/AnalVids.yml b/scrapers/AnalVids.yml index 105e9a759..ae746fa42 100644 --- a/scrapers/AnalVids.yml +++ b/scrapers/AnalVids.yml @@ -5,11 +5,13 @@ sceneByURL: - action: scrapeXPath url: - analvids.com/watch/ + - pissvids.com/watch/ scraper: sceneScraper performerByURL: - action: scrapeXPath url: - - https://www.analvids.com + - analvids.com/model/ + - pissvids.com/model/ scraper: performerScraper sceneByFragment: action: script @@ -22,39 +24,37 @@ sceneByFragment: xPathScrapers: sceneScraper: common: - $description: //dl[@class="dl-horizontal scene-description__column"] + $title: //h1[contains(@class, "watch__title")]//text()[not(ancestor::span)] scene: Title: - selector: //h1[@class="watchpage-title"]//text() + selector: $title concat: " " Date: - selector: //span[@title="Release date"]/a/text() + selector: //i[contains(@class, "bi-calendar3")]/text() postProcess: - parseDate: 2006-01-02 - Details: $description/div[3]/dd/text() + Details: + selector: //div[contains(@class, "text-mob-more")]//text()[not(parent::span[contains(@class, "dots")])] + concat: " " Code: - selector: //h1[@class="watchpage-title"] + selector: $title postProcess: - replace: - regex: .+?([A-Z]{2,3}\d+)$|(.+) with: $1 Performers: - Name: $description/div[1]/dd/a[contains(@href,'analvids.com/model')]/text() + Name: //h1[contains(@class, "watch__title")]//a/text() + URL: //h1[contains(@class, "watch__title")]//a/@href Studio: - Name: //div[@class="col-md-4 col-lg-3 hide-mobile text-right"]/div[@class="studio-director"]//a/text() + Name: //div[contains(@class, "genres-list")]//a[contains(@href, "/studios/")]/text() + URL: //div[contains(@class, "genres-list")]//a[contains(@href, "/studios/")]/@href Tags: - Name: $description/div[2]//a/text() - Image: - selector: //div[@id="player"]/@style - postProcess: - - replace: - - regex: .+(https[^"]+).+ - with: $1 - URL: //meta[@property="og:url"]/@content + Name: //div[contains(@class, "genres-list")]//a[contains(@href, "/genre/")]/text() + Image: //video/@data-poster performerScraper: performer: - Name: //h2 - Country: //td[@class='text-danger']//a[contains(@href,'nationality')]/text() - Image: //div[@class='model--avatar']//img/@src -# Last Updated March 03, 2023 + Name: //h1 + Country: //a[contains(@href, "nationality")] + Image: //div[contains(@class, 'model__left')]//img/@src +# Last Updated August 16, 2023 From 8c1e75f5d379675523b19f89da66dbe280dee3bb Mon Sep 17 00:00:00 2001 From: BlokeCDP <92161256+BlokeCDP@users.noreply.github.com> Date: Thu, 17 Aug 2023 14:03:32 +0100 Subject: [PATCH 301/624] Update SCRAPERS-LIST.md Added downblousejerk --- SCRAPERS-LIST.md | 1 + 1 file changed, 1 insertion(+) diff --git a/SCRAPERS-LIST.md b/SCRAPERS-LIST.md index be623a44a..1dbb1be5c 100644 --- a/SCRAPERS-LIST.md +++ b/SCRAPERS-LIST.md @@ -406,6 +406,7 @@ dorcelclub.com|DorcelClub.yml|:heavy_check_mark:|:x:|:heavy_check_mark:|:x:|-|- dorcelvision.com|DorcelVision.yml|:heavy_check_mark:|:x:|:heavy_check_mark:|:x:|-|- dothewife.com|SmutPuppet.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- doubleteamedteens.com|Teencoreclub.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +downblousejerk.com|downblousejerk.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- downtofuckdating.com|ThirdRockEnt.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- dpfanatics.com|Algolia_Adultime.yml|:heavy_check_mark:|:x:|:x:|:x:|Python|- dreamsofspanking.com|DreamsOfSpanking.yml|:heavy_check_mark:|:heavy_check_mark:|:x:|:x:|-|- From 32c3b95c92deb9cc90d7e663de0ee1c52ec0b7f3 Mon Sep 17 00:00:00 2001 From: Maista6969 Date: Fri, 18 Aug 2023 04:21:28 +0200 Subject: [PATCH 302/624] Replace AuntJudys XPath scraper with Python scraper provided by 2itno on Discord --- scrapers/AuntJudys.py | 112 +++++++++++++++++++++++++++++++++++++++++ scrapers/AuntJudys.yml | 35 +++---------- 2 files changed, 119 insertions(+), 28 deletions(-) create mode 100644 scrapers/AuntJudys.py diff --git a/scrapers/AuntJudys.py b/scrapers/AuntJudys.py new file mode 100644 index 000000000..c85b06c09 --- /dev/null +++ b/scrapers/AuntJudys.py @@ -0,0 +1,112 @@ +import json +import sys +import urllib.request +import urllib.parse + +try: + from lxml import html +except ModuleNotFoundError: + print( + "You need to install the lxml module. (https://lxml.de/installation.html#installation)", + file=sys.stderr, + ) + print( + "If you have pip (normally installed with python),", + "run this command in a terminal (cmd): python -m pip install lxml", + file=sys.stderr, + ) + sys.exit(1) + +try: + from py_common.log import debug +except ModuleNotFoundError: + print( + "You need to download the folder 'py_common' from the community repo", + "https://github.com/stashapp/CommunityScrapers", + file=sys.stderr, + ) + sys.exit(1) + +headers = { + "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/114.0.0.0 Safari/537.36" +} + + +def sceneByURL(url): + req = urllib.request.Request(url, headers=headers) + res = urllib.request.urlopen(req) + if not res.status == 200: + debug(f"Request to '{url}' failed with status code {res.status}") + return {} + + tree = html.fromstring(res.read().decode()) + + m, d, y = ( + tree.xpath("//div[contains(@class,'update_date')]/text()[1]") + .pop(0) + .strip() + .split("/") + ) + url_parts = urllib.parse.urlparse(url) + + scene = { + "title": tree.xpath("//span[@class='title_bar_hilite']/text()").pop(), + "details": tree.xpath("//span[@class='update_description']/text()") + .pop() + .strip(), + "studio": { + "name": "Aunt Judy's" if "auntjudys.com" in url else "Aunt Judy's XXX" + }, + "performers": [ + {"name": x} + for x in tree.xpath("//p/span[@class='update_models']/a/text()[1]") + ], + "tags": [ + {"name": x} for x in tree.xpath("//span[@class='update_tags']/a/text()") + ], + "date": "-".join([y, m, d]), + } + + try: + next_url = tree.xpath("//p/span[@class='update_models']/a/@href[1]").pop(0) + while next_url: + req = urllib.request.Request(next_url, headers=headers) + res = urllib.request.urlopen(req) + tree = html.fromstring(res.read().decode()) + next_url = None + links = tree.xpath("//div[a[@href='{}']]".format(url)) + if len(links): + link = links[0] + scene["code"] = link.get("data-setid") + scene["image"] = urllib.parse.urlunparse( + ( + url_parts.scheme, + url_parts.netloc, + link.xpath("./a/img/@src0_4x").pop(0), + "", + "", + "", + ) + ) + else: + n = tree.xpath("//a[@class='pagenav' and span[text()='>']]/@href") + if len(n): + next_url = urllib.parse.urlunparse( + ( + url_parts.scheme, + url_parts.netloc, + "/tour/" + n.pop(0), + "", + "", + "", + ) + ) + except Exception as e: + debug(f"Unable to find image for scene: {e}") + + return scene + + +if sys.argv[1] == "sceneByURL": + j = json.loads(sys.stdin.read()) + print(json.dumps(sceneByURL(j["url"]))) diff --git a/scrapers/AuntJudys.yml b/scrapers/AuntJudys.yml index 8a7446d4b..fc38558db 100644 --- a/scrapers/AuntJudys.yml +++ b/scrapers/AuntJudys.yml @@ -1,32 +1,11 @@ name: AuntJudys sceneByURL: - - action: scrapeXPath - url: + - action: script + url: - auntjudys.com - auntjudysxxx.com - scraper: sceneScraper -xPathScrapers: - sceneScraper: - scene: - Studio: - Name: - selector: //base/@href - postProcess: - - replace: - - regex: ^https?://(?:www\.)?(.+)\.com/.*$ - with: $1 - - map: - auntjudys: Aunt Judy's - auntjudysxxx: Aunt Judy's XXX - Title: //span[@class="title_bar_hilite"] - Details: //span[@class="update_description"] - Performers: - Name: //p/span[@class="update_models"]/a - URL: //p/span[@class="update_models"]/a/@href - Tags: - Name: //span[@class="update_tags"]/a - Date: - selector: //div[@class="cell update_date"]/text()[1] - postProcess: - - parseDate: 1/2/2006 -# Last Updated July 27, 2023 + script: + - python + - AuntJudys.py + - sceneByURL +# Last Updated August 18, 2023 From fe3e3b6e21ce1511951c910ab8bb58c0124cae70 Mon Sep 17 00:00:00 2001 From: Maista6969 Date: Fri, 18 Aug 2023 05:19:01 +0200 Subject: [PATCH 303/624] Revert "Add Algolia scraper for Next Door Studios" This reverts commit db3015046dc5983e618083dc8c9de9b46456b19d. Accidentally contained scratchpad changes to other scrapers! --- SCRAPERS-LIST.md | 31 +++++++----------- scrapers/Algolia_NextDoorStudios.yml | 48 ---------------------------- scrapers/Algolia_Rodsroom.yml | 39 ++++++++++++++++++++++ scrapers/GammaEntertainment.yml | 20 ++++++++++++ scrapers/Pornhub.yml | 6 +++- scrapers/Tokyohot.py | 12 ++----- scrapers/Twistys.yml | 7 +--- 7 files changed, 79 insertions(+), 84 deletions(-) delete mode 100644 scrapers/Algolia_NextDoorStudios.yml create mode 100644 scrapers/Algolia_Rodsroom.yml diff --git a/SCRAPERS-LIST.md b/SCRAPERS-LIST.md index 0cce03bdf..dbb2ae52c 100644 --- a/SCRAPERS-LIST.md +++ b/SCRAPERS-LIST.md @@ -122,7 +122,6 @@ auntjudysxxx.com|AuntJudys.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- aussieass.com|AussieAss.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- aussiefellatioqueens.com|AussieFelatioQueens.yml|:heavy_check_mark:|:x:|:x:|:x:|CDP|- aussiepov.com|AussieAss.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- -austinwilde.com|Algolia_NextDoorStudios.yml|:heavy_check_mark:|:heavy_check_mark:|:x:|:x:|Python|Gay av69.tv|Jhdv.yml|:heavy_check_mark:|:x:|:x:|:x:|-|JAV Uncensored avadawn.com|bellapass.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- avanal.com|Jhdv.yml|:heavy_check_mark:|:x:|:x:|:x:|-|JAV Uncensored @@ -274,7 +273,6 @@ clubsweethearts.com|ClubSeventeen.yml|:heavy_check_mark:|:heavy_check_mark:|:x:| clubtug.com|TugPass.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- cockhero.info|Cockhero.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- cocksuremen.com|CocksureMen.yml|:heavy_check_mark:|:x:|:x:|:x:|-|Gay -codycummings.com|Algolia_NextDoorStudios.yml|:heavy_check_mark:|:heavy_check_mark:|:x:|:x:|Python|Gay colbyknox.com|ColbyKnox.yml|:heavy_check_mark:|:x:|:x:|:x:|-|Gay colette.com|Colette.yml|:heavy_check_mark:|:heavy_check_mark:|:x:|:x:|-|- college-amateurs.com|ChickPass.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- @@ -810,7 +808,6 @@ manojob.com|FinishesTheJob.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- manroyale.com|AMAMultimedia.yml|:heavy_check_mark:|:x:|:x:|:x:|-|Gay manuelferrara.com|JulesJordan.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- manyvids.com|ManyVids|:heavy_check_mark:|:x:|:x:|:heavy_check_mark:|python|- -marcusmojo.com|Algolia_NextDoorStudios.yml|:heavy_check_mark:|:heavy_check_mark:|:x:|:x:|Python|Gay mariskax.com|MariskaX.yml|:heavy_check_mark:|:x:|:x:|:heavy_check_mark:|-|- maskurbate.com|GammaEntertainment.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- masqulin.com|Masqulin.yml|:heavy_check_mark:|:x:|:x:|:x:|-|Gay @@ -922,17 +919,15 @@ naughtyamerica.com|NaughtyAmerica.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- naughtyamericavr.com|NaughtyAmericaVR.yml|:heavy_check_mark:|:x:|:x:|:x:|-|VR newsensations.com/tour_ns/|NewSensationsMain.yml|:heavy_check_mark:|:x:|:heavy_check_mark:|:x:|-|- newsensations.com/tour_rs/|NewSensationsNetworkSites.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- -nextdoorbuddies.com|Algolia_NextDoorStudios.yml|:heavy_check_mark:|:heavy_check_mark:|:x:|:x:|Python|Gay -nextdoorcasting.com|Algolia_NextDoorStudios.yml|:heavy_check_mark:|:heavy_check_mark:|:x:|:x:|Python|Gay -nextdoorebony.com|Algolia_NextDoorStudios.yml|:heavy_check_mark:|:heavy_check_mark:|:x:|:x:|Python|Gay -nextdoorfilms.com|Algolia_NextDoorStudios.yml|:heavy_check_mark:|:heavy_check_mark:|:x:|:x:|Python|Gay -nextdoorhomemade.com|Algolia_NextDoorStudios.yml|:heavy_check_mark:|:heavy_check_mark:|:x:|:x:|Python|Gay -nextdoormale.com|Algolia_NextDoorStudios.yml|:heavy_check_mark:|:heavy_check_mark:|:x:|:x:|Python|Gay -nextdoororiginals.com|Algolia_NextDoorStudios.yml|:heavy_check_mark:|:heavy_check_mark:|:x:|:x:|Python|Gay -nextdoorraw.com|Algolia_NextDoorStudios.yml|:heavy_check_mark:|:heavy_check_mark:|:x:|:x:|Python|Gay -nextdoorstudios.com|Algolia_NextDoorStudios.yml|:heavy_check_mark:|:heavy_check_mark:|:x:|:x:|Python|Gay -nextdoortaboo.com|Algolia_NextDoorStudios.yml|:heavy_check_mark:|:heavy_check_mark:|:x:|:x:|Python|Gay -nextdoortwink.com|Algolia_NextDoorStudios.yml|:heavy_check_mark:|:heavy_check_mark:|:x:|:x:|Python|Gay +nextdoorbuddies.com|GammaEntertainment.yml|:heavy_check_mark:|:x:|:x:|:x:|-|Gay +nextdoorcasting.com|GammaEntertainment.yml|:heavy_check_mark:|:x:|:x:|:x:|-|Gay +nextdoorebony.com|GammaEntertainment.yml|:heavy_check_mark:|:x:|:x:|:x:|-|Gay +nextdoorhookups.com|GammaEntertainment.yml|:heavy_check_mark:|:x:|:x:|:x:|-|Gay +nextdoormale.com|GammaEntertainment.yml|:heavy_check_mark:|:x:|:x:|:x:|-|Gay +nextdoorraw.com|GammaEntertainment.yml|:heavy_check_mark:|:x:|:x:|:x:|-|Gay +nextdoorstudios.com|GammaEntertainment.yml|:heavy_check_mark:|:x:|:x:|:x:|-|Gay +nextdoortaboo.com|GammaEntertainment.yml|:heavy_check_mark:|:x:|:x:|:x:|-|Gay +nextdoortwink.com|GammaEntertainment.yml|:heavy_check_mark:|:x:|:x:|:x:|-|Gay nfbusty.com|Nubiles.yml|:heavy_check_mark:|:heavy_check_mark:|:x:|:x:|-|- nhentai.net|nhentai.yml|:x:|:heavy_check_mark:|:x:|:x:|-|Hentai nikkibenz.com|VNAGirls.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- @@ -1125,8 +1120,7 @@ riggsfilms.vip|RiggsFilms.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- rim4k.com|Vip4K.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- rk.com|MindGeek.yml|:heavy_check_mark:|:x:|:x:|:heavy_check_mark:|-|- roccosiffredi.com|Algolia_RoccoSiffredi.yml|:heavy_check_mark:|:heavy_check_mark:|:x:|:x:|Python|- -roddaily.com|Algolia_NextDoorStudios.yml|:heavy_check_mark:|:heavy_check_mark:|:x:|:x:|Python|Gay -rodsroom.com|Algolia_NextDoorStudios.yml|:heavy_check_mark:|:heavy_check_mark:|:x:|:x:|Python|Gay +rodsroom.com|Algolia_Rodsroom.yml|:heavy_check_mark:|:heavy_check_mark:|:x:|:x:|Python|Gay romemajor.com|VNAGirls.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- rubberdoll.net|VNAGirls.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- rule34video.com|Rule34Video.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- @@ -1134,7 +1128,6 @@ russian-tgirls.com|GroobyClub.yml|:heavy_check_mark:|:x:|:x:|:x:|-|Trans rylskyart.com|SARJ-LLC.yml|:heavy_check_mark:|:heavy_check_mark:|:x:|:heavy_check_mark:|Python|- sabiendemonia.com|Algolia_Adultime.yml|:heavy_check_mark:|:x:|:x:|:x:|Python|- samanthagrace.net|VNAGirls.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- -samuelotoole.com|Algolia_NextDoorStudios.yml|:heavy_check_mark:|:heavy_check_mark:|:x:|:x:|Python|Gay sapphicerotica.com|sapphix.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- sapphix.com|sapphix.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- sarajay.com|VNAGirls.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- @@ -1223,7 +1216,7 @@ squirtalicious.com|GammaEntertainment.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- squirted.com|MindGeek.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- squirtinglesbian.com|GammaEntertainment.yml|:heavy_check_mark:|:x:|:x:|:x:|-|Lesbian squirtingorgies.com|GammaEntertainment.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- -stagcollective.com|Algolia_NextDoorStudios.yml|:heavy_check_mark:|:heavy_check_mark:|:x:|:x:|Python|Gay +stagcollective.com|GammaEntertainment.yml|:heavy_check_mark:|:x:|:x:|:x:|-|Gay stasyq.com|StasyQ.yml|:heavy_check_mark:|:x:|:x:|:heavy_check_mark:|-|- staxus.com|Staxus.yml|:heavy_check_mark:|:x:|:x:|:heavy_check_mark:|-|gay stayhomepov.com|PaperStreetMedia.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- @@ -1355,7 +1348,6 @@ titworld.com|Hustler.yml|:heavy_check_mark:|:x:|:x:|:x:|CDP|- tmwvrnet.com|TmwVRnet.yml|:heavy_check_mark:|:x:|:heavy_check_mark:|:x:|-|VR tokyo-hot.com|Tokyohot.yml|:heavy_check_mark:|:x:|:x:|:x:|-|JAV Uncensored tokyobang.com|karatmedia.yml|:heavy_check_mark:|:x:|:x:|:x:|-|JAV -tommydxxx.com|Algolia_NextDoorStudios.yml|:heavy_check_mark:|:heavy_check_mark:|:x:|:x:|Python|Gay tonightsgirlfriend.com|Tonightsgirlfriend.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- toomanytrannies.com|Hustler.yml|:heavy_check_mark:|:x:|:x:|:x:|CDP|Trans topgrl.com|insex.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- @@ -1393,7 +1385,6 @@ tripforfuck.com|TripForFuck.yml|:heavy_check_mark:|:x:|:x:|:heavy_check_mark:|-| trueamateurs.com|MindGeek.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- trueanal.com|Nympho.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- truelesbian.com|Algolia_Adultime.yml|:heavy_check_mark:|:x:|:x:|:x:|Python|- -trystanbull.com|Algolia_NextDoorStudios.yml|:heavy_check_mark:|:heavy_check_mark:|:x:|:x:|Python|Gay tryteens.com|Teencoreclub.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- ts-castingcouch.com|GroobyClub.yml|:heavy_check_mark:|:x:|:x:|:x:|-|Trans tsfactor.com|Algolia_EvilAngel.yml|:heavy_check_mark:|:heavy_check_mark:|:heavy_check_mark:|:x:|Python|Trans diff --git a/scrapers/Algolia_NextDoorStudios.yml b/scrapers/Algolia_NextDoorStudios.yml deleted file mode 100644 index 0a9e3f05b..000000000 --- a/scrapers/Algolia_NextDoorStudios.yml +++ /dev/null @@ -1,48 +0,0 @@ -name: Next Door Studios -sceneByURL: - - action: script - url: - - austinwilde.com/en/video - - codycummings.com/en/video - - marcusmojo.com/en/video - - nextdoorbuddies.com/en/video - - nextdoorcasting.com/en/video - - nextdoorfilms.com/en/video - - nextdoorhomemade.com/en/video - - nextdoormale.com/en/video - - nextdoororiginals.com/en/video - - nextdoorraw.com/en/video - - nextdoorstudios.com/en/video - - nextdoortaboo.com/en/video - - nextdoortwink.com/en/video - - roddaily.com/en/video - - rodsroom.com/en/video - - samuelotoole.com/en/video - - stagcollective.com/en/video - - tommydxxx.com/en/video - - trystanbull.com/en/video - script: - - python - - Algolia.py - - nextdoorstudios -sceneByFragment: - action: script - script: - - python - - Algolia.py - - nextdoorstudios -sceneByName: - action: script - script: - - python - - Algolia.py - - nextdoorstudios - - searchName -sceneByQueryFragment: - action: script - script: - - python - - Algolia.py - - nextdoorstudios - - validName -# Last Updated January 10, 2023 diff --git a/scrapers/Algolia_Rodsroom.yml b/scrapers/Algolia_Rodsroom.yml new file mode 100644 index 000000000..516382fe8 --- /dev/null +++ b/scrapers/Algolia_Rodsroom.yml @@ -0,0 +1,39 @@ +name: "Rod's Room" +sceneByURL: + - action: script + url: + - rodsroom.com/en/video + script: + - python + - Algolia.py + - rodsroom +sceneByFragment: + action: script + script: + - python + - Algolia.py + - rodsroom +sceneByName: + action: script + script: + - python + - Algolia.py + - rodsroom + - searchName +sceneByQueryFragment: + action: script + script: + - python + - Algolia.py + - rodsroom + - validName +galleryByURL: + - action: script + url: + - rodsroom.com/en/photo/ + script: + - python + - Algolia.py + - rodsroom + - gallery +# Last Updated December 26, 2022 diff --git a/scrapers/GammaEntertainment.yml b/scrapers/GammaEntertainment.yml index 82325d6b8..ca8e71964 100644 --- a/scrapers/GammaEntertainment.yml +++ b/scrapers/GammaEntertainment.yml @@ -36,6 +36,15 @@ sceneByURL: - mommyblowsbest.com/en/scene/ - motherfuckerxxx.com/ - myteenoasis.com/ + - nextdoorbuddies.com/en/video/ + - nextdoorcasting.com/ + - nextdoorebony.com/ + - nextdoorhookups.com/ + - nextdoormale.com/ + - nextdoorraw.com/en/video/ + - nextdoorstudios.com/en/video/ + - nextdoortaboo.com/en/video/ + - nextdoortwink.com/en/video/ - onlyteenblowjobs.com/en/scene/ - openlife.com/ - pantypops.com/ @@ -47,6 +56,7 @@ sceneByURL: - squirtalicious.com/ - squirtinglesbian.com/en/video/ - squirtingorgies.com/en/scene/ + - stagcollective.com/ - strapattackers.com/ - sunnyleone.com/ - throated.com/en/video/ @@ -178,6 +188,15 @@ xPathScrapers: mommyblowsbest: Mommy Blows Best motherfuckerxxx: Mother Fucker XXX myteenoasis: My Teen Oasis + nextdoorbuddies: Next Door Buddies + nextdoorcasting: Next Door Casting + nextdoorebony: Next Door Ebony + nextdoorhookups: Next Door Hookups + nextdoormale: Next Door Male + nextdoorraw: Next Door Raw + nextdoorstudios: Next Door Studios + nextdoortaboo: Next Door Taboo + nextdoortwink: Next Door Twink onlyteenblowjobs: Only Teen Blowjobs openlife: Open Life outofthefamily: Out Of The Family @@ -190,6 +209,7 @@ xPathScrapers: squirtalicious: Squirtalicious squirtinglesbian: Squirting Lesbian squirtingorgies: Squirting Orgies + stagcollective: Stag Collective strapattackers: Strap Attackers sunnyleone: Sunny Leone throated: Throated diff --git a/scrapers/Pornhub.yml b/scrapers/Pornhub.yml index 5eb81b9a8..558eb7a97 100644 --- a/scrapers/Pornhub.yml +++ b/scrapers/Pornhub.yml @@ -146,7 +146,11 @@ xPathScrapers: selector: //meta[@property="og:image"]/@content Studio: Name: - fixed: Pornhub + selector: $datablob + postProcess: + - replace: + - regex: .+(?:'video_uploader_name'\s*:\s*')([^']+).+ + with: $1 driver: cookies: diff --git a/scrapers/Tokyohot.py b/scrapers/Tokyohot.py index 25d95f3dc..34ea81233 100644 --- a/scrapers/Tokyohot.py +++ b/scrapers/Tokyohot.py @@ -28,8 +28,6 @@ "(n\d{4})\S*", # "single part N series" "(k\d{4})\S*", # "single part K series" "(kb\d{4})\S*", # "single part KB series" - "(red-?\d{3})\S*", # "RED Hot collection series" - "(sky-?\d{3})\S*", # "SKY series" ] try: @@ -112,9 +110,7 @@ def get_performers(self): info_links = info.find_all("a") for link in info_links: if "cast" in link.get("href"): - perf = TokyoHotModel( - model_url=BASE_DETAIL_URL + link.get("href") - ).get_json() + perf = TokyoHotModel(model_url=BASE_DETAIL_URL + link.get("href")).get_json() performers.append(perf) return performers @@ -137,9 +133,7 @@ def get_date(self): def get_tags(self): potential_tags = self.soup.find("div", {"class": "infowrapper"}).find_all("a") - return [ - {"Name": a.text} for a in potential_tags if "type=play" in a.get("href") - ] + return [{"Name":a.text} for a in potential_tags if "type=play" in a.get("href")] def get_json(self): return { @@ -151,7 +145,7 @@ def get_json(self): "Studio": {"Name": self.studio}, "Code": self.scene_id, "Image": self.image, - "Tags": self.tags, + "Tags": self.tags } diff --git a/scrapers/Twistys.yml b/scrapers/Twistys.yml index 7442694cb..38338d275 100644 --- a/scrapers/Twistys.yml +++ b/scrapers/Twistys.yml @@ -15,12 +15,8 @@ performerByURL: xPathScrapers: sceneScraper: - common: - $poster: //section[1] - $info: //section[2] - $description: //h3 scene: - Title: //h2[contains(@class,"font-primary")] + Title: //h2[contains(@class,"edmOvr")]/text() Date: selector: //h2[contains(@class,"jywyKe")]/text() postProcess: @@ -33,7 +29,6 @@ xPathScrapers: - replace: - regex: ',\s*$' with: - URL: //link[@rel="canonical"]/@href Performers: Name: //h2[contains(@class,"hgImKM")]//a/text() Studio: From c6ec8fea9a98df30dcb73b145bf9710a644a2f8b Mon Sep 17 00:00:00 2001 From: Maista6969 Date: Fri, 18 Aug 2023 05:48:39 +0200 Subject: [PATCH 304/624] Add Algolia scraper for Next Door Studios This recreates #1437 without the extra files --- SCRAPERS-LIST.md | 31 +++++++++++------- scrapers/Algolia_NextDoorStudios.yml | 48 ++++++++++++++++++++++++++++ scrapers/Algolia_Rodsroom.yml | 39 ---------------------- scrapers/GammaEntertainment.yml | 20 ------------ 4 files changed, 68 insertions(+), 70 deletions(-) create mode 100644 scrapers/Algolia_NextDoorStudios.yml delete mode 100644 scrapers/Algolia_Rodsroom.yml diff --git a/SCRAPERS-LIST.md b/SCRAPERS-LIST.md index dbb2ae52c..0cce03bdf 100644 --- a/SCRAPERS-LIST.md +++ b/SCRAPERS-LIST.md @@ -122,6 +122,7 @@ auntjudysxxx.com|AuntJudys.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- aussieass.com|AussieAss.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- aussiefellatioqueens.com|AussieFelatioQueens.yml|:heavy_check_mark:|:x:|:x:|:x:|CDP|- aussiepov.com|AussieAss.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +austinwilde.com|Algolia_NextDoorStudios.yml|:heavy_check_mark:|:heavy_check_mark:|:x:|:x:|Python|Gay av69.tv|Jhdv.yml|:heavy_check_mark:|:x:|:x:|:x:|-|JAV Uncensored avadawn.com|bellapass.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- avanal.com|Jhdv.yml|:heavy_check_mark:|:x:|:x:|:x:|-|JAV Uncensored @@ -273,6 +274,7 @@ clubsweethearts.com|ClubSeventeen.yml|:heavy_check_mark:|:heavy_check_mark:|:x:| clubtug.com|TugPass.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- cockhero.info|Cockhero.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- cocksuremen.com|CocksureMen.yml|:heavy_check_mark:|:x:|:x:|:x:|-|Gay +codycummings.com|Algolia_NextDoorStudios.yml|:heavy_check_mark:|:heavy_check_mark:|:x:|:x:|Python|Gay colbyknox.com|ColbyKnox.yml|:heavy_check_mark:|:x:|:x:|:x:|-|Gay colette.com|Colette.yml|:heavy_check_mark:|:heavy_check_mark:|:x:|:x:|-|- college-amateurs.com|ChickPass.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- @@ -808,6 +810,7 @@ manojob.com|FinishesTheJob.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- manroyale.com|AMAMultimedia.yml|:heavy_check_mark:|:x:|:x:|:x:|-|Gay manuelferrara.com|JulesJordan.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- manyvids.com|ManyVids|:heavy_check_mark:|:x:|:x:|:heavy_check_mark:|python|- +marcusmojo.com|Algolia_NextDoorStudios.yml|:heavy_check_mark:|:heavy_check_mark:|:x:|:x:|Python|Gay mariskax.com|MariskaX.yml|:heavy_check_mark:|:x:|:x:|:heavy_check_mark:|-|- maskurbate.com|GammaEntertainment.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- masqulin.com|Masqulin.yml|:heavy_check_mark:|:x:|:x:|:x:|-|Gay @@ -919,15 +922,17 @@ naughtyamerica.com|NaughtyAmerica.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- naughtyamericavr.com|NaughtyAmericaVR.yml|:heavy_check_mark:|:x:|:x:|:x:|-|VR newsensations.com/tour_ns/|NewSensationsMain.yml|:heavy_check_mark:|:x:|:heavy_check_mark:|:x:|-|- newsensations.com/tour_rs/|NewSensationsNetworkSites.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- -nextdoorbuddies.com|GammaEntertainment.yml|:heavy_check_mark:|:x:|:x:|:x:|-|Gay -nextdoorcasting.com|GammaEntertainment.yml|:heavy_check_mark:|:x:|:x:|:x:|-|Gay -nextdoorebony.com|GammaEntertainment.yml|:heavy_check_mark:|:x:|:x:|:x:|-|Gay -nextdoorhookups.com|GammaEntertainment.yml|:heavy_check_mark:|:x:|:x:|:x:|-|Gay -nextdoormale.com|GammaEntertainment.yml|:heavy_check_mark:|:x:|:x:|:x:|-|Gay -nextdoorraw.com|GammaEntertainment.yml|:heavy_check_mark:|:x:|:x:|:x:|-|Gay -nextdoorstudios.com|GammaEntertainment.yml|:heavy_check_mark:|:x:|:x:|:x:|-|Gay -nextdoortaboo.com|GammaEntertainment.yml|:heavy_check_mark:|:x:|:x:|:x:|-|Gay -nextdoortwink.com|GammaEntertainment.yml|:heavy_check_mark:|:x:|:x:|:x:|-|Gay +nextdoorbuddies.com|Algolia_NextDoorStudios.yml|:heavy_check_mark:|:heavy_check_mark:|:x:|:x:|Python|Gay +nextdoorcasting.com|Algolia_NextDoorStudios.yml|:heavy_check_mark:|:heavy_check_mark:|:x:|:x:|Python|Gay +nextdoorebony.com|Algolia_NextDoorStudios.yml|:heavy_check_mark:|:heavy_check_mark:|:x:|:x:|Python|Gay +nextdoorfilms.com|Algolia_NextDoorStudios.yml|:heavy_check_mark:|:heavy_check_mark:|:x:|:x:|Python|Gay +nextdoorhomemade.com|Algolia_NextDoorStudios.yml|:heavy_check_mark:|:heavy_check_mark:|:x:|:x:|Python|Gay +nextdoormale.com|Algolia_NextDoorStudios.yml|:heavy_check_mark:|:heavy_check_mark:|:x:|:x:|Python|Gay +nextdoororiginals.com|Algolia_NextDoorStudios.yml|:heavy_check_mark:|:heavy_check_mark:|:x:|:x:|Python|Gay +nextdoorraw.com|Algolia_NextDoorStudios.yml|:heavy_check_mark:|:heavy_check_mark:|:x:|:x:|Python|Gay +nextdoorstudios.com|Algolia_NextDoorStudios.yml|:heavy_check_mark:|:heavy_check_mark:|:x:|:x:|Python|Gay +nextdoortaboo.com|Algolia_NextDoorStudios.yml|:heavy_check_mark:|:heavy_check_mark:|:x:|:x:|Python|Gay +nextdoortwink.com|Algolia_NextDoorStudios.yml|:heavy_check_mark:|:heavy_check_mark:|:x:|:x:|Python|Gay nfbusty.com|Nubiles.yml|:heavy_check_mark:|:heavy_check_mark:|:x:|:x:|-|- nhentai.net|nhentai.yml|:x:|:heavy_check_mark:|:x:|:x:|-|Hentai nikkibenz.com|VNAGirls.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- @@ -1120,7 +1125,8 @@ riggsfilms.vip|RiggsFilms.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- rim4k.com|Vip4K.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- rk.com|MindGeek.yml|:heavy_check_mark:|:x:|:x:|:heavy_check_mark:|-|- roccosiffredi.com|Algolia_RoccoSiffredi.yml|:heavy_check_mark:|:heavy_check_mark:|:x:|:x:|Python|- -rodsroom.com|Algolia_Rodsroom.yml|:heavy_check_mark:|:heavy_check_mark:|:x:|:x:|Python|Gay +roddaily.com|Algolia_NextDoorStudios.yml|:heavy_check_mark:|:heavy_check_mark:|:x:|:x:|Python|Gay +rodsroom.com|Algolia_NextDoorStudios.yml|:heavy_check_mark:|:heavy_check_mark:|:x:|:x:|Python|Gay romemajor.com|VNAGirls.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- rubberdoll.net|VNAGirls.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- rule34video.com|Rule34Video.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- @@ -1128,6 +1134,7 @@ russian-tgirls.com|GroobyClub.yml|:heavy_check_mark:|:x:|:x:|:x:|-|Trans rylskyart.com|SARJ-LLC.yml|:heavy_check_mark:|:heavy_check_mark:|:x:|:heavy_check_mark:|Python|- sabiendemonia.com|Algolia_Adultime.yml|:heavy_check_mark:|:x:|:x:|:x:|Python|- samanthagrace.net|VNAGirls.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +samuelotoole.com|Algolia_NextDoorStudios.yml|:heavy_check_mark:|:heavy_check_mark:|:x:|:x:|Python|Gay sapphicerotica.com|sapphix.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- sapphix.com|sapphix.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- sarajay.com|VNAGirls.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- @@ -1216,7 +1223,7 @@ squirtalicious.com|GammaEntertainment.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- squirted.com|MindGeek.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- squirtinglesbian.com|GammaEntertainment.yml|:heavy_check_mark:|:x:|:x:|:x:|-|Lesbian squirtingorgies.com|GammaEntertainment.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- -stagcollective.com|GammaEntertainment.yml|:heavy_check_mark:|:x:|:x:|:x:|-|Gay +stagcollective.com|Algolia_NextDoorStudios.yml|:heavy_check_mark:|:heavy_check_mark:|:x:|:x:|Python|Gay stasyq.com|StasyQ.yml|:heavy_check_mark:|:x:|:x:|:heavy_check_mark:|-|- staxus.com|Staxus.yml|:heavy_check_mark:|:x:|:x:|:heavy_check_mark:|-|gay stayhomepov.com|PaperStreetMedia.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- @@ -1348,6 +1355,7 @@ titworld.com|Hustler.yml|:heavy_check_mark:|:x:|:x:|:x:|CDP|- tmwvrnet.com|TmwVRnet.yml|:heavy_check_mark:|:x:|:heavy_check_mark:|:x:|-|VR tokyo-hot.com|Tokyohot.yml|:heavy_check_mark:|:x:|:x:|:x:|-|JAV Uncensored tokyobang.com|karatmedia.yml|:heavy_check_mark:|:x:|:x:|:x:|-|JAV +tommydxxx.com|Algolia_NextDoorStudios.yml|:heavy_check_mark:|:heavy_check_mark:|:x:|:x:|Python|Gay tonightsgirlfriend.com|Tonightsgirlfriend.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- toomanytrannies.com|Hustler.yml|:heavy_check_mark:|:x:|:x:|:x:|CDP|Trans topgrl.com|insex.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- @@ -1385,6 +1393,7 @@ tripforfuck.com|TripForFuck.yml|:heavy_check_mark:|:x:|:x:|:heavy_check_mark:|-| trueamateurs.com|MindGeek.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- trueanal.com|Nympho.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- truelesbian.com|Algolia_Adultime.yml|:heavy_check_mark:|:x:|:x:|:x:|Python|- +trystanbull.com|Algolia_NextDoorStudios.yml|:heavy_check_mark:|:heavy_check_mark:|:x:|:x:|Python|Gay tryteens.com|Teencoreclub.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- ts-castingcouch.com|GroobyClub.yml|:heavy_check_mark:|:x:|:x:|:x:|-|Trans tsfactor.com|Algolia_EvilAngel.yml|:heavy_check_mark:|:heavy_check_mark:|:heavy_check_mark:|:x:|Python|Trans diff --git a/scrapers/Algolia_NextDoorStudios.yml b/scrapers/Algolia_NextDoorStudios.yml new file mode 100644 index 000000000..07099fcd3 --- /dev/null +++ b/scrapers/Algolia_NextDoorStudios.yml @@ -0,0 +1,48 @@ +name: Next Door Studios +sceneByURL: + - action: script + url: + - austinwilde.com/en/video + - codycummings.com/en/video + - marcusmojo.com/en/video + - nextdoorbuddies.com/en/video + - nextdoorcasting.com/en/video + - nextdoorfilms.com/en/video + - nextdoorhomemade.com/en/video + - nextdoormale.com/en/video + - nextdoororiginals.com/en/video + - nextdoorraw.com/en/video + - nextdoorstudios.com/en/video + - nextdoortaboo.com/en/video + - nextdoortwink.com/en/video + - roddaily.com/en/video + - rodsroom.com/en/video + - samuelotoole.com/en/video + - stagcollective.com/en/video + - tommydxxx.com/en/video + - trystanbull.com/en/video + script: + - python + - Algolia.py + - nextdoorstudios +sceneByFragment: + action: script + script: + - python + - Algolia.py + - nextdoorstudios +sceneByName: + action: script + script: + - python + - Algolia.py + - nextdoorstudios + - searchName +sceneByQueryFragment: + action: script + script: + - python + - Algolia.py + - nextdoorstudios + - validName +# Last Updated August 18, 2023 diff --git a/scrapers/Algolia_Rodsroom.yml b/scrapers/Algolia_Rodsroom.yml deleted file mode 100644 index 516382fe8..000000000 --- a/scrapers/Algolia_Rodsroom.yml +++ /dev/null @@ -1,39 +0,0 @@ -name: "Rod's Room" -sceneByURL: - - action: script - url: - - rodsroom.com/en/video - script: - - python - - Algolia.py - - rodsroom -sceneByFragment: - action: script - script: - - python - - Algolia.py - - rodsroom -sceneByName: - action: script - script: - - python - - Algolia.py - - rodsroom - - searchName -sceneByQueryFragment: - action: script - script: - - python - - Algolia.py - - rodsroom - - validName -galleryByURL: - - action: script - url: - - rodsroom.com/en/photo/ - script: - - python - - Algolia.py - - rodsroom - - gallery -# Last Updated December 26, 2022 diff --git a/scrapers/GammaEntertainment.yml b/scrapers/GammaEntertainment.yml index ca8e71964..82325d6b8 100644 --- a/scrapers/GammaEntertainment.yml +++ b/scrapers/GammaEntertainment.yml @@ -36,15 +36,6 @@ sceneByURL: - mommyblowsbest.com/en/scene/ - motherfuckerxxx.com/ - myteenoasis.com/ - - nextdoorbuddies.com/en/video/ - - nextdoorcasting.com/ - - nextdoorebony.com/ - - nextdoorhookups.com/ - - nextdoormale.com/ - - nextdoorraw.com/en/video/ - - nextdoorstudios.com/en/video/ - - nextdoortaboo.com/en/video/ - - nextdoortwink.com/en/video/ - onlyteenblowjobs.com/en/scene/ - openlife.com/ - pantypops.com/ @@ -56,7 +47,6 @@ sceneByURL: - squirtalicious.com/ - squirtinglesbian.com/en/video/ - squirtingorgies.com/en/scene/ - - stagcollective.com/ - strapattackers.com/ - sunnyleone.com/ - throated.com/en/video/ @@ -188,15 +178,6 @@ xPathScrapers: mommyblowsbest: Mommy Blows Best motherfuckerxxx: Mother Fucker XXX myteenoasis: My Teen Oasis - nextdoorbuddies: Next Door Buddies - nextdoorcasting: Next Door Casting - nextdoorebony: Next Door Ebony - nextdoorhookups: Next Door Hookups - nextdoormale: Next Door Male - nextdoorraw: Next Door Raw - nextdoorstudios: Next Door Studios - nextdoortaboo: Next Door Taboo - nextdoortwink: Next Door Twink onlyteenblowjobs: Only Teen Blowjobs openlife: Open Life outofthefamily: Out Of The Family @@ -209,7 +190,6 @@ xPathScrapers: squirtalicious: Squirtalicious squirtinglesbian: Squirting Lesbian squirtingorgies: Squirting Orgies - stagcollective: Stag Collective strapattackers: Strap Attackers sunnyleone: Sunny Leone throated: Throated From 1fc3c48adcb8b7ddcd8c10fb444950ed7bfd7acc Mon Sep 17 00:00:00 2001 From: Maista6969 Date: Fri, 18 Aug 2023 11:59:32 +0200 Subject: [PATCH 305/624] Fix for scenes that do not have preview images --- scrapers/Loyalfans/Loyalfans.py | 158 +++++++++++++++----------------- 1 file changed, 75 insertions(+), 83 deletions(-) diff --git a/scrapers/Loyalfans/Loyalfans.py b/scrapers/Loyalfans/Loyalfans.py index aeef5e4a4..725c1e158 100644 --- a/scrapers/Loyalfans/Loyalfans.py +++ b/scrapers/Loyalfans/Loyalfans.py @@ -3,8 +3,7 @@ import json # to import from a parent directory we need to add that directory to the system path -csd = os.path.dirname( - os.path.realpath(__file__)) # get current script directory +csd = os.path.dirname(os.path.realpath(__file__)) # get current script directory parent = os.path.dirname(csd) # parent directory (should be the scrapers one) sys.path.append( parent @@ -16,101 +15,93 @@ except ModuleNotFoundError: print( "You need to download the folder 'py_common' from the community repo. (CommunityScrapers/tree/master/scrapers/py_common)", - file=sys.stderr) + file=sys.stderr, + ) sys.exit() try: # Import necessary modules. - from lxml import html import requests - from requests import utils - from requests import cookies import re - from urllib.parse import urlparse - from bs4 import BeautifulSoup # If one of these modules is not installed: except ModuleNotFoundError: - log.error( - "You need to install the python modules mentioned in requirements.txt" - ) + log.error("You need to install the python modules mentioned in requirements.txt") log.error( "If you have pip (normally installed with python), run this command in a terminal from the directory the scraper is located: pip install -r requirements.txt" ) sys.exit() +# Lookup table for tag replacements. The tags are in the form of hashtags, and often have multiple words mashed together. +# This is a quick and dirty way of turning these into meaningful data, and can be expanded on to taste. +TAG_REPLACEMENTS = { + "Fin Dom": "Findom", + "Fem Dom": "Femdom", + "bigtits": "Big Tits", + "titworship": "Tit Worship", + "financialdomination": "Financial Domination", + "R I P O F F": "ripoff", + "pussydenial": "pussy denial", +} + + def output_json_url(title, tags, url, image, studio, performers, description, date): - # Split the tags into a list (comma-separated), stripping away any trailing full stops or tags which are just "N/A" - tag_list = [tag.strip().rstrip('.') for tag in tags.split(",") if tag.strip() != "N/A"] # Create a tag dictionary from the tag list. - tag_dicts = [{"name": tag} for tag in tag_list] - # We're only using the value of 'performers' for our performer list. Kept for future-proofing, and also because I couldn't get it to work any other way. - performer_list = [performers] - performer_dicts = [{"name": performer} for performer in performer_list] + tag_dicts = [{"name": tag.strip(". ")} for tag in tags if tag.strip() != "N/A"] + # We're only using the value of 'performers' for our performer list + performer_dicts = [{"name": performer} for performer in performers] # Dump all of this as JSON data. - return json.dumps({ - "title": title, - "tags": tag_dicts, - "url": url, - "image": image, - "studio": {"name": studio}, - "performers": performer_dicts, - "details": description, - "date": date - }, indent=4) + return json.dumps( + { + "title": title, + "tags": tag_dicts, + "url": url, + "image": image, + "studio": {"name": studio}, + "performers": performer_dicts, + "details": description, + "date": date, + }, + indent=2, + ) + def get_cookies(scene_url: str): # Establish a session. session = requests.Session() # Set headers required for a successful POST query. headers = { - 'Accept': 'application/json, text/plain, */*', - 'Accept-Language': 'en-US,en;q=0.9', - 'Connection': 'keep-alive', - 'Content-Type': 'application/json', - 'Origin': 'https://www.loyalfans.com', - 'Referer': scene_url, - 'Sec-Fetch-Dest': 'empty', - 'Sec-Fetch-Mode': 'cors', - 'Sec-Fetch-Site': 'same-origin', - 'User-Agent': 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/114.0.0.0 Safari/537.36', - 'X-Api-Version': '3.4.4', - 'sec-ch-ua': '"Not.A/Brand";v="8", "Chromium";v="114", "Google Chrome";v="114"', - 'sec-ch-ua-mobile': '?0', - 'sec-ch-ua-platform': '"Linux"', + "Accept": "application/json", + "Accept-Language": "en-US,en;q=0.9", + "Content-Type": "application/json", + "Origin": "https://www.loyalfans.com", + "Referer": scene_url, + "User-Agent": "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/114.0.0.0 Safari/537.36", } # URL of the system status API. This is called when a Loyalfans page is first loaded from what I can tell. - url = 'https://www.loyalfans.com/api/v2/system-status' - data = {} + url = "https://www.loyalfans.com/api/v2/system-status" # Perform a POST query to capture initial cookies. - response = session.post(url, headers=headers, json=data) + response = session.post(url, headers=headers) # Return these cookies. return response.cookies def get_api_url(scene_url: str): # Extract the last component of the scene URL. - end_segment = scene_url.split('/')[-1] + end_segment = scene_url.split("/")[-1] # Append this to the API link. As far as I can tell, post names in scene URLs are unique. I have yet to encounter any data mismatches. return f"https://www.loyalfans.com/api/v1/social/post/{end_segment}" + def get_json(scene_url: str): # Set headers required for a successful request. headers = { - 'Accept': 'application/json, text/plain, */*', - 'Accept-Encoding': 'gzip, deflate, br', - 'Accept-Language': 'en-US,en;q=0.9', - 'Connection': 'keep-alive', - 'Host': 'www.loyalfans.com', - 'Referer': scene_url, - 'Sec-Fetch-Dest': 'empty', - 'Sec-Fetch-Mode': 'cors', - 'Sec-Fetch-Site': 'same-origin', - 'User-Agent': 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/114.0.0.0 Safari/537.36', - 'X-Api-Version': '3.4.4', - 'sec-ch-ua': '"Not.A/Brand";v="8", "Chromium";v="114", "Google Chrome";v="114"', - 'sec-ch-ua-mobile': '?0', - 'sec-ch-ua-platform': '"Linux"' + "Accept": "application/json", + "Accept-Language": "en-US,en;q=0.9", + "Content-Type": "application/json", + "Origin": "https://www.loyalfans.com", + "Referer": scene_url, + "User-Agent": "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/114.0.0.0 Safari/537.36", } # Set cookies using get_cookies function. cookie_set = get_cookies(scene_url) @@ -121,33 +112,29 @@ def get_json(scene_url: str): # Return the JSON data. return json_data + def scrape_scene(scene_url: str) -> dict: # Capture JSON relating to this scene from the Loyalfans API. json = get_json(scene_url) # Extract title from the JSON and strip out any whitespace. - title = json['post']['title'].strip() + title = json["post"]["title"].strip() # Use the video thumbnail/preview poster as the image. - image = json['post']['video_object']['poster'] + image = json["post"]["video_object"].get("poster") # Extract description, fix apostrophes and remove HTML newline tags. - description = json['post']['content'].replace('\u2019', "'").replace('
', '') + description = json["post"]["content"].replace("\u2019", "'").replace("
", "") # Sometimes hashtags are included at the bottom of the description. This line strips all that junk out, as we're utilising the hashtags for the tags. Also tidies up double-spacing and ellipses. - description = re.sub(r'#\w+\b', '', description).strip().replace(' ', ' ').replace('. . .', '...') + description = ( + re.sub(r"#\w+\b", "", description) + .strip() + .replace(" ", " ") + .replace(". . .", "...") + ) # Extract studio name. - studio = json['post']['owner']['display_name'] + studio = json["post"]["owner"]["display_name"] # Extract date. The JSON returns the date in the format '2023-06-18 12:00:00', but we only need the date, so the time is stripped out. - date = json['post']['created_at']['date'].split(' ')[0] + date = json["post"]["created_at"]["date"].split(" ")[0] # Extract tags. - tags_list = json['post']['hashtags'] - # Lookup table for tag replacements. The tags are in the form of hashtags, and often have multiple words mashed together. This is a quick and dirty way of turning these into meaningful data, and can be expanded on to taste. - replacements = { - 'Fin Dom': 'Findom', - 'Fem Dom': 'Femdom', - 'bigtits': 'Big Tits', - 'titworship': 'Tit Worship', - 'financialdomination': 'Financial Domination', - 'R I P O F F': 'ripoff', - 'pussydenial': 'pussy denial' - } + tags_list = json["post"]["hashtags"] fixed_tags = [] # For every tag we find: for tag in tags_list: @@ -155,20 +142,25 @@ def scrape_scene(scene_url: str) -> dict: tag = tag[1:] modified_tag = tag # Split CamelCase tags into separate words. - modified_tag = re.sub(r'(? Date: Sun, 20 Aug 2023 04:21:13 +0200 Subject: [PATCH 306/624] Scrape higher resolution images from GirlsRimming --- scrapers/GirlsRimming.yml | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/scrapers/GirlsRimming.yml b/scrapers/GirlsRimming.yml index f46dc9f25..0e297c21b 100644 --- a/scrapers/GirlsRimming.yml +++ b/scrapers/GirlsRimming.yml @@ -6,20 +6,22 @@ sceneByURL: scraper: sceneScraper xPathScrapers: sceneScraper: + common: + $image: //div[@class='player-thumb']/img scene: Title: //div[@class='updatesBlock']/h2[@class='title']/text() Details: //meta[@name='description']/@content - Image: //div[@class='player-thumb']/img/@src0_1x + Image: $image/@src0_4x|$image/@src0_3x|$image/@src0_2x|$image/@src0_1x Studio: Name: - fixed: GirlsRimming + fixed: Girls Rimming Movies: Name: selector: //div[@class='updatesBlock']/h2[@class='title']/text() postProcess: - replace: - - regex: (.+)(?:\sEp\d).* - with: $1 + - regex: (.+)(?:\sEp\d).* + with: $1 Tags: Name: selector: //meta[@name='keywords']/@content @@ -28,5 +30,4 @@ xPathScrapers: - regex: "[^,]*Id\\s(\\d+)[^,]*" with: split: "," - -# Last Updated September 30, 2020 +# Last Updated August 20, 2023 From 48e23a827de54346121e4e540db33365e557e9e6 Mon Sep 17 00:00:00 2001 From: Maista6969 Date: Sun, 20 Aug 2023 19:39:21 +0200 Subject: [PATCH 307/624] Update GroobyClub for new layout --- scrapers/GroobyClub.yml | 65 +++++++++++++++++++++++++++++------------ 1 file changed, 47 insertions(+), 18 deletions(-) diff --git a/scrapers/GroobyClub.yml b/scrapers/GroobyClub.yml index be5f20791..38de096f3 100644 --- a/scrapers/GroobyClub.yml +++ b/scrapers/GroobyClub.yml @@ -6,7 +6,6 @@ sceneByURL: - asianamericantgirls.com - canada-tgirl.com - euro-tgirls.com - - grooby.club - hazel-tucker.com - krissy4u.com - russian-tgirls.com @@ -17,33 +16,63 @@ sceneByURL: - transexdomination.com - ts-castingcouch.com - uk-tgirls.com + scraper: substudioScraper + - action: scrapeXPath + url: + - grooby.club # other grooby sites which work - tgirljapan.com - tgirljapanhardcore.com - scraper: sceneScraper + scraper: clubScraper + xPathScrapers: - sceneScraper: + clubScraper: scene: - Title: //div[@class="trailer_videoinfo"]//h3/text() - Date: - selector: //div[@class="trailer_videoinfo"]//b[contains(.,"Added")]/following-sibling::text()[1] + Title: &title //div[@class="trailer_toptitle_left"] + Date: &date + selector: //b[contains(.,"Added")]/following-sibling::text()[1] postProcess: - replace: - regex: ^- with: "" - - parseDate: Jan 2, 2006 - Details: //div[@class="trailer_videoinfo"]/p[not(b)] - Performers: - Name: //div[@class="trailer_videoinfo"]//b[contains(.,"Featuring")]/following-sibling::a/text()[1] + - parseDate: January 2, 2006 + Details: &details + selector: //div[@class="trailerpage_info"]/p/text() + concat: " " + Performers: &performers + Name: //div[@class="setdesc"]//a[contains(@href, "models")]/text() Studio: - Name: //meta[@name="author"]/@content - Image: - selector: //base/@href|//div[@class="videohere"]/img[@class="thumbs stdimage"]/@src|//script[contains(.,'jwplayer("jwbox").setup')]/text() - concat: "|" + Name: //div[@class="sitename"]/a/text() + URL: &url //link[@rel="canonical"]/@href + Image: &image + selector: //meta[@property="og:image"]/@content postProcess: - replace: - - regex: "(^[^|]+)\\|.*/tour/([^\\.]+\\.jpg).*" - with: $1$2 + - regex: ^// + with: https:// + Tags: + Name: &tagName //div[@class="set_tags"]/ul/li//a/text() + + substudioScraper: + scene: + Title: *title + Date: *date + Details: *details + Performers: *performers + Studio: + Name: //meta[@name="author"]/@content + URL: *url + Image: *image + # Tags for these subsites only appear on grooby.club as of 2023-08-15 + # but we have to extend the subScraper functionality in Stash + # if we want to be able to scrape more than just a single field + # TODO: write a python scraper, merge with GroobyNetwork-*.yml ? Tags: - Name: //div[@class="set_tags"]/ul/li//a/text() -# Last Updated June 07, 2023 + Name: + selector: //link[@rel="canonical"]/@href + postProcess: + - replace: + - regex: ^.+/tour + with: https://grooby.club/tour + - subScraper: *tagName +# Last Updated August 21, 2023 From 7c02cac662b293789a14d6e258f14edb863946db Mon Sep 17 00:00:00 2001 From: Gavin Mogan Date: Sat, 19 Aug 2023 21:47:41 -0700 Subject: [PATCH 308/624] Add performer scraper to Tenshigao.yml --- SCRAPERS-LIST.md | 2 +- scrapers/Tenshigao.yml | 107 ++++++++++++++++++++++++++++++++++++++++- 2 files changed, 106 insertions(+), 3 deletions(-) diff --git a/SCRAPERS-LIST.md b/SCRAPERS-LIST.md index 0cce03bdf..b8ed79ebf 100644 --- a/SCRAPERS-LIST.md +++ b/SCRAPERS-LIST.md @@ -1310,7 +1310,7 @@ teenstryblacks.com|Teencoreclub.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- teenthais.com|Baberotica.yml|:heavy_check_mark:|:x:|:x:|:x:|-|Thai Uncensored teentugs.com|TugPass.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- teenytaboo.com|TeenyTaboo.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- -tenshigao.com|Tenshigao.yml|:heavy_check_mark:|:x:|:x:|:x:|-|Jav +tenshigao.com|Tenshigao.yml|:heavy_check_mark:|:x:|:x:|:heavy_check_mark:|-|Jav terapatrick.com|NikkiAndTera.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- tessafowler.com|PinupDollars.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- texasbukkake.com|TexasBukkake.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- diff --git a/scrapers/Tenshigao.yml b/scrapers/Tenshigao.yml index 04f62fb02..eba7bbcc3 100644 --- a/scrapers/Tenshigao.yml +++ b/scrapers/Tenshigao.yml @@ -4,6 +4,11 @@ sceneByURL: url: - tenshigao.com scraper: sceneScraper +performerByURL: + - action: scrapeXPath + url: + - tenshigao.com + scraper: performerScraper xPathScrapers: sceneScraper: scene: @@ -18,7 +23,7 @@ xPathScrapers: - parseDate: January 02, 2006 Performers: Name: //h5/a - URL: //h5/a/@href + URL: //h5/a/@href Tags: Name: //div[@class="cat"]/a Details: @@ -32,4 +37,102 @@ xPathScrapers: Studio: Name: fixed: Tenshigao -# Last Updated March 02, 2022 + performerScraper: + common: + $profile: //div[@class="model-profile"] + performer: + Name: + selector: $profile[contains(strong, "Name:")]//text() + postProcess: + - replace: + - regex: .*Name:\s*(.*)\s*$ + with: $1 + - regex: None + with: + Aliases: + selector: $profile[contains(strong, "Japanese name:")]//text() + postProcess: + - replace: + - regex: .*Japanese name:\s*(.*)\s*$ + with: $1 + - regex: None + with: + Birthdate: + selector: $profile[contains(strong, "Birth date:")]//text() + postProcess: + - replace: + - regex: .*Birth date:\s*(.*)\s*$ + with: $1 + - parseDate: January 2, 2006 + - parseDate: 2006-01-02 + Measurements: + selector: $profile[contains(strong, "Body:")]//text() + postProcess: + - replace: + - regex: .*Body:\s*(.*)\s*$ + with: $1 + - regex: B(\d*)-W(\d*)-H(\d*) + with: $1-$2-$3 + - regex: -- + with: + - regex: None + with: + Weight: + selector: $profile[contains(strong, "Weight:")]//text() + postProcess: + - replace: + - regex: .*Weight:\s*(.*)\s*$ + with: $1 + - regex: (\d+)kg + with: $1 + - regex: None + with: + Height: + selector: $profile[contains(strong, "Height:")]//text() + postProcess: + - replace: + - regex: .*Height:\s*(.*)\s*$ + with: $1 + - regex: (\d+)cm.* + with: $1 + - regex: None + with: + Details: concat(string(//div[@class="intro"]//p//text()), string(//div[@class="intro"]//p//span[@class="readmore"]//text())) + Piercings: + selector: $profile[contains(strong, "Piercings:")]//text() + postProcess: + - replace: + - regex: .*Piercings:\s*(.*)\s*$ + with: $1 + - regex: None + with: + - regex: No Piercings + with: + Tattoos: + selector: $profile[contains(strong, "Tattoo:")]//text() + postProcess: + - replace: + - regex: .*Tattoo:\s*(.*)\s*$ + with: $1 + - regex: None + with: + HairColor: + selector: $profile[contains(strong, "Hair color:")]//text() + postProcess: + - replace: + - regex: .*Hair Color:?\s*(.*)\s*$ + with: $1 + - regex: .*Hair color:?\s*(.*)\s*$ + with: $1 + - regex: None + with: + URL: //link[@rel="canonical"][1]/@href + Image: + selector: //img[@class="smallroundedthumbs"]/@src + postProcess: + - replace: + - regex: ^// + with: https:// + - regex: 160x160 + with: 500x500 +# Last Updated August 19, 2023 From 2f9bc2604e6a145da45243fc0946c2460e3e3cca Mon Sep 17 00:00:00 2001 From: Maista6969 Date: Sun, 20 Aug 2023 21:08:11 +0200 Subject: [PATCH 309/624] Add XPath scraper for tadpolexstudio.com Almost exactly the same layout as ExploitedX --- SCRAPERS-LIST.md | 1 + scrapers/TadpolexStudio.yml | 42 +++++++++++++++++++++++++++++++++++++ 2 files changed, 43 insertions(+) create mode 100644 scrapers/TadpolexStudio.yml diff --git a/SCRAPERS-LIST.md b/SCRAPERS-LIST.md index b8ed79ebf..ba691be98 100644 --- a/SCRAPERS-LIST.md +++ b/SCRAPERS-LIST.md @@ -1269,6 +1269,7 @@ swnude.com|Williamhiggins.yml|:heavy_check_mark:|:x:|:x:|:x:|-|Gay tabooheat.com|Algolia_TabooHeat.yml|:heavy_check_mark:|:heavy_check_mark:|:x:|:x:|Python|- taboopov.com|taboopov.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- tacamateurs.com|TACAmateurs.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +tadpolexstudio.com|TadpolexStudio.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- takevan.com|TakeVan.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- taliashepard.com|bellapass.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- tamedteens.com|PerfectGonzo.yml|:heavy_check_mark:|:x:|:x:|:heavy_check_mark:|Python|- diff --git a/scrapers/TadpolexStudio.yml b/scrapers/TadpolexStudio.yml new file mode 100644 index 000000000..42879fdcb --- /dev/null +++ b/scrapers/TadpolexStudio.yml @@ -0,0 +1,42 @@ +name: "TadpolexStudio" +sceneByURL: + - action: scrapeXPath + url: + - tadpolexstudio.com/trailers + scraper: sceneScraper +xPathScrapers: + sceneScraper: + common: + $models: //div[contains(@class, "models-list-thumbs")]//a + scene: + Studio: + Name: + selector: //a[@class="navbar-brand"]/@title + postProcess: + - map: + OFFICIAL Backroom Casting Couch: Backroom Casting Couch + BlackAmbush: Black Ambush + HOTMILFSFUCK: Hot Milfs Fuck + "Exploited College Girls: Excogi": Exploited College Girls + + Title: //div[@class="video-player"]//h2[@class="section-title"]/text() + Performers: + Name: $models/span + URL: $models/@href + Date: + selector: //strong[text()="Released:"]/following-sibling::text() + postProcess: + - parseDate: January 2, 2006 + Tags: + Name: //ul[@class="tags"]//a + Image: + selector: //base/@href|//div[@class="player-thumb"]//img[contains(@class, "update_thumb")]/@src0_1x + concat: "|" + postProcess: + - replace: + - regex: "(^[^|]+)\\|([^\\.]+\\.jpg).*" + with: $1$2 + Details: + selector: //div[@class="update-info-block"]/h3[text()="Description:"]/following-sibling::text() + concat: "\n\n" +# Last Updated August 20, 2023 From 13b956a791c81caa0def7ffa86e3a1904e51cfb3 Mon Sep 17 00:00:00 2001 From: Maista6969 Date: Mon, 21 Aug 2023 13:59:48 +0200 Subject: [PATCH 310/624] Update LittleCapriceDreams for new layout --- scrapers/LittleCapriceDreams.yml | 32 +++++++++++++++++++++++++------- 1 file changed, 25 insertions(+), 7 deletions(-) diff --git a/scrapers/LittleCapriceDreams.yml b/scrapers/LittleCapriceDreams.yml index 203c6afa1..30a52a9b4 100644 --- a/scrapers/LittleCapriceDreams.yml +++ b/scrapers/LittleCapriceDreams.yml @@ -6,18 +6,36 @@ sceneByURL: scraper: sceneScraper xPathScrapers: sceneScraper: + common: + $project: //div[@id="main-project-content"] scene: Title: //meta[@property="og:title"]/@content - Details: //div[@class="vid_desc"]/p + Details: $project//div[contains(@class, "desc-text")] Image: //meta[@property="og:image"]/@content Studio: - Name: //meta[@property="og:site_name"]/@content + Name: + selector: $project/@class + postProcess: + - replace: + - regex: ".*(buttmuse|caprice-divas|nasstyx|porn-lifestyle|pov-dreams|streetfuck|superprivatex|virtual-reality|wecumtoyou|xpervo).*" + with: $1 + - map: + buttmuse: BUTTMuse + caprice-divas: Caprice Divas + nasstyx: NASSTYx + porn-lifestyle: Pornlifestyle + pov-dreams: POVdreams + streetfuck: STREETFUCK + superprivatex: SuperprivateX + virtual-reality: Virtual Reality + wecumtoyou: Wecumtoyou + xpervo: Xpervo Date: - selector: //div[@class="vid_date"] + selector: //meta[@property="article:published_time"]/@content postProcess: - - parseDate: January 2, 2006 + - parseDate: "2006-01-02T15:04:05+00:00" Performers: - Name: //div[@class="vid_infos"]/div[contains(div, "Models")]//a + Name: $project//div[contains(@class, "project-models")]//a Tags: - Name: //div[@class="vid_infos"]/div[contains(div, "Tags")]//a -# Last Updated March 28, 2021 + Name: $project//div[contains(@class, "project-tags")]//a +# Last Updated August 21, 2023 From 6c22cadf1d1064bcbf0e9adfcffe523ceebb563d Mon Sep 17 00:00:00 2001 From: Maista6969 Date: Mon, 21 Aug 2023 14:58:24 +0200 Subject: [PATCH 311/624] Add XPath scraper for Cockyboys.com --- SCRAPERS-LIST.md | 1 + scrapers/CockyBoys.yml | 30 ++++++++++++++++++++++++++++++ 2 files changed, 31 insertions(+) create mode 100644 scrapers/CockyBoys.yml diff --git a/SCRAPERS-LIST.md b/SCRAPERS-LIST.md index ba691be98..bf79f844d 100644 --- a/SCRAPERS-LIST.md +++ b/SCRAPERS-LIST.md @@ -274,6 +274,7 @@ clubsweethearts.com|ClubSeventeen.yml|:heavy_check_mark:|:heavy_check_mark:|:x:| clubtug.com|TugPass.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- cockhero.info|Cockhero.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- cocksuremen.com|CocksureMen.yml|:heavy_check_mark:|:x:|:x:|:x:|-|Gay +cockyboys.com|CockyBoys.yml|:heavy_check_mark:|:x:|:x:|:x:|-|Gay codycummings.com|Algolia_NextDoorStudios.yml|:heavy_check_mark:|:heavy_check_mark:|:x:|:x:|Python|Gay colbyknox.com|ColbyKnox.yml|:heavy_check_mark:|:x:|:x:|:x:|-|Gay colette.com|Colette.yml|:heavy_check_mark:|:heavy_check_mark:|:x:|:x:|-|- diff --git a/scrapers/CockyBoys.yml b/scrapers/CockyBoys.yml new file mode 100644 index 000000000..b0b69f841 --- /dev/null +++ b/scrapers/CockyBoys.yml @@ -0,0 +1,30 @@ +name: CockyBoys +sceneByURL: + - action: scrapeXPath + url: + - cockyboys.com/scenes/ + scraper: sceneScraper +xPathScrapers: + sceneScraper: + scene: + Title: //meta[@property="og:title"]/@content + Date: + selector: //strong[contains(text(),"Released:")]/following-sibling::text() + postProcess: + - parseDate: 01/02/2006 + Tags: + Name: //strong[contains(text(),"Categorized Under:")]/following-sibling::a/text() + Performers: + Name: //strong[contains(text(),"Featuring:")]/following-sibling::a/text() + Details: + selector: //div[@class="movieDesc"]/. + postProcess: + - replace: + - regex: Description\s* + with: + concat: "\n\n" + Image: + selector: //meta[@property="og:image"]/@content + Studio: + Name: //meta[@property="og:site_name"]/@content +# Last Updated August 21, 2023 From 76a295a1b12b522b0558031e624914084da1d448 Mon Sep 17 00:00:00 2001 From: Maista6969 Date: Tue, 22 Aug 2023 03:33:35 +0200 Subject: [PATCH 312/624] Update Aziani Aziani changed their layout but we can still scrape everything except the date using XPath Gloryholesecrets has been moved from Aziani to an Algolia-based Python scraper --- SCRAPERS-LIST.md | 2 +- scrapers/Algolia_Gloryholesecrets.yml | 39 +++++++++++++++ scrapers/Aziani.yml | 70 ++++++++------------------- 3 files changed, 61 insertions(+), 50 deletions(-) create mode 100644 scrapers/Algolia_Gloryholesecrets.yml diff --git a/SCRAPERS-LIST.md b/SCRAPERS-LIST.md index bf79f844d..7d46ecdba 100644 --- a/SCRAPERS-LIST.md +++ b/SCRAPERS-LIST.md @@ -573,7 +573,7 @@ givemepink.com|sapphix.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- givemeteens.com|Algolia_Adultime.yml|:heavy_check_mark:|:x:|:x:|:x:|Python|- gloryhole.com|Algolia_DogfartNetwork.yml|:heavy_check_mark:|:x:|:x:|:heavy_check_mark:|Python|- gloryholeinitiations.com|Algolia_DogfartNetwork.yml|:heavy_check_mark:|:x:|:x:|:heavy_check_mark:|Python|- -gloryholesecrets.com|Aziani.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +gloryholesecrets.com|Algolia_Gloryholesecrets.yml|:heavy_check_mark:|:heavy_check_mark:|:x:|:x:|-|- gloryholeswallow.com|GloryHoleSwallow.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- glosstightsglamour.com|GlossTightsGlamour.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- goddessnudes.com|SARJ-LLC.yml|:x:|:heavy_check_mark:|:x:|:heavy_check_mark:|Python|- diff --git a/scrapers/Algolia_Gloryholesecrets.yml b/scrapers/Algolia_Gloryholesecrets.yml new file mode 100644 index 000000000..c69808d6f --- /dev/null +++ b/scrapers/Algolia_Gloryholesecrets.yml @@ -0,0 +1,39 @@ +name: GloryholeSecrets +sceneByURL: + - action: script + url: + - gloryholesecrets.com/en/video + script: + - python + - Algolia.py + - gloryholesecrets +sceneByFragment: + action: script + script: + - python + - Algolia.py + - gloryholesecrets +sceneByName: + action: script + script: + - python + - Algolia.py + - gloryholesecrets + - searchName +sceneByQueryFragment: + action: script + script: + - python + - Algolia.py + - gloryholesecrets + - validName +galleryByURL: + - action: script + url: + - gloryholesecrets.com/en/photo/ + script: + - python + - Algolia.py + - gloryholesecrets + - gallery +# Last Updated August 22, 2023 diff --git a/scrapers/Aziani.yml b/scrapers/Aziani.yml index 97d59ed67..3f5ab7aaf 100644 --- a/scrapers/Aziani.yml +++ b/scrapers/Aziani.yml @@ -1,59 +1,31 @@ name: Aziani sceneByURL: - action: scrapeXPath - url: - - gloryholesecrets.com - scraper: sceneScraper - - action: scrapeXPath - # This site differs from the other two in that it lacks the host part in the url, - # therefore there is a copy of the scraper with only one extra replace. - # It uses YAML anchors, so ideally only the first scraper needs to be changed url: - aziani.com - scraper: sceneScraperB + scraper: sceneScraper xPathScrapers: sceneScraper: + common: + $scene: //div[contains(@class, "heading")][1] scene: - Studio: &studio + Studio: Name: - selector: //base/@href - postProcess: - - replace: - - regex: ^.+(?:\.|/)(.+)\..+$ - with: $1 - - map: - gloryholesecrets: Gloryhole Secrets - aziani: Aziani - Performers: &performers - Name: //div[@class="video_details mb mt0"]/h5[i[@class="icon-female"]]/a - Title: &title - selector: //h2[@class="H_underline"]/text() - Details: &details - selector: //div[@class="desc"]/p/text() - concat: "\n\n" - Tags: &tags - Name: //h5[@class="video_categories"]/a - Image: &image - selector: //img[@id="set-target-1_0"]/@src - Date: &date - selector: //comment()[contains(.,"icon-calendar")] - postProcess: - - replace: - - regex: ^.*(\d{2}\/\d{2}\/\d{4}).*$ - with: $1 - - parseDate: 01/02/2006 - sceneScraperB: - scene: - Studio: *studio - Performers: *performers - Title: *title - Details: *details - Tags: *tags - Image: - <<: *image - postProcess: - - replace: - - regex: ^ - with: https://www.aziani.com - Date: *date + fixed: Aziani + Performers: + Name: $scene//a[contains(@href, "/models/")]/text() + Title: //meta[@property="og:title"]/@content + Details: //meta[@property="og:description"]/@content + Tags: + Name: $scene//a[contains(@href, "/tags/")]/text() + Image: //meta[@property="og:image"]/@content + +driver: + cookies: + - CookieURL: "https://aziani.com" + Cookies: + - Name: nats + Value: anythinggoeshere + Domain: .aziani.com + Path: / # Last Updated November 10, 2022 From f351111f9b4b60f3d46ee5daab5ce7ab1d9e2cae Mon Sep 17 00:00:00 2001 From: Maista6969 Date: Tue, 22 Aug 2023 09:46:00 +0200 Subject: [PATCH 313/624] Fix r18.dev scraper returning empty images --- SCRAPERS-LIST.md | 4 +- scrapers/R18.dev.yml | 4 +- scrapers/r18.yml | 338 ------------------------------------------- 3 files changed, 3 insertions(+), 343 deletions(-) delete mode 100644 scrapers/r18.yml diff --git a/SCRAPERS-LIST.md b/SCRAPERS-LIST.md index 7d46ecdba..55ec8f758 100644 --- a/SCRAPERS-LIST.md +++ b/SCRAPERS-LIST.md @@ -1093,9 +1093,7 @@ purepov.com|PerfectGonzo.yml|:heavy_check_mark:|:x:|:x:|:heavy_check_mark:|Pytho puretaboo.com|Algolia_Adultime.yml|:heavy_check_mark:|:heavy_check_mark:|:x:|:x:|Python|- pussyav.com|Jhdv.yml|:heavy_check_mark:|:x:|:x:|:x:|-|JAV Uncensored putalocura.com|Putalocura.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- -r18.com|JavLibrary_python.yml|:heavy_check_mark:|:x:|:x:|:x:|Python|JAV -r18.com|r18.yml|:heavy_check_mark:|:x:|:heavy_check_mark:|:x:|-|JAV -r18.dev|R18.dev.yml|:heavy_check_mark:|:x:|:x:|:x:|-|JAV +r18.dev|R18.dev.yml|:heavy_check_mark:|:x:|:heavy_check_mark:|:x:|-|JAV rachelaldana.com|PinupDollars.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- rachel-steele.com|RachelSteele.yml|:heavy_check_mark:|:x:|:x:|:x:|-|MILF rachelstormsxxx.com|VNAGirls.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- diff --git a/scrapers/R18.dev.yml b/scrapers/R18.dev.yml index 7c92c8c40..bc71d243c 100644 --- a/scrapers/R18.dev.yml +++ b/scrapers/R18.dev.yml @@ -393,7 +393,7 @@ jsonScrapers: Details: selector: title postProcess: *ppUncensor - Image: images.jacket_image.large + Image: images.jacket_image.large2 Director: director Date: release_date Tags: @@ -416,7 +416,7 @@ jsonScrapers: Title: selector: title Image: - selector: images.jacket_image.large + selector: images.jacket_image.large2 Date: selector: release_date URL: diff --git a/scrapers/r18.yml b/scrapers/r18.yml deleted file mode 100644 index 60c30d410..000000000 --- a/scrapers/r18.yml +++ /dev/null @@ -1,338 +0,0 @@ -name: "R18 (JAV)" -sceneByURL: - - action: scrapeJson - url: - - r18.com/videos - scraper: sceneScraper - queryURL: "{url}" - queryURLReplace: - url: - - regex: .+id=(.+)/.* - with: https://www.r18.com/api/v4f/contents/$1?lang=en -movieByURL: - - action: scrapeXPath - url: - - r18.com/videos/vod/movies/list - scraper: serieScraper -sceneByFragment: - action: scrapeXPath - queryURL: https://www.r18.com/common/search/searchword={filename} - queryURLReplace: - filename: - - regex: (.*[^a-zA-Z0-9])*([a-zA-Z-]+\d+)(.+) - with: $2 - - regex: ([a-zA-Z]+)(\d+) - with: $1-$2 - scraper: sceneScraper -jsonScrapers: - sceneScraper: - scene: - Title: data.dvd_id - URL: data.detail_url - Date: - selector: data.release_date - postProcess: - - replace: - - regex: \s.+ - with: "" - Details: - selector: "[data.title,data.comment]" - concat: "\n\n" - postProcess: &ppUncensor - - replace: - - regex: A\*{3}ed\b - with: "Abused" - - regex: A\*{3}e\b - with: "Abuse" - - regex: A\*{3}es\b - with: "Abuses" - - regex: A\*{4}p\b - with: "Asleep" # ZMEN-037 - - regex: A\*{5}t - with: "Assault" - - regex: A\*{5}ted\b - with: "Assaulted" - - regex: B\*{5}p\b - with: "Bang Up" # Not Sure - - regex: B\*{5}k\b # IPX-374 - with: "Berserk" - - regex: B\*{3}d\b - with: "Blood" - - regex: B\*{3}dy\b - with: "Bloody" # Not Sure - - regex: B\*{6}y\b - with: "Brutally" - - regex: Chai\*{1}saw\b - with: "Chainsaw" - - regex: C\*{3}d\b - with: "Child" - - regex: C\*{3}dcare\b - with: "Childcare" - - regex: C\*{3}dhood\b - with: "Childhood" - - regex: C\*{3}dish\b - with: "Childish" # MMYM-045 - - regex: C\*{3}dren\b - with: "Children" - - regex: C\*{1}ck\b - with: "Cock" - - regex: C\*{1}cks\b - with: "Cocks" - - regex: C\*{1}llegiate\b - with: "Collegiate" - - regex: C\*{5}y\b - with: "Cruelty" - - regex: CrumB\*{2}d\b - with: "Crumbled" - - regex: D\*{1}ck\b - with: "Dick" - - regex: D\*{6}e\b - with: "Disgrace" - - regex: D\*{6}ed\b - with: "Disgraced" - - regex: D\*{6}eful\b - with: "Disgraceful" - - regex: D\*{3}king\b - with: "Drinking" - - regex: D\*{3}ks\b - with: "Drinks" - - regex: D\*{2}g\b - with: "Drug" - - regex: D\*{2}gged\b - with: "Drugged" - - regex: D\*{2}gs\b - with: "Drugs" - - regex: D\*{3}k\b - with: "Drunk" - - regex: D\*{3}ken\b - with: "Drunken" - - regex: D\*{3}kest\b - with: "Drunkest" - - regex: EnS\*{3}ed\b - with: "Enslaved" - - regex: F\*{3}e\b - with: "Force" - - regex: F\*{3}eful\b - with: "Forceful" - - regex: F\*{3}efully\b - with: "Forcefully" - - regex: F\*{3}es\b - with: "Forces" # Not Sure - - regex: F\*{3}ed\b - with: "Fucked" - - regex: F\*{5}g\b - with: "Fucking" # SSNI-391 - - regex: G\*{9}d\b - with: "Gang-Banged" - - regex: G\*{6}g\b - with: "Gangbang" # STAR-976 - - regex: G\*{7}g\b - with: "Gangbang" - - regex: G\*{6}ged\b - with: "Gangbanged" # SSNI-242 - - regex: G\*{7}ged\b - with: "Gangbanged" - - regex: G\*{7}gers\b - with: "Gangbangers" - - regex: G\*{6}ging\b - with: "Gangbanging" - - regex: G\*{7}ging\b - with: "Gangbanging" - - regex: G\*{7}gs\b - with: "Gangbangs" - - regex: Half-A\*{4}p\b - with: "Half-Asleep" # ZMEN-037 - - regex: HumB\*{2}d\b - with: "Humbled" - - regex: H\*{9}n\b - with: "Humiliation" - - regex: H\*{2}t\b - with: "Hurt" - - regex: H\*{2}ts\b - with: "Hurts" - - regex: H\*{7}m\b - with: "Hypnotism" - - regex: H\*{7}ed\b - with: "Hypnotized" # PPPD-376 - - regex: I\*{4}t\b - with: "Incest" - - regex: I\*{4}tuous\b - with: "Incestuous" - - regex: I\*{4}ts\b - with: "Insults" - - regex: J\*{1}\b - with: "Jo" # Not Sure - - regex: J\*{1}s\b - with: "Jos" # Not Sure - - regex: K\*{1}d\b - with: "Kid" - - regex: K\*{1}dding\b - with: "Kidding" - - regex: K\*{4}pped\b - with: "Kidnapped" - - regex: K\*{4}pper\b - with: "Kidnapper" - - regex: K\*{4}pping\b - with: "Kidnapping" - - regex: K\*{1}ds\b - with: "Kids" - - regex: K\*{2}l\b - with: "Kill" - - regex: K\*{2}led\b - with: "Killed" # SNIS-036 - - regex: K\*{2}ler\b - with: "Killer" # Not Sure - - regex: K\*{2}ling\b - with: "Killing" - - regex: Lol\*{1}pop\b - with: "Lolipop" - - regex: Lo\*{2}ta\b - with: "Lolita" - - regex: Ma\*{1}ko\b - with: "Maiko" - - regex: M\*{4}t\b - with: "Molest" - - regex: M\*{4}tation\b - with: "Molestation" - - regex: M\*{4}ted\b - with: "Molested" - - regex: M\*{4}ter\b - with: "Molester" - - regex: M\*{4}ters\b - with: "Molesters" - - regex: M\*{4}ting\b - with: "Molesting" - - regex: M\*{4}tor\b - with: "Molestor" - - regex: P\*{4}h\b - with: "Punish" - - regex: P\*{4}hed\b - with: "Punished" - - regex: P\*{4}hment\b - with: "Punishment" - - regex: P\*{1}ssy\b - with: "Pussy" - - regex: R\*{2}e\b - with: "Rape" - #- regex: R\*{1}pe\b - # with: "Rape" # Can be Rope ? IPX-311 - - regex: R\*{2}ed\b - with: "Raped" - - regex: R\*{1}ped\b - with: "Raped" - - regex: R\*{2}es\b - with: "Rapes" - - regex: R\*{4}g\b - with: "Raping" - - regex: S\*{9}l\b - with: "School Girl" # Not Sure - - regex: S\*{9}ls\b - with: "School Girls" # SSNI-296 - - regex: S\*{8}l\b - with: "Schoolgirl" - - regex: Sch\*{2}lgirl\b - with: "Schoolgirl" - - regex: S\*{9}ls\b - with: "Schoolgirls" # Not Sure (PPPD-811) - - regex: S\*{8}ls\b - with: "Schoolgirls" - - regex: Sch\*{2}lgirls\b - with: "Schoolgirls" - - regex: SK\*{2}led\b - with: "Skilled" - - regex: SK\*{2}lful\b - with: "Skillful" - - regex: SK\*{2}lfully\b - with: "Skillfully" - - regex: SK\*{2}ls\b - with: "Skills" - - regex: S\*{3}e\b - with: "Slave" - - regex: S\*{3}ery\b - with: "Slavery" - - regex: S\*{3}es\b - with: "Slaves" - - regex: S\*{6}g\b - with: "Sleeping" - - regex: StepB\*{16}r\b - with: "StepBrother And Sister" # Not Sure - - regex: StepK\*{1}ds \b - with: "StepKids" - - regex: StepM\*{12}n\b - with: "StepMother And Son" # GVG-299 - - regex: S\*{5}t\b - with: "Student" - - regex: S\*{5}ts\b - with: "Students" - - regex: S\*{8}n\b - with: "Submission" - - regex: T\*{6}e\b - with: "Tentacle" #MIDD-648 - - regex: T\*{6}es\b - with: "Tentacles" - - regex: T\*{5}e\b - with: "Torture" - - regex: T\*{5}ed\b - with: "Tortured" - - regex: T\*{5}es\b - with: "Tortures" #MIDD-648 - - regex: U\*{9}sly\b - with: "Unconsciously" - - regex: U\*{7}g\b - with: "Unwilling" - - regex: V\*{5}e\b - with: "Violate" - - regex: V\*{1}olated\b - with: "Violated" - - regex: V\*{5}ed\b - with: "Violated" - - regex: V\*{5}es\b - with: "Violates" - - regex: V\*{6}e\b - with: "Violence" - - regex: V\*{5}t\b - with: "Violent" - - regex: Y\*{8}l\b - with: "Young Girl" # Not Sure - - regex: Y\*{8}ls\b - with: "Young Girls" # Not Sure - Tags: - Name: data.categories.#.name - Performers: - Name: - selector: data.actresses.#.name - #Uncomment below to convert to Surname Name (JavLibrary compatible) - #postProcess: - # - replace: - # - regex: (.+)(\s)(.+) - # with: $3$2$1 - Studio: - Name: data.maker.name - Image: data.images.jacket_image.large -xPathScrapers: - sceneScraper: - common: - $searchinfo: //li[contains(@class,"item-list")]/a//img[string-length(@alt)=string-length(preceding::div[@class="genre01"]/span/text())] - scene: - Title: $searchinfo/@alt - URL: $searchinfo/ancestor::a/@href - serieScraper: - movie: - Name: - selector: //h1[@class="txt01"]/text() - postProcess: *ppUncensor - FrontImage: - selector: //li[contains(@class,"item-list")][1]//img/@data-original - postProcess: - - replace: - - regex: ps\.jpg - with: "pl.jpg" - BackImage: - selector: //li[contains(@class,"item-list")][2]//img/@data-original - postProcess: - - replace: - - regex: ps\.jpg - with: "pl.jpg" - Studio: - Name: //p[text()="TOP STUDIOS"]/following-sibling::ul//a/span[@class="item01"]/text() -# Last Updated May 22, 2022 From 8d57addd4034aa7024f6815b9243b267992238d5 Mon Sep 17 00:00:00 2001 From: Maista6969 Date: Tue, 22 Aug 2023 09:47:34 +0200 Subject: [PATCH 314/624] Update date --- scrapers/R18.dev.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scrapers/R18.dev.yml b/scrapers/R18.dev.yml index bc71d243c..b3b3b7dad 100644 --- a/scrapers/R18.dev.yml +++ b/scrapers/R18.dev.yml @@ -425,4 +425,4 @@ jsonScrapers: - replace: - regex: (.+) with: https://r18.dev/videos/vod/movies/detail/-/combined=$1/json -# Last Updated January 19, 2023 +# Last Updated August 22, 2023 From 7546ccda6d7a4a8b3f49c8583ec4e020377ec7eb Mon Sep 17 00:00:00 2001 From: Maista6969 Date: Tue, 22 Aug 2023 15:16:10 +0200 Subject: [PATCH 315/624] Update ScrapeWithURL for new scene structure Also makes it only use the URL and not needlessly query the GQL API for a scene when it already has everything it needs from the fragment --- scrapers/ScrapeWithURL.py | 95 ++++++++++++++++++++++++++------------- 1 file changed, 64 insertions(+), 31 deletions(-) diff --git a/scrapers/ScrapeWithURL.py b/scrapers/ScrapeWithURL.py index a660cc20d..636c1d07a 100644 --- a/scrapers/ScrapeWithURL.py +++ b/scrapers/ScrapeWithURL.py @@ -5,47 +5,80 @@ import py_common.graphql as graphql import py_common.log as log except ModuleNotFoundError: - print("You need to download the folder 'py_common' from the community repo! (CommunityScrapers/tree/master/scrapers/py_common)", file=sys.stderr) + print( + "You need to download the folder 'py_common' from the community repo! (CommunityScrapers/tree/master/scrapers/py_common)", + file=sys.stderr, + ) sys.exit() + def call_graphql(query, variables=None): return graphql.callGraphQL(query, variables) + def scrape_scene(url): - query = """query scrapeSceneURL($url: String!) { - scrapeSceneURL(url: $url) { - title - details - date - image - studio { - name - } - tags { - name - } - performers { - name - } - url - } - }""" - - variables = {'url': url} + query = """ +query scrapeSceneURL($url: String!) { + scrapeSceneURL(url: $url) { + title + details + code + date + image + urls + studio { + name + url + image + parent { + name + url + image + } + } + tags { + name + } + performers { + aliases + birthdate + career_length + country + death_date + details + ethnicity + eye_color + fake_tits + gender + hair_color + height + instagram + images + measurements + name + piercings + tags { + name + } + tattoos + twitter + url + weight + } + } +}""" + + variables = {"url": url} result = call_graphql(query, variables) log.debug(f"result {result}") return result["scrapeSceneURL"] FRAGMENT = json.loads(sys.stdin.read()) -SCENE_ID = FRAGMENT.get("id") - -scene = graphql.getScene(SCENE_ID) -if scene: - scene_url = scene['url'] +url = FRAGMENT.get("url") - if scene_url: - result = scrape_scene(scene_url) - print(json.dumps(result)) - else: - print(json.dumps({})) +if url: + result = scrape_scene(url) + print(json.dumps(result)) +else: + print(json.dumps({})) From 807cf4adacbdd64f30e9c16f0603d252e1326345 Mon Sep 17 00:00:00 2001 From: ZzazzCDTS <112476145+zzazzcdts@users.noreply.github.com> Date: Tue, 22 Aug 2023 15:23:20 +0100 Subject: [PATCH 316/624] Update Clips4Sale.yml Work-in-progress updating of C4S scraper. Still a lot to be done. --- scrapers/Clips4Sale.yml | 93 ++++++++++------------------------------- 1 file changed, 23 insertions(+), 70 deletions(-) diff --git a/scrapers/Clips4Sale.yml b/scrapers/Clips4Sale.yml index d8bce0955..43be7641c 100644 --- a/scrapers/Clips4Sale.yml +++ b/scrapers/Clips4Sale.yml @@ -5,14 +5,10 @@ sceneByURL: url: - clips4sale.com/studio/ scraper: c4sSceneScraper - - action: scrapeXPath - url: - - clips4sale.com/search/clip/ - scraper: c4sClipsSceneScraper sceneByName: action: scrapeXPath - queryURL: https://www.clips4sale.com/clips/search/{} + queryURL: https://www.clips4sale.com/clips/search/{}/category/0/storesPage/1/clipsPage/1 scraper: c4sSearch sceneByQueryFragment: action: scrapeXPath @@ -26,94 +22,51 @@ xPathScrapers: scene: Title: selector: $card/@aria-label + postProcess: + - replace: + - regex: <\/?em> + with: "" URL: selector: $card//a[contains(@class, "search-clip__title")]/@href - Image: - selector: $card//a[contains(@class, "search-clip__title")]/@href postProcess: - - replace: - - regex: https:\/\/www.clips4sale.com\/studio\/(\d+)\/(\d+)\/.*$ - with: https://imagecdn.clips4sale.com/accounts99/$1/clip_images/previewlg_$2.jpg + - replace: + - regex: ^ + with: https://www.clips4sale.com + Image: + selector: $card//img/@data-src c4sSceneScraper: common: - $studio: //span[contains(text(),"From:")]/following-sibling::a + $scene: //figure[contains(@class, "mediabook-preview")] scene: Title: - selector: //div[@data-clipid]/following-sibling::h3 - postProcess: &ppStripTitle - - replace: - # https://regex101.com/r/CYLeuO/8 - - regex: >- - (?i)[ \t]*((Super )?[SH]D)?[ ,-]*(\b(MP4|OPTIMUM|WMV|MOV|AVI|UHD|[48]K)\b|1080p|720p|480p|(Standard|High) Def(inition)?)+[ \t]* - with: - # https://regex101.com/r/C3TgFG/2 - - regex: >- - [ \t]*[([][])][ \t]*|[ -]+$ - with: + selector: $scene/figcaption/div/p[contains(@class, "lg:text-lg")]/text() Details: - selector: //div[@class="clip"]/div[1]/*|//div[@class="clip"]/div[1]/text() + selector: $scene//div[contains(@class, "read-more--text")]/p/text() concat: "\n\n" - postProcess: &ppStripDescription - - replace: - - regex: ^Description:\s* - with: Studio: - Name: $studio - URL: - selector: $studio/@href - postProcess: &ppPrependOrigin - - replace: - - regex: ^ - with: https://www.clips4sale.com + selector: //a[@class="hover:underline text-white font-medium w-full truncate block"]/text() Date: - selector: //span[contains(text(),"Added:")]/span + selector: //div[contains(@class, 'border-b border-white/20 lg:border-0 pb-3 lg:pb-0 mb-3 lg:mb-0')]/span[contains(text(),'/')]/text() postProcess: &ppParseDate - replace: - regex: \s.+ with: "" - parseDate: 1/2/06 Tags: - Name: //div/span[contains(text(),"Category:")]/../..//a + Name: //span[text()='Keywords']/following-sibling::span[position()=1]/a Performers: # Clips4Sale doesn't have an explict performer field, but performers are # often included in the video tags. So we attempt to find matches there. - Name: //div/span[contains(text(),"Keywords:")]/..//a + Name: //span[text()='Keywords']/following-sibling::span[position()=1]/a Image: - selector: //div[contains(@class, "clipImage")]/div/video/@data-poster - postProcess: &ppPrependScheme - - replace: - - regex: ^// - with: https:// + selector: $scene/div[@class="w-full lg:w-7/12 flex-grow flex-shrink-0"]//img/@data-src + postProcess: + - replace: + - regex: ^ + with: "https:" URL: //meta[@property="og:url"]/@content - c4sClipsSceneScraper: - scene: - Title: - selector: //div[@class="clip-info__title"]/h3/text() - postProcess: *ppStripTitle - Details: - selector: //div[@class="modal_clip_description"]//p - concat: "\n\n" - postProcess: *ppStripDescription - Studio: - Name: //span[contains(text(),"From:")]/following-sibling::a[2]/text() - URL: - selector: //span[contains(text(),"From:")]/following-sibling::a[2]/@href - postProcess: *ppPrependOrigin - Date: - selector: //strong[contains(text(),"Added:")]/following-sibling::span[contains(@class,"hidden-lg")]/text() - postProcess: *ppParseDate - Image: - selector: //button[@id="single-add-to-cart-0"]/@data-src - postProcess: *ppPrependScheme - Tags: - Name: //span[contains(text(),"Related Categories:")]/following-sibling::a|//span[contains(text(),"Keywords:")]/following-sibling::a - Performers: - # Clips4Sale doesn't have an explict performer field, but performers are - # often included in the video tags. So we attempt to find matches there. - Name: //span[contains(text(),"Keywords:")]/following-sibling::a - driver: cookies: - CookieURL: "https://clips4sale.com" @@ -127,4 +80,4 @@ driver: Value: Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:79.0) Gecko/20100101 Firefox/79.0) -# Last Updated July 24, 2023 +# Last Updated August 22, 2023 From eecc1c2ec70faa5aace95e6548efaf46b96b2e46 Mon Sep 17 00:00:00 2001 From: ZzazzCDTS <112476145+zzazzcdts@users.noreply.github.com> Date: Tue, 22 Aug 2023 15:32:02 +0100 Subject: [PATCH 317/624] Update Clips4Sale.yml Fixed studio name (thanks to Maista on the Discord) --- scrapers/Clips4Sale.yml | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/scrapers/Clips4Sale.yml b/scrapers/Clips4Sale.yml index 43be7641c..88ee4da86 100644 --- a/scrapers/Clips4Sale.yml +++ b/scrapers/Clips4Sale.yml @@ -38,6 +38,7 @@ xPathScrapers: c4sSceneScraper: common: $scene: //figure[contains(@class, "mediabook-preview")] + $studio: //a[@class="hover:underline text-white font-medium w-full truncate block"]/text() scene: Title: selector: $scene/figcaption/div/p[contains(@class, "lg:text-lg")]/text() @@ -45,7 +46,7 @@ xPathScrapers: selector: $scene//div[contains(@class, "read-more--text")]/p/text() concat: "\n\n" Studio: - selector: //a[@class="hover:underline text-white font-medium w-full truncate block"]/text() + Name: $studio Date: selector: //div[contains(@class, 'border-b border-white/20 lg:border-0 pb-3 lg:pb-0 mb-3 lg:mb-0')]/span[contains(text(),'/')]/text() postProcess: &ppParseDate From 70aa95974bb9f5dddfe02aac6aefc479a8bfe838 Mon Sep 17 00:00:00 2001 From: ZzazzCDTS <112476145+zzazzcdts@users.noreply.github.com> Date: Tue, 22 Aug 2023 16:09:34 +0100 Subject: [PATCH 318/624] Update Clips4Sale.yml Added support for tags/performers. Should now work same as the old one did. --- scrapers/Clips4Sale.yml | 38 ++++++++++++++++++++++++++++++++++---- 1 file changed, 34 insertions(+), 4 deletions(-) diff --git a/scrapers/Clips4Sale.yml b/scrapers/Clips4Sale.yml index 88ee4da86..b29f5135a 100644 --- a/scrapers/Clips4Sale.yml +++ b/scrapers/Clips4Sale.yml @@ -38,15 +38,31 @@ xPathScrapers: c4sSceneScraper: common: $scene: //figure[contains(@class, "mediabook-preview")] - $studio: //a[@class="hover:underline text-white font-medium w-full truncate block"]/text() + $studio: //a[@class="hover:underline text-white font-medium w-full truncate block"] scene: Title: selector: $scene/figcaption/div/p[contains(@class, "lg:text-lg")]/text() + postProcess: &ppStripTitle + - replace: + # https://regex101.com/r/CYLeuO/8 + - regex: >- + (?i)[ \t]*((Super )?[SH]D)?[ ,-]*(\b(MP4|OPTIMUM|WMV|MOV|AVI|UHD|[48]K)\b|1080p|720p|480p|(Standard|High) Def(inition)?)+[ \t]* + with: + # https://regex101.com/r/C3TgFG/2 + - regex: >- + [ \t]*[([][])][ \t]*|[ -]+$ + with: Details: selector: $scene//div[contains(@class, "read-more--text")]/p/text() concat: "\n\n" Studio: - Name: $studio + Name: $studio/text() + URL: + selector: $studio/@href + postProcess: &ppPrependOrigin + - replace: + - regex: ^ + with: https://www.clips4sale.com Date: selector: //div[contains(@class, 'border-b border-white/20 lg:border-0 pb-3 lg:pb-0 mb-3 lg:mb-0')]/span[contains(text(),'/')]/text() postProcess: &ppParseDate @@ -55,11 +71,25 @@ xPathScrapers: with: "" - parseDate: 1/2/06 Tags: - Name: //span[text()='Keywords']/following-sibling::span[position()=1]/a + Name: + selector: //span[contains(text(), 'Keywords')]/following-sibling::span/a/text() | //span[contains(text(), 'Category')]/following-sibling::span/a/text() + concat: "," + postProcess: + - replace: + - regex: ',\s+' + with: "," + split: "," Performers: # Clips4Sale doesn't have an explict performer field, but performers are # often included in the video tags. So we attempt to find matches there. - Name: //span[text()='Keywords']/following-sibling::span[position()=1]/a + Name: + selector: //span[contains(text(), 'Keywords')]/following-sibling::span/a/text() | //span[contains(text(), 'Category')]/following-sibling::span/a/text() + concat: "," + postProcess: + - replace: + - regex: ',\s+' + with: "," + split: "," Image: selector: $scene/div[@class="w-full lg:w-7/12 flex-grow flex-shrink-0"]//img/@data-src postProcess: From dd95daec74fe6de24bcf9a52323e4716054591a7 Mon Sep 17 00:00:00 2001 From: ZzazzCDTS <112476145+zzazzcdts@users.noreply.github.com> Date: Tue, 22 Aug 2023 16:18:11 +0100 Subject: [PATCH 319/624] Update Clips4Sale.yml Fixed bug where some descriptions weren't coming through. --- scrapers/Clips4Sale.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scrapers/Clips4Sale.yml b/scrapers/Clips4Sale.yml index b29f5135a..8035443e8 100644 --- a/scrapers/Clips4Sale.yml +++ b/scrapers/Clips4Sale.yml @@ -53,7 +53,7 @@ xPathScrapers: [ \t]*[([][])][ \t]*|[ -]+$ with: Details: - selector: $scene//div[contains(@class, "read-more--text")]/p/text() + selector: $scene//div[contains(@class, "read-more--text")]//text() concat: "\n\n" Studio: Name: $studio/text() From dc20f145dea63355da29b826116db22d689cbd4d Mon Sep 17 00:00:00 2001 From: ZzazzCDTS <112476145+zzazzcdts@users.noreply.github.com> Date: Tue, 22 Aug 2023 18:02:08 +0100 Subject: [PATCH 320/624] Update Clips4Sale.yml Added studio result to performer results - sometimes, the studio name is just the performer's name, so it's worth adding to the capture I'd say. --- scrapers/Clips4Sale.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scrapers/Clips4Sale.yml b/scrapers/Clips4Sale.yml index 8035443e8..a6946bcb9 100644 --- a/scrapers/Clips4Sale.yml +++ b/scrapers/Clips4Sale.yml @@ -83,7 +83,7 @@ xPathScrapers: # Clips4Sale doesn't have an explict performer field, but performers are # often included in the video tags. So we attempt to find matches there. Name: - selector: //span[contains(text(), 'Keywords')]/following-sibling::span/a/text() | //span[contains(text(), 'Category')]/following-sibling::span/a/text() + selector: //span[contains(text(), 'Keywords')]/following-sibling::span/a/text() | //span[contains(text(), 'Category')]/following-sibling::span/a/text() | $studio/text() concat: "," postProcess: - replace: From 906c9063a9b3e02b8e40114a04ebdf4eb6327ed7 Mon Sep 17 00:00:00 2001 From: StashMaitikeisi <143062645+StashMaitikeisi@users.noreply.github.com> Date: Thu, 24 Aug 2023 08:42:57 +0300 Subject: [PATCH 321/624] Add XPath scraper for BrokenLatinaWhores.com --- scrapers/BrokenLatinaWhores.yml | 37 +++++++++++++++++++++++++++++++++ 1 file changed, 37 insertions(+) create mode 100644 scrapers/BrokenLatinaWhores.yml diff --git a/scrapers/BrokenLatinaWhores.yml b/scrapers/BrokenLatinaWhores.yml new file mode 100644 index 000000000..032d9364c --- /dev/null +++ b/scrapers/BrokenLatinaWhores.yml @@ -0,0 +1,37 @@ +name: "Broken Latina Whores" +sceneByURL: + - action: scrapeXPath + url: + - brokenlatinawhores.com + scraper: sceneScraper + +xPathScrapers: + sceneScraper: + scene: + Studio: + Name: + fixed: Broken Latina Whores + Title: //span[@class="update_title"] + Details: //span[@class="latest_update_description"] + URL: + selector: //div[@class='update_image']/a/@href + Date: + selector: //span[@class="availdate"] + postProcess: + - replace: + - regex: (\d{2}\/\d{2}\/\d{4}) + with: $1 + - parseDate: 01/02/2006 + Image: + selector: //div[@class="update_image"]/a/img/@src0_3x|//div[@class="update_image"]/a/img/@src0_2x|//div[@class="update_image"]/a/img/@src0_1x + postProcess: + - replace: + - regex: ^(.*)$ + with: "https://brokenlatinawhores.com/$1" + Tags: + Name: + selector: //span[@class='update_tags']/a + Performers: + Name: + selector: //span[@class='tour_update_models']a +# Last Updated August 24, 2023 From 733f72c530b135be50435cd97f7e856e71011cf7 Mon Sep 17 00:00:00 2001 From: StashMaitikeisi <143062645+StashMaitikeisi@users.noreply.github.com> Date: Thu, 24 Aug 2023 08:47:11 +0300 Subject: [PATCH 322/624] Update SCRAPERS-LIST.md Add brokenlatinawhores.com --- SCRAPERS-LIST.md | 1 + 1 file changed, 1 insertion(+) diff --git a/SCRAPERS-LIST.md b/SCRAPERS-LIST.md index 55ec8f758..07b232d7c 100644 --- a/SCRAPERS-LIST.md +++ b/SCRAPERS-LIST.md @@ -235,6 +235,7 @@ braziltgirls.xxx|GroobyNetwork-Brazilian.yml|:heavy_check_mark:|:x:|:x:|:x:|-|Tr brazzers.com|Brazzers.yml|:heavy_check_mark:|:heavy_check_mark:|:x:|:heavy_check_mark:|CDP|- breeditraw.com|BreedItRaw.yml|:heavy_check_mark:|:x:|:x:|:heavy_check_mark:|-|Gay brickyates.com|brickyates.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +brokenlatinawhores.com|BrokenLatinaWhores.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- brookelynnebriar.com|ModelCentroAPI.yml|:heavy_check_mark:|:x:|:x:|:x:|Python|- bruceandmorgan.net|bruceandmorgan.yml|:heavy_check_mark:|:x:|:x:|:x:|CDP|Amateur Fetish brutalinvasion.com|Teencoreclub.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- From 47a7abaca4b96ec599f4e5693de98e917692cf74 Mon Sep 17 00:00:00 2001 From: StashMaitikeisi <143062645+StashMaitikeisi@users.noreply.github.com> Date: Thu, 24 Aug 2023 08:51:00 +0300 Subject: [PATCH 323/624] Add XPath scraper for BrokenLatinaWhores.yml --- scrapers/BrokenLatinaWhores.yml | 3 --- 1 file changed, 3 deletions(-) diff --git a/scrapers/BrokenLatinaWhores.yml b/scrapers/BrokenLatinaWhores.yml index 032d9364c..a2d163296 100644 --- a/scrapers/BrokenLatinaWhores.yml +++ b/scrapers/BrokenLatinaWhores.yml @@ -18,9 +18,6 @@ xPathScrapers: Date: selector: //span[@class="availdate"] postProcess: - - replace: - - regex: (\d{2}\/\d{2}\/\d{4}) - with: $1 - parseDate: 01/02/2006 Image: selector: //div[@class="update_image"]/a/img/@src0_3x|//div[@class="update_image"]/a/img/@src0_2x|//div[@class="update_image"]/a/img/@src0_1x From c67bf156b632a971a00b336fb05b60026aa5f1f2 Mon Sep 17 00:00:00 2001 From: StashMaitikeisi <143062645+StashMaitikeisi@users.noreply.github.com> Date: Thu, 24 Aug 2023 11:37:08 +0300 Subject: [PATCH 324/624] Minor edits --- scrapers/BrokenLatinaWhores.yml | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/scrapers/BrokenLatinaWhores.yml b/scrapers/BrokenLatinaWhores.yml index a2d163296..d8b96c2c0 100644 --- a/scrapers/BrokenLatinaWhores.yml +++ b/scrapers/BrokenLatinaWhores.yml @@ -4,7 +4,6 @@ sceneByURL: url: - brokenlatinawhores.com scraper: sceneScraper - xPathScrapers: sceneScraper: scene: @@ -20,7 +19,7 @@ xPathScrapers: postProcess: - parseDate: 01/02/2006 Image: - selector: //div[@class="update_image"]/a/img/@src0_3x|//div[@class="update_image"]/a/img/@src0_2x|//div[@class="update_image"]/a/img/@src0_1x + selector: //div[@class="update_image"]/a/img/@src0_4x|//div[@class="update_image"]/a/img/@src0_3x|//div[@class="update_image"]/a/img/@src0_2x|//div[@class="update_image"]/a/img/@src0_1x postProcess: - replace: - regex: ^(.*)$ @@ -30,5 +29,5 @@ xPathScrapers: selector: //span[@class='update_tags']/a Performers: Name: - selector: //span[@class='tour_update_models']a + selector: //span[@class='tour_update_models']/a # Last Updated August 24, 2023 From dcfce6ca88aa8c795f03959c499282f4b335ae8f Mon Sep 17 00:00:00 2001 From: JackDawson94 <100418085+JackDawson94@users.noreply.github.com> Date: Fri, 25 Aug 2023 01:27:36 +0200 Subject: [PATCH 325/624] Add eporner to Scrapers list --- SCRAPERS-LIST.md | 1 + 1 file changed, 1 insertion(+) diff --git a/SCRAPERS-LIST.md b/SCRAPERS-LIST.md index 07b232d7c..a17935292 100644 --- a/SCRAPERS-LIST.md +++ b/SCRAPERS-LIST.md @@ -425,6 +425,7 @@ eastboys.com|EastBoys.yml|:heavy_check_mark:|:x:|:x:|:x:|-|Gay ebonytugs.com|TugPass.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- elegantangel.com|AdultEmpireCash.yml|:heavy_check_mark:|:x:|:heavy_check_mark:|:x:|-|- elitebabes.com|EliteBabes.yml|:x:|:x:|:x:|:heavy_check_mark:|-|Babes +www.eporner.com|Eporner.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- ericjohnssexadventures.com|EricJohnsSexAdventures.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- ericvideos.com|EricVideos.yml|:heavy_check_mark:|:x:|:x:|:heavy_check_mark:|-|Gay erito.com|RealityKingsOL.yml|:heavy_check_mark:|:x:|:x:|:x:|-|JAV From 8cf44b33dce72bd48445635163853c6d41605130 Mon Sep 17 00:00:00 2001 From: JackDawson94 <100418085+JackDawson94@users.noreply.github.com> Date: Fri, 25 Aug 2023 01:38:59 +0200 Subject: [PATCH 326/624] Fix error in scrapers list --- SCRAPERS-LIST.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/SCRAPERS-LIST.md b/SCRAPERS-LIST.md index a17935292..7f0d5bde0 100644 --- a/SCRAPERS-LIST.md +++ b/SCRAPERS-LIST.md @@ -425,7 +425,7 @@ eastboys.com|EastBoys.yml|:heavy_check_mark:|:x:|:x:|:x:|-|Gay ebonytugs.com|TugPass.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- elegantangel.com|AdultEmpireCash.yml|:heavy_check_mark:|:x:|:heavy_check_mark:|:x:|-|- elitebabes.com|EliteBabes.yml|:x:|:x:|:x:|:heavy_check_mark:|-|Babes -www.eporner.com|Eporner.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +eporner.com|Eporner.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- ericjohnssexadventures.com|EricJohnsSexAdventures.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- ericvideos.com|EricVideos.yml|:heavy_check_mark:|:x:|:x:|:heavy_check_mark:|-|Gay erito.com|RealityKingsOL.yml|:heavy_check_mark:|:x:|:x:|:x:|-|JAV From 8911e699fcbec27dcc9ae9f74b6c87403dd34f64 Mon Sep 17 00:00:00 2001 From: Maista6969 Date: Fri, 25 Aug 2023 03:40:17 +0200 Subject: [PATCH 327/624] Add HijavMylfs to PaperStreetMedia XPath scraper --- SCRAPERS-LIST.md | 1 + scrapers/PaperStreetMedia.yml | 4 +++- 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/SCRAPERS-LIST.md b/SCRAPERS-LIST.md index 07b232d7c..7c51a84ba 100644 --- a/SCRAPERS-LIST.md +++ b/SCRAPERS-LIST.md @@ -618,6 +618,7 @@ heymilf.com|Jhdv.yml|:heavy_check_mark:|:x:|:x:|:x:|-|JAV Uncensored heyoutdoor.com|Jhdv.yml|:heavy_check_mark:|:x:|:x:|:x:|-|JAV Uncensored heyzo.com|Heyzo.yml|:heavy_check_mark:|:x:|:x:|:x:|-|JAV Uncensored hijabhookup.com|PaperStreetMedia.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +hijabmylfs.com|PaperStreetMedia.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- himeros.tv|HimerosTV.yml|:heavy_check_mark:|:x:|:x:|:x:|-|Gay hitzefrei.com|Hitzefrei.yml|:heavy_check_mark:|:x:|:x:|:heavy_check_mark:|-|- hmvmania.com|HmvMania.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- diff --git a/scrapers/PaperStreetMedia.yml b/scrapers/PaperStreetMedia.yml index 8e977eb82..61f8bd41b 100755 --- a/scrapers/PaperStreetMedia.yml +++ b/scrapers/PaperStreetMedia.yml @@ -16,6 +16,7 @@ sceneByURL: - freeusefantasy.com/movies/ - freeusemilf.com/movies/ - hijabhookup.com/movies/ + - hijabmylfs.com/movies/ - innocenthigh.com/movies/ - littleasians.com/movies/ - momswap.com/movies/ @@ -69,6 +70,7 @@ xPathScrapers: freeusefantasy: Freeuse Fantasy freeusemilf: Freeuse Milf hijabhookup: Hijab Hookup + hijabmylfs: Hijab Mylfs innocenthigh: Innocent High littleasians: Little Asians momswap: Mom Swap @@ -91,4 +93,4 @@ xPathScrapers: teensloveblackcocks: Teens Love Black Cocks thickumz: Thickumz tinysis: Tiny Sis -# Last Updated May 10, 2023 +# Last Updated August 24, 2023 From 51d43a5fe232547e25478313620b61c5d716e342 Mon Sep 17 00:00:00 2001 From: plz12345 <132735020+plz12345@users.noreply.github.com> Date: Thu, 24 Aug 2023 22:38:14 -0400 Subject: [PATCH 328/624] Update APOVStory.yml added update date --- scrapers/APOVStory.yml | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/scrapers/APOVStory.yml b/scrapers/APOVStory.yml index 680d157db..b0acfab87 100644 --- a/scrapers/APOVStory.yml +++ b/scrapers/APOVStory.yml @@ -36,4 +36,5 @@ xPathScrapers: - replace: - regex: \s+ with: "" - - parseDate: "January2,2006" \ No newline at end of file + - parseDate: "January2,2006" +#last updated August 24, 2023 From 4ab2b1044819972bea01715c179ac80221be39f6 Mon Sep 17 00:00:00 2001 From: plz12345 <132735020+plz12345@users.noreply.github.com> Date: Thu, 24 Aug 2023 22:39:48 -0400 Subject: [PATCH 329/624] Update APOVStory.yml From de3aaa11bf9f89ccda41766e676696fa78434aab Mon Sep 17 00:00:00 2001 From: Maista6969 Date: Fri, 25 Aug 2023 06:07:15 +0200 Subject: [PATCH 330/624] Add XPath scraper for OnlyTarts.com --- SCRAPERS-LIST.md | 1 + scrapers/OnlyTarts.yml | 33 +++++++++++++++++++++++++++++++++ 2 files changed, 34 insertions(+) create mode 100644 scrapers/OnlyTarts.yml diff --git a/SCRAPERS-LIST.md b/SCRAPERS-LIST.md index 045c45c60..85ce86f13 100644 --- a/SCRAPERS-LIST.md +++ b/SCRAPERS-LIST.md @@ -981,6 +981,7 @@ only3xvr.com|Only3xGirls.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- onlyblowjob.com|DDFNetwork.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- onlygolddigger.com|Only3xGirls.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- onlyprince.com|ThirdRockEnt.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +onlytarts.com|OnlyTarts.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- onlyteenblowjobs.com|GammaEntertainment.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- oopsie.com|Algolia_Adultime.yml|:heavy_check_mark:|:heavy_check_mark:|:x:|:x:|Python|- openlife.com|GammaEntertainment.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- diff --git a/scrapers/OnlyTarts.yml b/scrapers/OnlyTarts.yml new file mode 100644 index 000000000..12572a52e --- /dev/null +++ b/scrapers/OnlyTarts.yml @@ -0,0 +1,33 @@ +name: OnlyTarts +sceneByURL: + - action: scrapeXPath + url: + - onlytarts.com/video + scraper: sceneScraper +xPathScrapers: + sceneScraper: + scene: + Title: //h1[@class="video-detail__title"] + Details: //div[@data-id="description" and @class="hidden"] + Date: + selector: //div[@class="video-info__time"] + postProcess: + - replace: + - regex: .*•\s+(.*) + with: $1 + - parseDate: 2 January, 2006 + Image: + selector: //div[@class="xp-poster"]/@style + postProcess: + - replace: + - regex: .*url\("(.*)"\) + with: $1 + Studio: + Name: + fixed: Only Tarts + Tags: + Name: //a[contains(@class, "tags__item")] + Performers: + Name: //div[contains(@class, "video-info")]//a[contains(@href, "/model")]/text() + URL: //div[contains(@class, "video-info")]//a[contains(@href, "/model")]/@href +# Last Updated August 25, 2023 From 232f1ff0de0326af717efff7ba86262e17aa0f10 Mon Sep 17 00:00:00 2001 From: Maista6969 Date: Fri, 25 Aug 2023 08:20:57 +0200 Subject: [PATCH 331/624] Change HussieModels.yml to preserve descriptions with emojis --- scrapers/hussiemodels.yml | 15 +++------------ 1 file changed, 3 insertions(+), 12 deletions(-) diff --git a/scrapers/hussiemodels.yml b/scrapers/hussiemodels.yml index 2115ed8ab..c3a86b21e 100644 --- a/scrapers/hussiemodels.yml +++ b/scrapers/hussiemodels.yml @@ -8,7 +8,6 @@ sceneByURL: - seehimfuck.com/trailers - seehimsolo.com/trailers - povpornstars.com - # Removes all emoticons from Title, Description, Performers and Tags. scraper: sceneScraper xPathScrapers: sceneScraper: @@ -22,24 +21,16 @@ xPathScrapers: Performers: Name: selector: //li[@class="update_models"]//a - postProcess: + postProcess: &stripEmojis - replace: - regex: "[^\x00-\x7F]" with: - Details: - selector: //meta[@property="og:description"]/@content - postProcess: - - replace: - - regex: "[^\x00-\x7F]" - with: + Details: //meta[@property="og:description"]/@content Image: //meta[@property="og:image"]/@content Tags: Name: selector: //li[@class="label" and contains(text(), "Tags:")]/following-sibling::text() - postProcess: - - replace: - - regex: "[^\x00-\x7F]" - with: + <<: *stripEmojis Studio: Name: selector: //head/base/@href From 5f05eebd5f0ecf6186e2aae4c9da537d5ae4f368 Mon Sep 17 00:00:00 2001 From: Maista6969 Date: Fri, 25 Aug 2023 08:24:29 +0200 Subject: [PATCH 332/624] Fix a YAML fuckup in HussieModels.yml --- scrapers/hussiemodels.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/scrapers/hussiemodels.yml b/scrapers/hussiemodels.yml index c3a86b21e..cb0118581 100644 --- a/scrapers/hussiemodels.yml +++ b/scrapers/hussiemodels.yml @@ -30,7 +30,7 @@ xPathScrapers: Tags: Name: selector: //li[@class="label" and contains(text(), "Tags:")]/following-sibling::text() - <<: *stripEmojis + postProcess: *stripEmojis Studio: Name: selector: //head/base/@href @@ -45,4 +45,4 @@ xPathScrapers: seehimfuck: See Him Fuck seehimsolo: See Him Solo povpornstars: POV Pornstars -# Last Updated July 13, 2022 +# Last Updated August 25, 2023 From 2faf3cfe6dd46f7c391e783b4a3cc4c80df93b2b Mon Sep 17 00:00:00 2001 From: Maista6969 Date: Fri, 25 Aug 2023 09:04:18 +0200 Subject: [PATCH 333/624] Update PornCZ The scraper now fetches performers, tags and images again! It will also grab the canonical scene URL and the studio code --- scrapers/Porncz.yml | 20 +++++++++++++------- 1 file changed, 13 insertions(+), 7 deletions(-) diff --git a/scrapers/Porncz.yml b/scrapers/Porncz.yml index a370cbddf..466e3b4da 100644 --- a/scrapers/Porncz.yml +++ b/scrapers/Porncz.yml @@ -9,8 +9,7 @@ sceneByURL: - czechanalsex.com - czechbiporn.com - czechboobs.com - - czechdeviant.com - - czechescortgirls + - czechescortgirls.com - czechexecutor.com - czechgaycity.com - czechgypsies.com @@ -43,20 +42,27 @@ xPathScrapers: Title: selector: //div[@class="heading-detail"]/h1 Performers: - Name: //div[@class="video-info-item color_05-text" and contains(text(), "Actors:")]//a + Name: //div[contains(@class, "video-info-item") and contains(text(), "Actors:")]//a Details: //meta[@name="description"]/@content Tags: - Name: //div[@class="video-info-item color_05-text" and contains(text(), "Genres:")]//a + Name: //div[contains(@class, "video-info-item") and contains(text(), "Genres:")]//a Image: - selector: //meta[@property="og:image"]/@content + selector: //div[@id="video-poster"]/@data-poster postProcess: - replace: - - regex: "^/" - with: "https://www.porncz.com/" + - regex: ^ + with: https://www.porncz.com Date: selector: //meta[@property="video:release_date"]/@content postProcess: - parseDate: 2.01.2006 + URL: &url //link[@rel="canonical"]/@href + Code: + selector: *url + postProcess: + - replace: + - regex: .*/(\d*)$ + with: $1 Studio: Name: selector: //a[@class="video-detail-logo"]//@alt From ec10b0ea436b43420ba58c9b1ed97e8bd1da8a58 Mon Sep 17 00:00:00 2001 From: plz12345 <132735020+plz12345@users.noreply.github.com> Date: Fri, 25 Aug 2023 18:34:54 -0400 Subject: [PATCH 334/624] Update APOVStory.yml --- scrapers/APOVStory.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scrapers/APOVStory.yml b/scrapers/APOVStory.yml index b0acfab87..15600605e 100644 --- a/scrapers/APOVStory.yml +++ b/scrapers/APOVStory.yml @@ -37,4 +37,4 @@ xPathScrapers: - regex: \s+ with: "" - parseDate: "January2,2006" -#last updated August 24, 2023 +# last updated August 24, 2023 From e37b45d4096bc5094847dff0e4c5d06fa12ca2c2 Mon Sep 17 00:00:00 2001 From: Maista6969 Date: Sat, 26 Aug 2023 05:25:12 +0200 Subject: [PATCH 335/624] Consolidate Spizoo sites Removed thestripperexperience.com as the site now redirects to spizoo.com Merge rawattack.com, realsensual.com and vlogxxx.com into Spizoo scraper Add Studio name to the scraper --- SCRAPERS-LIST.md | 7 +++-- scrapers/RawAttack.yml | 28 -------------------- scrapers/RealSensual.yml | 29 --------------------- scrapers/Spizoo.yml | 55 +++++++++++++++++++++++++++------------- scrapers/VlogXXX.yml | 27 -------------------- 5 files changed, 41 insertions(+), 105 deletions(-) delete mode 100644 scrapers/RawAttack.yml delete mode 100644 scrapers/RealSensual.yml delete mode 100644 scrapers/VlogXXX.yml diff --git a/SCRAPERS-LIST.md b/SCRAPERS-LIST.md index 045c45c60..4cee4257b 100644 --- a/SCRAPERS-LIST.md +++ b/SCRAPERS-LIST.md @@ -1103,14 +1103,14 @@ rachelstormsxxx.com|VNAGirls.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- ragingstallion.com|GammaEntertainment.yml|:heavy_check_mark:|:x:|:x:|:x:|-|Gay randyblue.com|RandyBlue.yml|:heavy_check_mark:|:x:|:x:|:x:|-|Gay ravenswallowzxxx.com|Andomark.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- -rawattack.com|RawAttack.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +rawattack.com|Spizoo.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- rawcouples.com|TeenMegaWorld.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- reaganfoxx.com|AdultEmpireCash.yml|:heavy_check_mark:|:x:|:x:|:x:|-|MILF realityjunkies.com|MindGeek.yml|:heavy_check_mark:|:x:|:heavy_check_mark:|:x:|-|- realitykings.com|MindGeek.yml|:heavy_check_mark:|:x:|:x:|:heavy_check_mark:|-|- realitylovers.com|RealityLovers.yml|:heavy_check_mark:|:x:|:x:|:heavy_check_mark:|Python|- realjamvr.com|RealJamVR.yml|:heavy_check_mark:|:heavy_check_mark:|:x:|:x:|-|VR -realsensual.com|RealSensual.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +realsensual.com|Spizoo.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- realtgirls.com|GroobyNetwork-Partial.yml|:heavy_check_mark:|:heavy_check_mark:|:x:|:x:|-|Trans realtimebondage.com|insex.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- realvr.com|BaDoink.yml|:heavy_check_mark:|:x:|:heavy_check_mark:|:heavy_check_mark:|-|VR @@ -1340,7 +1340,6 @@ theflourishxxx.com|TheFlourish.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- thelesbianexperience.com|NewSensationsNetworkSites.yml|:heavy_check_mark:|:x:|:x:|:x:|-|Lesbian thelifeerotic.com|SARJ-LLC.yml|:heavy_check_mark:|:heavy_check_mark:|:x:|:heavy_check_mark:|Python|- thenude.com|TheNude.yml|:x:|:x:|:x:|:heavy_check_mark:|-|- -thestripperexperience.com|Spizoo.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- thetabutales.com|NewSensationsNetworkSites.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- theyeslist.com|Algolia_Adultime.yml|:heavy_check_mark:|:x:|:x:|:x:|Python|- thicc18.com|Fit18.yml|:heavy_check_mark:|:x:|:x:|:x:|Python|- @@ -1447,7 +1446,7 @@ vivid.com|Vivid.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- vividclassic.com|Vivid.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- vivthomas.com|SARJ-LLC.yml|:heavy_check_mark:|:heavy_check_mark:|:x:|:heavy_check_mark:|Python|- vixen.com|vixenNetwork.yml|:heavy_check_mark:|:x:|:x:|:x:|Python|- -vlogxxx.com|VlogXXX.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +vlogxxx.com|Spizoo.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- vogov.com|vogov.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- vrbangers.com|VRBangers.yml|:heavy_check_mark:|:x:|:heavy_check_mark:|:x:|-|VR vrbgay.com|VRBangers.yml|:heavy_check_mark:|:x:|:x:|:x:|-|VR diff --git a/scrapers/RawAttack.yml b/scrapers/RawAttack.yml deleted file mode 100644 index 274dd7449..000000000 --- a/scrapers/RawAttack.yml +++ /dev/null @@ -1,28 +0,0 @@ -name: RawAttack -sceneByURL: - - action: scrapeXPath - url: - - rawattack.com/updates/ - scraper: sceneScraper -xPathScrapers: - sceneScraper: - scene: - Title: - selector: //title - postProcess: - - replace: - - regex: \s*-\s*$ - with: "" - Date: - selector: //h3[text()="Release Date:"]/following-sibling::p[@class="date"]/text() - Details: //h2[text()="Description:"]/following-sibling::p/text() - Performers: - Name: //h3[text()="playmates:"]/following-sibling::a[contains(@href,'/models')]/@title - Studio: - Name: - fixed: RawAttack - Image: //img[@class="update_thumb thumbs img-unlock"]/@src - Tags: - Name: //h3[text()="Categories:"]/following-sibling::a[contains(@href,'/categories/')]/@title - -# Last Updated December 17, 2020 diff --git a/scrapers/RealSensual.yml b/scrapers/RealSensual.yml deleted file mode 100644 index 2b715def1..000000000 --- a/scrapers/RealSensual.yml +++ /dev/null @@ -1,29 +0,0 @@ -name: "RealSensual" -sceneByURL: - - action: scrapeXPath - url: - - realsensual.com/updates/ - scraper: sceneScraper -xPathScrapers: - sceneScraper: - common: - $info: //div[@class="col-12 col-md-6"] - scene: - Title: //h2[@class="titular col-12"]/text() - Date: - selector: $info//p[@class="date"]/text() - postProcess: - - parseDate: 2006-01-02 - Details: //p[@class="description-scene"]/text() - Tags: - Name: $info/div[2]//a/text() - Performers: - Name: - selector: $info/div[1]/div[1]/a/text() - postProcess: - - replace: - - regex: \.$ - with: - Image: //video[@class="ampVideo"]/@poster - -# Last Updated November 08, 2020 diff --git a/scrapers/Spizoo.yml b/scrapers/Spizoo.yml index e62d9aebc..7762fb043 100644 --- a/scrapers/Spizoo.yml +++ b/scrapers/Spizoo.yml @@ -2,34 +2,55 @@ name: Spizoo sceneByURL: - action: scrapeXPath url: + - creamher.com/updates/ - firstclasspov.com/updates/ - - intimatelesbians.com/updates/ - - jessicajaymesxxx.com/updates/ - mrluckypov.com/updates/ - - porngoespro.com/updates/ - - pornstartease.com/updates/ + - mrluckyraw.com/updates/ + - mrluckyvip.com/updates/ + - rawattack.com/updates/ + - realsensual.com/updates/ - spizoo.com/updates/ - - thestripperexperience.com/updates/ + - vlogxxx.com/updates/ scraper: sceneScraper xPathScrapers: sceneScraper: + common: + $scene_info: //section[@id="trailer-data" or @id="sceneInfo" or @id="scene-info" or @id="des-scene"] + $video_section: (//section[@id="trailer-video" or @id="scene" or @id="scene-video"] | //div[contains(@class, "videoHolder")]) scene: - Title: //h1 + Title: //h1|//h2 Date: - selector: //p[@class="date"] + selector: $scene_info//p[@class="date"] postProcess: - parseDate: 2006-01-02 - Details: //p[@class="description"] + Details: $scene_info//p Performers: + Name: $scene_info//a[contains(@href,"/model")]/@title + Tags: + Name: $scene_info//a[contains(@href,"/categories")] | //div[contains(@class, "categories-holder")]/a + Studio: Name: - selector: //section[@id="sceneInfo"]//a[contains(@href,"/model")]/@title + # Scenes on Spizoo can have an element with the studio name, others we get from the base URL + selector: //i[@id="site"]/@value | //base/@href postProcess: - replace: - - regex: \s\(.+ - with: - Tags: - Name: //section[@id="sceneInfo"]//a[contains(@href,"/categorie")]/text() | //div[@class="categories-holder"]/a - Image: //video[@id="trailervideo"]/@poster - - -# Last Updated June 28, 2021 + - regex: https?://(?:www\.)?(.*)(?:\.com)(?:/.*)? + with: $1 + - map: + creamher: Cream Her + firstclasspov: First Class POV + mrluckypov: Mr. LuckyPOV + mrluckyraw: Mr. LuckyRaw + mrluckyvip: Mr. LuckyVIP + rawattack: RawAttack + realsensual: Real Sensual + spizoo: Spizoo + vlogxxx: Vlog XXX + Image: + selector: $video_section//video/@poster | //img[contains(@class, "update_thumb") or contains(@class, "trailer-thumb")]/@src + postProcess: + - replace: + # Remove any resizing parameters for the image, we want the original + - regex: "[?&]img(?:q|w|h)=[^&]+" + with: +# Last Updated August 26, 2023 diff --git a/scrapers/VlogXXX.yml b/scrapers/VlogXXX.yml deleted file mode 100644 index 3810b0860..000000000 --- a/scrapers/VlogXXX.yml +++ /dev/null @@ -1,27 +0,0 @@ -name: VlogXXX -sceneByURL: - - action: scrapeXPath - url: - - vlogxxx.com - scraper: sceneScraper -xPathScrapers: - sceneScraper: - scene: - Title: - selector: //div[@class="row"]/h1 - Performers: - Name: //h3[contains(.,"pornstars")]/following-sibling::a - Date: - selector: //p[@class="date"] - postProcess: - - parseDate: 2006-01-02 - Tags: - Name: //h3[contains(.,"Categories")]/following-sibling::a - Details: - selector: //div[h3[text()="Description"]]/p - Image: - selector: //div[@id="noMore"]/img/@src - Studio: - Name: - fixed: VlogXXX -# Last Updated May 21, 2021 From ea22b3bccd300e59d589338ba144fb7131f574a2 Mon Sep 17 00:00:00 2001 From: Maista6969 Date: Sat, 26 Aug 2023 06:01:03 +0200 Subject: [PATCH 336/624] Add search to Spizoo scraper The Spizoo search kind of sucks though: it works best if you just search for performer names --- scrapers/Spizoo.yml | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) diff --git a/scrapers/Spizoo.yml b/scrapers/Spizoo.yml index 7762fb043..8a170f490 100644 --- a/scrapers/Spizoo.yml +++ b/scrapers/Spizoo.yml @@ -12,7 +12,24 @@ sceneByURL: - spizoo.com/updates/ - vlogxxx.com/updates/ scraper: sceneScraper +sceneByName: + action: scrapeXPath + queryURL: https://www.spizoo.com/search.php?query={} + scraper: sceneSearch +sceneByQueryFragment: + action: scrapeXPath + queryURL: "{url}" + scraper: sceneScraper xPathScrapers: + sceneSearch: + common: + $row: //div[@class="model-update row"] + scene: + Title: $row//h3 + URL: $row//h3/../@href + Image: $row//img/@src + Details: $row//p[contains(@class, "model-update-description")] + sceneScraper: common: $scene_info: //section[@id="trailer-data" or @id="sceneInfo" or @id="scene-info" or @id="des-scene"] From 729a760140132da06363e38d21e7c2182518106b Mon Sep 17 00:00:00 2001 From: Maista6969 Date: Sat, 26 Aug 2023 17:01:05 +0200 Subject: [PATCH 337/624] Fix YAML validation errors in APOVStory --- scrapers/APOVStory.yml | 21 +++++++++------------ 1 file changed, 9 insertions(+), 12 deletions(-) diff --git a/scrapers/APOVStory.yml b/scrapers/APOVStory.yml index 15600605e..d78b57853 100644 --- a/scrapers/APOVStory.yml +++ b/scrapers/APOVStory.yml @@ -9,14 +9,14 @@ xPathScrapers: sceneScraper: scene: Studio: - Name: - fixed: 'A POV Story' + Name: + fixed: "A POV Story" Title: selector: //div[@class = 'trailerArea centerwrap']/h3 Details: selector: //div[@class = 'trailerContent']//*//text() concat: "\n\n" - postProcess: &ppStripDescription + postProcess: - replace: - regex: ^Description:\s* with: @@ -24,17 +24,14 @@ xPathScrapers: Name: //li/span[contains(text(),'CATEGORIES')]/parent::li//a//text() Performers: Name: //li/span[contains(text(),'FEATURING')]/parent::li//a//text() - Image: + Image: selector: //div[@class="player-thumb"]/img/@src0_3x postProcess: - replace: - - regex: ^ - with: 'https://apovstory.com' + - regex: ^ + with: "https://apovstory.com" Date: selector: //span[contains(text(),'RELEASE DATE')]/parent::li/text() - postProcess: &ppParseDate - - replace: - - regex: \s+ - with: "" - - parseDate: "January2,2006" -# last updated August 24, 2023 + postProcess: + - parseDate: "January 2, 2006" +# Last Updated August 24, 2023 From 0f02f68b8eb2231e3c6ce77ea63ff1080f33da8a Mon Sep 17 00:00:00 2001 From: StashMaitikeisi <143062645+StashMaitikeisi@users.noreply.github.com> Date: Sat, 26 Aug 2023 23:06:32 +0300 Subject: [PATCH 338/624] Update ThirdRockEnt.yml Add sites analamateur.com, brokensluts.net, cumdumpsterteens.com, daughterjoi.com, downtofucking.com, fullpornnetwork.com, lesbiananalsluts.com, mommyjoi.com, mugfucked.com, realfuckingdating.com, shefucksonthefirstdate.com, slutsbts.com, slutspov.com. sluttybbws.com, teasingandpleasing.com, teenagetryouts.com & wifespov.com --- scrapers/ThirdRockEnt.yml | 40 +++++++++++++++++++++++++++++++++++---- 1 file changed, 36 insertions(+), 4 deletions(-) diff --git a/scrapers/ThirdRockEnt.yml b/scrapers/ThirdRockEnt.yml index e92974edb..962ec3401 100644 --- a/scrapers/ThirdRockEnt.yml +++ b/scrapers/ThirdRockEnt.yml @@ -2,29 +2,46 @@ name: ThirdRockEnt sceneByURL: - action: scrapeXPath url: - - abbiemaley.com/trailers + - abbiemaley.com/trailers/ + - analamateur.com/trailers/ - analbbc.com/trailers/ - analized.com/trailers/ - analviolation.com/trailers/ - baddaddypov.com/trailers/ - badfamilypov.com/trailers/ - badmommypov.com/trailers/ + - brokensluts.net/trailers/ + - cumdumpsterteens.com/trailers/ + - daughterjoi.com/trailers/ + - downtofuckdating.com/trailers/ - downtofuckdating.com/trailers/ - dtfsluts.com/trailers/ + - fullpornnetwork.com/trailers/ - girlfaction.com/trailers/ - hergape.com/trailers/ - homemadeanalwhores.com/trailers/ - jamesdeen.com/trailers/ + - lesbiananalsluts.com/trailers/ + - mommyjoi.com/trailers/ + - mugfucked.com/trailers/ - onlyprince.com/trailers/ - pervertgallery.com/trailers/ - porkvendors.com/trailers/ - pornforce.com/trailers/ - povperverts.net/trailers/ - publicsexdate.com/trailers/ + - realfuckingdating.com/trailers/ + - shefucksonthefirstdate.com/trailers/ - slutinspection.com/trailers/ + - slutsbts.com/trailers/ + - slutspov.com/trailers/ + - sluttybbws.com/trailers/ + - teasingandpleasing.com/trailers/ - teenageanalsluts.com/trailers/ - teenagecorruption.com/trailers/ + - teenagetryouts.com/trailers/ - twistedvisual.com/trailers/ + - wifespov.com/trailers/ - yourmomdoesanal.com/trailers/ - yourmomdoesporn.com/trailers/ scraper: sceneScraper @@ -49,29 +66,44 @@ xPathScrapers: postProcess: - map: https://AbbieMaley.com: Abbie Maley + https://AnalAmateur.com: Anal Amateur https://AnalBBC.com: AnalBBC https://ANALIZED.com: Analized https://analviolation.com: Anal Violation https://BadDaddyPOV.com: Bad Daddy POV - https://BadFamilyPov.com: Bad Family POV - https://BadMommyPov.com: Bad Mommy POV + https://BadFamilyPOV.com: Bad Family POV https://BadMommyPOV.com: Bad Mommy POV + https://BrokenSluts.net: Broken Sluts + https://CumDumpsterTeens.com: Cum Dumpster Teens + https://DaughterJOI.com: Daughter JOI https://DownToFuckDating.com: Down To Fuck Dating https://DTFsluts.com: DTF Sluts + https://FullPornNetwork.com: Full Porn Network https://Girlfaction.com: Girlfaction https://HerGape.com: Her Gape https://HomemadeAnalWhores.com: Homemade Anal Whores https://JamesDeen.com: James Deen + https://LesbianAnalSluts.com: Lesbian Anal Sluts + https://MommyJOI.com: Mommy JOI + https://mugfucked.com: Mug Fucked https://OnlyPrince.com: Only Prince https://PervertGallery.com: Pervert Gallery https://PorkVendors.com: Pork Vendors https://PornForce.com: Porn Force https://POVPerverts.net: POV Perverts https://publicsexdate.com: Public Sex Date + https://RealFuckingDating.com: Real Fucking Dating + https://SheFucksOnTheFirstDate.com: She Fucks On The First Date https://SlutInspection.com: Slut Inspection + https://slutsbts.com: Sluts BTS + https://slutspov.com: Sluts POV + https://sluttybbws.com: Slutty BBWs + https://TeasingandPleasing.com: Teasing And Pleasing https://teenageanalsluts.com: Teenage Anal Sluts https://TeenageCorruption.com: Teenage Corruption + https://teenagetryouts.com: Teenage Tryouts https://TwistedVisual.com: Twisted Visual + https://wifespov.com: Wifes POV https://yourmomdoesanal.com: Your Mom Does Anal https://YourMomDoesPorn.com: Your Mom Does Porn -# Last Updated December 20, 2022 +# Last Updated August 26, 2023 From 6c68c33d9ed31c7623ab6df9c2305063723491f9 Mon Sep 17 00:00:00 2001 From: StashMaitikeisi <143062645+StashMaitikeisi@users.noreply.github.com> Date: Sat, 26 Aug 2023 23:19:56 +0300 Subject: [PATCH 339/624] Update SCRAPERS-LIST.md Added analamateur.com, brokensluts.net, cumdumpsterteens.com, daughterjoi.com, downtofucking.com, fullpornnetwork.com, lesbiananalsluts.com, mommyjoi.com, mugfucked.com, realfuckingdating.com, shefucksonthefirstdate.com, slutsbts.com, slutspov.com. sluttybbws.com, teasingandpleasing.com, teenagetryouts.com & wifespov.com --- SCRAPERS-LIST.md | 27 ++++++++++++++++++++++----- 1 file changed, 22 insertions(+), 5 deletions(-) diff --git a/SCRAPERS-LIST.md b/SCRAPERS-LIST.md index c6319accd..f084f418c 100644 --- a/SCRAPERS-LIST.md +++ b/SCRAPERS-LIST.md @@ -75,6 +75,7 @@ anal-angels.com|TeenMegaWorld.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- anal-beauty.com|TeenMegaWorld.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- anal4k.com|AMAMultimedia.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- analacrobats.com|GammaEntertainment.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +analamateur.com|ThirdRockEnt.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- analbbc.com|ThirdRockEnt.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- analcheckups.com|Teencoreclub.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- analhookers.com|Hustler.yml|:heavy_check_mark:|:x:|:x:|:x:|CDP|- @@ -117,9 +118,9 @@ atkexotics.com|ATKExotics.yml|:heavy_check_mark:|:x:|:x:|:x:|CDP|- atkhairy.com|ATKHairy.yml|:heavy_check_mark:|:x:|:x:|:x:|CDP|- atkpetites.com|ATKHairy.yml|:heavy_check_mark:|:x:|:x:|:x:|CDP|- atkpremium.com|ATKHairy.yml|:heavy_check_mark:|:x:|:x:|:x:|CDP|- -auntykathy.com|auntykathy.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- auntjudys.com|AuntJudys.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- auntjudysxxx.com|AuntJudys.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +auntykathy.com|auntykathy.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- aussieass.com|AussieAss.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- aussiefellatioqueens.com|AussieFelatioQueens.yml|:heavy_check_mark:|:x:|:x:|:x:|CDP|- aussiepov.com|AussieAss.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- @@ -237,6 +238,7 @@ brazzers.com|Brazzers.yml|:heavy_check_mark:|:heavy_check_mark:|:x:|:heavy_check breeditraw.com|BreedItRaw.yml|:heavy_check_mark:|:x:|:x:|:heavy_check_mark:|-|Gay brickyates.com|brickyates.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- brokenlatinawhores.com|BrokenLatinaWhores.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +brokensluts.net|ThirdRockEnt.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- brookelynnebriar.com|ModelCentroAPI.yml|:heavy_check_mark:|:x:|:x:|:x:|Python|- bruceandmorgan.net|bruceandmorgan.yml|:heavy_check_mark:|:x:|:x:|:x:|CDP|Amateur Fetish brutalinvasion.com|Teencoreclub.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- @@ -301,6 +303,7 @@ cum4k.com|AMAMultimedia.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- cumaholicteens.com|Teencoreclub.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- cumbang.com|Algolia_DogfartNetwork.yml|:heavy_check_mark:|:x:|:x:|:heavy_check_mark:|Python|- cumblastcity.com|TugPass.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +cumdumpsterteens.com|ThirdRockEnt.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- cumforcover.com|PerfectGonzo.yml|:heavy_check_mark:|:x:|:x:|:heavy_check_mark:|Python|- cumlouder.com|Cumlouder.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- cumshotoasis.com|GammaEntertainment.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- @@ -368,6 +371,7 @@ darkroomvr.com|DarkRoomVR.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- darksodomy.com|SmutPuppet.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- darkx.com|Algolia_xEmpire.yml|:heavy_check_mark:|:heavy_check_mark:|:x:|:x:|Python|- data18.com|data18.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +daughterjoi.com|ThirdRockEnt.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- daughterswap.com|PaperStreetMedia.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- ddfbusty.com|DDFNetwork.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- deauxmalive.com|VNAGirls.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- @@ -386,8 +390,8 @@ devilsfilmparodies.com|GammaEntertainment.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- devilsgangbangs.com|GammaEntertainment.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- devilstgirls.com|Algolia_Adultime.yml|:heavy_check_mark:|:x:|:heavy_check_mark:|:x:|Python|Trans devonlee.com|GammaEntertainment.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- -dfxtra.com|Algolia_DogfartNetwork.yml|:heavy_check_mark:|:x:|:x:|:heavy_check_mark:|Python|- dfbnetwork.com|DFB.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +dfxtra.com|Algolia_DogfartNetwork.yml|:heavy_check_mark:|:x:|:x:|:heavy_check_mark:|Python|- diabolic.com|Algolia_zerotolerancefilms.yml|:heavy_check_mark:|:x:|:heavy_check_mark:|:x:|python|- dianafeet.com|FFCSH.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- dickdrainers.com|Dickdrainers.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- @@ -537,9 +541,10 @@ fuckingparties.com|ChickPass.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- fuckingstreet.com|PornCZ.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- fuckstudies.com|TeenMegaWorld.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- fuckthegeek.com|ChickPass.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +fullpornnetwork.com|ThirdRockEnt.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- funbags.com|Voyeurweb.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- -futanarica.com|Futanarica.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- futanari.xxx|Hentaied.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +futanarica.com|Futanarica.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- gag-n-gape.com|TeenMegaWorld.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- gangav.com|Jhdv.yml|:heavy_check_mark:|:x:|:x:|:x:|-|JAV Uncensored gangbangcreampie.com|Algolia_GangBangCreampie.yml|:heavy_check_mark:|:heavy_check_mark:|:x:|:x:|Python|- @@ -766,6 +771,7 @@ latinoguysporn.com|AdultEmpireCash.yml|:heavy_check_mark:|:x:|:x:|:x:|-|Gay leannecrow.com|PinupDollars.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- legsex.com|TheScoreGroup.yml|:heavy_check_mark:|:heavy_check_mark:|:x:|:x:|-|- lesbea.com|RealityKingsOL.yml|:heavy_check_mark:|:x:|:x:|:x:|-|Lesbian +lesbiananalsluts.com|ThirdRockEnt.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- lesbianass.com|Hustler.yml|:heavy_check_mark:|:x:|:x:|:x:|CDP|Lesbian lesbianfactor.com|GammaEntertainment.yml|:heavy_check_mark:|:x:|:x:|:x:|-|Lesbian lesbiantribe.com|MVG.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- @@ -844,10 +850,10 @@ metartnetwork.com|SARJ-LLC.yml|:heavy_check_mark:|:heavy_check_mark:|:x:|:heavy_ metartx.com|SARJ-LLC.yml|:heavy_check_mark:|:heavy_check_mark:|:x:|:heavy_check_mark:|Python|- milehighmedia.com|MindGeek.yml|:heavy_check_mark:|:x:|:heavy_check_mark:|:x:|-|- milfed.com|MindGeek.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +milfsodomy.com|SmutPuppet.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- milfthing.com|PerfectGonzo.yml|:heavy_check_mark:|:x:|:x:|:heavy_check_mark:|Python|- milftrip.com|VegasDreamsLLC.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- milftugs.com|MilfTugs.yml|:heavy_check_mark:|:x:|:x:|:heavy_check_mark:|-|- -milfsodomy.com|SmutPuppet.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- milfvr.com|POVR.yml|:heavy_check_mark:|:x:|:x:|:x:|-|VR milkingtable.com|GammaEntertainment.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- milovana.com|Milovana.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- @@ -870,6 +876,7 @@ momcum.com|AMAMultimedia.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- momisamilf.com|Vivid.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- momlover.com|Nubiles.yml|:heavy_check_mark:|:heavy_check_mark:|:x:|:x:|-|- mommyblowsbest.com|GammaEntertainment.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +mommyjoi.com|ThirdRockEnt.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- mommysboy.com|Algolia_Adultime.yml|:heavy_check_mark:|:x:|:x:|:x:|Python|- mommysboy.net|Mypervmom.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- mommysgirl.com|Algolia_Adultime.yml|:heavy_check_mark:|:heavy_check_mark:|:x:|:x:|Python|- @@ -897,6 +904,7 @@ mrluckypov.com|Spizoo.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- mrpov.com|FinishesTheJob.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- muchaslatinas.com|Hustler.yml|:heavy_check_mark:|:x:|:x:|:x:|CDP|- mugfucked.com|MugFucked.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +mugfucked.com|ThirdRockEnt.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- muses.com|Algolia_Adultime.yml|:heavy_check_mark:|:heavy_check_mark:|:heavy_check_mark:|:x:|Python|- my-slavegirl.com|my-slavegirl.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- mybabysittersclub.com|PaperStreetMedia.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- @@ -1099,8 +1107,8 @@ puretaboo.com|Algolia_Adultime.yml|:heavy_check_mark:|:heavy_check_mark:|:x:|:x: pussyav.com|Jhdv.yml|:heavy_check_mark:|:x:|:x:|:x:|-|JAV Uncensored putalocura.com|Putalocura.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- r18.dev|R18.dev.yml|:heavy_check_mark:|:x:|:heavy_check_mark:|:x:|-|JAV -rachelaldana.com|PinupDollars.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- rachel-steele.com|RachelSteele.yml|:heavy_check_mark:|:x:|:x:|:x:|-|MILF +rachelaldana.com|PinupDollars.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- rachelstormsxxx.com|VNAGirls.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- ragingstallion.com|GammaEntertainment.yml|:heavy_check_mark:|:x:|:x:|:x:|-|Gay randyblue.com|RandyBlue.yml|:heavy_check_mark:|:x:|:x:|:x:|-|Gay @@ -1108,6 +1116,7 @@ ravenswallowzxxx.com|Andomark.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- rawattack.com|Spizoo.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- rawcouples.com|TeenMegaWorld.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- reaganfoxx.com|AdultEmpireCash.yml|:heavy_check_mark:|:x:|:x:|:x:|-|MILF +realfuckingdating.com|ThirdRockEnt.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- realityjunkies.com|MindGeek.yml|:heavy_check_mark:|:x:|:heavy_check_mark:|:x:|-|- realitykings.com|MindGeek.yml|:heavy_check_mark:|:x:|:x:|:heavy_check_mark:|-|- realitylovers.com|RealityLovers.yml|:heavy_check_mark:|:x:|:x:|:heavy_check_mark:|Python|- @@ -1175,6 +1184,7 @@ shagmag.com|shagmag.yml|:x:|:heavy_check_mark:|:x:|:x:|-|Magazines shandafay.com|VNAGirls.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- shanedieselsbanginbabes.com|NewSensationsNetworkSites.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- share.myfreecams.com|MFC.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +shefucksonthefirstdate.com|ThirdRockEnt.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- shegotsix.com|Teencoreclub.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- shelovesblack.com|LoveHerFilms.yml|:heavy_check_mark:|:x:|:x:|:heavy_check_mark:|-|- shesbrandnew.com|TopWebModels.yml|:heavy_check_mark:|:x:|:x:|:x:|Python|- @@ -1202,6 +1212,9 @@ sissypov.com|SissyPov.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- slayed.com|vixenNetwork.yml|:heavy_check_mark:|:x:|:x:|:x:|Python|- slroriginals.com|SLRoriginals.yml|:heavy_check_mark:|:x:|:heavy_check_mark:|:x:|-|VR slutinspection.com|ThirdRockEnt.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +slutsbts.com|ThirdRockEnt.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +slutspov.com|ThirdRockEnt.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +sluttybbws.com|ThirdRockEnt.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- smashed.xxx|Nubiles.yml|:heavy_check_mark:|:heavy_check_mark:|:x:|:x:|-|- smashpictures.com|CombatZone.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- smokingmina.com|SmokingMina.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- @@ -1291,8 +1304,10 @@ teacherfucksteens.com|Nubiles.yml|:heavy_check_mark:|:heavy_check_mark:|:x:|:x:| teachmyass.com|Teencoreclub.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- teamskeet.com|Teamskeet.yml|:heavy_check_mark:|:x:|:x:|:x:|Python|- teasepov.com|TeasePOV.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +teasingandpleasing.com|ThirdRockEnt.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- teenageanalsluts.com|ThirdRockEnt.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- teenagecorruption.com|ThirdRockEnt.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +teenagetryouts.com|ThirdRockEnt.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- teenanalcasting.com|Teencoreclub.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- teencoreclub.com|Teencoreclub.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- teencorezine.com|Teencoreclub.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- @@ -1493,6 +1508,7 @@ whiteteensblackcocks.com|Teencoreclub.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- whorecraftvr.com|AdultEmpireCash.yml|:heavy_check_mark:|:x:|:x:|:x:|-|Fantasy VR wicked.com (/movies)|WickedMovies.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- wicked.com|Algolia_Wicked.yml|:heavy_check_mark:|:heavy_check_mark:|:heavy_check_mark:|:x:|Python|- +wifespov.com|ThirdRockEnt.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- wildoncam.com|trafficpimps.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- williamhiggins.com|Williamhiggins.yml|:heavy_check_mark:|:x:|:x:|:x:|-|Gay willtilexxx.com|WillTileXXX.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- @@ -1542,6 +1558,7 @@ zexyvr.com|ZexyVR.yml|:heavy_check_mark:|:x:|:x:|:heavy_check_mark:|-|VR zishy.com|Zishy.yml|:heavy_check_mark:|:heavy_check_mark:|:x:|:x:|-|- zoiestarr.com|ChickPass.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- + ## Non url scrapers The following scrapers do not support xxxByURL scraping and are not included to the above list. To keep the below list tidy please add scrapers keeping the list in alphabetical order by the .yml filename. From 2638a2ad77b5dd8af17aa1087ad492e37a52b630 Mon Sep 17 00:00:00 2001 From: StashMaitikeisi <143062645+StashMaitikeisi@users.noreply.github.com> Date: Sat, 26 Aug 2023 23:28:07 +0300 Subject: [PATCH 340/624] Update ThirdRockEnt.yml Add sites analamateur.com, brokensluts.net, cumdumpsterteens.com, daughterjoi.com, downtofucking.com, fullpornnetwork.com, lesbiananalsluts.com, mommyjoi.com, mugfucked.com, realfuckingdating.com, shefucksonthefirstdate.com, slutsbts.com, slutspov.com. sluttybbws.com, teasingandpleasing.com, teenagetryouts.com & wifespov.com --- scrapers/ThirdRockEnt.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scrapers/ThirdRockEnt.yml b/scrapers/ThirdRockEnt.yml index 962ec3401..b41106f71 100644 --- a/scrapers/ThirdRockEnt.yml +++ b/scrapers/ThirdRockEnt.yml @@ -80,7 +80,7 @@ xPathScrapers: https://DTFsluts.com: DTF Sluts https://FullPornNetwork.com: Full Porn Network https://Girlfaction.com: Girlfaction - https://HerGape.com: Her Gape + https://HerGape.com: HerGape https://HomemadeAnalWhores.com: Homemade Anal Whores https://JamesDeen.com: James Deen https://LesbianAnalSluts.com: Lesbian Anal Sluts From 6404dfc1192a50385921b9bcdf76ad1613b97ad5 Mon Sep 17 00:00:00 2001 From: StashMaitikeisi <143062645+StashMaitikeisi@users.noreply.github.com> Date: Sat, 26 Aug 2023 23:31:18 +0300 Subject: [PATCH 341/624] Update ThirdRockEnt.yml Add network sites analamateur.com, brokensluts.net, cumdumpsterteens.com, daughterjoi.com, downtofucking.com, fullpornnetwork.com, lesbiananalsluts.com, mommyjoi.com, mugfucked.com, realfuckingdating.com, shefucksonthefirstdate.com, slutsbts.com, slutspov.com. sluttybbws.com, teasingandpleasing.com, teenagetryouts.com & wifespov.com --- scrapers/ThirdRockEnt.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scrapers/ThirdRockEnt.yml b/scrapers/ThirdRockEnt.yml index b41106f71..962ec3401 100644 --- a/scrapers/ThirdRockEnt.yml +++ b/scrapers/ThirdRockEnt.yml @@ -80,7 +80,7 @@ xPathScrapers: https://DTFsluts.com: DTF Sluts https://FullPornNetwork.com: Full Porn Network https://Girlfaction.com: Girlfaction - https://HerGape.com: HerGape + https://HerGape.com: Her Gape https://HomemadeAnalWhores.com: Homemade Anal Whores https://JamesDeen.com: James Deen https://LesbianAnalSluts.com: Lesbian Anal Sluts From d701405d097ad7269ba4983dfc35f6f8af4d8148 Mon Sep 17 00:00:00 2001 From: StashMaitikeisi <143062645+StashMaitikeisi@users.noreply.github.com> Date: Sat, 26 Aug 2023 23:35:49 +0300 Subject: [PATCH 342/624] Update ThirdRockEnt.yml Add network sites analamateur.com, brokensluts.net, cumdumpsterteens.com, daughterjoi.com, downtofucking.com, fullpornnetwork.com, lesbiananalsluts.com, mommyjoi.com, mugfucked.com, realfuckingdating.com, shefucksonthefirstdate.com, slutsbts.com, slutspov.com. sluttybbws.com, teasingandpleasing.com, teenagetryouts.com & wifespov.com --- scrapers/ThirdRockEnt.yml | 1 - 1 file changed, 1 deletion(-) diff --git a/scrapers/ThirdRockEnt.yml b/scrapers/ThirdRockEnt.yml index 962ec3401..f027b4390 100644 --- a/scrapers/ThirdRockEnt.yml +++ b/scrapers/ThirdRockEnt.yml @@ -14,7 +14,6 @@ sceneByURL: - cumdumpsterteens.com/trailers/ - daughterjoi.com/trailers/ - downtofuckdating.com/trailers/ - - downtofuckdating.com/trailers/ - dtfsluts.com/trailers/ - fullpornnetwork.com/trailers/ - girlfaction.com/trailers/ From 7dbc2d7d640f71c2cb286e4975bfeee251065dab Mon Sep 17 00:00:00 2001 From: Maista6969 Date: Mon, 28 Aug 2023 08:23:33 +0200 Subject: [PATCH 343/624] Fix for titles at RawAttack --- scrapers/Spizoo.yml | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/scrapers/Spizoo.yml b/scrapers/Spizoo.yml index 8a170f490..882562bcc 100644 --- a/scrapers/Spizoo.yml +++ b/scrapers/Spizoo.yml @@ -35,7 +35,13 @@ xPathScrapers: $scene_info: //section[@id="trailer-data" or @id="sceneInfo" or @id="scene-info" or @id="des-scene"] $video_section: (//section[@id="trailer-video" or @id="scene" or @id="scene-video"] | //div[contains(@class, "videoHolder")]) scene: - Title: //h1|//h2 + Title: + selector: //div[@class="title" or @class="row"]//h1 | //h2[contains(@class, "titular")] | //title + postProcess: + # RawAttack titles have a trailing dash and space + - replace: + - regex: \s\-\s*$ + with: Date: selector: $scene_info//p[@class="date"] postProcess: From cd81e764d1f1c5fe973693de8e7270a2284dee1b Mon Sep 17 00:00:00 2001 From: ZzazzCDTS <112476145+zzazzcdts@users.noreply.github.com> Date: Mon, 28 Aug 2023 14:06:06 +0100 Subject: [PATCH 344/624] Update Assylum.yml Fixed an issue where images were not being scraped. --- scrapers/Assylum.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/scrapers/Assylum.yml b/scrapers/Assylum.yml index d98450a1c..14fadaa0a 100644 --- a/scrapers/Assylum.yml +++ b/scrapers/Assylum.yml @@ -25,7 +25,7 @@ xPathScrapers: selector: //div[@class='mainpic']/comment() postProcess: - replace: - - regex: .*src="(.*?)".* + - regex: Works without token - selector: //base/@href|//div[@class="player-thumb"]//img[contains(@class, "update_thumb")]/@src0_1x - concat: "|" + Image: &image //div[@class="player-thumb"]//img[contains(@class, "update_thumb")]/@src0_1x + Details: &details + selector: //p[contains(@class, "descriptionFull")]//text() + concat: "\n\n" postProcess: - replace: - - regex: "[|].+?\\.hwcdn.net/(.+?)\\?.+$" + - regex: (.*?)\s*Read Less with: $1 - Details: - selector: //div[@class="update-info-block"]/h3[text()="Description:"]/following-sibling::text() - concat: "\n\n" + + newSiteScraper: + scene: + Title: //h1 + Date: *date + Image: *image + Tags: *tags + Studio: *studio + Details: *details + Performers: + Name: //div[@class="card txt-lg-left"]//*[@class="model-name"] + URL: //div[@class="card txt-lg-left"]//a/@href performerScraper: common: From 9fefe95354acb568dfbca10f00dd1c10ee0615e3 Mon Sep 17 00:00:00 2001 From: Maista6969 Date: Wed, 8 Nov 2023 09:14:18 +0100 Subject: [PATCH 517/624] Add new ExploitedX site to SCRAPERS-LIST --- SCRAPERS-LIST.md | 1 + 1 file changed, 1 insertion(+) diff --git a/SCRAPERS-LIST.md b/SCRAPERS-LIST.md index ae8de9291..dc7764473 100644 --- a/SCRAPERS-LIST.md +++ b/SCRAPERS-LIST.md @@ -496,6 +496,7 @@ evolvedfightslez.com|evolvedfightslez.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- exotic4k.com|AMAMultimedia.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- explicite-art.com|ExpliciteArt.yml|:heavy_check_mark:|:heavy_check_mark:|:x:|:x:|-|- exploitedcollegegirls.com|ExploitedX.yml|:heavy_check_mark:|:x:|:x:|:heavy_check_mark:|-|- +excogigirls.com|ExploitedX.yml|:heavy_check_mark:|:x:|:x:|:heavy_check_mark:|-|- extrapackage.com|ExtraPackage.yml|:heavy_check_mark:|:x:|:heavy_check_mark:|:x:|-|- extremepickups.com|Algolia_Adultime.yml|:heavy_check_mark:|:x:|:x:|:x:|Python|- exxxtrasmall.com|PaperStreetMedia.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- From 3417bc3ef99cc62e301f12e51205a7cd693419b1 Mon Sep 17 00:00:00 2001 From: pops64 Date: Wed, 15 Nov 2023 18:48:49 -0500 Subject: [PATCH 518/624] Add files via upload --- scrapers/IFeelMyself.py | 30 ++++++++++++++++-------------- 1 file changed, 16 insertions(+), 14 deletions(-) diff --git a/scrapers/IFeelMyself.py b/scrapers/IFeelMyself.py index fe9d13059..961684184 100644 --- a/scrapers/IFeelMyself.py +++ b/scrapers/IFeelMyself.py @@ -41,7 +41,7 @@ def extract_SceneInfo(table,cover_url=None): debugPrint(f"performer:{performer}") date = datetime.strptime(date, '%d %b %Y').date().strftime('%Y-%m-%d') #Convert date to ISO format if cover_url == None: - cover_url=str(table.find("img")['src']) + cover_url=str(table.find("video")['poster']) title = table.find(class_= ["entryHeadingFlash","entryHeading"]).find('a').get_text().replace("\x92","'") media_id = re.search(r"\/(\d{3,5})\/",cover_url,re.I).group(1) artist_id = re.search(r"\/(f\d{4,5})",cover_url,re.I).group(1) @@ -96,13 +96,14 @@ def scrapeScene(filename,date,url): debugPrint(artist_id+"-"+video_id) tables = response.find_all(class_= ["blog_wide_news_tbl entry ppss-scene","entry ppss-scene"]) for table in tables: - img=str(table.find("img")['src']) - debugPrint(f"Image:{str(img)}") - if (f"/{artist_id}-{video_id}" in img) and img.endswith(("vg.jpg","hs.jpg")): - debugPrint("Found a single match video!") - # Extract data from this single result - ret = extract_SceneInfo(table) - break + if table.find('video'): + img=str(table.find("video")['poster']) + debugPrint(f"Image:{str(img)}") + if (f"/{artist_id}-{video_id}" in img) and img.endswith(("vg.jpg","hs.jpg")): + debugPrint("Found a single match video!") + # Extract data from this single result + ret = extract_SceneInfo(table) + break else: sys.stderr.write("0 matches found! Checking offset") pages=int(response.find_all("a", class_="pagging_nonsel")[-1].get_text()) @@ -113,12 +114,13 @@ def scrapeScene(filename,date,url): response = browser.page tables = response.find_all(class_= ["blog_wide_news_tbl entry ppss-scene","entry ppss-scene"]) for table in tables: - img=str(table.find("img")["src"]) - debugPrint(f"Image:{img}") - if (f"/{artist_id}-{video_id}" in img) and img.endswith(("vg.jpg","hs.jpg")): - sys.stderr.write("FOUND") - ret = extract_SceneInfo(table) - break + if table.find('video'): + img=str(table.find("video")["poster"]) + debugPrint(f"Image:{img}") + if (f"/{artist_id}-{video_id}" in img) and img.endswith(("vg.jpg","hs.jpg")): + sys.stderr.write("FOUND") + ret = extract_SceneInfo(table) + break else: sys.stderr.write("0 matches found!, check your filename") From 4714e19f455037da279d120fa5380c149b1d3f54 Mon Sep 17 00:00:00 2001 From: pops64 Date: Wed, 15 Nov 2023 19:15:59 -0500 Subject: [PATCH 519/624] Some additonal tweaks If we already had url that would have an img instead of a video tag. Add a check to see if the img tag is present and go with that if not fall back to the video tag --- scrapers/IFeelMyself.py | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/scrapers/IFeelMyself.py b/scrapers/IFeelMyself.py index 961684184..69dbb8105 100644 --- a/scrapers/IFeelMyself.py +++ b/scrapers/IFeelMyself.py @@ -41,7 +41,10 @@ def extract_SceneInfo(table,cover_url=None): debugPrint(f"performer:{performer}") date = datetime.strptime(date, '%d %b %Y').date().strftime('%Y-%m-%d') #Convert date to ISO format if cover_url == None: - cover_url=str(table.find("video")['poster']) + if table.find("img"): + cover_url=str(table.find("img")['src']) + else: + cover_url=str(table.find("video")['poster']) title = table.find(class_= ["entryHeadingFlash","entryHeading"]).find('a').get_text().replace("\x92","'") media_id = re.search(r"\/(\d{3,5})\/",cover_url,re.I).group(1) artist_id = re.search(r"\/(f\d{4,5})",cover_url,re.I).group(1) @@ -99,7 +102,7 @@ def scrapeScene(filename,date,url): if table.find('video'): img=str(table.find("video")['poster']) debugPrint(f"Image:{str(img)}") - if (f"/{artist_id}-{video_id}" in img) and img.endswith(("vg.jpg","hs.jpg")): + if (f"/{artist_id}-{video_id}vg.jpg" in img) or (f"/{artist_id}-{video_id}hs.jpg" in img): debugPrint("Found a single match video!") # Extract data from this single result ret = extract_SceneInfo(table) @@ -117,7 +120,7 @@ def scrapeScene(filename,date,url): if table.find('video'): img=str(table.find("video")["poster"]) debugPrint(f"Image:{img}") - if (f"/{artist_id}-{video_id}" in img) and img.endswith(("vg.jpg","hs.jpg")): + if (f"/{artist_id}-{video_id}vg.jpg" in img) or (f"/{artist_id}-{video_id}hs.jpg" in img): sys.stderr.write("FOUND") ret = extract_SceneInfo(table) break From cd0d39c19fcb609cf36adb50306372d8178e52d8 Mon Sep 17 00:00:00 2001 From: Maista6969 Date: Fri, 17 Nov 2023 06:07:36 +0100 Subject: [PATCH 520/624] Make URL patterns for ChristianXXX more specific Fixes false positive matches with URLs from itspov.com --- scrapers/ChristianXXX.yml | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/scrapers/ChristianXXX.yml b/scrapers/ChristianXXX.yml index 427362448..048814eb7 100644 --- a/scrapers/ChristianXXX.yml +++ b/scrapers/ChristianXXX.yml @@ -2,9 +2,9 @@ name: "ChristianXXX" sceneByURL: - action: scrapeXPath url: - - becomingfemme.com - - pure-ts.com - - tspov.com + - becomingfemme.com/tour + - pure-ts.com/tour + - tspov.com/tour scraper: sceneScraper xPathScrapers: sceneScraper: @@ -35,4 +35,4 @@ xPathScrapers: with: $1$2 - regex: ^\/\/ with: "https://" -# Last Updated June 26, 2022 +# Last Updated November 17, 2023 From abd2d3724d4a6a3f1050f8d73396770880d1fb98 Mon Sep 17 00:00:00 2001 From: l1xander <125724412+l1xander@users.noreply.github.com> Date: Sat, 18 Nov 2023 15:37:58 +0530 Subject: [PATCH 521/624] Delete scrapers/Pure-BBW.yml --- scrapers/Pure-BBW.yml | 33 --------------------------------- 1 file changed, 33 deletions(-) delete mode 100644 scrapers/Pure-BBW.yml diff --git a/scrapers/Pure-BBW.yml b/scrapers/Pure-BBW.yml deleted file mode 100644 index 9e659b32b..000000000 --- a/scrapers/Pure-BBW.yml +++ /dev/null @@ -1,33 +0,0 @@ -name: "Pure-BBW" -sceneByURL: - - action: scrapeXPath - url: - - pure-bbw.com/ - scraper: sceneScraper -xPathScrapers: - sceneScraper: - scene: - Title: - selector: //div[@class="video_membership"]/div[@class="titlebox clear"]/h3/text() - Details: //div[@class="aboutvideo"]/p/text() - Performers: - Name: //div[@class="video_description"]/ul[@class="featuredModels"]/li/a/span/text() - Image: - selector: //div[@class="videohere"]/img[contains(@class,"stdimage")]/@src | //script[contains(.,'jwplayer("jwbox").setup')]/text() - postProcess: - - replace: - - regex: (.+image:\s+")(.+jpg)(.+) - with: $2 - - regex: ^ - with: "https://pure-bbw.com/" - Studio: - Name: - fixed: Pure-BBW - Date: - selector: //div[@class="video_description"]/h4[1]/text() - postProcess: - - replace: - - regex: .*(\d{4})-(\d{2})-(\d{2}).* - with: $1-$2-$3 - - parseDate: 2006-01-02 -# Last Updated March 11, 2021 From 393e9e120a500b732d590f460c7f0d6f0df02cbb Mon Sep 17 00:00:00 2001 From: l1xander <125724412+l1xander@users.noreply.github.com> Date: Sat, 18 Nov 2023 15:39:15 +0530 Subject: [PATCH 522/624] Added Pure XXX Scraper. Supported sites: - pure-xxx.com - pure-bbw.com - pure-ts.com --- scrapers/Pure-XXX.yml | 40 ++++++++++++++++++++++++++++++++++++++++ 1 file changed, 40 insertions(+) create mode 100644 scrapers/Pure-XXX.yml diff --git a/scrapers/Pure-XXX.yml b/scrapers/Pure-XXX.yml new file mode 100644 index 000000000..9953ad932 --- /dev/null +++ b/scrapers/Pure-XXX.yml @@ -0,0 +1,40 @@ +name: "Pure-XXX" +sceneByURL: + - action: scrapeXPath + url: + - pure-xxx.com + - pure-ts.com + - pure-bbw.com + scraper: sceneScraper +xPathScrapers: + sceneScraper: + scene: + Title: + selector: //div[@class="video_membership"]/div[@class="titlebox clear"]/h3/text() + Details: //div[@class="aboutvideo"]/p/text() + Performers: + Name: //div[@class="video_description"]/ul[@class="featuredModels"]/li/a/span/text() + Image: + selector: (//div[@id="logo"]//a/@href | //div[@class="flexslider"]//a/@href) | (//div[@class="videohere"]/img[contains(@class,"stdimage")]/@src | //script[contains(.,'jwplayer("jwbox").setup')]/text()) + concat: '@~@' + postProcess: + - replace: + - regex: (.+@~@)(.+image:\s+")(.+jpg)(.+) + with: $1$3 + - regex: \/\/(.*).com\/.*@~@(.*) + with: https://$1.com$2 + Studio: + Name: + selector: //div[@id="logo"]//img/@alt | //div[@class="flexslider"]//a/@title + postProcess: + - replace: + - regex: (.*)\-(.*) + - with: $1 $2 + Date: + selector: //div[@class="video_description"]/h4[1]/text() + postProcess: + - replace: + - regex: .*(\d{4})-(\d{2})-(\d{2}).* + with: $1-$2-$3 + - parseDate: 2006-01-02 +# Last Updated November 18, 2023 From 2b08c78ca3029a23e78e138f3578de595b6e83dc Mon Sep 17 00:00:00 2001 From: l1xander <125724412+l1xander@users.noreply.github.com> Date: Sat, 18 Nov 2023 19:36:30 +0530 Subject: [PATCH 523/624] Updated TheScoreGroup scraper * Fixed: Details not getting scrapped from gallery pages * Added the following supported sites - scorevideos.com - milftugs.com --- scrapers/TheScoreGroup.yml | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/scrapers/TheScoreGroup.yml b/scrapers/TheScoreGroup.yml index 39c096752..96cbf7cb5 100644 --- a/scrapers/TheScoreGroup.yml +++ b/scrapers/TheScoreGroup.yml @@ -13,6 +13,8 @@ sceneByURL: - scoreland.com/ - scoreland2.com/ - xlgirls.com/ + - scorevideos.com/ + - milftugs.com/ scraper: sceneScraper galleryByURL: - action: scrapeXPath @@ -78,9 +80,9 @@ xPathScrapers: Studio: *studioAttr Date: *dateAttr Details: - selector: $photopage//div[@class="p-desc"]/text() + selector: $photopage//div[contains(@class, 'p-desc')]/text() concat: "\n" Tags: Name: $photopage//div[@class='mb-3']/a/text() Performers: *performersAttr -# Last Updated September 24, 2023 +# Last Updated November 18, 2023 From f962b73787c1775a85f55b0f3f6acfc4bccd7d89 Mon Sep 17 00:00:00 2001 From: l1xander <125724412+l1xander@users.noreply.github.com> Date: Sun, 19 Nov 2023 10:14:43 +0530 Subject: [PATCH 524/624] Fixed and simplified Studio postProcess --- scrapers/Pure-XXX.yml | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/scrapers/Pure-XXX.yml b/scrapers/Pure-XXX.yml index 9953ad932..98186f748 100644 --- a/scrapers/Pure-XXX.yml +++ b/scrapers/Pure-XXX.yml @@ -28,8 +28,8 @@ xPathScrapers: selector: //div[@id="logo"]//img/@alt | //div[@class="flexslider"]//a/@title postProcess: - replace: - - regex: (.*)\-(.*) - - with: $1 $2 + - regex: \- + with: ' ' Date: selector: //div[@class="video_description"]/h4[1]/text() postProcess: @@ -37,4 +37,4 @@ xPathScrapers: - regex: .*(\d{4})-(\d{2})-(\d{2}).* with: $1-$2-$3 - parseDate: 2006-01-02 -# Last Updated November 18, 2023 +# Last Updated November 19, 2023 From 228ef5292b6ae8177a9bdc8c86f0a832f2a7f873 Mon Sep 17 00:00:00 2001 From: Maista6969 Date: Mon, 20 Nov 2023 18:15:16 +0100 Subject: [PATCH 525/624] Add XPath scraper for Antonio Suleiman --- SCRAPERS-LIST.md | 1 + scrapers/AntonioSuleiman.yml | 50 ++++++++++++++++++++++++++++++++++++ 2 files changed, 51 insertions(+) create mode 100644 scrapers/AntonioSuleiman.yml diff --git a/SCRAPERS-LIST.md b/SCRAPERS-LIST.md index dc7764473..a34852ba7 100644 --- a/SCRAPERS-LIST.md +++ b/SCRAPERS-LIST.md @@ -99,6 +99,7 @@ angelinacastrolive.com|VNAGirls.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- anidb.net|AniDB.yml|:heavy_check_mark:|:x:|:x:|:heavy_check_mark:|-|Hentai Database anilos.com|Nubiles.yml|:heavy_check_mark:|:heavy_check_mark:|:x:|:x:|-|- animecharactersdatabase.com|AnimeCharactersDatabase.yml|:x:|:x:|:x:|:heavy_check_mark:|Python|Database +antoniosuleiman.com|AntonioSuleiman.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- apovstory.com|APOVStory.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- archangelvideo.com|ArchAngelVideo.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- ariellynn.com|Andomark.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- diff --git a/scrapers/AntonioSuleiman.yml b/scrapers/AntonioSuleiman.yml new file mode 100644 index 000000000..a9bea72dc --- /dev/null +++ b/scrapers/AntonioSuleiman.yml @@ -0,0 +1,50 @@ +name: AntonioSuleiman.com +sceneByURL: + - action: scrapeXPath + url: + - antoniosuleiman.com + scraper: sceneScraper +xPathScrapers: + sceneScraper: + common: + $scene: //div[contains(@class, "updatesBlock")] + $image: (//div[contains(@class, "updatesBlock")]//img)[1] + scene: + Title: $scene//h3 + Date: + selector: ($scene//div[contains(@class, "updateDetails")]//p)[1] + postProcess: + - parseDate: 2006-01-02 + Details: $scene/div[@class="wrapper"]/*[last()] + Tags: + Name: + # The worst way to do tags but it's all they have + selector: //meta[@name="keywords"]/@content + split: "," + Performers: + Name: $scene//*[contains(@class,"tour_update_models")]//a + Image: >- + $image/@src0_4x | + $image/@src0_3x | + $image/@src0_2x | + $image/@src0_1x + Studio: + Name: + fixed: Antonio Suleiman +debug: + printHTML: true +driver: + cookies: + - CookieURL: "https://antoniosuleiman.com" + Cookies: + - Name: "PHPSESSID" + Domain: ".antoniosuleiman.com" + # Unsure about the duration of this cookie + Value: ovejq7d8cfhoc99q1jrn265af8 + Path: "/" + - Name: "lang" + Domain: ".antoniosuleiman.com" + # 0 is English, 1 is Arabic + Value: "0" + Path: "/" +# Last Updated November 20, 2023 From d59b99fd9ffc97b8df4dccddd9b6d787be2a4f45 Mon Sep 17 00:00:00 2001 From: Maista6969 Date: Mon, 20 Nov 2023 18:16:42 +0100 Subject: [PATCH 526/624] Update SCRAPERS-LIST for Pure-XXX network --- SCRAPERS-LIST.md | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/SCRAPERS-LIST.md b/SCRAPERS-LIST.md index a34852ba7..21385bed2 100644 --- a/SCRAPERS-LIST.md +++ b/SCRAPERS-LIST.md @@ -1185,8 +1185,9 @@ publicsexadventures.com|Wtfpass.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- publicsexdate.com|ThirdRockEnt.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- puffynetwork.com|Puffynetwork.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- pumaswedexxx.com|VNAGirls.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- -pure-bbw.com|Pure-BBW.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- -pure-ts.com|ChristianXXX.yml|:heavy_check_mark:|:x:|:x:|:x:|-|Trans +pure-bbw.com|Pure-XXX.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +pure-ts.com|Pure-XXX.yml|:heavy_check_mark:|:x:|:x:|:x:|-|Trans +pure-xxx.com|Pure-XXX.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- purebj.com|Only3xGirls.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- puremature.com|AMAMultimedia.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- purepov.com|PerfectGonzo.yml|:heavy_check_mark:|:x:|:x:|:heavy_check_mark:|Python|- From 2dab51f839ba0ca536de47f0dd5cabb5d3d523bd Mon Sep 17 00:00:00 2001 From: JackDawson94 <100418085+JackDawson94@users.noreply.github.com> Date: Tue, 21 Nov 2023 12:47:10 +0100 Subject: [PATCH 527/624] PMVHaven scraper works, will do a bit more testing --- SCRAPERS-LIST.md | 1 + scrapers/PMVHaven.py | 118 ++++++++++++++++++++++++++++++++++++++++++ scrapers/PMVHaven.yml | 9 ++++ 3 files changed, 128 insertions(+) create mode 100644 scrapers/PMVHaven.py create mode 100644 scrapers/PMVHaven.yml diff --git a/SCRAPERS-LIST.md b/SCRAPERS-LIST.md index 21385bed2..9241d6189 100644 --- a/SCRAPERS-LIST.md +++ b/SCRAPERS-LIST.md @@ -1133,6 +1133,7 @@ playdaddy.com|AdultSiteRunner.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- playwithrae.com|PlayWithRae.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- plumperpass.com|PlumperPass.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- plushies.tv|Plushies.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +pmvhaven.com|PMVHeaven.yml|:heavy_check_mark:|:x:|:x:|:x:|Python|PMVs porkvendors.com|ThirdRockEnt.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- pornbox.com|Pornbox.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- porncornvr.com|RealJamVR.yml|:heavy_check_mark:|:heavy_check_mark:|:x:|:x:|-|VR diff --git a/scrapers/PMVHaven.py b/scrapers/PMVHaven.py new file mode 100644 index 000000000..5d8b28842 --- /dev/null +++ b/scrapers/PMVHaven.py @@ -0,0 +1,118 @@ +import os +import json +import sys +import requests +import random +import time +from urllib.parse import urlparse +# extra modules below need to be installed +try: + import cloudscraper +except ModuleNotFoundError: + print("You need to install the cloudscraper module. (https://pypi.org/project/cloudscraper/)", file=sys.stderr) + print("If you have pip (normally installed with python), run this command in a terminal (cmd): pip install cloudscraper", file=sys.stderr) + sys.exit() + +try: + from lxml import html +except ModuleNotFoundError: + print("You need to install the lxml module. (https://lxml.de/installation.html#installation)", file=sys.stderr) + print("If you have pip (normally installed with python), run this command in a terminal (cmd): pip install lxml", file=sys.stderr) + sys.exit() + +try: + import py_common.log as log +except ModuleNotFoundError: + print("You need to download the folder 'py_common' from the community repo! (CommunityScrapers/tree/master/scrapers/py_common)", file=sys.stderr) + sys.exit() + +#bugfix for socks5 proxies, due to pySocks implementation incompatibility with Stash +proxy = os.environ['HTTPS_PROXY'] +if proxy != "" and proxy.startswith("socks5://"): + proxy = proxy.replace("socks5://", "socks5h://") + os.environ['HTTPS_PROXY'] = proxy + os.environ['HTTP_PROXY'] = proxy + +URL_XPATH = '//meta[@property="og:video:url"]/@content' +IMAGE_XPATH = '//meta[@property="og:image"]/@content' + +def getHTML(url, retries=0): + scraper = cloudscraper.create_scraper() + + try: + scraped = scraper.get(url) + except requests.exceptions.Timeout as exc_time: + log.debug(f"Timeout: {exc_time}") + return getHTML(url, retries + 1) + except Exception as e: + log.error(f"scrape error {e}") + sys.exit(1) + if scraped.status_code >= 400: + if retries < 10: + wait_time = random.randint(1, 4) + log.debug(f"HTTP Error: {scraped.status_code}, waiting {wait_time} seconds") + time.sleep(wait_time) + return getHTML(url, retries + 1) + log.error(f"HTTP Error: {scraped.status_code}, giving up") + sys.exit(1) + + return html.fromstring(scraped.text) + +def getXPATH(pageTree, XPATH): + res = pageTree.xpath(XPATH) + if res: + return res[0] + return "" + +def getData(sceneId): + try: + req = requests.post("https://pmvhaven.com/api/v2/videoInput", json={ + "video": sceneId, + "mode": "InitVideo", + "view": True + }) + except Exception as e: + log.error(f"scrape error {e}") + sys.exit(1) + return req.json() + +def getURL(pageTree): + return getXPATH(pageTree, URL_XPATH) + +def getIMG(pageTree): + return getXPATH(pageTree, IMAGE_XPATH) + +def main(): + params = json.loads(sys.stdin.read()) + if not params['url']: + log.error('No URL entered.') + sys.exit(1) + + tree = getHTML(params['url']) + data = getData(getURL(tree).split('_')[-1])['video'][0] + + tags = data['tags'] + data['categories'] + + ret = { + 'title': data['title'], + 'image': getIMG(tree), + 'date': data['isoDate'].split('T')[0], + 'details': data['description'], + 'studio': { + 'Name': data['creator'] + }, + 'tags':[ + { + 'name': x.strip() + } for x in tags + ], + 'performers': [ + { + 'name': x.strip() + } for x in data['stars'] + ] + } + print(json.dumps(ret)) + +if __name__ == "__main__": + main() \ No newline at end of file diff --git a/scrapers/PMVHaven.yml b/scrapers/PMVHaven.yml new file mode 100644 index 000000000..b56cb6de3 --- /dev/null +++ b/scrapers/PMVHaven.yml @@ -0,0 +1,9 @@ +name: PMVHaven +sceneByURL: + - url: + - pmvhaven.com/video/ + action: script + script: + - python + - PMVHaven.py +# Last Updated November 21, 2023 From 7841730bb25f7566d83836f15cc31b793b618b55 Mon Sep 17 00:00:00 2001 From: Maista6969 Date: Tue, 21 Nov 2023 17:51:54 +0100 Subject: [PATCH 528/624] Update Caribbean scraper to get the displayed scene title --- scrapers/Carib.yml | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/scrapers/Carib.yml b/scrapers/Carib.yml index 3f73324f9..84fb6ab15 100644 --- a/scrapers/Carib.yml +++ b/scrapers/Carib.yml @@ -24,8 +24,7 @@ xPathScrapers: common: $movieinfo: //div[@class="movie-info section divider"] scene: - Title: /html/head/title/text() - # //div[contains(@class,"heading")]/h1/text() + Title: //div[contains(@class,"heading")]/h1/text() Details: $movieinfo/p URL: selector: //link[@hreflang="ja-JP"]/@href From 25a4146c8542f3519eecb1d8eda802511c60a1ee Mon Sep 17 00:00:00 2001 From: Maista6969 Date: Tue, 21 Nov 2023 20:54:02 +0100 Subject: [PATCH 529/624] Add scene search to AntonioSuleiman scraper --- scrapers/AntonioSuleiman.yml | 20 ++++++++++++++++++-- 1 file changed, 18 insertions(+), 2 deletions(-) diff --git a/scrapers/AntonioSuleiman.yml b/scrapers/AntonioSuleiman.yml index a9bea72dc..a4273ec2a 100644 --- a/scrapers/AntonioSuleiman.yml +++ b/scrapers/AntonioSuleiman.yml @@ -4,6 +4,14 @@ sceneByURL: url: - antoniosuleiman.com scraper: sceneScraper +sceneByName: + action: scrapeXPath + queryURL: https://antoniosuleiman.com/search.php?query={} + scraper: sceneSearch +sceneByQueryFragment: + action: scrapeXPath + queryURL: "{url}" + scraper: sceneScraper xPathScrapers: sceneScraper: common: @@ -31,8 +39,16 @@ xPathScrapers: Studio: Name: fixed: Antonio Suleiman -debug: - printHTML: true + sceneSearch: + common: + $scene: //div[@data-url] + scene: + Title: $scene//p[@class="left-first-paragraph"] + URL: $scene/@data-url + # Search doesn't return any description but we can show the performers instead + Details: //div[@data-url]//p[@class="left-second-paragraph"] + Image: $scene//img/@src0_1x + Date: $scene//p[@class="right-paragraph"][1] driver: cookies: - CookieURL: "https://antoniosuleiman.com" From bbf0a2669d39f240c5142ff82ca73e4e4251f66f Mon Sep 17 00:00:00 2001 From: WithoutPants <53250216+WithoutPants@users.noreply.github.com> Date: Wed, 22 Nov 2023 10:10:40 +1100 Subject: [PATCH 530/624] Build package index --- .github/workflows/deploy.yml | 48 +++++++++++++++++++++++++++++ .gitignore | 2 ++ build_site.sh | 60 ++++++++++++++++++++++++++++++++++++ 3 files changed, 110 insertions(+) create mode 100644 .github/workflows/deploy.yml create mode 100755 build_site.sh diff --git a/.github/workflows/deploy.yml b/.github/workflows/deploy.yml new file mode 100644 index 000000000..4f216947f --- /dev/null +++ b/.github/workflows/deploy.yml @@ -0,0 +1,48 @@ +name: Deploy repository to Github Pages + +on: + push: + branches: [ master, stable ] + + # Allows you to run this workflow manually from the Actions tab + workflow_dispatch: + +# Sets permissions of the GITHUB_TOKEN to allow deployment to GitHub Pages +permissions: + contents: read + pages: write + id-token: write + +jobs: + build: + runs-on: ubuntu-20.04 + steps: + - name: Checkout main + uses: actions/checkout@v2 + with: + path: master + ref: master + - run: | + cd master + ./build_site.sh ../_site/develop + - name: Checkout Stable + uses: actions/checkout@v2 + with: + path: stable + ref: stable + - run: | + cd stable + ../master/build_site.sh ../_site/stable + - uses: actions/upload-pages-artifact@v1 + + deploy: + environment: + name: github-pages + url: ${{ steps.deployment.outputs.page_url }} + runs-on: ubuntu-20.04 + needs: build + steps: + - name: Deploy to GitHub Pages + id: deployment + uses: actions/deploy-pages@v1 + diff --git a/.gitignore b/.gitignore index 9377605f1..393112e62 100644 --- a/.gitignore +++ b/.gitignore @@ -5,3 +5,5 @@ yarn-error.log # Scraper-generated files /scrapers/*.ini **/__pycache__/ + +/_site \ No newline at end of file diff --git a/build_site.sh b/build_site.sh new file mode 100755 index 000000000..1c1c7cf92 --- /dev/null +++ b/build_site.sh @@ -0,0 +1,60 @@ +#!/bin/bash + +# builds a repository of scrapers +# outputs to _site with the following structure: +# index.yml +# .zip +# Each zip file contains the scraper.yml file and any other files in the same directory + +outdir="$1" +if [ -z "$outdir" ]; then + outdir="_site" +fi + +rm -rf "$outdir" +mkdir -p "$outdir" + +buildScraper() +{ + f=$1 + # get the scraper id from the filename + scraper_id=$(basename "$f" .yml) + + echo "Processing $scraper_id" + + # create a directory for the version + version=$(git log -n 1 --pretty=format:%h -- "$f") + updated=$(git log -n 1 --date="format:%F %T %z" --pretty=format:%ad -- "$f") + + # create the zip file + # copy other files + zipfile=$(realpath "$outdir/$scraper_id.zip") + + pushd $(dirname "$f") > /dev/null + if [ $(dirname "$f") != "./scrapers" ]; then + zip -r "$zipfile" . > /dev/null + else + zip "$zipfile" "$scraper_id.yml" > /dev/null + fi + popd > /dev/null + + name=$(grep "^name:" "$f" | cut -d' ' -f2- | sed -e 's/\r//' -e 's/^"\(.*\)"$/\1/') + + # write to spec index + echo "- id: $scraper_id + name: $name + version: $version + date: $updated + path: $scraper_id.zip + sha256: $(sha256sum "$zipfile" | cut -d' ' -f1) +" >> "$outdir"/index.yml +} + +# find all yml files in ./scrapers - these are packages individually +for f in ./scrapers/*.yml; do + buildScraper "$f" +done + +for f in ./scrapers/*/*.yml; do + buildScraper "$f" +done From 67a250d438911cfe5589cf0c74714cceac901339 Mon Sep 17 00:00:00 2001 From: Maista6969 Date: Wed, 22 Nov 2023 00:41:38 +0100 Subject: [PATCH 531/624] Fix scene search for AntonioSuleiman --- scrapers/AntonioSuleiman.yml | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/scrapers/AntonioSuleiman.yml b/scrapers/AntonioSuleiman.yml index a4273ec2a..47a5d61af 100644 --- a/scrapers/AntonioSuleiman.yml +++ b/scrapers/AntonioSuleiman.yml @@ -24,6 +24,7 @@ xPathScrapers: postProcess: - parseDate: 2006-01-02 Details: $scene/div[@class="wrapper"]/*[last()] + URL: //link[@rel="canonical"]/@href Tags: Name: # The worst way to do tags but it's all they have @@ -48,7 +49,7 @@ xPathScrapers: # Search doesn't return any description but we can show the performers instead Details: //div[@data-url]//p[@class="left-second-paragraph"] Image: $scene//img/@src0_1x - Date: $scene//p[@class="right-paragraph"][1] + Date: $scene//p[@class="right-paragraph" and not(span)] driver: cookies: - CookieURL: "https://antoniosuleiman.com" From 2f08a172734bb1bad3a105b1847e3aa1d30c7269 Mon Sep 17 00:00:00 2001 From: WithoutPants <53250216+WithoutPants@users.noreply.github.com> Date: Wed, 22 Nov 2023 11:03:51 +1100 Subject: [PATCH 532/624] Move algolia scrapers --- scrapers/{Algolia_21Naturals.yml => 21Naturals/21Naturals.yml} | 0 scrapers/{Algolia_21Sextreme.yml => 21Sextreme/21Sextreme.yml} | 0 scrapers/{Algolia_21Sextury.yml => 21Sextury/21Sextury.yml} | 0 scrapers/{Algolia_ActiveDuty.yml => ActiveDuty/ActiveDuty.yml} | 0 .../AddictedToGirls.yml} | 0 scrapers/{Algolia_Adultime.yml => Adultime/Adultime.yml} | 0 scrapers/{ => Algolia}/Algolia.py | 0 .../AllGirlMassage.yml} | 0 scrapers/{Algolia_Biphoria.yml => Biphoria/Biphoria.yml} | 0 scrapers/{Algolia_Blowpass.yml => Blowpass/Blowpass.yml} | 0 .../{Algolia_BurningAngel.yml => BurningAngel/BurningAngel.yml} | 0 scrapers/{Algolia_ChaosMen.yml => ChaosMen/ChaosMen.yml} | 0 .../ClubInfernoDungeon.yml} | 0 .../DogfartNetwork.yml} | 0 scrapers/{Algolia_EvilAngel.yml => EvilAngel/EvilAngel.yml} | 0 .../FalconStudios.yml} | 0 .../FantasyMassage.yml} | 0 .../FistingInferno.yml} | 0 .../GangBangCreampie.yml} | 0 .../{Algolia_GenderXFilms.yml => GenderXFilms/GenderXFilms.yml} | 0 .../Girlfriendsfilms.yml} | 0 scrapers/{Algolia_Girlsway.yml => Girlsway/Girlsway.yml} | 0 .../Gloryholesecrets.yml} | 0 scrapers/{Algolia_Johnnyrapid.yml => Johnnyrapid/Johnnyrapid.yml} | 0 scrapers/{Algolia_MenOver30.yml => MenOver30/MenOver30.yml} | 0 .../NextDoorStudios.yml} | 0 .../RagingStallion.yml} | 0 .../RoccoSiffredi.yml} | 0 scrapers/{Algolia_TabooHeat.yml => TabooHeat/TabooHeat.yml} | 0 scrapers/{Algolia_Wicked.yml => Wicked/Wicked.yml} | 0 .../disruptivefilms.yml} | 0 scrapers/{Algolia_filthykings.yml => filthykings/filthykings.yml} | 0 .../mypervyfamily.yml} | 0 scrapers/{Algolia_touchmywife.yml => touchmywife/touchmywife.yml} | 0 scrapers/{Algolia_xEmpire.yml => xEmpire/xEmpire.yml} | 0 .../zerotolerancefilms.yml} | 0 36 files changed, 0 insertions(+), 0 deletions(-) rename scrapers/{Algolia_21Naturals.yml => 21Naturals/21Naturals.yml} (100%) rename scrapers/{Algolia_21Sextreme.yml => 21Sextreme/21Sextreme.yml} (100%) rename scrapers/{Algolia_21Sextury.yml => 21Sextury/21Sextury.yml} (100%) rename scrapers/{Algolia_ActiveDuty.yml => ActiveDuty/ActiveDuty.yml} (100%) rename scrapers/{Algolia_AddictedToGirls.yml => AddictedToGirls/AddictedToGirls.yml} (100%) rename scrapers/{Algolia_Adultime.yml => Adultime/Adultime.yml} (100%) rename scrapers/{ => Algolia}/Algolia.py (100%) rename scrapers/{Algolia_AllGirlMassage.yml => AllGirlMassage/AllGirlMassage.yml} (100%) rename scrapers/{Algolia_Biphoria.yml => Biphoria/Biphoria.yml} (100%) rename scrapers/{Algolia_Blowpass.yml => Blowpass/Blowpass.yml} (100%) rename scrapers/{Algolia_BurningAngel.yml => BurningAngel/BurningAngel.yml} (100%) rename scrapers/{Algolia_ChaosMen.yml => ChaosMen/ChaosMen.yml} (100%) rename scrapers/{Algolia_ClubInfernoDungeon.yml => ClubInfernoDungeon/ClubInfernoDungeon.yml} (100%) rename scrapers/{Algolia_DogfartNetwork.yml => DogfartNetwork/DogfartNetwork.yml} (100%) rename scrapers/{Algolia_EvilAngel.yml => EvilAngel/EvilAngel.yml} (100%) rename scrapers/{Algolia_FalconStudios.yml => FalconStudios/FalconStudios.yml} (100%) rename scrapers/{Algolia_FantasyMassage.yml => FantasyMassage/FantasyMassage.yml} (100%) rename scrapers/{Algolia_FistingInferno.yml => FistingInferno/FistingInferno.yml} (100%) rename scrapers/{Algolia_GangBangCreampie.yml => GangBangCreampie/GangBangCreampie.yml} (100%) rename scrapers/{Algolia_GenderXFilms.yml => GenderXFilms/GenderXFilms.yml} (100%) rename scrapers/{Algolia_Girlfriendsfilms.yml => Girlfriendsfilms/Girlfriendsfilms.yml} (100%) rename scrapers/{Algolia_Girlsway.yml => Girlsway/Girlsway.yml} (100%) rename scrapers/{Algolia_Gloryholesecrets.yml => Gloryholesecrets/Gloryholesecrets.yml} (100%) rename scrapers/{Algolia_Johnnyrapid.yml => Johnnyrapid/Johnnyrapid.yml} (100%) rename scrapers/{Algolia_MenOver30.yml => MenOver30/MenOver30.yml} (100%) rename scrapers/{Algolia_NextDoorStudios.yml => NextDoorStudios/NextDoorStudios.yml} (100%) rename scrapers/{Algolia_RagingStallion.yml => RagingStallion/RagingStallion.yml} (100%) rename scrapers/{Algolia_RoccoSiffredi.yml => RoccoSiffredi/RoccoSiffredi.yml} (100%) rename scrapers/{Algolia_TabooHeat.yml => TabooHeat/TabooHeat.yml} (100%) rename scrapers/{Algolia_Wicked.yml => Wicked/Wicked.yml} (100%) rename scrapers/{Algolia_disruptivefilms.yml => disruptivefilms/disruptivefilms.yml} (100%) rename scrapers/{Algolia_filthykings.yml => filthykings/filthykings.yml} (100%) rename scrapers/{Algolia_mypervyfamily.yml => mypervyfamily/mypervyfamily.yml} (100%) rename scrapers/{Algolia_touchmywife.yml => touchmywife/touchmywife.yml} (100%) rename scrapers/{Algolia_xEmpire.yml => xEmpire/xEmpire.yml} (100%) rename scrapers/{Algolia_zerotolerancefilms.yml => zerotolerancefilms/zerotolerancefilms.yml} (100%) diff --git a/scrapers/Algolia_21Naturals.yml b/scrapers/21Naturals/21Naturals.yml similarity index 100% rename from scrapers/Algolia_21Naturals.yml rename to scrapers/21Naturals/21Naturals.yml diff --git a/scrapers/Algolia_21Sextreme.yml b/scrapers/21Sextreme/21Sextreme.yml similarity index 100% rename from scrapers/Algolia_21Sextreme.yml rename to scrapers/21Sextreme/21Sextreme.yml diff --git a/scrapers/Algolia_21Sextury.yml b/scrapers/21Sextury/21Sextury.yml similarity index 100% rename from scrapers/Algolia_21Sextury.yml rename to scrapers/21Sextury/21Sextury.yml diff --git a/scrapers/Algolia_ActiveDuty.yml b/scrapers/ActiveDuty/ActiveDuty.yml similarity index 100% rename from scrapers/Algolia_ActiveDuty.yml rename to scrapers/ActiveDuty/ActiveDuty.yml diff --git a/scrapers/Algolia_AddictedToGirls.yml b/scrapers/AddictedToGirls/AddictedToGirls.yml similarity index 100% rename from scrapers/Algolia_AddictedToGirls.yml rename to scrapers/AddictedToGirls/AddictedToGirls.yml diff --git a/scrapers/Algolia_Adultime.yml b/scrapers/Adultime/Adultime.yml similarity index 100% rename from scrapers/Algolia_Adultime.yml rename to scrapers/Adultime/Adultime.yml diff --git a/scrapers/Algolia.py b/scrapers/Algolia/Algolia.py similarity index 100% rename from scrapers/Algolia.py rename to scrapers/Algolia/Algolia.py diff --git a/scrapers/Algolia_AllGirlMassage.yml b/scrapers/AllGirlMassage/AllGirlMassage.yml similarity index 100% rename from scrapers/Algolia_AllGirlMassage.yml rename to scrapers/AllGirlMassage/AllGirlMassage.yml diff --git a/scrapers/Algolia_Biphoria.yml b/scrapers/Biphoria/Biphoria.yml similarity index 100% rename from scrapers/Algolia_Biphoria.yml rename to scrapers/Biphoria/Biphoria.yml diff --git a/scrapers/Algolia_Blowpass.yml b/scrapers/Blowpass/Blowpass.yml similarity index 100% rename from scrapers/Algolia_Blowpass.yml rename to scrapers/Blowpass/Blowpass.yml diff --git a/scrapers/Algolia_BurningAngel.yml b/scrapers/BurningAngel/BurningAngel.yml similarity index 100% rename from scrapers/Algolia_BurningAngel.yml rename to scrapers/BurningAngel/BurningAngel.yml diff --git a/scrapers/Algolia_ChaosMen.yml b/scrapers/ChaosMen/ChaosMen.yml similarity index 100% rename from scrapers/Algolia_ChaosMen.yml rename to scrapers/ChaosMen/ChaosMen.yml diff --git a/scrapers/Algolia_ClubInfernoDungeon.yml b/scrapers/ClubInfernoDungeon/ClubInfernoDungeon.yml similarity index 100% rename from scrapers/Algolia_ClubInfernoDungeon.yml rename to scrapers/ClubInfernoDungeon/ClubInfernoDungeon.yml diff --git a/scrapers/Algolia_DogfartNetwork.yml b/scrapers/DogfartNetwork/DogfartNetwork.yml similarity index 100% rename from scrapers/Algolia_DogfartNetwork.yml rename to scrapers/DogfartNetwork/DogfartNetwork.yml diff --git a/scrapers/Algolia_EvilAngel.yml b/scrapers/EvilAngel/EvilAngel.yml similarity index 100% rename from scrapers/Algolia_EvilAngel.yml rename to scrapers/EvilAngel/EvilAngel.yml diff --git a/scrapers/Algolia_FalconStudios.yml b/scrapers/FalconStudios/FalconStudios.yml similarity index 100% rename from scrapers/Algolia_FalconStudios.yml rename to scrapers/FalconStudios/FalconStudios.yml diff --git a/scrapers/Algolia_FantasyMassage.yml b/scrapers/FantasyMassage/FantasyMassage.yml similarity index 100% rename from scrapers/Algolia_FantasyMassage.yml rename to scrapers/FantasyMassage/FantasyMassage.yml diff --git a/scrapers/Algolia_FistingInferno.yml b/scrapers/FistingInferno/FistingInferno.yml similarity index 100% rename from scrapers/Algolia_FistingInferno.yml rename to scrapers/FistingInferno/FistingInferno.yml diff --git a/scrapers/Algolia_GangBangCreampie.yml b/scrapers/GangBangCreampie/GangBangCreampie.yml similarity index 100% rename from scrapers/Algolia_GangBangCreampie.yml rename to scrapers/GangBangCreampie/GangBangCreampie.yml diff --git a/scrapers/Algolia_GenderXFilms.yml b/scrapers/GenderXFilms/GenderXFilms.yml similarity index 100% rename from scrapers/Algolia_GenderXFilms.yml rename to scrapers/GenderXFilms/GenderXFilms.yml diff --git a/scrapers/Algolia_Girlfriendsfilms.yml b/scrapers/Girlfriendsfilms/Girlfriendsfilms.yml similarity index 100% rename from scrapers/Algolia_Girlfriendsfilms.yml rename to scrapers/Girlfriendsfilms/Girlfriendsfilms.yml diff --git a/scrapers/Algolia_Girlsway.yml b/scrapers/Girlsway/Girlsway.yml similarity index 100% rename from scrapers/Algolia_Girlsway.yml rename to scrapers/Girlsway/Girlsway.yml diff --git a/scrapers/Algolia_Gloryholesecrets.yml b/scrapers/Gloryholesecrets/Gloryholesecrets.yml similarity index 100% rename from scrapers/Algolia_Gloryholesecrets.yml rename to scrapers/Gloryholesecrets/Gloryholesecrets.yml diff --git a/scrapers/Algolia_Johnnyrapid.yml b/scrapers/Johnnyrapid/Johnnyrapid.yml similarity index 100% rename from scrapers/Algolia_Johnnyrapid.yml rename to scrapers/Johnnyrapid/Johnnyrapid.yml diff --git a/scrapers/Algolia_MenOver30.yml b/scrapers/MenOver30/MenOver30.yml similarity index 100% rename from scrapers/Algolia_MenOver30.yml rename to scrapers/MenOver30/MenOver30.yml diff --git a/scrapers/Algolia_NextDoorStudios.yml b/scrapers/NextDoorStudios/NextDoorStudios.yml similarity index 100% rename from scrapers/Algolia_NextDoorStudios.yml rename to scrapers/NextDoorStudios/NextDoorStudios.yml diff --git a/scrapers/Algolia_RagingStallion.yml b/scrapers/RagingStallion/RagingStallion.yml similarity index 100% rename from scrapers/Algolia_RagingStallion.yml rename to scrapers/RagingStallion/RagingStallion.yml diff --git a/scrapers/Algolia_RoccoSiffredi.yml b/scrapers/RoccoSiffredi/RoccoSiffredi.yml similarity index 100% rename from scrapers/Algolia_RoccoSiffredi.yml rename to scrapers/RoccoSiffredi/RoccoSiffredi.yml diff --git a/scrapers/Algolia_TabooHeat.yml b/scrapers/TabooHeat/TabooHeat.yml similarity index 100% rename from scrapers/Algolia_TabooHeat.yml rename to scrapers/TabooHeat/TabooHeat.yml diff --git a/scrapers/Algolia_Wicked.yml b/scrapers/Wicked/Wicked.yml similarity index 100% rename from scrapers/Algolia_Wicked.yml rename to scrapers/Wicked/Wicked.yml diff --git a/scrapers/Algolia_disruptivefilms.yml b/scrapers/disruptivefilms/disruptivefilms.yml similarity index 100% rename from scrapers/Algolia_disruptivefilms.yml rename to scrapers/disruptivefilms/disruptivefilms.yml diff --git a/scrapers/Algolia_filthykings.yml b/scrapers/filthykings/filthykings.yml similarity index 100% rename from scrapers/Algolia_filthykings.yml rename to scrapers/filthykings/filthykings.yml diff --git a/scrapers/Algolia_mypervyfamily.yml b/scrapers/mypervyfamily/mypervyfamily.yml similarity index 100% rename from scrapers/Algolia_mypervyfamily.yml rename to scrapers/mypervyfamily/mypervyfamily.yml diff --git a/scrapers/Algolia_touchmywife.yml b/scrapers/touchmywife/touchmywife.yml similarity index 100% rename from scrapers/Algolia_touchmywife.yml rename to scrapers/touchmywife/touchmywife.yml diff --git a/scrapers/Algolia_xEmpire.yml b/scrapers/xEmpire/xEmpire.yml similarity index 100% rename from scrapers/Algolia_xEmpire.yml rename to scrapers/xEmpire/xEmpire.yml diff --git a/scrapers/Algolia_zerotolerancefilms.yml b/scrapers/zerotolerancefilms/zerotolerancefilms.yml similarity index 100% rename from scrapers/Algolia_zerotolerancefilms.yml rename to scrapers/zerotolerancefilms/zerotolerancefilms.yml From 39ff3d526f5be387276652984d53d348fff36219 Mon Sep 17 00:00:00 2001 From: WithoutPants <53250216+WithoutPants@users.noreply.github.com> Date: Wed, 22 Nov 2023 11:08:53 +1100 Subject: [PATCH 533/624] Add dependency-only packages --- build_site.sh | 23 ++++++++++++++++++----- scrapers/Algolia/package | 2 ++ scrapers/py_common/package | 2 ++ scrapers/rb_common/package | 2 ++ 4 files changed, 24 insertions(+), 5 deletions(-) create mode 100644 scrapers/Algolia/package create mode 100644 scrapers/py_common/package create mode 100644 scrapers/rb_common/package diff --git a/build_site.sh b/build_site.sh index 1c1c7cf92..b3dd42247 100755 --- a/build_site.sh +++ b/build_site.sh @@ -17,8 +17,12 @@ mkdir -p "$outdir" buildScraper() { f=$1 + # get the scraper id from the filename scraper_id=$(basename "$f" .yml) + if [ "$scraper_id" == "package" ]; then + scraper_id=$(basename $(dirname "$f")) + fi echo "Processing $scraper_id" @@ -29,17 +33,21 @@ buildScraper() # create the zip file # copy other files zipfile=$(realpath "$outdir/$scraper_id.zip") - + + name=$(grep "^name:" "$f" | cut -d' ' -f2- | sed -e 's/\r//' -e 's/^"\(.*\)"$/\1/') + ignore=$(grep "^# ignore:" "$f" | cut -c 10- | sed -e 's/\r//') + + # always ignore package file + ignore="-x $ignore package" + pushd $(dirname "$f") > /dev/null if [ $(dirname "$f") != "./scrapers" ]; then - zip -r "$zipfile" . > /dev/null + zip -r "$zipfile" . ${ignore} > /dev/null else zip "$zipfile" "$scraper_id.yml" > /dev/null fi popd > /dev/null - name=$(grep "^name:" "$f" | cut -d' ' -f2- | sed -e 's/\r//' -e 's/^"\(.*\)"$/\1/') - # write to spec index echo "- id: $scraper_id name: $name @@ -55,6 +63,11 @@ for f in ./scrapers/*.yml; do buildScraper "$f" done -for f in ./scrapers/*/*.yml; do +find ./scrapers/ -mindepth 2 -name *.yml -print0 | while read -d $'\0' f; do + buildScraper "$f" +done + +# handle dependency packages +find ./scrapers/ -mindepth 2 -name package -print0 | while read -d $'\0' f; do buildScraper "$f" done diff --git a/scrapers/Algolia/package b/scrapers/Algolia/package new file mode 100644 index 000000000..2f852be20 --- /dev/null +++ b/scrapers/Algolia/package @@ -0,0 +1,2 @@ +# script used as a dependency only +name: Algolia diff --git a/scrapers/py_common/package b/scrapers/py_common/package new file mode 100644 index 000000000..6c171ab07 --- /dev/null +++ b/scrapers/py_common/package @@ -0,0 +1,2 @@ +# script used as a dependency only +name: py_common diff --git a/scrapers/rb_common/package b/scrapers/rb_common/package new file mode 100644 index 000000000..363e2f4ff --- /dev/null +++ b/scrapers/rb_common/package @@ -0,0 +1,2 @@ +# script used as a dependency only +name: rb_common From a955b8eeb320207cb8563e7632e85b8683c45e38 Mon Sep 17 00:00:00 2001 From: WithoutPants <53250216+WithoutPants@users.noreply.github.com> Date: Wed, 22 Nov 2023 11:15:25 +1100 Subject: [PATCH 534/624] Correct date and version --- build_site.sh | 16 +++++++++++----- 1 file changed, 11 insertions(+), 5 deletions(-) diff --git a/build_site.sh b/build_site.sh index b3dd42247..84b52fb02 100755 --- a/build_site.sh +++ b/build_site.sh @@ -17,18 +17,24 @@ mkdir -p "$outdir" buildScraper() { f=$1 + dir=$(dirname "$f") # get the scraper id from the filename scraper_id=$(basename "$f" .yml) + versionFile=$f if [ "$scraper_id" == "package" ]; then - scraper_id=$(basename $(dirname "$f")) + scraper_id=$(basename "$dir") + fi + + if [ "$dir" != "./scrapers" ]; then + versionFile="$dir" fi echo "Processing $scraper_id" # create a directory for the version - version=$(git log -n 1 --pretty=format:%h -- "$f") - updated=$(git log -n 1 --date="format:%F %T %z" --pretty=format:%ad -- "$f") + version=$(git log -n 1 --pretty=format:%h -- "$versionFile") + updated=$(git log -n 1 --date="format:%F %T %z" --pretty=format:%ad -- "$versionFile") # create the zip file # copy other files @@ -40,8 +46,8 @@ buildScraper() # always ignore package file ignore="-x $ignore package" - pushd $(dirname "$f") > /dev/null - if [ $(dirname "$f") != "./scrapers" ]; then + pushd "$dir" > /dev/null + if [ "$dir" != "./scrapers" ]; then zip -r "$zipfile" . ${ignore} > /dev/null else zip "$zipfile" "$scraper_id.yml" > /dev/null From fbd81c51de7312dcc29b37060292ecc49a6a82ff Mon Sep 17 00:00:00 2001 From: WithoutPants <53250216+WithoutPants@users.noreply.github.com> Date: Wed, 22 Nov 2023 11:31:17 +1100 Subject: [PATCH 535/624] Move grouped files into own directories --- .../{ => ATKGirlfriends}/ATKGirlfriends.py | 0 .../{ => ATKGirlfriends}/ATKGirlfriends.yml | 0 scrapers/{ => AnalVids}/AnalVids.py | 0 scrapers/{ => AnalVids}/AnalVids.yml | 0 .../AnimeCharactersDatabase.py | 0 .../AnimeCharactersDatabase.yml | 0 scrapers/{ => Arx}/Arx.py | 0 scrapers/{ => Arx}/Arx.yml | 0 scrapers/{ => AuntJudys}/AuntJudys.py | 0 scrapers/{ => AuntJudys}/AuntJudys.yml | 0 scrapers/{ => ComicInfoXML}/ComicInfoXML.py | 0 scrapers/{ => ComicInfoXML}/ComicInfoXML.yml | 0 scrapers/{ => CopyFromScene}/CopyFromScene.py | 0 .../{ => CopyFromScene}/CopyFromScene.yml | 0 scrapers/{ => CopyToGallery}/CopyToGallery.py | 246 +++++++++--------- .../{ => CopyToGallery}/CopyToGallery.yml | 0 scrapers/{ => Filename}/Filename.py | 0 scrapers/{ => Filename}/Filename.yml | 0 scrapers/{ => Fit18}/Fit18.py | 0 scrapers/{ => Fit18}/Fit18.yml | 0 scrapers/{ => FratX}/FratX.py | 0 scrapers/{ => FratX}/FratX.yml | 0 scrapers/{ => IAFD}/IAFD.py | 0 scrapers/{ => IAFD}/IAFD.yml | 0 scrapers/{ => IFeelMyself}/IFeelMyself.py | 0 scrapers/{ => IFeelMyself}/IFeelMyself.yml | 0 .../JacquieEtMichelTV.py | 0 .../JacquieEtMichelTV.yml | 0 .../JavLibrary_python.py | 0 .../JavLibrary_python.yml | 0 scrapers/{ => KBProductions}/KBProductions.py | 0 .../{ => KBProductions}/KBProductions.yml | 0 scrapers/{ => MindGeekAPI}/MindGeekAPI.py | 0 scrapers/{ => MindGeekAPI}/MindGeekAPI.yml | 0 scrapers/{ => MissaX}/MissaX.py | 0 scrapers/{ => MissaX}/MissaX.yml | 0 scrapers/{ => PerfectGonzo}/PerfectGonzo.py | 0 scrapers/{ => PerfectGonzo}/PerfectGonzo.yml | 0 .../{ => PremiumBukkake}/PremiumBukkake.py | 0 .../{ => PremiumBukkake}/PremiumBukkake.yml | 0 scrapers/{ => RealityLovers}/RealityLovers.py | 0 .../{ => RealityLovers}/RealityLovers.yml | 0 scrapers/{ => Redgifs}/Redgifs.py | 0 scrapers/{ => Redgifs}/Redgifs.yml | 0 scrapers/{ => SARJ-LLC}/SARJ-LLC.py | 0 scrapers/{ => SARJ-LLC}/SARJ-LLC.yml | 0 scrapers/{ => ScrapeWithURL}/ScrapeWithURL.py | 162 ++++++------ .../{ => ScrapeWithURL}/ScrapeWithURL.yml | 16 +- scrapers/{ => Teamskeet}/Teamskeet.yml | 0 scrapers/{ => Teamskeet}/TeamskeetAPI.py | 0 scrapers/{ => Tokyohot}/Tokyohot.py | 0 scrapers/{ => Tokyohot}/Tokyohot.yml | 0 scrapers/{ => TopWebModels}/TopWebModels.py | 0 scrapers/{ => TopWebModels}/TopWebModels.yml | 0 scrapers/{ => Traxxx}/Traxxx.py | 0 scrapers/{ => Traxxx}/Traxxx.yml | 0 scrapers/{ => Traxxx}/traxxx_interface.py | 0 scrapers/{ => WAPdB}/WAPdB.py | 0 scrapers/{ => WAPdB}/WAPdB.yml | 0 scrapers/{ => dc-onlyfans}/dc-onlyfans.py | 0 scrapers/{ => dc-onlyfans}/dc-onlyfans.yml | 0 scrapers/{ => jellyfin}/jellyfin.py | 0 scrapers/{ => jellyfin}/jellyfin.yml | 0 scrapers/{ => multiscrape}/multiscrape.py | 0 scrapers/{ => multiscrape}/multiscrape.yml | 0 .../performer-image-by-scene.py | 0 .../performer-image-by-scene.yml | 0 .../performer-image-dir.py | 0 .../performer-image-dir.yml | 0 scrapers/{ => stash-sqlite}/stash-sqlite.py | 0 scrapers/{ => stash-sqlite}/stash-sqlite.yml | 0 scrapers/{ => torrent}/torrent.py | 0 scrapers/{ => torrent}/torrent.yml | 0 scrapers/{ => vixenNetwork}/vixenNetwork.py | 0 scrapers/{ => vixenNetwork}/vixenNetwork.yml | 0 scrapers/{ => xbvrdb}/xbvrdb.py | 0 scrapers/{ => xbvrdb}/xbvrdb.yml | 0 77 files changed, 212 insertions(+), 212 deletions(-) rename scrapers/{ => ATKGirlfriends}/ATKGirlfriends.py (100%) rename scrapers/{ => ATKGirlfriends}/ATKGirlfriends.yml (100%) rename scrapers/{ => AnalVids}/AnalVids.py (100%) rename scrapers/{ => AnalVids}/AnalVids.yml (100%) rename scrapers/{ => AnimeCharactersDatabase}/AnimeCharactersDatabase.py (100%) rename scrapers/{ => AnimeCharactersDatabase}/AnimeCharactersDatabase.yml (100%) rename scrapers/{ => Arx}/Arx.py (100%) rename scrapers/{ => Arx}/Arx.yml (100%) rename scrapers/{ => AuntJudys}/AuntJudys.py (100%) rename scrapers/{ => AuntJudys}/AuntJudys.yml (100%) rename scrapers/{ => ComicInfoXML}/ComicInfoXML.py (100%) rename scrapers/{ => ComicInfoXML}/ComicInfoXML.yml (100%) rename scrapers/{ => CopyFromScene}/CopyFromScene.py (100%) rename scrapers/{ => CopyFromScene}/CopyFromScene.yml (100%) rename scrapers/{ => CopyToGallery}/CopyToGallery.py (97%) rename scrapers/{ => CopyToGallery}/CopyToGallery.yml (100%) rename scrapers/{ => Filename}/Filename.py (100%) rename scrapers/{ => Filename}/Filename.yml (100%) rename scrapers/{ => Fit18}/Fit18.py (100%) rename scrapers/{ => Fit18}/Fit18.yml (100%) rename scrapers/{ => FratX}/FratX.py (100%) rename scrapers/{ => FratX}/FratX.yml (100%) rename scrapers/{ => IAFD}/IAFD.py (100%) rename scrapers/{ => IAFD}/IAFD.yml (100%) rename scrapers/{ => IFeelMyself}/IFeelMyself.py (100%) rename scrapers/{ => IFeelMyself}/IFeelMyself.yml (100%) rename scrapers/{ => JacquieEtMichelTV}/JacquieEtMichelTV.py (100%) rename scrapers/{ => JacquieEtMichelTV}/JacquieEtMichelTV.yml (100%) rename scrapers/{ => JavLibrary_python}/JavLibrary_python.py (100%) rename scrapers/{ => JavLibrary_python}/JavLibrary_python.yml (100%) rename scrapers/{ => KBProductions}/KBProductions.py (100%) rename scrapers/{ => KBProductions}/KBProductions.yml (100%) rename scrapers/{ => MindGeekAPI}/MindGeekAPI.py (100%) rename scrapers/{ => MindGeekAPI}/MindGeekAPI.yml (100%) rename scrapers/{ => MissaX}/MissaX.py (100%) rename scrapers/{ => MissaX}/MissaX.yml (100%) rename scrapers/{ => PerfectGonzo}/PerfectGonzo.py (100%) rename scrapers/{ => PerfectGonzo}/PerfectGonzo.yml (100%) rename scrapers/{ => PremiumBukkake}/PremiumBukkake.py (100%) rename scrapers/{ => PremiumBukkake}/PremiumBukkake.yml (100%) rename scrapers/{ => RealityLovers}/RealityLovers.py (100%) rename scrapers/{ => RealityLovers}/RealityLovers.yml (100%) rename scrapers/{ => Redgifs}/Redgifs.py (100%) rename scrapers/{ => Redgifs}/Redgifs.yml (100%) rename scrapers/{ => SARJ-LLC}/SARJ-LLC.py (100%) rename scrapers/{ => SARJ-LLC}/SARJ-LLC.yml (100%) rename scrapers/{ => ScrapeWithURL}/ScrapeWithURL.py (95%) rename scrapers/{ => ScrapeWithURL}/ScrapeWithURL.yml (94%) rename scrapers/{ => Teamskeet}/Teamskeet.yml (100%) rename scrapers/{ => Teamskeet}/TeamskeetAPI.py (100%) rename scrapers/{ => Tokyohot}/Tokyohot.py (100%) rename scrapers/{ => Tokyohot}/Tokyohot.yml (100%) rename scrapers/{ => TopWebModels}/TopWebModels.py (100%) rename scrapers/{ => TopWebModels}/TopWebModels.yml (100%) rename scrapers/{ => Traxxx}/Traxxx.py (100%) rename scrapers/{ => Traxxx}/Traxxx.yml (100%) rename scrapers/{ => Traxxx}/traxxx_interface.py (100%) rename scrapers/{ => WAPdB}/WAPdB.py (100%) rename scrapers/{ => WAPdB}/WAPdB.yml (100%) rename scrapers/{ => dc-onlyfans}/dc-onlyfans.py (100%) rename scrapers/{ => dc-onlyfans}/dc-onlyfans.yml (100%) rename scrapers/{ => jellyfin}/jellyfin.py (100%) rename scrapers/{ => jellyfin}/jellyfin.yml (100%) rename scrapers/{ => multiscrape}/multiscrape.py (100%) rename scrapers/{ => multiscrape}/multiscrape.yml (100%) rename scrapers/{ => performer-image-by-scene}/performer-image-by-scene.py (100%) rename scrapers/{ => performer-image-by-scene}/performer-image-by-scene.yml (100%) rename scrapers/{ => performer-image-dir}/performer-image-dir.py (100%) rename scrapers/{ => performer-image-dir}/performer-image-dir.yml (100%) rename scrapers/{ => stash-sqlite}/stash-sqlite.py (100%) rename scrapers/{ => stash-sqlite}/stash-sqlite.yml (100%) rename scrapers/{ => torrent}/torrent.py (100%) rename scrapers/{ => torrent}/torrent.yml (100%) rename scrapers/{ => vixenNetwork}/vixenNetwork.py (100%) rename scrapers/{ => vixenNetwork}/vixenNetwork.yml (100%) rename scrapers/{ => xbvrdb}/xbvrdb.py (100%) rename scrapers/{ => xbvrdb}/xbvrdb.yml (100%) diff --git a/scrapers/ATKGirlfriends.py b/scrapers/ATKGirlfriends/ATKGirlfriends.py similarity index 100% rename from scrapers/ATKGirlfriends.py rename to scrapers/ATKGirlfriends/ATKGirlfriends.py diff --git a/scrapers/ATKGirlfriends.yml b/scrapers/ATKGirlfriends/ATKGirlfriends.yml similarity index 100% rename from scrapers/ATKGirlfriends.yml rename to scrapers/ATKGirlfriends/ATKGirlfriends.yml diff --git a/scrapers/AnalVids.py b/scrapers/AnalVids/AnalVids.py similarity index 100% rename from scrapers/AnalVids.py rename to scrapers/AnalVids/AnalVids.py diff --git a/scrapers/AnalVids.yml b/scrapers/AnalVids/AnalVids.yml similarity index 100% rename from scrapers/AnalVids.yml rename to scrapers/AnalVids/AnalVids.yml diff --git a/scrapers/AnimeCharactersDatabase.py b/scrapers/AnimeCharactersDatabase/AnimeCharactersDatabase.py similarity index 100% rename from scrapers/AnimeCharactersDatabase.py rename to scrapers/AnimeCharactersDatabase/AnimeCharactersDatabase.py diff --git a/scrapers/AnimeCharactersDatabase.yml b/scrapers/AnimeCharactersDatabase/AnimeCharactersDatabase.yml similarity index 100% rename from scrapers/AnimeCharactersDatabase.yml rename to scrapers/AnimeCharactersDatabase/AnimeCharactersDatabase.yml diff --git a/scrapers/Arx.py b/scrapers/Arx/Arx.py similarity index 100% rename from scrapers/Arx.py rename to scrapers/Arx/Arx.py diff --git a/scrapers/Arx.yml b/scrapers/Arx/Arx.yml similarity index 100% rename from scrapers/Arx.yml rename to scrapers/Arx/Arx.yml diff --git a/scrapers/AuntJudys.py b/scrapers/AuntJudys/AuntJudys.py similarity index 100% rename from scrapers/AuntJudys.py rename to scrapers/AuntJudys/AuntJudys.py diff --git a/scrapers/AuntJudys.yml b/scrapers/AuntJudys/AuntJudys.yml similarity index 100% rename from scrapers/AuntJudys.yml rename to scrapers/AuntJudys/AuntJudys.yml diff --git a/scrapers/ComicInfoXML.py b/scrapers/ComicInfoXML/ComicInfoXML.py similarity index 100% rename from scrapers/ComicInfoXML.py rename to scrapers/ComicInfoXML/ComicInfoXML.py diff --git a/scrapers/ComicInfoXML.yml b/scrapers/ComicInfoXML/ComicInfoXML.yml similarity index 100% rename from scrapers/ComicInfoXML.yml rename to scrapers/ComicInfoXML/ComicInfoXML.yml diff --git a/scrapers/CopyFromScene.py b/scrapers/CopyFromScene/CopyFromScene.py similarity index 100% rename from scrapers/CopyFromScene.py rename to scrapers/CopyFromScene/CopyFromScene.py diff --git a/scrapers/CopyFromScene.yml b/scrapers/CopyFromScene/CopyFromScene.yml similarity index 100% rename from scrapers/CopyFromScene.yml rename to scrapers/CopyFromScene/CopyFromScene.yml diff --git a/scrapers/CopyToGallery.py b/scrapers/CopyToGallery/CopyToGallery.py similarity index 97% rename from scrapers/CopyToGallery.py rename to scrapers/CopyToGallery/CopyToGallery.py index b3d16af46..90bd41095 100644 --- a/scrapers/CopyToGallery.py +++ b/scrapers/CopyToGallery/CopyToGallery.py @@ -1,123 +1,123 @@ -import json -import sys -import os - -try: - import py_common.graphql as graphql - import py_common.log as log -except ModuleNotFoundError: - print("You need to download the folder 'py_common' from the community repo! (CommunityScrapers/tree/master/scrapers/py_common)", file=sys.stderr) - sys.exit() - -find_gallery = False - -def call_graphql(query, variables=None): - return graphql.callGraphQL(query, variables) - -def get_gallery_id_by_path(gallery_path): - log.debug("get_gallery_by_path gallery_path " + str(gallery_path)) - query = """ - query FindGalleries($galleries_filter: GalleryFilterType) { - findGalleries(gallery_filter: $galleries_filter filter: {per_page: -1}) { - count - galleries { - id - } - } - } - """ - variables = {"galleries_filter": {"path": {'value': gallery_path, "modifier": "EQUALS"}}} - result = call_graphql(query, variables) - log.debug("get_gallery_by_path callGraphQL result " + str(result)) - return result['findGalleries']['galleries'][0]['id'] - -def update_gallery(input): - log.debug("gallery input " + str(input)) - query = """ - mutation GalleryUpdate($input : GalleryUpdateInput!) { - galleryUpdate(input: $input) { - id - title - } - } - """ - variables = { - "input": input - } - result = call_graphql(query, variables) - if result: - g_id = result['galleryUpdate'].get('id') - g_title = result['galleryUpdate'].get('title') - log.info(f"updated Gallery ({g_id}): {g_title}") - return result - -def get_id(obj): - ids = [] - for item in obj: - ids.append(item['id']) - return ids - -def find_galleries(scene_id, scene_path): - ids = [] - directory_path = os.path.dirname(scene_path) - for (cur, dirs, files) in os.walk(directory_path): - - for file in files: - if file.endswith('.zip'): - gallery_path = os.path.join(cur, file) - id = get_gallery_id_by_path(gallery_path) - updateScene_with_gallery(scene_id, id) - ids.append(id) - break - log.debug("find_galleries ids' found " + str(ids)) - return ids - -def updateScene_with_gallery(scene_id, gallery_id): - data = {'id': scene_id, 'gallery_ids': [gallery_id]} - log.debug("data " + str(data)) - query = """ - mutation SceneUpdate($input : SceneUpdateInput!) { - sceneUpdate(input: $input) { - id - title - } - } - """ - variables = { - "input": data - } - result = call_graphql(query, variables) - log.debug("graphql_updateGallery callGraphQL result " + str(result)) - -FRAGMENT = json.loads(sys.stdin.read()) -SCENE_ID = FRAGMENT.get("id") - -scene = graphql.getScene(SCENE_ID) -if scene: - scene_galleries = scene['galleries'] - log.debug("scene_galleries " + str(scene_galleries)) - gallery_ids = [] - if len(scene_galleries) > 0: - for gallery_obj in scene_galleries: - gallery_ids.append(gallery_obj['id']) - elif find_gallery: - # if no galleries are associated see if any gallery zips exist in directory - gallery_ids = find_galleries(SCENE_ID, scene["path"]) - log.debug("gallery_ids " + str(gallery_ids)) - - for gallery_id in gallery_ids: - studio = None - if scene['studio']: - studio = scene['studio']['id'] - gallery_input = {'id': gallery_id, - 'urls': scene['urls'], - 'title': scene['title'], - 'date': scene["date"], - 'details': scene['details'], - 'studio_id': studio, - 'tag_ids': get_id(scene['tags']), - 'performer_ids': get_id(scene['performers'])} - update_gallery(gallery_input) - - print(json.dumps({})) - +import json +import sys +import os + +try: + import py_common.graphql as graphql + import py_common.log as log +except ModuleNotFoundError: + print("You need to download the folder 'py_common' from the community repo! (CommunityScrapers/tree/master/scrapers/py_common)", file=sys.stderr) + sys.exit() + +find_gallery = False + +def call_graphql(query, variables=None): + return graphql.callGraphQL(query, variables) + +def get_gallery_id_by_path(gallery_path): + log.debug("get_gallery_by_path gallery_path " + str(gallery_path)) + query = """ + query FindGalleries($galleries_filter: GalleryFilterType) { + findGalleries(gallery_filter: $galleries_filter filter: {per_page: -1}) { + count + galleries { + id + } + } + } + """ + variables = {"galleries_filter": {"path": {'value': gallery_path, "modifier": "EQUALS"}}} + result = call_graphql(query, variables) + log.debug("get_gallery_by_path callGraphQL result " + str(result)) + return result['findGalleries']['galleries'][0]['id'] + +def update_gallery(input): + log.debug("gallery input " + str(input)) + query = """ + mutation GalleryUpdate($input : GalleryUpdateInput!) { + galleryUpdate(input: $input) { + id + title + } + } + """ + variables = { + "input": input + } + result = call_graphql(query, variables) + if result: + g_id = result['galleryUpdate'].get('id') + g_title = result['galleryUpdate'].get('title') + log.info(f"updated Gallery ({g_id}): {g_title}") + return result + +def get_id(obj): + ids = [] + for item in obj: + ids.append(item['id']) + return ids + +def find_galleries(scene_id, scene_path): + ids = [] + directory_path = os.path.dirname(scene_path) + for (cur, dirs, files) in os.walk(directory_path): + + for file in files: + if file.endswith('.zip'): + gallery_path = os.path.join(cur, file) + id = get_gallery_id_by_path(gallery_path) + updateScene_with_gallery(scene_id, id) + ids.append(id) + break + log.debug("find_galleries ids' found " + str(ids)) + return ids + +def updateScene_with_gallery(scene_id, gallery_id): + data = {'id': scene_id, 'gallery_ids': [gallery_id]} + log.debug("data " + str(data)) + query = """ + mutation SceneUpdate($input : SceneUpdateInput!) { + sceneUpdate(input: $input) { + id + title + } + } + """ + variables = { + "input": data + } + result = call_graphql(query, variables) + log.debug("graphql_updateGallery callGraphQL result " + str(result)) + +FRAGMENT = json.loads(sys.stdin.read()) +SCENE_ID = FRAGMENT.get("id") + +scene = graphql.getScene(SCENE_ID) +if scene: + scene_galleries = scene['galleries'] + log.debug("scene_galleries " + str(scene_galleries)) + gallery_ids = [] + if len(scene_galleries) > 0: + for gallery_obj in scene_galleries: + gallery_ids.append(gallery_obj['id']) + elif find_gallery: + # if no galleries are associated see if any gallery zips exist in directory + gallery_ids = find_galleries(SCENE_ID, scene["path"]) + log.debug("gallery_ids " + str(gallery_ids)) + + for gallery_id in gallery_ids: + studio = None + if scene['studio']: + studio = scene['studio']['id'] + gallery_input = {'id': gallery_id, + 'urls': scene['urls'], + 'title': scene['title'], + 'date': scene["date"], + 'details': scene['details'], + 'studio_id': studio, + 'tag_ids': get_id(scene['tags']), + 'performer_ids': get_id(scene['performers'])} + update_gallery(gallery_input) + + print(json.dumps({})) + diff --git a/scrapers/CopyToGallery.yml b/scrapers/CopyToGallery/CopyToGallery.yml similarity index 100% rename from scrapers/CopyToGallery.yml rename to scrapers/CopyToGallery/CopyToGallery.yml diff --git a/scrapers/Filename.py b/scrapers/Filename/Filename.py similarity index 100% rename from scrapers/Filename.py rename to scrapers/Filename/Filename.py diff --git a/scrapers/Filename.yml b/scrapers/Filename/Filename.yml similarity index 100% rename from scrapers/Filename.yml rename to scrapers/Filename/Filename.yml diff --git a/scrapers/Fit18.py b/scrapers/Fit18/Fit18.py similarity index 100% rename from scrapers/Fit18.py rename to scrapers/Fit18/Fit18.py diff --git a/scrapers/Fit18.yml b/scrapers/Fit18/Fit18.yml similarity index 100% rename from scrapers/Fit18.yml rename to scrapers/Fit18/Fit18.yml diff --git a/scrapers/FratX.py b/scrapers/FratX/FratX.py similarity index 100% rename from scrapers/FratX.py rename to scrapers/FratX/FratX.py diff --git a/scrapers/FratX.yml b/scrapers/FratX/FratX.yml similarity index 100% rename from scrapers/FratX.yml rename to scrapers/FratX/FratX.yml diff --git a/scrapers/IAFD.py b/scrapers/IAFD/IAFD.py similarity index 100% rename from scrapers/IAFD.py rename to scrapers/IAFD/IAFD.py diff --git a/scrapers/IAFD.yml b/scrapers/IAFD/IAFD.yml similarity index 100% rename from scrapers/IAFD.yml rename to scrapers/IAFD/IAFD.yml diff --git a/scrapers/IFeelMyself.py b/scrapers/IFeelMyself/IFeelMyself.py similarity index 100% rename from scrapers/IFeelMyself.py rename to scrapers/IFeelMyself/IFeelMyself.py diff --git a/scrapers/IFeelMyself.yml b/scrapers/IFeelMyself/IFeelMyself.yml similarity index 100% rename from scrapers/IFeelMyself.yml rename to scrapers/IFeelMyself/IFeelMyself.yml diff --git a/scrapers/JacquieEtMichelTV.py b/scrapers/JacquieEtMichelTV/JacquieEtMichelTV.py similarity index 100% rename from scrapers/JacquieEtMichelTV.py rename to scrapers/JacquieEtMichelTV/JacquieEtMichelTV.py diff --git a/scrapers/JacquieEtMichelTV.yml b/scrapers/JacquieEtMichelTV/JacquieEtMichelTV.yml similarity index 100% rename from scrapers/JacquieEtMichelTV.yml rename to scrapers/JacquieEtMichelTV/JacquieEtMichelTV.yml diff --git a/scrapers/JavLibrary_python.py b/scrapers/JavLibrary_python/JavLibrary_python.py similarity index 100% rename from scrapers/JavLibrary_python.py rename to scrapers/JavLibrary_python/JavLibrary_python.py diff --git a/scrapers/JavLibrary_python.yml b/scrapers/JavLibrary_python/JavLibrary_python.yml similarity index 100% rename from scrapers/JavLibrary_python.yml rename to scrapers/JavLibrary_python/JavLibrary_python.yml diff --git a/scrapers/KBProductions.py b/scrapers/KBProductions/KBProductions.py similarity index 100% rename from scrapers/KBProductions.py rename to scrapers/KBProductions/KBProductions.py diff --git a/scrapers/KBProductions.yml b/scrapers/KBProductions/KBProductions.yml similarity index 100% rename from scrapers/KBProductions.yml rename to scrapers/KBProductions/KBProductions.yml diff --git a/scrapers/MindGeekAPI.py b/scrapers/MindGeekAPI/MindGeekAPI.py similarity index 100% rename from scrapers/MindGeekAPI.py rename to scrapers/MindGeekAPI/MindGeekAPI.py diff --git a/scrapers/MindGeekAPI.yml b/scrapers/MindGeekAPI/MindGeekAPI.yml similarity index 100% rename from scrapers/MindGeekAPI.yml rename to scrapers/MindGeekAPI/MindGeekAPI.yml diff --git a/scrapers/MissaX.py b/scrapers/MissaX/MissaX.py similarity index 100% rename from scrapers/MissaX.py rename to scrapers/MissaX/MissaX.py diff --git a/scrapers/MissaX.yml b/scrapers/MissaX/MissaX.yml similarity index 100% rename from scrapers/MissaX.yml rename to scrapers/MissaX/MissaX.yml diff --git a/scrapers/PerfectGonzo.py b/scrapers/PerfectGonzo/PerfectGonzo.py similarity index 100% rename from scrapers/PerfectGonzo.py rename to scrapers/PerfectGonzo/PerfectGonzo.py diff --git a/scrapers/PerfectGonzo.yml b/scrapers/PerfectGonzo/PerfectGonzo.yml similarity index 100% rename from scrapers/PerfectGonzo.yml rename to scrapers/PerfectGonzo/PerfectGonzo.yml diff --git a/scrapers/PremiumBukkake.py b/scrapers/PremiumBukkake/PremiumBukkake.py similarity index 100% rename from scrapers/PremiumBukkake.py rename to scrapers/PremiumBukkake/PremiumBukkake.py diff --git a/scrapers/PremiumBukkake.yml b/scrapers/PremiumBukkake/PremiumBukkake.yml similarity index 100% rename from scrapers/PremiumBukkake.yml rename to scrapers/PremiumBukkake/PremiumBukkake.yml diff --git a/scrapers/RealityLovers.py b/scrapers/RealityLovers/RealityLovers.py similarity index 100% rename from scrapers/RealityLovers.py rename to scrapers/RealityLovers/RealityLovers.py diff --git a/scrapers/RealityLovers.yml b/scrapers/RealityLovers/RealityLovers.yml similarity index 100% rename from scrapers/RealityLovers.yml rename to scrapers/RealityLovers/RealityLovers.yml diff --git a/scrapers/Redgifs.py b/scrapers/Redgifs/Redgifs.py similarity index 100% rename from scrapers/Redgifs.py rename to scrapers/Redgifs/Redgifs.py diff --git a/scrapers/Redgifs.yml b/scrapers/Redgifs/Redgifs.yml similarity index 100% rename from scrapers/Redgifs.yml rename to scrapers/Redgifs/Redgifs.yml diff --git a/scrapers/SARJ-LLC.py b/scrapers/SARJ-LLC/SARJ-LLC.py similarity index 100% rename from scrapers/SARJ-LLC.py rename to scrapers/SARJ-LLC/SARJ-LLC.py diff --git a/scrapers/SARJ-LLC.yml b/scrapers/SARJ-LLC/SARJ-LLC.yml similarity index 100% rename from scrapers/SARJ-LLC.yml rename to scrapers/SARJ-LLC/SARJ-LLC.yml diff --git a/scrapers/ScrapeWithURL.py b/scrapers/ScrapeWithURL/ScrapeWithURL.py similarity index 95% rename from scrapers/ScrapeWithURL.py rename to scrapers/ScrapeWithURL/ScrapeWithURL.py index 25312d9e9..1498ded76 100644 --- a/scrapers/ScrapeWithURL.py +++ b/scrapers/ScrapeWithURL/ScrapeWithURL.py @@ -1,81 +1,81 @@ -import json -import sys - -try: - import py_common.graphql as graphql - import py_common.log as log -except ModuleNotFoundError: - print( - "You need to download the folder 'py_common' from the community repo! (CommunityScrapers/tree/master/scrapers/py_common)", - file=sys.stderr, - ) - sys.exit() - - -def scrape_scene(url): - query = """ -query scrapeSceneURL($url: String!) { - scrapeSceneURL(url: $url) { - title - details - code - date - image - urls - studio { - name - url - image - parent { - name - url - image - } - } - tags { - name - } - performers { - aliases - birthdate - career_length - country - death_date - details - ethnicity - eye_color - fake_tits - gender - hair_color - height - instagram - images - measurements - name - piercings - tags { - name - } - tattoos - twitter - url - weight - } - } -}""" - - variables = {"url": url} - result = graphql.callGraphQL(query, variables) - log.debug(f"result {result}") - if result: - return result["scrapeSceneURL"] - - -FRAGMENT = json.loads(sys.stdin.read()) -url = FRAGMENT.get("url") - -if url: - result = scrape_scene(url) - print(json.dumps(result)) -else: - print("null") +import json +import sys + +try: + import py_common.graphql as graphql + import py_common.log as log +except ModuleNotFoundError: + print( + "You need to download the folder 'py_common' from the community repo! (CommunityScrapers/tree/master/scrapers/py_common)", + file=sys.stderr, + ) + sys.exit() + + +def scrape_scene(url): + query = """ +query scrapeSceneURL($url: String!) { + scrapeSceneURL(url: $url) { + title + details + code + date + image + urls + studio { + name + url + image + parent { + name + url + image + } + } + tags { + name + } + performers { + aliases + birthdate + career_length + country + death_date + details + ethnicity + eye_color + fake_tits + gender + hair_color + height + instagram + images + measurements + name + piercings + tags { + name + } + tattoos + twitter + url + weight + } + } +}""" + + variables = {"url": url} + result = graphql.callGraphQL(query, variables) + log.debug(f"result {result}") + if result: + return result["scrapeSceneURL"] + + +FRAGMENT = json.loads(sys.stdin.read()) +url = FRAGMENT.get("url") + +if url: + result = scrape_scene(url) + print(json.dumps(result)) +else: + print("null") diff --git a/scrapers/ScrapeWithURL.yml b/scrapers/ScrapeWithURL/ScrapeWithURL.yml similarity index 94% rename from scrapers/ScrapeWithURL.yml rename to scrapers/ScrapeWithURL/ScrapeWithURL.yml index dfc7541dc..a68a76b95 100644 --- a/scrapers/ScrapeWithURL.yml +++ b/scrapers/ScrapeWithURL/ScrapeWithURL.yml @@ -1,8 +1,8 @@ -name: Scrape with URL -sceneByFragment: - action: script - script: - - python - - ScrapeWithURL.py - -# Last Updated April 16, 2022 +name: Scrape with URL +sceneByFragment: + action: script + script: + - python + - ScrapeWithURL.py + +# Last Updated April 16, 2022 diff --git a/scrapers/Teamskeet.yml b/scrapers/Teamskeet/Teamskeet.yml similarity index 100% rename from scrapers/Teamskeet.yml rename to scrapers/Teamskeet/Teamskeet.yml diff --git a/scrapers/TeamskeetAPI.py b/scrapers/Teamskeet/TeamskeetAPI.py similarity index 100% rename from scrapers/TeamskeetAPI.py rename to scrapers/Teamskeet/TeamskeetAPI.py diff --git a/scrapers/Tokyohot.py b/scrapers/Tokyohot/Tokyohot.py similarity index 100% rename from scrapers/Tokyohot.py rename to scrapers/Tokyohot/Tokyohot.py diff --git a/scrapers/Tokyohot.yml b/scrapers/Tokyohot/Tokyohot.yml similarity index 100% rename from scrapers/Tokyohot.yml rename to scrapers/Tokyohot/Tokyohot.yml diff --git a/scrapers/TopWebModels.py b/scrapers/TopWebModels/TopWebModels.py similarity index 100% rename from scrapers/TopWebModels.py rename to scrapers/TopWebModels/TopWebModels.py diff --git a/scrapers/TopWebModels.yml b/scrapers/TopWebModels/TopWebModels.yml similarity index 100% rename from scrapers/TopWebModels.yml rename to scrapers/TopWebModels/TopWebModels.yml diff --git a/scrapers/Traxxx.py b/scrapers/Traxxx/Traxxx.py similarity index 100% rename from scrapers/Traxxx.py rename to scrapers/Traxxx/Traxxx.py diff --git a/scrapers/Traxxx.yml b/scrapers/Traxxx/Traxxx.yml similarity index 100% rename from scrapers/Traxxx.yml rename to scrapers/Traxxx/Traxxx.yml diff --git a/scrapers/traxxx_interface.py b/scrapers/Traxxx/traxxx_interface.py similarity index 100% rename from scrapers/traxxx_interface.py rename to scrapers/Traxxx/traxxx_interface.py diff --git a/scrapers/WAPdB.py b/scrapers/WAPdB/WAPdB.py similarity index 100% rename from scrapers/WAPdB.py rename to scrapers/WAPdB/WAPdB.py diff --git a/scrapers/WAPdB.yml b/scrapers/WAPdB/WAPdB.yml similarity index 100% rename from scrapers/WAPdB.yml rename to scrapers/WAPdB/WAPdB.yml diff --git a/scrapers/dc-onlyfans.py b/scrapers/dc-onlyfans/dc-onlyfans.py similarity index 100% rename from scrapers/dc-onlyfans.py rename to scrapers/dc-onlyfans/dc-onlyfans.py diff --git a/scrapers/dc-onlyfans.yml b/scrapers/dc-onlyfans/dc-onlyfans.yml similarity index 100% rename from scrapers/dc-onlyfans.yml rename to scrapers/dc-onlyfans/dc-onlyfans.yml diff --git a/scrapers/jellyfin.py b/scrapers/jellyfin/jellyfin.py similarity index 100% rename from scrapers/jellyfin.py rename to scrapers/jellyfin/jellyfin.py diff --git a/scrapers/jellyfin.yml b/scrapers/jellyfin/jellyfin.yml similarity index 100% rename from scrapers/jellyfin.yml rename to scrapers/jellyfin/jellyfin.yml diff --git a/scrapers/multiscrape.py b/scrapers/multiscrape/multiscrape.py similarity index 100% rename from scrapers/multiscrape.py rename to scrapers/multiscrape/multiscrape.py diff --git a/scrapers/multiscrape.yml b/scrapers/multiscrape/multiscrape.yml similarity index 100% rename from scrapers/multiscrape.yml rename to scrapers/multiscrape/multiscrape.yml diff --git a/scrapers/performer-image-by-scene.py b/scrapers/performer-image-by-scene/performer-image-by-scene.py similarity index 100% rename from scrapers/performer-image-by-scene.py rename to scrapers/performer-image-by-scene/performer-image-by-scene.py diff --git a/scrapers/performer-image-by-scene.yml b/scrapers/performer-image-by-scene/performer-image-by-scene.yml similarity index 100% rename from scrapers/performer-image-by-scene.yml rename to scrapers/performer-image-by-scene/performer-image-by-scene.yml diff --git a/scrapers/performer-image-dir.py b/scrapers/performer-image-dir/performer-image-dir.py similarity index 100% rename from scrapers/performer-image-dir.py rename to scrapers/performer-image-dir/performer-image-dir.py diff --git a/scrapers/performer-image-dir.yml b/scrapers/performer-image-dir/performer-image-dir.yml similarity index 100% rename from scrapers/performer-image-dir.yml rename to scrapers/performer-image-dir/performer-image-dir.yml diff --git a/scrapers/stash-sqlite.py b/scrapers/stash-sqlite/stash-sqlite.py similarity index 100% rename from scrapers/stash-sqlite.py rename to scrapers/stash-sqlite/stash-sqlite.py diff --git a/scrapers/stash-sqlite.yml b/scrapers/stash-sqlite/stash-sqlite.yml similarity index 100% rename from scrapers/stash-sqlite.yml rename to scrapers/stash-sqlite/stash-sqlite.yml diff --git a/scrapers/torrent.py b/scrapers/torrent/torrent.py similarity index 100% rename from scrapers/torrent.py rename to scrapers/torrent/torrent.py diff --git a/scrapers/torrent.yml b/scrapers/torrent/torrent.yml similarity index 100% rename from scrapers/torrent.yml rename to scrapers/torrent/torrent.yml diff --git a/scrapers/vixenNetwork.py b/scrapers/vixenNetwork/vixenNetwork.py similarity index 100% rename from scrapers/vixenNetwork.py rename to scrapers/vixenNetwork/vixenNetwork.py diff --git a/scrapers/vixenNetwork.yml b/scrapers/vixenNetwork/vixenNetwork.yml similarity index 100% rename from scrapers/vixenNetwork.yml rename to scrapers/vixenNetwork/vixenNetwork.yml diff --git a/scrapers/xbvrdb.py b/scrapers/xbvrdb/xbvrdb.py similarity index 100% rename from scrapers/xbvrdb.py rename to scrapers/xbvrdb/xbvrdb.py diff --git a/scrapers/xbvrdb.yml b/scrapers/xbvrdb/xbvrdb.yml similarity index 100% rename from scrapers/xbvrdb.yml rename to scrapers/xbvrdb/xbvrdb.yml From 97bec7148d2e6c84196a3f6e80566bfb4116bf8d Mon Sep 17 00:00:00 2001 From: WithoutPants <53250216+WithoutPants@users.noreply.github.com> Date: Wed, 22 Nov 2023 11:53:44 +1100 Subject: [PATCH 536/624] Add algolia dependency requirement --- scrapers/21Naturals/21Naturals.yml | 9 +++++---- scrapers/21Sextreme/21Sextreme.yml | 9 +++++---- scrapers/21Sextury/21Sextury.yml | 11 ++++++----- scrapers/ActiveDuty/ActiveDuty.yml | 9 +++++---- scrapers/AddictedToGirls/AddictedToGirls.yml | 11 ++++++----- scrapers/Adultime/Adultime.yml | 15 ++++++++------- scrapers/Algolia/Algolia.py | 1 + scrapers/Algolia/package | 1 + scrapers/AllGirlMassage/AllGirlMassage.yml | 11 ++++++----- scrapers/Biphoria/Biphoria.yml | 11 ++++++----- scrapers/Blowpass/Blowpass.yml | 9 +++++---- scrapers/BurningAngel/BurningAngel.yml | 9 +++++---- scrapers/ChaosMen/ChaosMen.yml | 9 +++++---- .../ClubInfernoDungeon/ClubInfernoDungeon.yml | 11 ++++++----- scrapers/DogfartNetwork/DogfartNetwork.yml | 11 ++++++----- scrapers/EvilAngel/EvilAngel.yml | 13 +++++++------ scrapers/FalconStudios/FalconStudios.yml | 9 +++++---- scrapers/FantasyMassage/FantasyMassage.yml | 11 ++++++----- scrapers/FistingInferno/FistingInferno.yml | 11 ++++++----- scrapers/GangBangCreampie/GangBangCreampie.yml | 11 ++++++----- scrapers/GenderXFilms/GenderXFilms.yml | 13 +++++++------ scrapers/Girlfriendsfilms/Girlfriendsfilms.yml | 13 +++++++------ scrapers/Girlsway/Girlsway.yml | 11 ++++++----- scrapers/Gloryholesecrets/Gloryholesecrets.yml | 11 ++++++----- scrapers/Johnnyrapid/Johnnyrapid.yml | 11 ++++++----- scrapers/MenOver30/MenOver30.yml | 11 ++++++----- scrapers/NextDoorStudios/NextDoorStudios.yml | 9 +++++---- scrapers/RagingStallion/RagingStallion.yml | 9 +++++---- scrapers/RoccoSiffredi/RoccoSiffredi.yml | 11 ++++++----- scrapers/TabooHeat/TabooHeat.yml | 11 ++++++----- scrapers/Wicked/Wicked.yml | 13 +++++++------ scrapers/disruptivefilms/disruptivefilms.yml | 11 ++++++----- scrapers/filthykings/filthykings.yml | 11 ++++++----- scrapers/mypervyfamily/mypervyfamily.yml | 11 ++++++----- scrapers/touchmywife/touchmywife.yml | 11 ++++++----- scrapers/xEmpire/xEmpire.yml | 11 ++++++----- .../zerotolerancefilms/zerotolerancefilms.yml | 13 +++++++------ 37 files changed, 210 insertions(+), 173 deletions(-) diff --git a/scrapers/21Naturals/21Naturals.yml b/scrapers/21Naturals/21Naturals.yml index 131b7bf6b..8f0dceda1 100644 --- a/scrapers/21Naturals/21Naturals.yml +++ b/scrapers/21Naturals/21Naturals.yml @@ -1,3 +1,4 @@ +# requires: Algolia name: "21Naturals" sceneByURL: - action: script @@ -5,26 +6,26 @@ sceneByURL: - 21naturals.com/en/video script: - python - - Algolia.py + - ../Algolia/Algolia.py - 21naturals sceneByFragment: action: script script: - python - - Algolia.py + - ../Algolia/Algolia.py - 21naturals sceneByName: action: script script: - python - - Algolia.py + - ../Algolia/Algolia.py - 21naturals - searchName sceneByQueryFragment: action: script script: - python - - Algolia.py + - ../Algolia/Algolia.py - 21naturals - validName # Last Updated March 23, 2022 diff --git a/scrapers/21Sextreme/21Sextreme.yml b/scrapers/21Sextreme/21Sextreme.yml index 1dfaa360a..34fda954e 100644 --- a/scrapers/21Sextreme/21Sextreme.yml +++ b/scrapers/21Sextreme/21Sextreme.yml @@ -1,3 +1,4 @@ +# requires: Algolia name: "21Sextreme" sceneByURL: - action: script @@ -5,26 +6,26 @@ sceneByURL: - 21sextreme.com/en/video script: - python - - Algolia.py + - ../Algolia/Algolia.py - 21sextreme sceneByFragment: action: script script: - python - - Algolia.py + - ../Algolia/Algolia.py - 21sextreme sceneByName: action: script script: - python - - Algolia.py + - ../Algolia/Algolia.py - 21sextreme - searchName sceneByQueryFragment: action: script script: - python - - Algolia.py + - ../Algolia/Algolia.py - 21sextreme - validName # Last Updated March 23, 2022 diff --git a/scrapers/21Sextury/21Sextury.yml b/scrapers/21Sextury/21Sextury.yml index 62d1df2e5..afc0b5317 100644 --- a/scrapers/21Sextury/21Sextury.yml +++ b/scrapers/21Sextury/21Sextury.yml @@ -1,3 +1,4 @@ +# requires: Algolia name: "21Sextury" sceneByURL: - action: script @@ -5,26 +6,26 @@ sceneByURL: - 21sextury.com/en/video script: - python - - Algolia.py + - ../Algolia/Algolia.py - 21sextury sceneByFragment: action: script script: - python - - Algolia.py + - ../Algolia/Algolia.py - 21sextury sceneByName: action: script script: - python - - Algolia.py + - ../Algolia/Algolia.py - 21sextury - searchName sceneByQueryFragment: action: script script: - python - - Algolia.py + - ../Algolia/Algolia.py - 21sextury - validName galleryByURL: @@ -33,7 +34,7 @@ galleryByURL: - 21sextury.com/en/photo/ script: - python - - Algolia.py + - ../Algolia/Algolia.py - 21sextury - gallery # Last Updated December 22, 2022 diff --git a/scrapers/ActiveDuty/ActiveDuty.yml b/scrapers/ActiveDuty/ActiveDuty.yml index 68833fcf7..895fea7aa 100644 --- a/scrapers/ActiveDuty/ActiveDuty.yml +++ b/scrapers/ActiveDuty/ActiveDuty.yml @@ -1,3 +1,4 @@ +# requires: Algolia name: "Active Duty" sceneByURL: - action: script @@ -5,26 +6,26 @@ sceneByURL: - activeduty.com/en/video script: - python - - Algolia.py + - ../Algolia/Algolia.py - activeduty sceneByFragment: action: script script: - python - - Algolia.py + - ../Algolia/Algolia.py - activeduty sceneByName: action: script script: - python - - Algolia.py + - ../Algolia/Algolia.py - activeduty - searchName sceneByQueryFragment: action: script script: - python - - Algolia.py + - ../Algolia/Algolia.py - activeduty - validName # Last Updated September 26, 2023 diff --git a/scrapers/AddictedToGirls/AddictedToGirls.yml b/scrapers/AddictedToGirls/AddictedToGirls.yml index 296ac220d..8eb060de4 100644 --- a/scrapers/AddictedToGirls/AddictedToGirls.yml +++ b/scrapers/AddictedToGirls/AddictedToGirls.yml @@ -1,3 +1,4 @@ +# requires: Algolia name: "addicted 2 Girls" sceneByURL: - action: script @@ -5,26 +6,26 @@ sceneByURL: - addicted2girls.com/en/video script: - python - - Algolia.py + - ../Algolia/Algolia.py - addicted2girls sceneByFragment: action: script script: - python - - Algolia.py + - ../Algolia/Algolia.py - addicted2girls sceneByName: action: script script: - python - - Algolia.py + - ../Algolia/Algolia.py - addicted2girls - searchName sceneByQueryFragment: action: script script: - python - - Algolia.py + - ../Algolia/Algolia.py - addicted2girls - validName galleryByURL: @@ -33,7 +34,7 @@ galleryByURL: - addicted2girls.com/en/photo/ script: - python - - Algolia.py + - ../Algolia/Algolia.py - addicted2girls - gallery # Last Updated December 22, 2022 diff --git a/scrapers/Adultime/Adultime.yml b/scrapers/Adultime/Adultime.yml index 028706f35..4a01a9fc9 100644 --- a/scrapers/Adultime/Adultime.yml +++ b/scrapers/Adultime/Adultime.yml @@ -1,3 +1,4 @@ +# requires: Algolia name: "Adultime" sceneByURL: - action: script @@ -56,33 +57,33 @@ sceneByURL: - wolfwagner.com/en/video/ script: - python - - Algolia.py + - ../Algolia/Algolia.py - girlsway sceneByFragment: action: script script: - python - - Algolia.py + - ../Algolia/Algolia.py - girlsway sceneByName: action: script script: - python - - Algolia.py + - ../Algolia/Algolia.py - girlsway - searchName sceneByQueryFragment: action: script script: - python - - Algolia.py + - ../Algolia/Algolia.py - girlsway - validName galleryByFragment: action: script script: - python - - Algolia.py + - ../Algolia/Algolia.py - puretaboo - gallery galleryByURL: @@ -100,7 +101,7 @@ galleryByURL: - webyoung.com/en/photo/ script: - python - - Algolia.py + - ../Algolia/Algolia.py - puretaboo - gallery movieByURL: @@ -112,7 +113,7 @@ movieByURL: - outofthefamily.com/en/dvd/ script: - python - - Algolia.py + - ../Algolia/Algolia.py - puretaboo - movie # Last Updated October 12, 2023 diff --git a/scrapers/Algolia/Algolia.py b/scrapers/Algolia/Algolia.py index e0fd6da6b..bbda0b13b 100644 --- a/scrapers/Algolia/Algolia.py +++ b/scrapers/Algolia/Algolia.py @@ -17,6 +17,7 @@ "You need to install the following modules 'requests', 'bs4', 'lxml'.", file=sys.stderr) sys.exit() +sys.path.append('../') try: from py_common import graphql from py_common import log diff --git a/scrapers/Algolia/package b/scrapers/Algolia/package index 2f852be20..acffc82b4 100644 --- a/scrapers/Algolia/package +++ b/scrapers/Algolia/package @@ -1,2 +1,3 @@ # script used as a dependency only +# requires: py_common name: Algolia diff --git a/scrapers/AllGirlMassage/AllGirlMassage.yml b/scrapers/AllGirlMassage/AllGirlMassage.yml index e648981e7..45c23adb0 100644 --- a/scrapers/AllGirlMassage/AllGirlMassage.yml +++ b/scrapers/AllGirlMassage/AllGirlMassage.yml @@ -1,3 +1,4 @@ +# requires: Algolia name: "AllGirlMassage" sceneByURL: - action: script @@ -5,26 +6,26 @@ sceneByURL: - allgirlmassage.com/en/video script: - python - - Algolia.py + - ../Algolia/Algolia.py - allgirlmassage sceneByFragment: action: script script: - python - - Algolia.py + - ../Algolia/Algolia.py - allgirlmassage sceneByName: action: script script: - python - - Algolia.py + - ../Algolia/Algolia.py - allgirlmassage - searchName sceneByQueryFragment: action: script script: - python - - Algolia.py + - ../Algolia/Algolia.py - allgirlmassage - validName galleryByURL: @@ -33,7 +34,7 @@ galleryByURL: - allgirlmassage.com/en/photo/ script: - python - - Algolia.py + - ../Algolia/Algolia.py - allgirlmassage - gallery # Last Updated December 22, 2022 diff --git a/scrapers/Biphoria/Biphoria.yml b/scrapers/Biphoria/Biphoria.yml index 94d6441c2..8a8c72303 100644 --- a/scrapers/Biphoria/Biphoria.yml +++ b/scrapers/Biphoria/Biphoria.yml @@ -1,3 +1,4 @@ +# requires: Algolia name: "Biphoria" sceneByURL: - action: script @@ -5,26 +6,26 @@ sceneByURL: - biphoria.com/en/video script: - python - - Algolia.py + - ../Algolia/Algolia.py - biphoria sceneByFragment: action: script script: - python - - Algolia.py + - ../Algolia/Algolia.py - biphoria sceneByName: action: script script: - python - - Algolia.py + - ../Algolia/Algolia.py - biphoria - searchName sceneByQueryFragment: action: script script: - python - - Algolia.py + - ../Algolia/Algolia.py - biphoria - validName galleryByURL: @@ -33,7 +34,7 @@ galleryByURL: - biphoria.com/en/photo/ script: - python - - Algolia.py + - ../Algolia/Algolia.py - biphoria - gallery # Last Updated February 07, 2023 diff --git a/scrapers/Blowpass/Blowpass.yml b/scrapers/Blowpass/Blowpass.yml index 76ada89b7..077459c21 100644 --- a/scrapers/Blowpass/Blowpass.yml +++ b/scrapers/Blowpass/Blowpass.yml @@ -1,3 +1,4 @@ +# requires: Algolia name: Blowpass sceneByURL: - action: script @@ -5,26 +6,26 @@ sceneByURL: - blowpass.com/en/video/ script: - python - - Algolia.py + - ../Algolia/Algolia.py - blowpass sceneByFragment: action: script script: - python - - Algolia.py + - ../Algolia/Algolia.py - blowpass sceneByName: action: script script: - python - - Algolia.py + - ../Algolia/Algolia.py - blowpass - searchName sceneByQueryFragment: action: script script: - python - - Algolia.py + - ../Algolia/Algolia.py - blowpass - validName # Last Updated October 04, 2023 diff --git a/scrapers/BurningAngel/BurningAngel.yml b/scrapers/BurningAngel/BurningAngel.yml index 2b0f93cf6..b3e861a05 100644 --- a/scrapers/BurningAngel/BurningAngel.yml +++ b/scrapers/BurningAngel/BurningAngel.yml @@ -1,3 +1,4 @@ +# requires: Algolia name: "Burning Angel" sceneByURL: - action: script @@ -5,26 +6,26 @@ sceneByURL: - burningangel.com/en/video script: - python - - Algolia.py + - ../Algolia/Algolia.py - burningangel sceneByFragment: action: script script: - python - - Algolia.py + - ../Algolia/Algolia.py - burningangel sceneByName: action: script script: - python - - Algolia.py + - ../Algolia/Algolia.py - burningangel - searchName sceneByQueryFragment: action: script script: - python - - Algolia.py + - ../Algolia/Algolia.py - burningangel - validName # Last Updated March 23, 2022 diff --git a/scrapers/ChaosMen/ChaosMen.yml b/scrapers/ChaosMen/ChaosMen.yml index 156521147..ee8f9b1d6 100644 --- a/scrapers/ChaosMen/ChaosMen.yml +++ b/scrapers/ChaosMen/ChaosMen.yml @@ -1,3 +1,4 @@ +# requires: Algolia name: "ChaosMen" sceneByURL: - action: script @@ -5,26 +6,26 @@ sceneByURL: - chaosmen.com/en/video script: - python - - Algolia.py + - ../Algolia/Algolia.py - chaosmen sceneByFragment: action: script script: - python - - Algolia.py + - ../Algolia/Algolia.py - chaosmen sceneByName: action: script script: - python - - Algolia.py + - ../Algolia/Algolia.py - chaosmen - searchName sceneByQueryFragment: action: script script: - python - - Algolia.py + - ../Algolia/Algolia.py - chaosmen - validName # Last Updated January 10, 2023 diff --git a/scrapers/ClubInfernoDungeon/ClubInfernoDungeon.yml b/scrapers/ClubInfernoDungeon/ClubInfernoDungeon.yml index 60d1ed6d5..d02a370a7 100644 --- a/scrapers/ClubInfernoDungeon/ClubInfernoDungeon.yml +++ b/scrapers/ClubInfernoDungeon/ClubInfernoDungeon.yml @@ -1,3 +1,4 @@ +# requires: Algolia name: "ClubInfernoDungeon" sceneByURL: - action: script @@ -5,26 +6,26 @@ sceneByURL: - clubinfernodungeon.com/en/video script: - python - - Algolia.py + - ../Algolia/Algolia.py - clubinfernodungeon sceneByFragment: action: script script: - python - - Algolia.py + - ../Algolia/Algolia.py - clubinfernodungeon sceneByName: action: script script: - python - - Algolia.py + - ../Algolia/Algolia.py - clubinfernodungeon - searchName sceneByQueryFragment: action: script script: - python - - Algolia.py + - ../Algolia/Algolia.py - clubinfernodungeon - validName galleryByURL: @@ -33,7 +34,7 @@ galleryByURL: - clubinfernodungeon.com/en/photo/ script: - python - - Algolia.py + - ../Algolia/Algolia.py - clubinfernodungeon - gallery # Last Updated December 22, 2022 diff --git a/scrapers/DogfartNetwork/DogfartNetwork.yml b/scrapers/DogfartNetwork/DogfartNetwork.yml index 064968446..4bf3304d9 100644 --- a/scrapers/DogfartNetwork/DogfartNetwork.yml +++ b/scrapers/DogfartNetwork/DogfartNetwork.yml @@ -1,3 +1,4 @@ +# requires: Algolia name: "DogfartNetwork" sceneByURL: - action: script @@ -19,26 +20,26 @@ sceneByURL: - zebragirls.com/en/video script: - python - - Algolia.py + - ../Algolia/Algolia.py - dogfartnetwork sceneByFragment: action: script script: - python - - Algolia.py + - ../Algolia/Algolia.py - dogfartnetwork sceneByName: action: script script: - python - - Algolia.py + - ../Algolia/Algolia.py - dogfartnetwork - searchName sceneByQueryFragment: action: script script: - python - - Algolia.py + - ../Algolia/Algolia.py - dogfartnetwork - validName galleryByURL: @@ -61,7 +62,7 @@ galleryByURL: - zebragirls.com/en/photo script: - python - - Algolia.py + - ../Algolia/Algolia.py - dogfartnetwork - gallery # Last Updated July 27, 2023 diff --git a/scrapers/EvilAngel/EvilAngel.yml b/scrapers/EvilAngel/EvilAngel.yml index c13bbdb3e..c9bb7d235 100644 --- a/scrapers/EvilAngel/EvilAngel.yml +++ b/scrapers/EvilAngel/EvilAngel.yml @@ -1,3 +1,4 @@ +# requires: Algolia name: "EvilAngel" sceneByURL: - action: script @@ -9,7 +10,7 @@ sceneByURL: - tsfactor.com/en/video/ script: - python - - Algolia.py + - ../Algolia/Algolia.py - evilangel movieByURL: - action: script @@ -21,27 +22,27 @@ movieByURL: - tsfactor.com/en/movie/ script: - python - - Algolia.py + - ../Algolia/Algolia.py - evilangel - movie sceneByFragment: action: script script: - python - - Algolia.py + - ../Algolia/Algolia.py - evilangel sceneByName: action: script script: - python - - Algolia.py + - ../Algolia/Algolia.py - evilangel - searchName sceneByQueryFragment: action: script script: - python - - Algolia.py + - ../Algolia/Algolia.py - evilangel - validName galleryByURL: @@ -54,7 +55,7 @@ galleryByURL: - tsfactor.com/en/photo/ script: - python - - Algolia.py + - ../Algolia/Algolia.py - evilangel - gallery # Last Updated December 26, 2022 diff --git a/scrapers/FalconStudios/FalconStudios.yml b/scrapers/FalconStudios/FalconStudios.yml index 10ac8b052..61bc61961 100644 --- a/scrapers/FalconStudios/FalconStudios.yml +++ b/scrapers/FalconStudios/FalconStudios.yml @@ -1,3 +1,4 @@ +# requires: Algolia name: Falcon Studios sceneByURL: - action: script @@ -6,26 +7,26 @@ sceneByURL: - hothouse.com/en/video script: - python - - Algolia.py + - ../Algolia/Algolia.py - falconstudios sceneByFragment: action: script script: - python - - Algolia.py + - ../Algolia/Algolia.py - falconstudios sceneByName: action: script script: - python - - Algolia.py + - ../Algolia/Algolia.py - falconstudios - searchName sceneByQueryFragment: action: script script: - python - - Algolia.py + - ../Algolia/Algolia.py - falconstudios - validName # Last Updated September 09, 2023 diff --git a/scrapers/FantasyMassage/FantasyMassage.yml b/scrapers/FantasyMassage/FantasyMassage.yml index 22e387367..f5bdf03e3 100644 --- a/scrapers/FantasyMassage/FantasyMassage.yml +++ b/scrapers/FantasyMassage/FantasyMassage.yml @@ -1,3 +1,4 @@ +# requires: Algolia name: "FantasyMassage" sceneByURL: - action: script @@ -5,26 +6,26 @@ sceneByURL: - fantasymassage.com/en/video script: - python - - Algolia.py + - ../Algolia/Algolia.py - fantasymassage sceneByFragment: action: script script: - python - - Algolia.py + - ../Algolia/Algolia.py - fantasymassage sceneByName: action: script script: - python - - Algolia.py + - ../Algolia/Algolia.py - fantasymassage - searchName sceneByQueryFragment: action: script script: - python - - Algolia.py + - ../Algolia/Algolia.py - fantasymassage - validName galleryByURL: @@ -33,7 +34,7 @@ galleryByURL: - fantasymassage.com/en/photo/ script: - python - - Algolia.py + - ../Algolia/Algolia.py - fantasymassage - gallery # Last Updated December 22, 2022 diff --git a/scrapers/FistingInferno/FistingInferno.yml b/scrapers/FistingInferno/FistingInferno.yml index 591d3868b..f4a900755 100644 --- a/scrapers/FistingInferno/FistingInferno.yml +++ b/scrapers/FistingInferno/FistingInferno.yml @@ -1,3 +1,4 @@ +# requires: Algolia name: "FistingInferno" sceneByURL: - action: script @@ -5,26 +6,26 @@ sceneByURL: - fistinginferno.com/en/video/ script: - python - - Algolia.py + - ../Algolia/Algolia.py - fistinginferno sceneByFragment: action: script script: - python - - Algolia.py + - ../Algolia/Algolia.py - fistinginferno sceneByName: action: script script: - python - - Algolia.py + - ../Algolia/Algolia.py - fistinginferno - searchName sceneByQueryFragment: action: script script: - python - - Algolia.py + - ../Algolia/Algolia.py - fistinginferno - validName galleryByURL: @@ -33,7 +34,7 @@ galleryByURL: - fistinginferno.com/en/photo/ script: - python - - Algolia.py + - ../Algolia/Algolia.py - fistinginferno - gallery # Last Updated September 24, 2023 diff --git a/scrapers/GangBangCreampie/GangBangCreampie.yml b/scrapers/GangBangCreampie/GangBangCreampie.yml index 1eae61aa1..f49d827e5 100644 --- a/scrapers/GangBangCreampie/GangBangCreampie.yml +++ b/scrapers/GangBangCreampie/GangBangCreampie.yml @@ -1,3 +1,4 @@ +# requires: Algolia name: "GangbangCreampie" sceneByURL: - action: script @@ -5,26 +6,26 @@ sceneByURL: - gangbangcreampie.com/en/video script: - python - - Algolia.py + - ../Algolia/Algolia.py - gangbangcreampie sceneByFragment: action: script script: - python - - Algolia.py + - ../Algolia/Algolia.py - gangbangcreampie sceneByName: action: script script: - python - - Algolia.py + - ../Algolia/Algolia.py - gangbangcreampie - searchName sceneByQueryFragment: action: script script: - python - - Algolia.py + - ../Algolia/Algolia.py - gangbangcreampie - validName galleryByURL: @@ -33,7 +34,7 @@ galleryByURL: - gangbangcreampie.com/en/photo/ script: - python - - Algolia.py + - ../Algolia/Algolia.py - gangbangcreampie - gallery # Last Updated December 22, 2022 diff --git a/scrapers/GenderXFilms/GenderXFilms.yml b/scrapers/GenderXFilms/GenderXFilms.yml index fd567c4c6..aad91c963 100644 --- a/scrapers/GenderXFilms/GenderXFilms.yml +++ b/scrapers/GenderXFilms/GenderXFilms.yml @@ -1,3 +1,4 @@ +# requires: Algolia name: GenderX Films sceneByURL: - action: script @@ -5,26 +6,26 @@ sceneByURL: - genderxfilms.com/en/video script: - python - - Algolia.py + - ../Algolia/Algolia.py - genderxfilms sceneByFragment: action: script script: - python - - Algolia.py + - ../Algolia/Algolia.py - genderxfilms sceneByName: action: script script: - python - - Algolia.py + - ../Algolia/Algolia.py - genderxfilms - searchName sceneByQueryFragment: action: script script: - python - - Algolia.py + - ../Algolia/Algolia.py - genderxfilms - validName galleryByURL: @@ -33,7 +34,7 @@ galleryByURL: - genderxfilms.com/en/photo/ script: - python - - Algolia.py + - ../Algolia/Algolia.py - genderxfilms - gallery movieByURL: @@ -42,7 +43,7 @@ movieByURL: - genderxfilms.com/en/movie/ script: - python - - Algolia.py + - ../Algolia/Algolia.py - genderxfilms - movie # Last Updated December 22, 2022 diff --git a/scrapers/Girlfriendsfilms/Girlfriendsfilms.yml b/scrapers/Girlfriendsfilms/Girlfriendsfilms.yml index 8e02c4bce..933251fa0 100644 --- a/scrapers/Girlfriendsfilms/Girlfriendsfilms.yml +++ b/scrapers/Girlfriendsfilms/Girlfriendsfilms.yml @@ -1,3 +1,4 @@ +# requires: Algolia name: "Girlfriends Films" sceneByURL: - action: script @@ -5,26 +6,26 @@ sceneByURL: - girlfriendsfilms.com/en/video/ script: - python - - Algolia.py + - ../Algolia/Algolia.py - girlfriendsfilms sceneByFragment: action: script script: - python - - Algolia.py + - ../Algolia/Algolia.py - girlfriendsfilms sceneByName: action: script script: - python - - Algolia.py + - ../Algolia/Algolia.py - girlfriendsfilms - searchName sceneByQueryFragment: action: script script: - python - - Algolia.py + - ../Algolia/Algolia.py - girlfriendsfilms - validName movieByURL: @@ -33,7 +34,7 @@ movieByURL: - girlfriendsfilms.com/en/movie/ script: - python - - Algolia.py + - ../Algolia/Algolia.py - girlfriendsfilms - movie galleryByURL: @@ -42,7 +43,7 @@ galleryByURL: - girlfriendsfilms.com/en/photo/ script: - python - - Algolia.py + - ../Algolia/Algolia.py - girlfriendsfilms - gallery # Last Updated December 22, 2022 diff --git a/scrapers/Girlsway/Girlsway.yml b/scrapers/Girlsway/Girlsway.yml index d81ad7dbd..4b37bb8b3 100644 --- a/scrapers/Girlsway/Girlsway.yml +++ b/scrapers/Girlsway/Girlsway.yml @@ -1,3 +1,4 @@ +# requires: Algolia name: "GirlsWay" sceneByURL: - action: script @@ -5,26 +6,26 @@ sceneByURL: - girlsway.com/en/video/ script: - python - - Algolia.py + - ../Algolia/Algolia.py - girlsway sceneByFragment: action: script script: - python - - Algolia.py + - ../Algolia/Algolia.py - girlsway sceneByName: action: script script: - python - - Algolia.py + - ../Algolia/Algolia.py - girlsway - searchName sceneByQueryFragment: action: script script: - python - - Algolia.py + - ../Algolia/Algolia.py - girlsway - validName galleryByURL: @@ -33,7 +34,7 @@ galleryByURL: - girlsway.com/en/photo/ script: - python - - Algolia.py + - ../Algolia/Algolia.py - girlsway - gallery # Last Updated December 22, 2022 diff --git a/scrapers/Gloryholesecrets/Gloryholesecrets.yml b/scrapers/Gloryholesecrets/Gloryholesecrets.yml index c69808d6f..4fbc18858 100644 --- a/scrapers/Gloryholesecrets/Gloryholesecrets.yml +++ b/scrapers/Gloryholesecrets/Gloryholesecrets.yml @@ -1,3 +1,4 @@ +# requires: Algolia name: GloryholeSecrets sceneByURL: - action: script @@ -5,26 +6,26 @@ sceneByURL: - gloryholesecrets.com/en/video script: - python - - Algolia.py + - ../Algolia/Algolia.py - gloryholesecrets sceneByFragment: action: script script: - python - - Algolia.py + - ../Algolia/Algolia.py - gloryholesecrets sceneByName: action: script script: - python - - Algolia.py + - ../Algolia/Algolia.py - gloryholesecrets - searchName sceneByQueryFragment: action: script script: - python - - Algolia.py + - ../Algolia/Algolia.py - gloryholesecrets - validName galleryByURL: @@ -33,7 +34,7 @@ galleryByURL: - gloryholesecrets.com/en/photo/ script: - python - - Algolia.py + - ../Algolia/Algolia.py - gloryholesecrets - gallery # Last Updated August 22, 2023 diff --git a/scrapers/Johnnyrapid/Johnnyrapid.yml b/scrapers/Johnnyrapid/Johnnyrapid.yml index cb6d91c04..008401646 100644 --- a/scrapers/Johnnyrapid/Johnnyrapid.yml +++ b/scrapers/Johnnyrapid/Johnnyrapid.yml @@ -1,3 +1,4 @@ +# requires: Algolia name: "Johnny Rapid" sceneByURL: - action: script @@ -5,26 +6,26 @@ sceneByURL: - johnnyrapid.com/en/video/ script: - python - - Algolia.py + - ../Algolia/Algolia.py - johnnyrapid sceneByFragment: action: script script: - python - - Algolia.py + - ../Algolia/Algolia.py - johnnyrapid sceneByName: action: script script: - python - - Algolia.py + - ../Algolia/Algolia.py - johnnyrapid - searchName sceneByQueryFragment: action: script script: - python - - Algolia.py + - ../Algolia/Algolia.py - johnnyrapid - validName galleryByURL: @@ -33,7 +34,7 @@ galleryByURL: - johnnyrapid.com/en/photo/ script: - python - - Algolia.py + - ../Algolia/Algolia.py - johnnyrapid - gallery # Last Updated December 22, 2022 diff --git a/scrapers/MenOver30/MenOver30.yml b/scrapers/MenOver30/MenOver30.yml index f0a362915..8342bd998 100644 --- a/scrapers/MenOver30/MenOver30.yml +++ b/scrapers/MenOver30/MenOver30.yml @@ -1,3 +1,4 @@ +# requires: Algolia name: "MenOver30" sceneByURL: - action: script @@ -6,26 +7,26 @@ sceneByURL: - pridestudios.com/en/video script: - python - - Algolia.py + - ../Algolia/Algolia.py - menover30 sceneByFragment: action: script script: - python - - Algolia.py + - ../Algolia/Algolia.py - menover30 sceneByName: action: script script: - python - - Algolia.py + - ../Algolia/Algolia.py - menover30 - searchName sceneByQueryFragment: action: script script: - python - - Algolia.py + - ../Algolia/Algolia.py - menover30 - validName galleryByURL: @@ -35,7 +36,7 @@ galleryByURL: - pridestudios.com/en/photo/ script: - python - - Algolia.py + - ../Algolia/Algolia.py - menover30 - gallery # Last Updated December 22, 2022 diff --git a/scrapers/NextDoorStudios/NextDoorStudios.yml b/scrapers/NextDoorStudios/NextDoorStudios.yml index 07099fcd3..61263df9a 100644 --- a/scrapers/NextDoorStudios/NextDoorStudios.yml +++ b/scrapers/NextDoorStudios/NextDoorStudios.yml @@ -1,3 +1,4 @@ +# requires: Algolia name: Next Door Studios sceneByURL: - action: script @@ -23,26 +24,26 @@ sceneByURL: - trystanbull.com/en/video script: - python - - Algolia.py + - ../Algolia/Algolia.py - nextdoorstudios sceneByFragment: action: script script: - python - - Algolia.py + - ../Algolia/Algolia.py - nextdoorstudios sceneByName: action: script script: - python - - Algolia.py + - ../Algolia/Algolia.py - nextdoorstudios - searchName sceneByQueryFragment: action: script script: - python - - Algolia.py + - ../Algolia/Algolia.py - nextdoorstudios - validName # Last Updated August 18, 2023 diff --git a/scrapers/RagingStallion/RagingStallion.yml b/scrapers/RagingStallion/RagingStallion.yml index e6a94864f..373a88b31 100644 --- a/scrapers/RagingStallion/RagingStallion.yml +++ b/scrapers/RagingStallion/RagingStallion.yml @@ -1,3 +1,4 @@ +# requires: Algolia name: Raging Stallion sceneByURL: - action: script @@ -5,26 +6,26 @@ sceneByURL: - ragingstallion.com/en/scene script: - python - - Algolia.py + - ../Algolia/Algolia.py - ragingstallion sceneByFragment: action: script script: - python - - Algolia.py + - ../Algolia/Algolia.py - ragingstallion sceneByName: action: script script: - python - - Algolia.py + - ../Algolia/Algolia.py - ragingstallion - searchName sceneByQueryFragment: action: script script: - python - - Algolia.py + - ../Algolia/Algolia.py - ragingstallion - validName # Last Updated September 09, 2023 diff --git a/scrapers/RoccoSiffredi/RoccoSiffredi.yml b/scrapers/RoccoSiffredi/RoccoSiffredi.yml index 6c22a531f..1c41ad740 100644 --- a/scrapers/RoccoSiffredi/RoccoSiffredi.yml +++ b/scrapers/RoccoSiffredi/RoccoSiffredi.yml @@ -1,3 +1,4 @@ +# requires: Algolia name: "RoccoSiffredi" sceneByURL: - action: script @@ -5,26 +6,26 @@ sceneByURL: - roccosiffredi.com/en/video script: - python - - Algolia.py + - ../Algolia/Algolia.py - roccosiffredi sceneByFragment: action: script script: - python - - Algolia.py + - ../Algolia/Algolia.py - roccosiffredi sceneByName: action: script script: - python - - Algolia.py + - ../Algolia/Algolia.py - roccosiffredi - searchName sceneByQueryFragment: action: script script: - python - - Algolia.py + - ../Algolia/Algolia.py - roccosiffredi - validName galleryByURL: @@ -33,7 +34,7 @@ galleryByURL: - roccosiffredi.com/en/photo/ script: - python - - Algolia.py + - ../Algolia/Algolia.py - roccosiffredi - gallery # Last Updated December 22, 2022 diff --git a/scrapers/TabooHeat/TabooHeat.yml b/scrapers/TabooHeat/TabooHeat.yml index e695f5895..455d42d3a 100644 --- a/scrapers/TabooHeat/TabooHeat.yml +++ b/scrapers/TabooHeat/TabooHeat.yml @@ -1,3 +1,4 @@ +# requires: Algolia name: "TabooHeat" sceneByURL: - action: script @@ -5,26 +6,26 @@ sceneByURL: - tabooheat.com/en/video script: - python - - Algolia.py + - ../Algolia/Algolia.py - tabooheat sceneByFragment: action: script script: - python - - Algolia.py + - ../Algolia/Algolia.py - tabooheat sceneByName: action: script script: - python - - Algolia.py + - ../Algolia/Algolia.py - tabooheat - searchName sceneByQueryFragment: action: script script: - python - - Algolia.py + - ../Algolia/Algolia.py - tabooheat - validName galleryByURL: @@ -33,7 +34,7 @@ galleryByURL: - tabooheat.com/en/photo/ script: - python - - Algolia.py + - ../Algolia/Algolia.py - tabooheat - gallery # Last Updated December 22, 2022 diff --git a/scrapers/Wicked/Wicked.yml b/scrapers/Wicked/Wicked.yml index 236c3bf70..390020a40 100644 --- a/scrapers/Wicked/Wicked.yml +++ b/scrapers/Wicked/Wicked.yml @@ -1,3 +1,4 @@ +# requires: Algolia name: "Wicked" sceneByURL: - action: script @@ -5,7 +6,7 @@ sceneByURL: - wicked.com/en/video/ script: - python - - Algolia.py + - ../Algolia/Algolia.py - wicked movieByURL: - action: script @@ -13,27 +14,27 @@ movieByURL: - wicked.com/en/movie/ script: - python - - Algolia.py + - ../Algolia/Algolia.py - wicked - movie sceneByFragment: action: script script: - python - - Algolia.py + - ../Algolia/Algolia.py - wicked sceneByName: action: script script: - python - - Algolia.py + - ../Algolia/Algolia.py - wicked - searchName sceneByQueryFragment: action: script script: - python - - Algolia.py + - ../Algolia/Algolia.py - wicked - validName galleryByURL: @@ -42,7 +43,7 @@ galleryByURL: - wicked.com/en/photo/ script: - python - - Algolia.py + - ../Algolia/Algolia.py - wicked - gallery # Last Updated December 22, 2022 diff --git a/scrapers/disruptivefilms/disruptivefilms.yml b/scrapers/disruptivefilms/disruptivefilms.yml index f857fb8e5..47373a0d5 100644 --- a/scrapers/disruptivefilms/disruptivefilms.yml +++ b/scrapers/disruptivefilms/disruptivefilms.yml @@ -1,3 +1,4 @@ +# requires: Algolia name: "DisruptiveFilms" sceneByURL: - action: script @@ -5,26 +6,26 @@ sceneByURL: - disruptivefilms.com/en/video script: - python - - Algolia.py + - ../Algolia/Algolia.py - disruptivefilms sceneByFragment: action: script script: - python - - Algolia.py + - ../Algolia/Algolia.py - disruptivefilms sceneByName: action: script script: - python - - Algolia.py + - ../Algolia/Algolia.py - disruptivefilms - searchName sceneByQueryFragment: action: script script: - python - - Algolia.py + - ../Algolia/Algolia.py - disruptivefilms - validName galleryByURL: @@ -33,7 +34,7 @@ galleryByURL: - disruptivefilms.com/en/photo/ script: - python - - Algolia.py + - ../Algolia/Algolia.py - disruptivefilms - gallery # Last Updated December 22, 2022 diff --git a/scrapers/filthykings/filthykings.yml b/scrapers/filthykings/filthykings.yml index 55dee88d3..8c0941b7a 100644 --- a/scrapers/filthykings/filthykings.yml +++ b/scrapers/filthykings/filthykings.yml @@ -1,3 +1,4 @@ +# requires: Algolia name: "FilthyKings" sceneByURL: - action: script @@ -5,26 +6,26 @@ sceneByURL: - filthykings.com/en/video script: - python - - Algolia.py + - ../Algolia/Algolia.py - filthykings sceneByFragment: action: script script: - python - - Algolia.py + - ../Algolia/Algolia.py - filthykings sceneByName: action: script script: - python - - Algolia.py + - ../Algolia/Algolia.py - filthykings - searchName sceneByQueryFragment: action: script script: - python - - Algolia.py + - ../Algolia/Algolia.py - filthykings - validName galleryByURL: @@ -33,7 +34,7 @@ galleryByURL: - filthykings.com/en/photo/ script: - python - - Algolia.py + - ../Algolia/Algolia.py - filthykings - gallery # Last Updated December 22, 2022 diff --git a/scrapers/mypervyfamily/mypervyfamily.yml b/scrapers/mypervyfamily/mypervyfamily.yml index 2474ce329..74e353a1c 100644 --- a/scrapers/mypervyfamily/mypervyfamily.yml +++ b/scrapers/mypervyfamily/mypervyfamily.yml @@ -1,3 +1,4 @@ +# requires: Algolia name: "MyPervyFamily" sceneByURL: - action: script @@ -5,26 +6,26 @@ sceneByURL: - mypervyfamily.com/en/video script: - python - - Algolia.py + - ../Algolia/Algolia.py - mypervyfamily sceneByFragment: action: script script: - python - - Algolia.py + - ../Algolia/Algolia.py - mypervyfamily sceneByName: action: script script: - python - - Algolia.py + - ../Algolia/Algolia.py - mypervyfamily - searchName sceneByQueryFragment: action: script script: - python - - Algolia.py + - ../Algolia/Algolia.py - mypervyfamily - validName galleryByURL: @@ -33,7 +34,7 @@ galleryByURL: - mypervyfamily.com/en/photo/ script: - python - - Algolia.py + - ../Algolia/Algolia.py - mypervyfamily - gallery # Last Updated December 22, 2022 diff --git a/scrapers/touchmywife/touchmywife.yml b/scrapers/touchmywife/touchmywife.yml index bf1ac810c..20081577e 100644 --- a/scrapers/touchmywife/touchmywife.yml +++ b/scrapers/touchmywife/touchmywife.yml @@ -1,3 +1,4 @@ +# requires: Algolia name: "TouchMyWife" sceneByURL: - action: script @@ -5,26 +6,26 @@ sceneByURL: - touchmywife.com/en/video script: - python - - Algolia.py + - ../Algolia/Algolia.py - touchmywife sceneByFragment: action: script script: - python - - Algolia.py + - ../Algolia/Algolia.py - touchmywife sceneByName: action: script script: - python - - Algolia.py + - ../Algolia/Algolia.py - touchmywife - searchName sceneByQueryFragment: action: script script: - python - - Algolia.py + - ../Algolia/Algolia.py - touchmywife - validName galleryByURL: @@ -33,7 +34,7 @@ galleryByURL: - touchmywife.com/en/photo/ script: - python - - Algolia.py + - ../Algolia/Algolia.py - touchmywife - gallery # Last Updated December 22, 2022 diff --git a/scrapers/xEmpire/xEmpire.yml b/scrapers/xEmpire/xEmpire.yml index 75f6c400a..8ff92c4bd 100644 --- a/scrapers/xEmpire/xEmpire.yml +++ b/scrapers/xEmpire/xEmpire.yml @@ -1,3 +1,4 @@ +# requires: Algolia name: "xEmpire" sceneByURL: - action: script @@ -10,26 +11,26 @@ sceneByURL: - xempire.com/en/video/ script: - python - - Algolia.py + - ../Algolia/Algolia.py - xempire sceneByFragment: action: script script: - python - - Algolia.py + - ../Algolia/Algolia.py - xempire sceneByName: action: script script: - python - - Algolia.py + - ../Algolia/Algolia.py - xempire - searchName sceneByQueryFragment: action: script script: - python - - Algolia.py + - ../Algolia/Algolia.py - xempire - validName galleryByURL: @@ -43,7 +44,7 @@ galleryByURL: - xempire.com/en/photo/ script: - python - - Algolia.py + - ../Algolia/Algolia.py - xempire - gallery # Last Updated December 22, 2022 diff --git a/scrapers/zerotolerancefilms/zerotolerancefilms.yml b/scrapers/zerotolerancefilms/zerotolerancefilms.yml index 7015c48a4..2a4fbac96 100644 --- a/scrapers/zerotolerancefilms/zerotolerancefilms.yml +++ b/scrapers/zerotolerancefilms/zerotolerancefilms.yml @@ -1,3 +1,4 @@ +# requires: Algolia name: "ZeroToleranceFilms" sceneByURL: - action: script @@ -6,26 +7,26 @@ sceneByURL: - zerotolerancefilms.com/en/video script: - python - - Algolia.py + - ../Algolia/Algolia.py - zerotolerancefilms sceneByFragment: action: script script: - python - - Algolia.py + - ../Algolia/Algolia.py - zerotolerancefilms sceneByName: action: script script: - python - - Algolia.py + - ../Algolia/Algolia.py - zerotolerancefilms - searchName sceneByQueryFragment: action: script script: - python - - Algolia.py + - ../Algolia/Algolia.py - zerotolerancefilms - validName movieByURL: @@ -35,7 +36,7 @@ movieByURL: - zerotolerancefilms.com/en/movie script: - python - - Algolia.py + - ../Algolia/Algolia.py - zerotolerancefilms - movie galleryByURL: @@ -44,7 +45,7 @@ galleryByURL: - zerotolerancefilms.com/en/photo/ script: - python - - Algolia.py + - ../Algolia/Algolia.py - zerotolerancefilms - gallery # Last Updated December 22, 2022 From b85fd5b85650ba42ef40f6dac24235327051b5d8 Mon Sep 17 00:00:00 2001 From: WithoutPants <53250216+WithoutPants@users.noreply.github.com> Date: Wed, 22 Nov 2023 11:54:03 +1100 Subject: [PATCH 537/624] Handle dependency requirements --- build_site.sh | 14 ++++++++++++-- 1 file changed, 12 insertions(+), 2 deletions(-) diff --git a/build_site.sh b/build_site.sh index 84b52fb02..4b3b9a3e0 100755 --- a/build_site.sh +++ b/build_site.sh @@ -42,6 +42,7 @@ buildScraper() name=$(grep "^name:" "$f" | cut -d' ' -f2- | sed -e 's/\r//' -e 's/^"\(.*\)"$/\1/') ignore=$(grep "^# ignore:" "$f" | cut -c 10- | sed -e 's/\r//') + dep=$(grep "^# requires:" "$f" | cut -c 12- | sed -e 's/\r//') # always ignore package file ignore="-x $ignore package" @@ -60,8 +61,17 @@ buildScraper() version: $version date: $updated path: $scraper_id.zip - sha256: $(sha256sum "$zipfile" | cut -d' ' -f1) -" >> "$outdir"/index.yml + sha256: $(sha256sum "$zipfile" | cut -d' ' -f1)" >> "$outdir"/index.yml + + # handle dependencies + if [ ! -z "$dep" ]; then + echo " requires:" >> "$outdir"/index.yml + for d in ${dep//,/ }; do + echo " - $d" >> "$outdir"/index.yml + done + fi + + echo "" >> "$outdir"/index.yml } # find all yml files in ./scrapers - these are packages individually From 3479c8b5f9275e4d22a715ecd0c9f53bb7d5db5c Mon Sep 17 00:00:00 2001 From: WithoutPants <53250216+WithoutPants@users.noreply.github.com> Date: Wed, 22 Nov 2023 12:14:42 +1100 Subject: [PATCH 538/624] Add py_common dependencies --- scrapers/ATKGirlfriends/ATKGirlfriends.yml | 1 + scrapers/AnimeCharactersDatabase/AnimeCharactersDatabase.yml | 1 + scrapers/AuntJudys/AuntJudys.yml | 1 + scrapers/ComicInfoXML/ComicInfoXML.yml | 1 + scrapers/CopyFromScene/CopyFromScene.yml | 2 ++ scrapers/CopyToGallery/CopyToGallery.yml | 2 ++ scrapers/Filename/Filename.yml | 2 ++ scrapers/Fit18/Fit18.yml | 2 ++ scrapers/FratX/FratX.yml | 2 ++ scrapers/IAFD/IAFD.yml | 2 ++ scrapers/JacquieEtMichelTV/JacquieEtMichelTV.yml | 2 ++ scrapers/JavLibrary_python/JavLibrary_python.yml | 2 ++ scrapers/KBProductions/KBProductions.yml | 2 ++ scrapers/Loyalfans/Loyalfans.yml | 2 ++ scrapers/ManyVids/ManyVids.yml | 2 ++ scrapers/MindGeekAPI/MindGeekAPI.yml | 2 ++ scrapers/MissaX/MissaX.yml | 2 ++ scrapers/ModelCentroAPI/ModelCentroAPI.yml | 2 ++ scrapers/RealityLovers/RealityLovers.yml | 2 ++ scrapers/Redgifs/Redgifs.yml | 2 ++ scrapers/SARJ-LLC/SARJ-LLC.yml | 1 + scrapers/ScrapeWithURL/ScrapeWithURL.yml | 2 ++ scrapers/ShokoAPI/ShokoAPI.yml | 2 ++ scrapers/Teamskeet/Teamskeet.yml | 2 ++ scrapers/Tokyohot/Tokyohot.yml | 2 ++ scrapers/TopWebModels/TopWebModels.yml | 2 ++ scrapers/Traxxx/Traxxx.yml | 2 ++ scrapers/WowNetworkVenus/WowNetworkVenus.yml | 2 ++ scrapers/dc-onlyfans/dc-onlyfans.yml | 2 ++ scrapers/jellyfin/jellyfin.yml | 2 ++ scrapers/performer-image-by-scene/performer-image-by-scene.yml | 1 + scrapers/torrent/torrent.yml | 2 ++ scrapers/vixenNetwork/vixenNetwork.yml | 2 ++ 33 files changed, 60 insertions(+) diff --git a/scrapers/ATKGirlfriends/ATKGirlfriends.yml b/scrapers/ATKGirlfriends/ATKGirlfriends.yml index a0c8d8b34..782e4fefd 100644 --- a/scrapers/ATKGirlfriends/ATKGirlfriends.yml +++ b/scrapers/ATKGirlfriends/ATKGirlfriends.yml @@ -1,4 +1,5 @@ name: ATK Girlfriends +# requires: py_common sceneByFragment: action: script script: diff --git a/scrapers/AnimeCharactersDatabase/AnimeCharactersDatabase.yml b/scrapers/AnimeCharactersDatabase/AnimeCharactersDatabase.yml index f83e35251..22d778cfd 100644 --- a/scrapers/AnimeCharactersDatabase/AnimeCharactersDatabase.yml +++ b/scrapers/AnimeCharactersDatabase/AnimeCharactersDatabase.yml @@ -1,4 +1,5 @@ name: AnimeCharactersDatabase +# requires: py_common performerByURL: - action: script diff --git a/scrapers/AuntJudys/AuntJudys.yml b/scrapers/AuntJudys/AuntJudys.yml index fc38558db..acf463c1b 100644 --- a/scrapers/AuntJudys/AuntJudys.yml +++ b/scrapers/AuntJudys/AuntJudys.yml @@ -1,3 +1,4 @@ +# requires: py_common name: AuntJudys sceneByURL: - action: script diff --git a/scrapers/ComicInfoXML/ComicInfoXML.yml b/scrapers/ComicInfoXML/ComicInfoXML.yml index 561d22bc1..240044270 100644 --- a/scrapers/ComicInfoXML/ComicInfoXML.yml +++ b/scrapers/ComicInfoXML/ComicInfoXML.yml @@ -1,4 +1,5 @@ name: "ComicInfo XML" +# requires: py_common galleryByFragment: action: script diff --git a/scrapers/CopyFromScene/CopyFromScene.yml b/scrapers/CopyFromScene/CopyFromScene.yml index 7801dd1b4..266918b9a 100644 --- a/scrapers/CopyFromScene/CopyFromScene.yml +++ b/scrapers/CopyFromScene/CopyFromScene.yml @@ -1,4 +1,6 @@ name: "Copy from Scene" +# requires: py_common + galleryByFragment: action: script script: diff --git a/scrapers/CopyToGallery/CopyToGallery.yml b/scrapers/CopyToGallery/CopyToGallery.yml index 732b8ab34..226b23191 100644 --- a/scrapers/CopyToGallery/CopyToGallery.yml +++ b/scrapers/CopyToGallery/CopyToGallery.yml @@ -1,4 +1,6 @@ name: "Copy to Gallery" +# requires: py_common + sceneByFragment: action: script script: diff --git a/scrapers/Filename/Filename.yml b/scrapers/Filename/Filename.yml index 41380fe6a..0a8e037a7 100644 --- a/scrapers/Filename/Filename.yml +++ b/scrapers/Filename/Filename.yml @@ -1,4 +1,6 @@ name: Filename +# requires: py_common + sceneByFragment: action: script script: diff --git a/scrapers/Fit18/Fit18.yml b/scrapers/Fit18/Fit18.yml index e21d06676..c6be98c45 100644 --- a/scrapers/Fit18/Fit18.yml +++ b/scrapers/Fit18/Fit18.yml @@ -1,4 +1,6 @@ name: Fit18 +# requires: py_common + sceneByURL: - url: - fit18.com/videos/ diff --git a/scrapers/FratX/FratX.yml b/scrapers/FratX/FratX.yml index 1e9081f64..90ab03290 100644 --- a/scrapers/FratX/FratX.yml +++ b/scrapers/FratX/FratX.yml @@ -1,4 +1,6 @@ name: FratX +# requires: py_common + sceneByURL: - action: script script: diff --git a/scrapers/IAFD/IAFD.yml b/scrapers/IAFD/IAFD.yml index 3084f927b..1a3acdc34 100644 --- a/scrapers/IAFD/IAFD.yml +++ b/scrapers/IAFD/IAFD.yml @@ -1,4 +1,6 @@ name: IAFD +# requires: py_common + performerByName: action: script script: diff --git a/scrapers/JacquieEtMichelTV/JacquieEtMichelTV.yml b/scrapers/JacquieEtMichelTV/JacquieEtMichelTV.yml index 30a90c91e..b92cafe2b 100644 --- a/scrapers/JacquieEtMichelTV/JacquieEtMichelTV.yml +++ b/scrapers/JacquieEtMichelTV/JacquieEtMichelTV.yml @@ -1,4 +1,6 @@ name: JacquieEtMicaelTV +# requires: py_common + sceneByFragment: action: script script: diff --git a/scrapers/JavLibrary_python/JavLibrary_python.yml b/scrapers/JavLibrary_python/JavLibrary_python.yml index 457df64d4..5066baa78 100644 --- a/scrapers/JavLibrary_python/JavLibrary_python.yml +++ b/scrapers/JavLibrary_python/JavLibrary_python.yml @@ -1,4 +1,6 @@ name: "JavLibrary Python" +# requires: py_common + sceneByFragment: action: script script: diff --git a/scrapers/KBProductions/KBProductions.yml b/scrapers/KBProductions/KBProductions.yml index cdb300a2d..d100ae752 100644 --- a/scrapers/KBProductions/KBProductions.yml +++ b/scrapers/KBProductions/KBProductions.yml @@ -1,4 +1,6 @@ name: "KB Productions" +# requires: py_common + sceneByURL: - url: # Keeping this to allow for updates using old urls diff --git a/scrapers/Loyalfans/Loyalfans.yml b/scrapers/Loyalfans/Loyalfans.yml index 74f0c006d..e32fedd39 100644 --- a/scrapers/Loyalfans/Loyalfans.yml +++ b/scrapers/Loyalfans/Loyalfans.yml @@ -1,4 +1,6 @@ name: Loyalfans +# requires: py_common + sceneByURL: - url: - loyalfans.com diff --git a/scrapers/ManyVids/ManyVids.yml b/scrapers/ManyVids/ManyVids.yml index a17d87fca..0d492db42 100644 --- a/scrapers/ManyVids/ManyVids.yml +++ b/scrapers/ManyVids/ManyVids.yml @@ -1,4 +1,6 @@ name: ManyVids +# requires: py_common + sceneByURL: - url: - manyvids.com/Video diff --git a/scrapers/MindGeekAPI/MindGeekAPI.yml b/scrapers/MindGeekAPI/MindGeekAPI.yml index 70d9af864..bf83410c4 100644 --- a/scrapers/MindGeekAPI/MindGeekAPI.yml +++ b/scrapers/MindGeekAPI/MindGeekAPI.yml @@ -1,4 +1,6 @@ name: "MindGeekAPI" +# requires: py_common + sceneByFragment: action: script script: diff --git a/scrapers/MissaX/MissaX.yml b/scrapers/MissaX/MissaX.yml index 931949e2e..cfca1a5f5 100644 --- a/scrapers/MissaX/MissaX.yml +++ b/scrapers/MissaX/MissaX.yml @@ -1,4 +1,6 @@ name: "MissaX" +# requires: py_common + sceneByURL: - action: script script: diff --git a/scrapers/ModelCentroAPI/ModelCentroAPI.yml b/scrapers/ModelCentroAPI/ModelCentroAPI.yml index 65ddd4e84..86b1a782c 100644 --- a/scrapers/ModelCentroAPI/ModelCentroAPI.yml +++ b/scrapers/ModelCentroAPI/ModelCentroAPI.yml @@ -1,4 +1,6 @@ name: "ModelCentroAPI" +# requires: py_common + sceneByURL: - url: - beingphoenixmarie.com/scene/ diff --git a/scrapers/RealityLovers/RealityLovers.yml b/scrapers/RealityLovers/RealityLovers.yml index 0c70b6086..9ea81438e 100644 --- a/scrapers/RealityLovers/RealityLovers.yml +++ b/scrapers/RealityLovers/RealityLovers.yml @@ -1,4 +1,6 @@ name: RealityLovers +# requires: py_common + performerByURL: - action: script url: diff --git a/scrapers/Redgifs/Redgifs.yml b/scrapers/Redgifs/Redgifs.yml index d62e9c3a1..73ff5b563 100644 --- a/scrapers/Redgifs/Redgifs.yml +++ b/scrapers/Redgifs/Redgifs.yml @@ -1,4 +1,6 @@ name: Redgifs +# requires: py_common + sceneByURL: - action: script script: diff --git a/scrapers/SARJ-LLC/SARJ-LLC.yml b/scrapers/SARJ-LLC/SARJ-LLC.yml index cf7b5fe60..01bef8720 100644 --- a/scrapers/SARJ-LLC/SARJ-LLC.yml +++ b/scrapers/SARJ-LLC/SARJ-LLC.yml @@ -1,4 +1,5 @@ name: SARJ LLC +# requires: py_common sceneByName: action: script diff --git a/scrapers/ScrapeWithURL/ScrapeWithURL.yml b/scrapers/ScrapeWithURL/ScrapeWithURL.yml index a68a76b95..fbd2e4594 100644 --- a/scrapers/ScrapeWithURL/ScrapeWithURL.yml +++ b/scrapers/ScrapeWithURL/ScrapeWithURL.yml @@ -1,4 +1,6 @@ name: Scrape with URL +# requires: py_common + sceneByFragment: action: script script: diff --git a/scrapers/ShokoAPI/ShokoAPI.yml b/scrapers/ShokoAPI/ShokoAPI.yml index bab4dc550..64777953a 100644 --- a/scrapers/ShokoAPI/ShokoAPI.yml +++ b/scrapers/ShokoAPI/ShokoAPI.yml @@ -1,4 +1,6 @@ name: "ShokoAPI" +# requires: py_common + sceneByFragment: action: script script: diff --git a/scrapers/Teamskeet/Teamskeet.yml b/scrapers/Teamskeet/Teamskeet.yml index 35eb892ea..dbca726b6 100644 --- a/scrapers/Teamskeet/Teamskeet.yml +++ b/scrapers/Teamskeet/Teamskeet.yml @@ -1,4 +1,6 @@ name: "TeamskeetAPI" +# requires: py_common + sceneByURL: - url: - teamskeet.com/movies/ diff --git a/scrapers/Tokyohot/Tokyohot.yml b/scrapers/Tokyohot/Tokyohot.yml index 32cbb8378..1f976b3da 100644 --- a/scrapers/Tokyohot/Tokyohot.yml +++ b/scrapers/Tokyohot/Tokyohot.yml @@ -1,4 +1,6 @@ name: tokyohot +# requires: py_common + sceneByURL: - action: scrapeXPath url: diff --git a/scrapers/TopWebModels/TopWebModels.yml b/scrapers/TopWebModels/TopWebModels.yml index 0fb0532a3..ae9a847e5 100644 --- a/scrapers/TopWebModels/TopWebModels.yml +++ b/scrapers/TopWebModels/TopWebModels.yml @@ -1,4 +1,6 @@ name: "Top Web Models" +# requires: py_common + sceneByURL: - url: - tour.2girls1camera.com/scenes diff --git a/scrapers/Traxxx/Traxxx.yml b/scrapers/Traxxx/Traxxx.yml index 2186c9a18..f3d6a851b 100644 --- a/scrapers/Traxxx/Traxxx.yml +++ b/scrapers/Traxxx/Traxxx.yml @@ -1,4 +1,6 @@ name: "Traxxx" +# requires: py_common + sceneByURL: - url: - traxxx.me/scene/ diff --git a/scrapers/WowNetworkVenus/WowNetworkVenus.yml b/scrapers/WowNetworkVenus/WowNetworkVenus.yml index c8fa2aa40..5f4a86727 100644 --- a/scrapers/WowNetworkVenus/WowNetworkVenus.yml +++ b/scrapers/WowNetworkVenus/WowNetworkVenus.yml @@ -1,4 +1,6 @@ name: WowNetworkVenus +# requires: py_common + sceneByURL: - action: script script: diff --git a/scrapers/dc-onlyfans/dc-onlyfans.yml b/scrapers/dc-onlyfans/dc-onlyfans.yml index 3e9edb526..16df8e81a 100644 --- a/scrapers/dc-onlyfans/dc-onlyfans.yml +++ b/scrapers/dc-onlyfans/dc-onlyfans.yml @@ -1,4 +1,6 @@ name: "DC Onlyfans" +# requires: py_common + sceneByFragment: action: script script: diff --git a/scrapers/jellyfin/jellyfin.yml b/scrapers/jellyfin/jellyfin.yml index 9017d10e8..0efe6754a 100644 --- a/scrapers/jellyfin/jellyfin.yml +++ b/scrapers/jellyfin/jellyfin.yml @@ -1,4 +1,6 @@ name: Jellyfin +# requires: py_common + performerByURL: - url: #Replace this with you local jellyfin host diff --git a/scrapers/performer-image-by-scene/performer-image-by-scene.yml b/scrapers/performer-image-by-scene/performer-image-by-scene.yml index fbbacc265..c046fb2ea 100644 --- a/scrapers/performer-image-by-scene/performer-image-by-scene.yml +++ b/scrapers/performer-image-by-scene/performer-image-by-scene.yml @@ -1,4 +1,5 @@ name: Performer Image by scene cover +# requires: py_common performerByFragment: action: script diff --git a/scrapers/torrent/torrent.yml b/scrapers/torrent/torrent.yml index 785ce2473..ab8f49a7d 100644 --- a/scrapers/torrent/torrent.yml +++ b/scrapers/torrent/torrent.yml @@ -1,4 +1,6 @@ name: Torrent +# requires: py_common + sceneByFragment: action: script script: diff --git a/scrapers/vixenNetwork/vixenNetwork.yml b/scrapers/vixenNetwork/vixenNetwork.yml index bebff994f..5515e8b59 100644 --- a/scrapers/vixenNetwork/vixenNetwork.yml +++ b/scrapers/vixenNetwork/vixenNetwork.yml @@ -1,4 +1,6 @@ name: "Vixen Media Group" +# requires: py_common + sceneByURL: - url: - blacked.com/videos From d7ea1b2741507ef00599dc8f136584819981f955 Mon Sep 17 00:00:00 2001 From: WithoutPants <53250216+WithoutPants@users.noreply.github.com> Date: Wed, 22 Nov 2023 12:15:32 +1100 Subject: [PATCH 539/624] Fix Algolia --- scrapers/Algolia/Algolia.py | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/scrapers/Algolia/Algolia.py b/scrapers/Algolia/Algolia.py index bbda0b13b..3295a1183 100644 --- a/scrapers/Algolia/Algolia.py +++ b/scrapers/Algolia/Algolia.py @@ -8,6 +8,13 @@ from configparser import ConfigParser, NoSectionError from urllib.parse import urlparse +# to import from a parent directory we need to add that directory to the system path +csd = os.path.dirname(os.path.realpath(__file__)) # get current script directory +parent = os.path.dirname(csd) # parent directory (should be the scrapers one) +sys.path.append( + parent +) # add parent dir to sys path so that we can import py_common from there + try: from bs4 import BeautifulSoup as bs import requests @@ -17,7 +24,6 @@ "You need to install the following modules 'requests', 'bs4', 'lxml'.", file=sys.stderr) sys.exit() -sys.path.append('../') try: from py_common import graphql from py_common import log From 1973ac2c49bdf144c40ecc9c61db6c079edab914 Mon Sep 17 00:00:00 2001 From: WithoutPants <53250216+WithoutPants@users.noreply.github.com> Date: Wed, 22 Nov 2023 12:34:11 +1100 Subject: [PATCH 540/624] Fix version generation --- .github/workflows/deploy.yml | 19 +++++++++++-------- 1 file changed, 11 insertions(+), 8 deletions(-) diff --git a/.github/workflows/deploy.yml b/.github/workflows/deploy.yml index 4f216947f..d7168b64a 100644 --- a/.github/workflows/deploy.yml +++ b/.github/workflows/deploy.yml @@ -22,17 +22,20 @@ jobs: with: path: master ref: master + fetch-depth: '0' - run: | cd master ./build_site.sh ../_site/develop - - name: Checkout Stable - uses: actions/checkout@v2 - with: - path: stable - ref: stable - - run: | - cd stable - ../master/build_site.sh ../_site/stable + # uncomment this once we have a stable branch + # - name: Checkout Stable + # uses: actions/checkout@v2 + # with: + # path: stable + # ref: stable + # fetch-depth: '0' + # - run: | + # cd stable + # ../master/build_site.sh ../_site/stable - uses: actions/upload-pages-artifact@v1 deploy: From 4a9aca2a1f3c090e4a124805eb97c980f7fb1708 Mon Sep 17 00:00:00 2001 From: WithoutPants <53250216+WithoutPants@users.noreply.github.com> Date: Wed, 22 Nov 2023 12:42:01 +1100 Subject: [PATCH 541/624] Bump ubuntu version --- .github/workflows/deploy.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/deploy.yml b/.github/workflows/deploy.yml index d7168b64a..bdf14fc76 100644 --- a/.github/workflows/deploy.yml +++ b/.github/workflows/deploy.yml @@ -15,7 +15,7 @@ permissions: jobs: build: - runs-on: ubuntu-20.04 + runs-on: ubuntu-22.04 steps: - name: Checkout main uses: actions/checkout@v2 @@ -42,7 +42,7 @@ jobs: environment: name: github-pages url: ${{ steps.deployment.outputs.page_url }} - runs-on: ubuntu-20.04 + runs-on: ubuntu-22.04 needs: build steps: - name: Deploy to GitHub Pages From 1f08c9d1a90e243936bc9fcd071104acbcb86005 Mon Sep 17 00:00:00 2001 From: WithoutPants <53250216+WithoutPants@users.noreply.github.com> Date: Wed, 22 Nov 2023 14:19:23 +1100 Subject: [PATCH 542/624] Bump github action versions --- .github/workflows/deploy.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/deploy.yml b/.github/workflows/deploy.yml index bdf14fc76..15e6e3118 100644 --- a/.github/workflows/deploy.yml +++ b/.github/workflows/deploy.yml @@ -36,7 +36,7 @@ jobs: # - run: | # cd stable # ../master/build_site.sh ../_site/stable - - uses: actions/upload-pages-artifact@v1 + - uses: actions/upload-pages-artifact@v2 deploy: environment: @@ -47,5 +47,5 @@ jobs: steps: - name: Deploy to GitHub Pages id: deployment - uses: actions/deploy-pages@v1 + uses: actions/deploy-pages@v2 From fe728859af2f70f49e1bb3e782057857ec4784f3 Mon Sep 17 00:00:00 2001 From: JackDawson94 <100418085+JackDawson94@users.noreply.github.com> Date: Thu, 23 Nov 2023 01:11:25 +0100 Subject: [PATCH 543/624] Fix image retrieval --- scrapers/PMVHaven.py | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/scrapers/PMVHaven.py b/scrapers/PMVHaven.py index 5d8b28842..e9717fffb 100644 --- a/scrapers/PMVHaven.py +++ b/scrapers/PMVHaven.py @@ -79,8 +79,11 @@ def getData(sceneId): def getURL(pageTree): return getXPATH(pageTree, URL_XPATH) -def getIMG(pageTree): - return getXPATH(pageTree, IMAGE_XPATH) +def getIMG(data): + for item in data['thumbnails']: + if item.startswith("https://storage.pmvhaven.com/"): + return item + return "" def main(): params = json.loads(sys.stdin.read()) @@ -95,7 +98,7 @@ def main(): ret = { 'title': data['title'], - 'image': getIMG(tree), + 'image': getIMG(data), 'date': data['isoDate'].split('T')[0], 'details': data['description'], 'studio': { From 77d5d6012be263bb34c7a3991d4268f23b30c795 Mon Sep 17 00:00:00 2001 From: Maista6969 Date: Sun, 26 Nov 2023 17:31:05 +0100 Subject: [PATCH 544/624] Fix titles scraped from Sexmex The trailing performer name is not part of the title: 'Title of Scene . PerformerName' should just be 'Title of Scene' --- scrapers/SexMex.yml | 18 +++++++++++------- 1 file changed, 11 insertions(+), 7 deletions(-) diff --git a/scrapers/SexMex.yml b/scrapers/SexMex.yml index 72f1fcf11..29bcba176 100644 --- a/scrapers/SexMex.yml +++ b/scrapers/SexMex.yml @@ -1,4 +1,4 @@ -name: "SexMex" +name: SexMex sceneByURL: &byUrl - action: scrapeXPath @@ -10,10 +10,15 @@ galleryByURL: *byUrl xPathScrapers: sceneScraper: scene: - Title: &title //div[@class="panel-body"]/h4//text() + Title: &title + selector: //div[@class="panel-body"]/h4//text() + postProcess: + - replace: + - regex: (.*) \..*$ + with: $1 Details: &details //div[@class="panel-body"]/p[not(@class)] Tags: &tags - Name: + Name: selector: //meta[@name="keywords"]/@content split: "," Performers: &performers @@ -21,18 +26,17 @@ xPathScrapers: Studio: &studio Name: fixed: Sex Mex - Image: + Image: selector: //video/@poster postProcess: - replace: - regex: cover.jpg$ with: 1.jpg - + gallery: Title: *title Details: *details Tags: *tags Performers: *performers Studio: *studio - -# Last Updated February 14, 2022 +# Last Updated November 26, 2023 From 71bc8fde9eee1dcf3170d8abe1487bce25745b95 Mon Sep 17 00:00:00 2001 From: Maista6969 Date: Sun, 26 Nov 2023 17:36:43 +0100 Subject: [PATCH 545/624] Fix for SexMex titles with multiple performers --- scrapers/SexMex.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scrapers/SexMex.yml b/scrapers/SexMex.yml index 29bcba176..d9c9be4f6 100644 --- a/scrapers/SexMex.yml +++ b/scrapers/SexMex.yml @@ -14,7 +14,7 @@ xPathScrapers: selector: //div[@class="panel-body"]/h4//text() postProcess: - replace: - - regex: (.*) \..*$ + - regex: ([^.]*)\s\..* with: $1 Details: &details //div[@class="panel-body"]/p[not(@class)] Tags: &tags From ea9f5d01045c2dacc21db6af75fa56baeb398287 Mon Sep 17 00:00:00 2001 From: Maista6969 Date: Sun, 26 Nov 2023 19:13:42 +0100 Subject: [PATCH 546/624] Fix performer selector for Hentaied network --- scrapers/Hentaied.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scrapers/Hentaied.yml b/scrapers/Hentaied.yml index f5aab9f55..9b398fd96 100644 --- a/scrapers/Hentaied.yml +++ b/scrapers/Hentaied.yml @@ -18,7 +18,7 @@ xPathScrapers: postProcess: - parseDate: January 2, 2006 Performers: - Name: //div[@class="left-top-part"]//div[contains(@class,"tagsmodels")]/a + Name: //div[@class="left-top-part"]//div[contains(@class,"tagsmodels")]//a Tags: Name: selector: //ul[@class="post-categories"]//a From f1f58195e2d7b9728a669afd10e7dfe4aa174601 Mon Sep 17 00:00:00 2001 From: Maista6969 Date: Sun, 26 Nov 2023 19:27:16 +0100 Subject: [PATCH 547/624] Fix image selector for Boyfun Adds a fallback selector for older scenes that do not have a video element --- scrapers/BoyFun.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scrapers/BoyFun.yml b/scrapers/BoyFun.yml index 276219a7d..5a69f8626 100644 --- a/scrapers/BoyFun.yml +++ b/scrapers/BoyFun.yml @@ -10,7 +10,7 @@ xPathScrapers: $perf: //span[@class="models"]/span[@class="content"]/a scene: Title: //span[@class="title"] - Image: //video/@poster + Image: //video/@poster | //div[@class="video-poster"]/img/@src Date: selector: //span[@class="date"]/span[@class="content"] postProcess: From 91f2e02d67b4a94555abab5de2b89a708faa1147 Mon Sep 17 00:00:00 2001 From: WithoutPants <53250216+WithoutPants@users.noreply.github.com> Date: Mon, 27 Nov 2023 11:29:57 +1100 Subject: [PATCH 548/624] Remove timezone from date --- build_site.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/build_site.sh b/build_site.sh index 4b3b9a3e0..49872250a 100755 --- a/build_site.sh +++ b/build_site.sh @@ -34,7 +34,7 @@ buildScraper() # create a directory for the version version=$(git log -n 1 --pretty=format:%h -- "$versionFile") - updated=$(git log -n 1 --date="format:%F %T %z" --pretty=format:%ad -- "$versionFile") + updated=$(TZ=UTC0 git log -n 1 --date="format-local:%F %T" --pretty=format:%ad -- "$versionFile") # create the zip file # copy other files From 413280c4b2467df3a3bbc7efb8f78afdd65b29b8 Mon Sep 17 00:00:00 2001 From: MrX292 <42774880+MrX292@users.noreply.github.com> Date: Mon, 27 Nov 2023 05:49:56 +0100 Subject: [PATCH 549/624] Add files via upload use py_common.graphql and move config options into a separate file --- scrapers/ShokoAPI/ShokoAPI.py | 46 +++++++++++++++------------------- scrapers/ShokoAPI/ShokoAPI.yml | 2 +- scrapers/ShokoAPI/config.py | 9 +++++++ 3 files changed, 30 insertions(+), 27 deletions(-) create mode 100644 scrapers/ShokoAPI/config.py diff --git a/scrapers/ShokoAPI/ShokoAPI.py b/scrapers/ShokoAPI/ShokoAPI.py index 9fd87e6f1..2633a4d80 100644 --- a/scrapers/ShokoAPI/ShokoAPI.py +++ b/scrapers/ShokoAPI/ShokoAPI.py @@ -29,20 +29,19 @@ try: from py_common import log + import py_common.graphql as graphql except ModuleNotFoundError: print( "You need to download the folder 'py_common' from the community repo! (CommunityScrapers/tree/master/scrapers/py_common)", file=sys.stderr) sys.exit() +import config -#user inputs start SHOKO_API_KEY = '' #leave empty it gets your Shoko api key with your shoko server username and password -STASH_API_KEY = "" #your Stash api key -STASH_URL = "http://localhost:9999/graphql" #your stash graphql url -SHOKO_URL = "http://localhost:8111" #your shoko server url -SHOKO_USER = "" #your shoko server username -SHOKO_PASS = "" #your shoko server password -#user inputs end +SHOKO_URL = config.SHOKO.get("url", "") +SHOKO_USER = config.SHOKO.get("user", "") +SHOKO_PASS = config.SHOKO.get("pass", "") + def validate_user_inputs() -> bool: @@ -50,28 +49,14 @@ def validate_user_inputs() -> bool: if shoko is False: log.error("Shoko Url needs to be hostname:port and is currently " + SHOKO_URL) - stash = bool(re.match(r"^(http|https)://.+:\d+/graphql$", STASH_URL)) - if stash is False: - log.error( - "Stash Url needs to be hostname:port/graphql and is currently " + - STASH_URL) - return (shoko and stash) + return (shoko) def get_filename(scene_id: str) -> str: log.debug(f"stash sceneid: {scene_id}") - headers = CaseInsensitiveDict() - headers["ApiKey"] = STASH_API_KEY - headers["Content-Type"] = "application/json" - data = data = '{ \"query\": \" query { findScene (id: ' + scene_id + ' ) {path , id} }\" }' - resp = requests.post(url=STASH_URL, headers=headers, data=data) - if resp.status_code == 200: - log.debug("Stash response was successful resp_code: " + str(resp.status_code)) - else: - log.error("response from stash was not successful stash resp_code: " + str(resp.status_code)) - return None - output = resp.json() - path = output['data']['findScene']['path'] + log.debug(graphql.getScene(scene_id)) + data = graphql.getScene(scene_id) + path = data['files'][0]['path'] log.debug("scene path in stash: " + str(path)) pattern = "(^.+)([\\\\]|[/])" replace = "" @@ -172,6 +157,12 @@ def get_series(apikey: str, scene_id: str): def query(fragment: dict) -> dict: + if fragment['title'] == "": + scene_id = fragment['id'] + query = """query findScene($scene_id:ID!){findScene(id:$scene_id){files{basename}}}""" + variables = {'scene_id': scene_id} + result = call_graphql(query, variables) + basename = result['findScene']['files'][0]['basename'] filename, apikey = find_scene_id(fragment['id']) try: findscene_scene_id, findscene_epnumber, find_date = find_scene(apikey, filename) @@ -197,6 +188,9 @@ def main(): data = query(fragment) print(json.dumps(data)) +def call_graphql(query, variables=None): + return graphql.callGraphQL(query, variables) + if __name__ == '__main__': - main() + main() \ No newline at end of file diff --git a/scrapers/ShokoAPI/ShokoAPI.yml b/scrapers/ShokoAPI/ShokoAPI.yml index 64777953a..f4012c0ee 100644 --- a/scrapers/ShokoAPI/ShokoAPI.yml +++ b/scrapers/ShokoAPI/ShokoAPI.yml @@ -8,4 +8,4 @@ sceneByFragment: - ShokoAPI.py - query -# Last Updated April 03, 2022 +# Last Updated November 27, 2023 diff --git a/scrapers/ShokoAPI/config.py b/scrapers/ShokoAPI/config.py new file mode 100644 index 000000000..51757e963 --- /dev/null +++ b/scrapers/ShokoAPI/config.py @@ -0,0 +1,9 @@ +#it gets your Shoko api key with your shoko server username and password +SHOKO = { + "url": + "http://localhost:8111", #your shoko server url + "user": + "username",#your shoko server username + "pass": + "password" #your shoko server password +} From f6337b49d971c197c9c754ca013ce7bb9fd594fd Mon Sep 17 00:00:00 2001 From: Clasp8775 Date: Fri, 24 Nov 2023 18:22:31 +0100 Subject: [PATCH 550/624] Improve Minnano-AV scraper --- scrapers/Minnano-AV.yml | 141 ---------- scrapers/Minnano-AV/Minnano-AV.py | 378 +++++++++++++++++++++++++++ scrapers/Minnano-AV/Minnano-AV.yml | 17 ++ scrapers/Minnano-AV/requirements.txt | 2 + 4 files changed, 397 insertions(+), 141 deletions(-) delete mode 100644 scrapers/Minnano-AV.yml create mode 100644 scrapers/Minnano-AV/Minnano-AV.py create mode 100644 scrapers/Minnano-AV/Minnano-AV.yml create mode 100644 scrapers/Minnano-AV/requirements.txt diff --git a/scrapers/Minnano-AV.yml b/scrapers/Minnano-AV.yml deleted file mode 100644 index 3d779d0b1..000000000 --- a/scrapers/Minnano-AV.yml +++ /dev/null @@ -1,141 +0,0 @@ -name: "Minnano-AV (JAV)" -performerByName: - action: scrapeXPath - queryURL: http://www.minnano-av.com/search_result.php?search_scope=actress&search_word={} - scraper: performerSearch -performerByURL: - - action: scrapeXPath - url: - - http://www.minnano-av.com/ - scraper: performerScraper - -xPathScrapers: - performerSearch: - performer: - Name: //tbody/tr/td/p[@class="furi"]|//div[@class="act-profile"]/table/tbody/tr/td/h2/text() - URL: - selector: //tbody/tr/td/h2[@class="ttl"]/a/@href|//form[@class="add_favorite"]/@action - postProcess: - - replace: - - regex: .*?(\d+).* - with: $1 - - regex: ^ - with: "http://www.minnano-av.com/actress" - - regex: $ - with: ".html" - - performerScraper: - performer: - Name: - selector: //section[@class="main-column details"]/h1/span/text() - # $1 Alt Name in Jap? | $3 Name in Latin script - postProcess: - - replace: - - regex: (.+)(\s\/\s)(.+) - with: $3 - Aliases: - selector: //section[@class="main-column details"]/h1/text()|//span[text()='別名']/following-sibling::p/text() - concat: ", " - postProcess: - - replace: - - regex: ( - with: ( - - regex: ) - with: ) - URL: - selector: //form[@class="add_favorite"]/@action - postProcess: - - replace: - - regex: (.+=)(.+) - with: http://www.minnano-av.com/actress$2 - - regex: $ - with: ".html" - Twitter: //span[text()='ブログ']/../p/a[contains(@href,'twitter.com')]/@href - Instagram: //span[text()='ブログ']/../p/a[contains(@href,'instagram.com')]/@href - Birthdate: - selector: //span[text()='生年月日']/../p/a/@href - postProcess: - - replace: - - regex: (.+=)(.+) - with: $2 - Height: - selector: //span[text()='サイズ']/../p/text()[1] - postProcess: - - replace: - - regex: (T)(\d+)(.+) - with: $2 - - regex: ^T.* # if the above regex doesnt match => no height was provided - with: "" - Measurements: - selector: //span[text()='サイズ']/../p/a/@href|//span[text()='サイズ']/../p/text() - concat: "|" - postProcess: - - replace: - - regex: (.+=)(\w*)(.+B)(\d*)(.+W)(\d*)(.+H)(\d*)(.+) - with: $4$2-$6-$8 - - regex: ^T(.+B)(\d*)(.+W)(\d*)(.+H)(\d*)(.+) # cup size missing case - with: $2-$4-$6 - CareerLength: - selector: //span[text()='AV出演期間']/../p/text() - postProcess: - - replace: - # Stupid regex to replace Jap Unicode (Can appear like this: http://www.minnano-av.com/actress741247.html) - - regex: "0" - with: "0" - - regex: "1" - with: "1" - - regex: "2" - with: "2" - - regex: "3" - with: "3" - - regex: "4" - with: "4" - - regex: "5" - with: "5" - - regex: "6" - with: "6" - - regex: "7" - with: "7" - - regex: "8" - with: "8" - - regex: "9" - with: "9" - - regex: "、" - with: "," - - regex: ( - with: ( - - regex: ) - with: ) - - regex: \s*[-~]\s* - with: "-" - - regex: -+|年\s*- - with: "-" - - regex: 年\s*, - with: ", " - - regex: 年\s* - with: "/" - - regex: 月\D+ - with: "-" - - regex:  |年|\(.*?\) - with: "" - - regex: \/, - with: "," - - regex: \/$ - with: "" - - regex: "[\\p{Han}\\p{Hiragana}\\p{Katakana}ー]+" - with: "" - #Image: - # selector: //div[@class='act-area']/div[@class="thumb"]/img/@src - # postProcess: - # - replace: - # - regex: ^ - # with: http://www.minnano-av.com - # - regex: ?new - # with: "" - Ethnicity: - fixed: "Japanese" - Country: - fixed: "Japan" - Gender: - fixed: "Female" -# Last Updated June 20, 2021 diff --git a/scrapers/Minnano-AV/Minnano-AV.py b/scrapers/Minnano-AV/Minnano-AV.py new file mode 100644 index 000000000..6e9b215e1 --- /dev/null +++ b/scrapers/Minnano-AV/Minnano-AV.py @@ -0,0 +1,378 @@ +import json +import logging +import os +import re +import sys +from typing import Any + +CURRENT_SCRIPT_DIR = os.path.dirname(os.path.realpath(__file__)) +PARENT_DIR = os.path.dirname(CURRENT_SCRIPT_DIR) +sys.path.append(PARENT_DIR) + +try: + import py_common.log as log +except ModuleNotFoundError: + print( + "You need to download the folder 'py_common' from the community repo (CommunityScrapers/tree/master/scrapers/py_common)", + file=sys.stderr, + ) + sys.exit() + +try: + import requests + from lxml import etree +except ModuleNotFoundError: + print("You need to install dependencies from requirements.txt") + sys.exit(1) + +XPATHS = { + "alias": "//section[@class=\"main-column details\"]/h1/text()|//span[text()='別名']/following-sibling::p/text()", + "birthdate": "//span[text()='生年月日']/../p/a/@href", + "career": "//span[text()='AV出演期間']/../p/text()", + "debut": "//span[text()='デビュー作品']/../p/text()", + "id": '//form[@class="add_favorite"]/@action', + "image": "//div[@class='act-area']/div[@class=\"thumb\"]/img/@src", + "instagram": ("//span[text()='ブログ']/../p/a[contains(@href,'instagram.com')]/@href"), + "measurements": ( + "//span[text()='サイズ']/../p/a/@href|//span[text()='サイズ']/../p/text()" + ), + "name_kanji": '//section[@class="main-column details"]/h1/text()', + "origin": "//span[text()='出身地']/../p/a/text()", + "name": '//section[@class="main-column details"]/h1/span/text()', + "search_url": '../h2[@class="ttl"]/a/@href', + "search": '//p[@class="furi"]', + "twitter": ("//span[text()='ブログ']/../p/a[contains(@href,'twitter.com')]/@href"), +} + +REGEXES = { + "alias": r"(?P[^\x29\uFF09]+?)(?P[\x28\uFF08\u3010][^\x29\uFF09\u3011]+(?:[\x29\uFF09\u3011]))? [\x28\uFF08](?P\w+)? / (?P[a-z-A-Z ]+)?[\x29\uFF09]", + "id": r"\d+", + "birthdate": r"[0-9-]+", + "career": (r"(?P\d+)年?(?:\d+月)? ?(?:\d+)?日?[-~]? ?(?:(?P\d+)?)?年?"), + "measurements": ( + r"(?<=T)(?P\d+)? / B(?P\d+)\([^=]+=(?P\w+)\) / W(?P\d+) / H(?P\d+)" + ), + "url": r"http://www.minnano-av.com/actress\d+.html", +} + +FORMATS = { + "image": "http://www.minnano-av.com{IMAGE_URL_FRAGMENT}", + "url": "http://www.minnano-av.com/actress{PERFORMER_ID}.html", +} + + +def reverse_first_last_name(performer_name): + return " ".join(reversed(performer_name.split(" "))) + + +def convert_to_halfwidth(input: str) -> str: + """Convert full-width characters to half-width.""" + fullwidth_range = range(0xFF01, 0xFF5E + 1) + fullwidth_to_halfwidth_dict = { + chr(fw_char): chr(fw_char - 0xFEE0) for fw_char in fullwidth_range + } + halfwidth_str = "".join( + fullwidth_to_halfwidth_dict.get(char, char) for char in input + ) + return halfwidth_str + + +def cm_to_inches(centimeters: int) -> int: + return int(f"{centimeters / 2.54:.0f}") + + +def convert_bra_jp_to_us(jp_size: str) -> str: + """ + Converts bra size from Japanese to US size. + First it looks up the whole size in predefined chart, + and if that fails: + 1. Band size is calculated manually. + 2. Cup size is looked up in another chart. + 1. If that fails as well, the Japanese cup size is used. + References: + * https://www.petitecherry.com/pages/size-guide + * https://japanrabbit.com/blog/japanese-clothing-size-chart/ + """ + predefined_conversion_chart = { + "65A": "30AA", + "65B": "30A", + "65C": "30B", + "65D": "30C", + "65E": "30D", + "65F": "30E", + "70A": "32AA", + "70B": "32A", + "70C": "32B", + "70D": "32C", + "70E": "32D", + "70F": "32E", + "70G": "32F", + "70H": "32F", + "70I": "32G", + "75A": "34AA", + "75B": "34A", + "75C": "34B", + "75D": "34C", + "75E": "34D", + "75F": "34E", + "75G": "32E", + "75H": "34F", + "75I": "34G", + "80B": "36A", + "80C": "36B", + "80D": "36C", + "80E": "36D", + "80F": "36E", + "80G": "36E", + "80H": "36F", + "80I": "36G", + "85C": "38B", + "85D": "38C", + "85E": "38D", + "85F": "38E", + "85G": "38E", + "85H": "38F", + "90D": "40C", + "90E": "40D", + "90F": "40E", + "90G": "40E", + "90H": "40F", + "90I": "40G", + "95E": "42C", + "95F": "42E", + "95G": "42E", + "95H": "42F", + "95I": "42G", + "100E": "44D", + "100F": "44E", + "100G": "44E", + "100H": "44F", + } + cup_conversion_chart = { + "A": "AA", + "B": "A", + "C": "B", + "D": "C", + "F": "DD", + "G": "D", + "H": "F", + "I": "G", + "J": "H", + "K": "I", + } + + converted_size = None + converted_size = predefined_conversion_chart.get(jp_size, None) + + if converted_size is None: + band_size = int(jp_size[:-1]) + cup_size = jp_size[-1] + converted_size = ( + f"{cm_to_inches(band_size)}{cup_conversion_chart.get(cup_size, cup_size)}" + ) + return converted_size + + +def get_xpath_result(tree: Any, xpath_string: str) -> str | list[str] | None: + _result = tree.xpath(xpath_string) + if _result == []: + return None + elif len(_result) == 1: + return _result[0] + else: + return _result + + +def performer_by_url(url): + request = requests.get(url) + log.debug(request.status_code) + + tree = etree.HTML(request.text) + + scrape = {} + aliases = set() + + JAPANESE = True + + if origin_result := get_xpath_result(tree, XPATHS["origin"]): + if origin_result == "海外": + JAPANESE = False + + if name_xpath_result := get_xpath_result(tree, XPATHS["name"]): + _, romanized_name = name_xpath_result.split(" / ") + performer_name = romanized_name + if JAPANESE: + performer_name = reverse_first_last_name(performer_name) + scrape["name"] = performer_name + aliases.add(romanized_name) + + if kanji_xpath_result := get_xpath_result(tree, XPATHS["name_kanji"]): + # \u3010 is 【 + if "\u3010" in kanji_xpath_result: + kanji_name, _ = kanji_xpath_result.split("\u3010") + else: + kanji_name = kanji_xpath_result + if kanji_name != "": + aliases.add(kanji_name) + else: + log.debug("Kanji name XPath matched, but no value found.") + + if aliases_xpath_result := get_xpath_result(tree, XPATHS["alias"]): + for alias in aliases_xpath_result: + if match := re.match(REGEXES["alias"], alias): + aliases.add(match.group("kanji")) + try: + aliases.add(match.group("romanized")) + except: + pass + + if favorite_form_url := get_xpath_result(tree, XPATHS["id"]): + if match := re.search(REGEXES["id"], favorite_form_url): + scrape["url"] = FORMATS["url"].format(PERFORMER_ID=match[0]) + else: + log.debug("URL XPath matched, but no value found.") + + if twitter_url_result := get_xpath_result(tree, XPATHS["twitter"]): + if twitter_url_result != None: + scrape["twitter"] = twitter_url_result + else: + log.debug("Twitter XPath matched, but no value found.") + + if instagram_url_result := get_xpath_result(tree, XPATHS["instagram"]): + if instagram_url_result != None: + scrape["instagram"] = instagram_url_result + else: + log.debug("Instagram XPath matched, but no value found.") + + if birthdate_result := get_xpath_result(tree, XPATHS["birthdate"]): + if match := re.search( + REGEXES["birthdate"], convert_to_halfwidth(birthdate_result) + ): + scrape["birthdate"] = match[0] + else: + log.debug("Birthday XPath matched, but no value found.") + + if measurements_result := get_xpath_result(tree, XPATHS["measurements"]): + combined = "".join(measurements_result) + if match := re.search(REGEXES["measurements"], convert_to_halfwidth(combined)): + waist_in_inches, hip_in_inches = [ + cm_to_inches(int(measurement)) + for measurement in [match["waist"], match["hip"]] + ] + + bra_size = convert_bra_jp_to_us(f'{match["bust"]}{match["cup"]}') + + scrape["measurements"] = f"{bra_size}-{waist_in_inches}-{hip_in_inches}" + if match["height"] != None: + scrape["height"] = match["height"] + else: + log.debug("Measurements XPath matched, but no value found.") + + if career_result := get_xpath_result(tree, XPATHS["career"]): + clean_career_result = convert_to_halfwidth(career_result).replace(" ", "") + if match := re.match(REGEXES["career"], clean_career_result): + groups = match.groups() + start = match["start"] + "-" if groups[0] != None else "" + end = match["end"] if groups[1] != None else "" + scrape["career_length"] = start + end + else: + log.debug("Career debut XPath matched, but no value found.") + + elif debut_result := get_xpath_result(tree, XPATHS["debut"]): + if match := re.search(REGEXES["career"], convert_to_halfwidth(debut_result)): + groups = match.groups() + scrape[ + "career_length" + ] = f'{match["start"] if groups[0] != None else ""}-{match["end"] if groups[1] != None else ""}' + else: + log.debug("Career debut XPath matched, but no value found.") + + if image_result := get_xpath_result(tree, XPATHS["image"]): + clean_url_fragment = str.replace(image_result, "?new", "") + if clean_url_fragment != "": + scrape["image"] = str.format( + FORMATS["image"], IMAGE_URL_FRAGMENT=clean_url_fragment + ) + else: + log.debug("Image XPath matched, but no value found.") + + aliases.discard(None) + sorted_aliases = sorted(aliases) + scrape["aliases"] = ", ".join(sorted_aliases) + if JAPANESE: + scrape["country"] = "Japan" + scrape["ethnicity"] = "Asian" + scrape["hair_color"] = "Black" + scrape["eye_color"] = "Brown" + scrape["gender"] = "Female" + print(json.dumps(scrape)) + + +def performer_by_name(name: str, retry=True) -> None: + queryURL = f"http://www.minnano-av.com/search_result.php?search_scope=actress&search_word={name}" + + result = requests.get(queryURL) + tree = etree.HTML(result.text) + + performer_list = [] + + if re.search(REGEXES["url"], result.url): + performer_list.append({"name": name, "url": result.url}) + elif search_result := get_xpath_result(tree, XPATHS["search"]): + for node in search_result: + performer = {} + node_value = node.text + if "/" not in node_value: + continue + _, romanized_name = node_value.split(" / ") + performer["name"] = romanized_name + if url_result := get_xpath_result(node, XPATHS["search_url"]): + url = "" + if match := re.search(REGEXES["id"], url_result): + url = str.format(FORMATS["url"], PERFORMER_ID=match[0]) + performer["url"] = url + performer_list.append(performer) + elif retry: + modified_name = reverse_first_last_name(name) + performer_by_name(modified_name, retry=False) + else: + performer_list.append({"name": "No performer found"}) + + print(json.dumps(performer_list)) + + +def main(): + if len(sys.argv) == 1: + log.error("No arguments") + sys.exit(1) + + stdin = sys.stdin.read() + + inputJSON = json.loads(stdin) + url = inputJSON.get("url", None) + name = inputJSON.get("name", None) + + if "performer_by_url" in sys.argv: + log.debug("Processing performer by URL") + log.debug(stdin) + if url: + performer_by_url(url) + else: + log.error("Missing URL") + elif "performer_by_name" in sys.argv: + log.debug("Processing performer by name") + log.debug(stdin) + if name: + performer_by_name(name) + else: + log.error("Missing name") + else: + log.error("No argument processed") + log.debug(stdin) + + +if __name__ == "__main__": + try: + main() + except Exception as e: + logging.exception(e) diff --git a/scrapers/Minnano-AV/Minnano-AV.yml b/scrapers/Minnano-AV/Minnano-AV.yml new file mode 100644 index 000000000..c26e6dbbc --- /dev/null +++ b/scrapers/Minnano-AV/Minnano-AV.yml @@ -0,0 +1,17 @@ +name: "Minnano-AV (JAV)" +performerByURL: + - url: + - http://www.minnano-av.com/ + action: script + script: + - python + - Minnano-AV.py + - performer_by_url + +performerByName: + action: script + script: + - python + - Minnano-AV.py + - performer_by_name +# Last Updated November 27, 2023 diff --git a/scrapers/Minnano-AV/requirements.txt b/scrapers/Minnano-AV/requirements.txt new file mode 100644 index 000000000..b4531941f --- /dev/null +++ b/scrapers/Minnano-AV/requirements.txt @@ -0,0 +1,2 @@ +requests +lxml \ No newline at end of file From fcca1142502a32948d2e132d95e9b8fead1aa314 Mon Sep 17 00:00:00 2001 From: Muldec <34677746+Muldec@users.noreply.github.com> Date: Fri, 24 Nov 2023 20:09:35 +0100 Subject: [PATCH 551/624] Get Studio Name instead of fixed value --- scrapers/IWantClips.yml | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/scrapers/IWantClips.yml b/scrapers/IWantClips.yml index 6ccf8b50a..f3ab06847 100644 --- a/scrapers/IWantClips.yml +++ b/scrapers/IWantClips.yml @@ -28,6 +28,8 @@ xPathScrapers: URL: //span[@class='clip-title']/a/@href Image: //div[@class='clip-thumb-16-9']/a/img/@src sceneScraper: + common: + $model: //a[@class="modelLink"] scene: Title: //h1[@class="headline hidden-xs"]/text() URL: //link[@rel="canonical"]/@href @@ -51,10 +53,9 @@ xPathScrapers: Image: selector: //div[contains(@class,'vidStuff')]//video[contains(@id,'html5_api')]/@poster | //div[contains(@class,'vidStuff')]//img/@src Studio: - Name: - fixed: IWantClips + Name: $model Performers: - Name: //a[@class="modelLink"] + Name: $model performerScraper: common: $author: //li[@class="ais-Hits-item"][1]//span[contains(@class,'clip-author')] @@ -78,4 +79,4 @@ xPathScrapers: driver: useCDP: true -# Last Updated October 19, 2023 +# Last Updated November 24, 2023 From 9a795bdab8d474a628ae3895e5648592eb311857 Mon Sep 17 00:00:00 2001 From: Muldec <34677746+Muldec@users.noreply.github.com> Date: Fri, 24 Nov 2023 20:11:27 +0100 Subject: [PATCH 552/624] Fix Details missing CRLF --- scrapers/IWantClips.yml | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/scrapers/IWantClips.yml b/scrapers/IWantClips.yml index f3ab06847..5a1c32e9e 100644 --- a/scrapers/IWantClips.yml +++ b/scrapers/IWantClips.yml @@ -40,7 +40,9 @@ xPathScrapers: - regex: Published\s(.+) with: $1 - parseDate: Jan 2, 2006 - Details: //div[@class="col-xs-12 description fix"]/span + Details: + selector: //div[@class="col-xs-12 description fix"][last()]/span/text() + concat: "\n\n" Tags: Name: selector: //div[@class="col-xs-12 hashtags fix"]/span/em | //div[@class="col-xs-12 category fix"]/span From 6e12ea0266fa33c66508ad121a344bd90d0c7156 Mon Sep 17 00:00:00 2001 From: github-user-t <150365686+github-user-t@users.noreply.github.com> Date: Tue, 28 Nov 2023 12:23:22 -0500 Subject: [PATCH 553/624] Url wasn't getting populated for SARJ-LLC.py Also fits the new structure instead of deprecated one: https://github.com/stashapp/stash/blob/413311711fba25a67e6aa2f9eeeefef97e641567/pkg/scraper/gallery.go#L10 --- scrapers/SARJ-LLC/SARJ-LLC.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scrapers/SARJ-LLC/SARJ-LLC.py b/scrapers/SARJ-LLC/SARJ-LLC.py index 1dedfb7c4..8dcc8435d 100644 --- a/scrapers/SARJ-LLC/SARJ-LLC.py +++ b/scrapers/SARJ-LLC/SARJ-LLC.py @@ -195,7 +195,7 @@ def map_media(data, studio, base_url): return { 'Title': data['name'], 'Details': data['description'], - 'URL': url, + 'URLs': [url], 'Date': data['publishedAt'][0:data['publishedAt'].find('T')], 'Tags': list(map(lambda t: {'Name': t}, data['tags'])), 'Performers': list(map(lambda m: map_model(base_url, m), data['models'])), From 6fc925f1b4ee40daa714179ffe306b0625adde09 Mon Sep 17 00:00:00 2001 From: github-user-t <150365686+github-user-t@users.noreply.github.com> Date: Tue, 28 Nov 2023 12:27:19 -0500 Subject: [PATCH 554/624] Update SARJ-LLC.yml --- scrapers/SARJ-LLC/SARJ-LLC.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scrapers/SARJ-LLC/SARJ-LLC.yml b/scrapers/SARJ-LLC/SARJ-LLC.yml index 01bef8720..c10c552cb 100644 --- a/scrapers/SARJ-LLC/SARJ-LLC.yml +++ b/scrapers/SARJ-LLC/SARJ-LLC.yml @@ -91,4 +91,4 @@ performerByName: - SARJ-LLC.py - search - performer -# Last Updated April 24, 2023 +# Last Updated November 28, 2023 From 9314cc14cb4314df99139b96ad20759fbe583816 Mon Sep 17 00:00:00 2001 From: github-user-t <150365686+github-user-t@users.noreply.github.com> Date: Tue, 28 Nov 2023 12:37:07 -0500 Subject: [PATCH 555/624] Update SARJ-LLC.py --- scrapers/SARJ-LLC/SARJ-LLC.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/scrapers/SARJ-LLC/SARJ-LLC.py b/scrapers/SARJ-LLC/SARJ-LLC.py index 8dcc8435d..d00187cda 100644 --- a/scrapers/SARJ-LLC/SARJ-LLC.py +++ b/scrapers/SARJ-LLC/SARJ-LLC.py @@ -167,12 +167,12 @@ def scrape_model(base_url, name): def map_media(data, studio, base_url): - url = "" + urls = [] studio_code = data["UUID"] studio_name = {'Name': ""} if studio is not None: studio_url = studio[1] - url = f"https://www.{studio_url}{data['path']}" + urls = [f"https://www.{studio_url}{data['path']}"] studio_name = {'Name': studio[0]} director = None @@ -195,7 +195,7 @@ def map_media(data, studio, base_url): return { 'Title': data['name'], 'Details': data['description'], - 'URLs': [url], + 'URLs': urls, 'Date': data['publishedAt'][0:data['publishedAt'].find('T')], 'Tags': list(map(lambda t: {'Name': t}, data['tags'])), 'Performers': list(map(lambda m: map_model(base_url, m), data['models'])), From 19af377ad42c812a062d120a8e3ae9babb09e6f9 Mon Sep 17 00:00:00 2001 From: Nic Patterson Date: Wed, 29 Nov 2023 14:06:44 -0500 Subject: [PATCH 556/624] Update SARJ-LLC.py is unable to import the py_common library after being moved under into a subfolder --- scrapers/SARJ-LLC/SARJ-LLC.py | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/scrapers/SARJ-LLC/SARJ-LLC.py b/scrapers/SARJ-LLC/SARJ-LLC.py index d00187cda..127b8803e 100644 --- a/scrapers/SARJ-LLC/SARJ-LLC.py +++ b/scrapers/SARJ-LLC/SARJ-LLC.py @@ -1,9 +1,18 @@ import base64 +import os import json import sys import re from urllib.parse import urlparse, urlencode +# to import from a parent directory we need to add that directory to the system path +csd = os.path.dirname( + os.path.realpath(__file__)) # get current script directory +parent = os.path.dirname(csd) # parent directory (should be the scrapers one) +sys.path.append( + parent +) # add parent dir to sys path so that we can import py_common from there + try: from py_common import log except ModuleNotFoundError: From 5994c39fe8e63123ec9e4b4930a065533171360d Mon Sep 17 00:00:00 2001 From: Nic Patterson Date: Wed, 29 Nov 2023 19:15:12 +0000 Subject: [PATCH 557/624] update teamskeet and vixen scrapers --- scrapers/Teamskeet/TeamskeetAPI.py | 8 ++++++++ scrapers/vixenNetwork/vixenNetwork.py | 9 +++++++++ 2 files changed, 17 insertions(+) diff --git a/scrapers/Teamskeet/TeamskeetAPI.py b/scrapers/Teamskeet/TeamskeetAPI.py index 4ceb22be6..eab533466 100644 --- a/scrapers/Teamskeet/TeamskeetAPI.py +++ b/scrapers/Teamskeet/TeamskeetAPI.py @@ -5,6 +5,14 @@ import sys from datetime import datetime +# to import from a parent directory we need to add that directory to the system path +csd = os.path.dirname( + os.path.realpath(__file__)) # get current script directory +parent = os.path.dirname(csd) # parent directory (should be the scrapers one) +sys.path.append( + parent +) # add parent dir to sys path so that we can import py_common from there + try: import py_common.log as log except ModuleNotFoundError: diff --git a/scrapers/vixenNetwork/vixenNetwork.py b/scrapers/vixenNetwork/vixenNetwork.py index 3116eb9f2..c409a1424 100644 --- a/scrapers/vixenNetwork/vixenNetwork.py +++ b/scrapers/vixenNetwork/vixenNetwork.py @@ -1,7 +1,16 @@ import json +import os import sys from urllib.parse import urlparse +# to import from a parent directory we need to add that directory to the system path +csd = os.path.dirname( + os.path.realpath(__file__)) # get current script directory +parent = os.path.dirname(csd) # parent directory (should be the scrapers one) +sys.path.append( + parent +) # add parent dir to sys path so that we can import py_common from there + try: import requests except ModuleNotFoundError: From d810333deea4f71a8ab071d274013a6afa07e3fa Mon Sep 17 00:00:00 2001 From: SpecialKeta <148014803+SpecialKeta@users.noreply.github.com> Date: Thu, 30 Nov 2023 03:48:23 +0100 Subject: [PATCH 558/624] Add files via upload --- scrapers/HeavyOnHotties.yml | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/scrapers/HeavyOnHotties.yml b/scrapers/HeavyOnHotties.yml index b22682b64..948609c10 100644 --- a/scrapers/HeavyOnHotties.yml +++ b/scrapers/HeavyOnHotties.yml @@ -9,6 +9,10 @@ xPathScrapers: scene: Title: selector: //div[@class="vid-title clearfix text-center-mobile"]/h2 + postProcess: + - replace: + - regex: .+\"([^\"]+)\" + with: $1 Date: selector: //span[contains(@class, "released title")]/strong/text() postProcess: @@ -25,4 +29,4 @@ xPathScrapers: Studio: Name: fixed: HeavyOnHotties -# Last Updated July 15, 2021 +# Last Updated November 30, 2023 From 4b3d5ecbf62004a8d7d192e0754404dbdd6944bb Mon Sep 17 00:00:00 2001 From: JackDawson94 <100418085+JackDawson94@users.noreply.github.com> Date: Fri, 1 Dec 2023 00:10:52 +0100 Subject: [PATCH 559/624] Fix env variable retrieval --- scrapers/PMVHaven.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scrapers/PMVHaven.py b/scrapers/PMVHaven.py index e9717fffb..83aaea1f2 100644 --- a/scrapers/PMVHaven.py +++ b/scrapers/PMVHaven.py @@ -27,7 +27,7 @@ sys.exit() #bugfix for socks5 proxies, due to pySocks implementation incompatibility with Stash -proxy = os.environ['HTTPS_PROXY'] +proxy = os.environ.get('HTTPS_PROXY', '') if proxy != "" and proxy.startswith("socks5://"): proxy = proxy.replace("socks5://", "socks5h://") os.environ['HTTPS_PROXY'] = proxy From cf663b8082b5f2ccd3fcf687ce375fdeb23586dd Mon Sep 17 00:00:00 2001 From: JackDawson94 <100418085+JackDawson94@users.noreply.github.com> Date: Fri, 1 Dec 2023 00:15:00 +0100 Subject: [PATCH 560/624] Move to folder --- scrapers/{ => PMVHaven}/PMVHaven.py | 0 scrapers/{ => PMVHaven}/PMVHaven.yml | 0 2 files changed, 0 insertions(+), 0 deletions(-) rename scrapers/{ => PMVHaven}/PMVHaven.py (100%) rename scrapers/{ => PMVHaven}/PMVHaven.yml (100%) diff --git a/scrapers/PMVHaven.py b/scrapers/PMVHaven/PMVHaven.py similarity index 100% rename from scrapers/PMVHaven.py rename to scrapers/PMVHaven/PMVHaven.py diff --git a/scrapers/PMVHaven.yml b/scrapers/PMVHaven/PMVHaven.yml similarity index 100% rename from scrapers/PMVHaven.yml rename to scrapers/PMVHaven/PMVHaven.yml From 8468b43990696601c4f6b61c9c731a35bc4be98e Mon Sep 17 00:00:00 2001 From: JackDawson94 <100418085+JackDawson94@users.noreply.github.com> Date: Fri, 1 Dec 2023 00:24:36 +0100 Subject: [PATCH 561/624] Fix py_common import bug --- scrapers/PMVHaven/PMVHaven.py | 9 ++++++++- scrapers/PMVHaven/PMVHaven.yml | 4 +++- 2 files changed, 11 insertions(+), 2 deletions(-) diff --git a/scrapers/PMVHaven/PMVHaven.py b/scrapers/PMVHaven/PMVHaven.py index 83aaea1f2..6ba81d85f 100644 --- a/scrapers/PMVHaven/PMVHaven.py +++ b/scrapers/PMVHaven/PMVHaven.py @@ -20,8 +20,15 @@ print("If you have pip (normally installed with python), run this command in a terminal (cmd): pip install lxml", file=sys.stderr) sys.exit() +# to import from a parent directory we need to add that directory to the system path +csd = os.path.dirname(os.path.realpath(__file__)) # get current script directory +parent = os.path.dirname(csd) # parent directory (should be the scrapers one) +sys.path.append( + parent +) # add parent dir to sys path so that we can import py_common from there + try: - import py_common.log as log + from py_common import log except ModuleNotFoundError: print("You need to download the folder 'py_common' from the community repo! (CommunityScrapers/tree/master/scrapers/py_common)", file=sys.stderr) sys.exit() diff --git a/scrapers/PMVHaven/PMVHaven.yml b/scrapers/PMVHaven/PMVHaven.yml index b56cb6de3..eb9c8e51a 100644 --- a/scrapers/PMVHaven/PMVHaven.yml +++ b/scrapers/PMVHaven/PMVHaven.yml @@ -1,4 +1,6 @@ name: PMVHaven +# requires: py_common + sceneByURL: - url: - pmvhaven.com/video/ @@ -6,4 +8,4 @@ sceneByURL: script: - python - PMVHaven.py -# Last Updated November 21, 2023 +# Last Updated November 30, 2023 From c20a13edce2fb212fc3ce54f464944c71d44bc19 Mon Sep 17 00:00:00 2001 From: JackDawson94 <100418085+JackDawson94@users.noreply.github.com> Date: Fri, 1 Dec 2023 13:37:00 +0100 Subject: [PATCH 562/624] Fix the scraper, broken due to py_common --- scrapers/torrent/torrent.py | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/scrapers/torrent/torrent.py b/scrapers/torrent/torrent.py index a88db119b..2ee32ac71 100644 --- a/scrapers/torrent/torrent.py +++ b/scrapers/torrent/torrent.py @@ -1,11 +1,18 @@ import sys import json -from os.path import basename +import os from pathlib import Path import re from datetime import datetime import difflib +# to import from a parent directory we need to add that directory to the system path +csd = os.path.dirname(os.path.realpath(__file__)) # get current script directory +parent = os.path.dirname(csd) # parent directory (should be the scrapers one) +sys.path.append( + parent +) # add parent dir to sys path so that we can import py_common from there + try: from bencoder import bdecode except ModuleNotFoundError: @@ -39,7 +46,7 @@ def get_scene_data(fragment_data): if response and response["findScene"]: for f in response["findScene"]["files"]: - scene_files.append({"filename": basename(f["path"]), "size": f["size"]}) + scene_files.append({"filename": os.path.basename(f["path"]), "size": f["size"]}) return {"id": scene_id, "title": scene_title, "files": scene_files} return {} From 10c4eeff6da6cd9d01ac6995fb3341ae007fccaf Mon Sep 17 00:00:00 2001 From: Clasp8775 Date: Fri, 1 Dec 2023 15:07:44 +0100 Subject: [PATCH 563/624] Improve the alias regex --- scrapers/Minnano-AV/Minnano-AV.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scrapers/Minnano-AV/Minnano-AV.py b/scrapers/Minnano-AV/Minnano-AV.py index 6e9b215e1..8d68defc7 100644 --- a/scrapers/Minnano-AV/Minnano-AV.py +++ b/scrapers/Minnano-AV/Minnano-AV.py @@ -45,7 +45,7 @@ } REGEXES = { - "alias": r"(?P[^\x29\uFF09]+?)(?P[\x28\uFF08\u3010][^\x29\uFF09\u3011]+(?:[\x29\uFF09\u3011]))? [\x28\uFF08](?P\w+)? / (?P[a-z-A-Z ]+)?[\x29\uFF09]", + "alias": r"(?P[^\x29\uFF09]+?)(?P[\x28\uFF08\u3010][^\x29\uFF09\u3011]+(?:[\x29\uFF09\u3011]))?\s[\x28\uFF08](?P\w+)?\s+/\s(?P[a-z-A-Z ]+)?[\x29\uFF09]", "id": r"\d+", "birthdate": r"[0-9-]+", "career": (r"(?P\d+)年?(?:\d+月)? ?(?:\d+)?日?[-~]? ?(?:(?P\d+)?)?年?"), From 5a6d33e1de35a5578d795e708a3691f0abeb984c Mon Sep 17 00:00:00 2001 From: Clasp8775 Date: Fri, 1 Dec 2023 15:09:29 +0100 Subject: [PATCH 564/624] Add link to alias regex --- scrapers/Minnano-AV/Minnano-AV.py | 1 + 1 file changed, 1 insertion(+) diff --git a/scrapers/Minnano-AV/Minnano-AV.py b/scrapers/Minnano-AV/Minnano-AV.py index 8d68defc7..b1b10fb82 100644 --- a/scrapers/Minnano-AV/Minnano-AV.py +++ b/scrapers/Minnano-AV/Minnano-AV.py @@ -45,6 +45,7 @@ } REGEXES = { + # https://regex101.com/r/9k2GXw/5 "alias": r"(?P[^\x29\uFF09]+?)(?P[\x28\uFF08\u3010][^\x29\uFF09\u3011]+(?:[\x29\uFF09\u3011]))?\s[\x28\uFF08](?P\w+)?\s+/\s(?P[a-z-A-Z ]+)?[\x29\uFF09]", "id": r"\d+", "birthdate": r"[0-9-]+", From 4ef2f0f2f0f8f4e0170c58359a36eaea3d074b93 Mon Sep 17 00:00:00 2001 From: Clasp8775 Date: Fri, 1 Dec 2023 15:12:43 +0100 Subject: [PATCH 565/624] Replace logging module with log --- scrapers/Minnano-AV/Minnano-AV.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/scrapers/Minnano-AV/Minnano-AV.py b/scrapers/Minnano-AV/Minnano-AV.py index b1b10fb82..9103b0501 100644 --- a/scrapers/Minnano-AV/Minnano-AV.py +++ b/scrapers/Minnano-AV/Minnano-AV.py @@ -1,5 +1,4 @@ import json -import logging import os import re import sys @@ -376,4 +375,4 @@ def main(): try: main() except Exception as e: - logging.exception(e) + log.error(e) From d516f478ec0cb4bc932fb9385da169dbd1958a47 Mon Sep 17 00:00:00 2001 From: ouenascender <138104265+ouenascender@users.noreply.github.com> Date: Sat, 2 Dec 2023 13:31:17 +0000 Subject: [PATCH 566/624] Added Support for freeze.xxx to Hentaied.yml --- SCRAPERS-LIST.md | 1 + scrapers/Hentaied.yml | 4 +++- 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/SCRAPERS-LIST.md b/SCRAPERS-LIST.md index 9241d6189..d81a1ddb3 100644 --- a/SCRAPERS-LIST.md +++ b/SCRAPERS-LIST.md @@ -573,6 +573,7 @@ freeones.com|FreeonesCommunity.yml|:heavy_check_mark:|:x:|:heavy_check_mark:|:he freeones.xxx|FreeonesCommunity.yml|:heavy_check_mark:|:x:|:heavy_check_mark:|:heavy_check_mark:|-|- freeusefantasy.com|PaperStreetMedia.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- freeusemilf.com|PaperStreetMedia.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +freeze.xxx|Hentaied.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- french-twinks.com|Frenchtwinks.yml|:heavy_check_mark:|:x:|:x:|:x:|-|Gay frenchporn.fr|PornsiteManager.yml|:heavy_check_mark:|:x:|:heavy_check_mark:|:x:|-|Gay freshmen.net|Freshmen.yml|:heavy_check_mark:|:x:|:x:|:x:|-|Gay diff --git a/scrapers/Hentaied.yml b/scrapers/Hentaied.yml index 9b398fd96..508d0533a 100644 --- a/scrapers/Hentaied.yml +++ b/scrapers/Hentaied.yml @@ -2,6 +2,7 @@ name: Hentaied sceneByURL: - action: scrapeXPath url: + - freeze.xxx - futanari.xxx - hentaied.com - parasited.com @@ -41,4 +42,5 @@ xPathScrapers: postProcess: - map: Real Life Hentai: Hentaied -# Last Updated September 28, 2023 + +# Last Updated December 2, 2023 \ No newline at end of file From 5f8fff32c27fab95e1ef18d3ab7f44fe76d213fa Mon Sep 17 00:00:00 2001 From: GroteKnuffelbeer <106664253+GroteKnuffelbeer@users.noreply.github.com> Date: Sat, 2 Dec 2023 19:02:35 +0100 Subject: [PATCH 567/624] Add EvilAngelVideo scraper --- scrapers/EvilAngelVideo.yml | 61 +++++++++++++++++++++++++++++++++++++ 1 file changed, 61 insertions(+) create mode 100644 scrapers/EvilAngelVideo.yml diff --git a/scrapers/EvilAngelVideo.yml b/scrapers/EvilAngelVideo.yml new file mode 100644 index 000000000..06f871a09 --- /dev/null +++ b/scrapers/EvilAngelVideo.yml @@ -0,0 +1,61 @@ +name: "EvilAngelVideo" +movieByURL: + - action: scrapeXPath + url: + - store.evilangelvideo.com/ + scraper: movieScraper +sceneByURL: + - action: scrapeXPath + url: + - store.evilangelvideo.com/ + scraper: sceneScraper +xPathScrapers: + movieScraper: + movie: + Name: + selector: //h1[@class="description"]/text() + Director: + selector: //a[@data-label="Director"]/text() + Date: + selector: (//div[@class="release-date"])[1]/text() + postProcess: + - parseDate: Jan 02, 2006 + Synopsis: + selector: //div[@class="synopsis"]/p//text() + concat: " " + FrontImage: + selector: (//div[@id="viewLargeBoxcoverCarousel"]//img)[1]/@data-src + BackImage: + selector: (//div[@id="viewLargeBoxcoverCarousel"]//img)[2]/@data-src + Duration: + selector: (//div[@class="release-date"])[2]/text() + postProcess: + - replace: + - regex: ^(\d+)\s*hrs\.\s+(\d+)\s*mins\. # xx hrs. xx mins. + with: "$1:$2:00" + Studio: + Name: + selector: //a[@data-label="Studio"]/text() + sceneScraper: + scene: + Title: + selector: //h1[@class="description"]/text() + Director: + selector: //a[@data-label="Director"]/text() + Date: + selector: (//div[@class="release-date"])[1]/text() + postProcess: + - parseDate: Jan 02, 2006 + Details: + selector: //meta[@name="og:description"]/@content + Performers: + Name: + selector: //span[@class="video-performer-name overlay"] + Image: + selector: //link[@rel="image_src"]/@href + Studio: + Name: + selector: //a[@data-label="Studio"]/text() + Tags: + Name: //div[@class="categories"/a[@data-category="Item Page"]/text() +# Last Updated December 2, 2023 From ac432e73f6367370c040ed465faef87bdd9a3bf8 Mon Sep 17 00:00:00 2001 From: Maista6969 Date: Fri, 1 Dec 2023 04:40:44 +0100 Subject: [PATCH 568/624] Fix dig util to allow all valid python list indexes --- scrapers/py_common/util.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scrapers/py_common/util.py b/scrapers/py_common/util.py index 5cf6bcf9d..846e2c7ef 100644 --- a/scrapers/py_common/util.py +++ b/scrapers/py_common/util.py @@ -27,7 +27,7 @@ def inner(d: dict | list, key: str | int | tuple): if k in d: return d[k] return d.get(key) - elif isinstance(d, list) and isinstance(key, int) and 0 < key < len(d): + elif isinstance(d, list) and isinstance(key, int) and key < len(d): return d[key] else: return default From 4d5b951ec59ab34deb035a95b843c29e21ca486b Mon Sep 17 00:00:00 2001 From: Maista6969 Date: Tue, 5 Dec 2023 02:48:20 +0100 Subject: [PATCH 569/624] Add XPath scraper for PKF Studios --- SCRAPERS-LIST.md | 1 + scrapers/PKFStudios.yml | 56 +++++++++++++++++++++++++++++++++++++++++ 2 files changed, 57 insertions(+) create mode 100644 scrapers/PKFStudios.yml diff --git a/SCRAPERS-LIST.md b/SCRAPERS-LIST.md index 9241d6189..ed9b7924b 100644 --- a/SCRAPERS-LIST.md +++ b/SCRAPERS-LIST.md @@ -1127,6 +1127,7 @@ pinupfiles.com|PinupDollars.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- pissplay.com|PissPlay.yml|:heavy_check_mark:|:x:|:x:|:x:|-|Amateur Fetish pissynetwork.com|PissyNetwork.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- pjgirls.com|pjgirls.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +pkfstudios.com|PKFStudios.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- playboy.tv|PlayboyTV.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- playboyplus.com|PlayboyPlus.yml|:heavy_check_mark:|:heavy_check_mark:|:x:|:x:|-|- playdaddy.com|AdultSiteRunner.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- diff --git a/scrapers/PKFStudios.yml b/scrapers/PKFStudios.yml new file mode 100644 index 000000000..426a77be0 --- /dev/null +++ b/scrapers/PKFStudios.yml @@ -0,0 +1,56 @@ +name: PKF Studios +sceneByURL: + - action: scrapeXPath + url: + - pkfstudios.com + scraper: sceneScraper +xPathScrapers: + sceneScraper: + scene: + Title: //h1[@class="entry-title"]/text() + Details: + # Description is a sequence of p elements containing ONLY text + selector: //div[@class="entry-content"]/p[not(*)]/text() + concat: "\n\n" + # Remove the trailing "_ _ _ _ _" separator + replace: + - regex: "(\n\n[_ ]+)" + with: "" + Date: + selector: //span[@class="entry-date"]//text() + postProcess: + - parseDate: January 2, 2006 + Image: + # Images are loaded with javascript, we'll just grab the last image + # from the srcset because it's usually the largest + selector: //div[contains(@class, "post-thumbnail")]/img/@data-lazy-srcset + postProcess: + - replace: + - regex: ^.*\s+(https://.*)\s+\d+w$ + with: $1 + Studio: + Name: + fixed: "PKF Studios" + Tags: + Name: + # First selector will match when the tags are outside of the tag + selector: //div[@class="entry-content"]//strong[starts-with(text(), "Role")]/following-sibling::text() | //div[@class="entry-content"]//strong[starts-with(text(), "Role")]/text() + postProcess: + - replace: + - regex: (?:.*:\s+)?([^.]*).? + with: $1 + split: ", " + Performers: + Name: + # Sometimes the performers are listed in a separate tag, sometimes they're in a paragraph mixed in with the description + selector: //div[@class="entry-content"]//*[contains(text(), "Starring")]/text() | //p[contains(., "Starring")] + postProcess: + - replace: + - regex: ".*Starring (.*)" + with: $1 + - regex: "(?i)directed by johnm" + with: "" + - regex: ", (and)?" + with: " and " + split: " and " +# Last Updated December 05, 2023 From 016bf3c9ae6590f5eed7d2741b2a610e5b8a75fd Mon Sep 17 00:00:00 2001 From: Maista6969 Date: Tue, 5 Dec 2023 02:51:44 +0100 Subject: [PATCH 570/624] Fix validation errors in PKFStudios.yml --- scrapers/PKFStudios.yml | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/scrapers/PKFStudios.yml b/scrapers/PKFStudios.yml index 426a77be0..1c2742a28 100644 --- a/scrapers/PKFStudios.yml +++ b/scrapers/PKFStudios.yml @@ -13,9 +13,10 @@ xPathScrapers: selector: //div[@class="entry-content"]/p[not(*)]/text() concat: "\n\n" # Remove the trailing "_ _ _ _ _" separator - replace: - - regex: "(\n\n[_ ]+)" - with: "" + postProcess: + - replace: + - regex: "(\n\n[_ ]+)" + with: "" Date: selector: //span[@class="entry-date"]//text() postProcess: From 9405730b1adc9f1452dbf5e51657ce5126668048 Mon Sep 17 00:00:00 2001 From: Maista6969 Date: Tue, 5 Dec 2023 03:20:51 +0100 Subject: [PATCH 571/624] Add another domain for EvilAngel Store --- SCRAPERS-LIST.md | 2 ++ scrapers/EvilAngelStore.yml | 49 +++++++++++++++++++++++++++++ scrapers/EvilAngelVideo.yml | 61 ------------------------------------- 3 files changed, 51 insertions(+), 61 deletions(-) create mode 100644 scrapers/EvilAngelStore.yml delete mode 100644 scrapers/EvilAngelVideo.yml diff --git a/SCRAPERS-LIST.md b/SCRAPERS-LIST.md index ed9b7924b..79b104c85 100644 --- a/SCRAPERS-LIST.md +++ b/SCRAPERS-LIST.md @@ -1351,6 +1351,8 @@ stepsiblings.com|PaperStreetMedia.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- stepsiblingscaught.com|Nubiles.yml|:heavy_check_mark:|:heavy_check_mark:|:x:|:x:|-|- stockingvideos.com|FFstockings.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- stockydudes.com|AdultSiteRunner.yml|:heavy_check_mark:|:x:|:x:|:x:|-|Gay +store.evilangel.com|EvilAngelStore.yml|:heavy_check_mark:|:x:|:heavy_check_mark:|:x:|-|- +store.evilangelvideo.com|EvilAngelStore.yml|:heavy_check_mark:|:x:|:heavy_check_mark:|:x:|-|- store.freeones.com|FreeonesStore.yml|:heavy_check_mark:|:x:|:heavy_check_mark:|:x:|-|- str8hell.com|Williamhiggins.yml|:heavy_check_mark:|:x:|:x:|:x:|-|Gay strapattackers.com|GammaEntertainment.yml|:heavy_check_mark:|:x:|:x:|:x:|-|Femdom diff --git a/scrapers/EvilAngelStore.yml b/scrapers/EvilAngelStore.yml new file mode 100644 index 000000000..e04adf12d --- /dev/null +++ b/scrapers/EvilAngelStore.yml @@ -0,0 +1,49 @@ +name: Evil Angel Store +movieByURL: + - action: scrapeXPath + url: &urls + - store.evilangelvideo.com/ + - store.evilangel.com/ + scraper: movieScraper +sceneByURL: + - action: scrapeXPath + url: *urls + scraper: sceneScraper +xPathScrapers: + movieScraper: + movie: + Name: &name //h1[@class="description"]/text() + Director: &director //a[@data-label="Director"]/text() + Date: + selector: (//div[@class="release-date"])[1]/text() + postProcess: + - parseDate: Jan 02, 2006 + Synopsis: + selector: //div[@class="synopsis"]/p//text() + concat: " " + FrontImage: (//div[@id="viewLargeBoxcoverCarousel"]//img)[1]/@data-src + BackImage: (//div[@id="viewLargeBoxcoverCarousel"]//img)[2]/@data-src + Duration: + selector: (//div[@class="release-date"])[2]/text() + postProcess: + - replace: + - regex: ^(\d+)\s*hrs\.\s+(\d+)\s*mins\. # xx hrs. xx mins. + with: "$1:$2:00" + Studio: &studio + Name: //a[@data-label="Studio"]/text() + sceneScraper: + scene: + Title: *name + Director: *director + Date: + selector: (//div[@class="release-date"])[1]/text() + postProcess: + - parseDate: Jan 02, 2006 + Details: //meta[@name="og:description"]/@content + Performers: + Name: //span[@class="video-performer-name overlay"] + Image: //link[@rel="image_src"]/@href + Studio: *studio + Tags: + Name: //div[@class="categories"/a[@data-category="Item Page"]/text() +# Last Updated December 02, 2023 diff --git a/scrapers/EvilAngelVideo.yml b/scrapers/EvilAngelVideo.yml deleted file mode 100644 index 06f871a09..000000000 --- a/scrapers/EvilAngelVideo.yml +++ /dev/null @@ -1,61 +0,0 @@ -name: "EvilAngelVideo" -movieByURL: - - action: scrapeXPath - url: - - store.evilangelvideo.com/ - scraper: movieScraper -sceneByURL: - - action: scrapeXPath - url: - - store.evilangelvideo.com/ - scraper: sceneScraper -xPathScrapers: - movieScraper: - movie: - Name: - selector: //h1[@class="description"]/text() - Director: - selector: //a[@data-label="Director"]/text() - Date: - selector: (//div[@class="release-date"])[1]/text() - postProcess: - - parseDate: Jan 02, 2006 - Synopsis: - selector: //div[@class="synopsis"]/p//text() - concat: " " - FrontImage: - selector: (//div[@id="viewLargeBoxcoverCarousel"]//img)[1]/@data-src - BackImage: - selector: (//div[@id="viewLargeBoxcoverCarousel"]//img)[2]/@data-src - Duration: - selector: (//div[@class="release-date"])[2]/text() - postProcess: - - replace: - - regex: ^(\d+)\s*hrs\.\s+(\d+)\s*mins\. # xx hrs. xx mins. - with: "$1:$2:00" - Studio: - Name: - selector: //a[@data-label="Studio"]/text() - sceneScraper: - scene: - Title: - selector: //h1[@class="description"]/text() - Director: - selector: //a[@data-label="Director"]/text() - Date: - selector: (//div[@class="release-date"])[1]/text() - postProcess: - - parseDate: Jan 02, 2006 - Details: - selector: //meta[@name="og:description"]/@content - Performers: - Name: - selector: //span[@class="video-performer-name overlay"] - Image: - selector: //link[@rel="image_src"]/@href - Studio: - Name: - selector: //a[@data-label="Studio"]/text() - Tags: - Name: //div[@class="categories"/a[@data-category="Item Page"]/text() -# Last Updated December 2, 2023 From bddf685b8be9a580fd94d01dc3d1a1899402caa5 Mon Sep 17 00:00:00 2001 From: symptom6186 <149571017+symptom6186@users.noreply.github.com> Date: Tue, 5 Dec 2023 12:25:41 +0100 Subject: [PATCH 572/624] Update AniDB.yml to scrape performer URL while scarping scene URL --- scrapers/AniDB.yml | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/scrapers/AniDB.yml b/scrapers/AniDB.yml index dc8fee6d2..5e4f3d76c 100644 --- a/scrapers/AniDB.yml +++ b/scrapers/AniDB.yml @@ -107,6 +107,7 @@ xPathScrapers: sceneScraper: common: $info: //div[@class="g_section info"] + $character: //div[@id="characterlist"]//div[contains(@class, 'main character') or contains(@class, 'secondary cast')]//div[@itemprop="character"] scene: Title: $info//div[@id="tab_1_pane"]//span[@itemprop="name"] Details: @@ -115,7 +116,13 @@ xPathScrapers: Tags: Name: $info//div[@id="tab_1_pane"]//span[@class="tagname"] Performers: - Name: //div[@id="characterlist"]//div[contains(@class, 'main character') or contains(@class, 'secondary cast')]//div[@itemprop="character"]/a/span + Name: $character/a/span + URL: + selector: $character/a/@href + postProcess: + - replace: + - regex: ^ + with: https://anidb.net Studio: Name: $info//table[@id="staffoverview"]//tr[last()]/td[@class="name creator"]/a Image: $info//div[@class="image"]//img/@src @@ -135,4 +142,4 @@ driver: Domain: "anidb.net" Value: "" # Enter the value of the 'adbuin' here Path: "/" -# Last Updated June 23, 2023 +# Last Updated Dec 5, 2023 From 541d935efa6ce02fe4d3f17644851e70cc1d8ecb Mon Sep 17 00:00:00 2001 From: estellaarrieta <111574945+estellaarrieta@users.noreply.github.com> Date: Wed, 6 Dec 2023 04:22:13 +0300 Subject: [PATCH 573/624] cover fetch fail no longer fails the whole script --- scrapers/SARJ-LLC/SARJ-LLC.py | 15 ++++++--------- 1 file changed, 6 insertions(+), 9 deletions(-) diff --git a/scrapers/SARJ-LLC/SARJ-LLC.py b/scrapers/SARJ-LLC/SARJ-LLC.py index 127b8803e..4d172ff08 100644 --- a/scrapers/SARJ-LLC/SARJ-LLC.py +++ b/scrapers/SARJ-LLC/SARJ-LLC.py @@ -231,16 +231,13 @@ def scrape_movie(base_url, date, name): response = requests.get(res['Image'], headers={ 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:79.0) Gecko/20100101 Firefox/79.0' }, timeout=(3, 6)) + if response and response.status_code < 400: + mime = 'image/jpeg' + encoded = base64.b64encode(response.content).decode('utf-8') + res['Image'] = f'data:{mime};base64,{encoded}' + break except requests.exceptions.RequestException as req_ex: - log.error(f"Error fetching URL {res['Image']}: {req_ex}") - - if response.status_code < 400: - mime = 'image/jpeg' - encoded = base64.b64encode(response.content).decode('utf-8') - res['Image'] = f'data:{mime};base64,{encoded}' - break - - log.info(f"Fetching URL {res['Image']} resulted in error status: {response.status_code}") + log.info(f"Error fetching URL {res['Image']}: {req_ex}") res['Image'] = None return res From e71d1a95688e6ccc8d0d87bc6526a99b3dce3cf7 Mon Sep 17 00:00:00 2001 From: estellaarrieta <111574945+estellaarrieta@users.noreply.github.com> Date: Wed, 6 Dec 2023 04:22:52 +0300 Subject: [PATCH 574/624] date update --- scrapers/SARJ-LLC/SARJ-LLC.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scrapers/SARJ-LLC/SARJ-LLC.yml b/scrapers/SARJ-LLC/SARJ-LLC.yml index c10c552cb..79f020da2 100644 --- a/scrapers/SARJ-LLC/SARJ-LLC.yml +++ b/scrapers/SARJ-LLC/SARJ-LLC.yml @@ -91,4 +91,4 @@ performerByName: - SARJ-LLC.py - search - performer -# Last Updated November 28, 2023 +# Last Updated December 06, 2023 From c3b907b367dc4a1ff1df2f45278e311eb6a87203 Mon Sep 17 00:00:00 2001 From: estellaarrieta <111574945+estellaarrieta@users.noreply.github.com> Date: Wed, 6 Dec 2023 04:27:41 +0300 Subject: [PATCH 575/624] date fix --- scrapers/SARJ-LLC/SARJ-LLC.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scrapers/SARJ-LLC/SARJ-LLC.yml b/scrapers/SARJ-LLC/SARJ-LLC.yml index 79f020da2..86069d719 100644 --- a/scrapers/SARJ-LLC/SARJ-LLC.yml +++ b/scrapers/SARJ-LLC/SARJ-LLC.yml @@ -91,4 +91,4 @@ performerByName: - SARJ-LLC.py - search - performer -# Last Updated December 06, 2023 +# Last Updated December 05, 2023 From 735af0e753f49e95d89a6ef7c31cbb23671709e6 Mon Sep 17 00:00:00 2001 From: estellaarrieta <111574945+estellaarrieta@users.noreply.github.com> Date: Wed, 6 Dec 2023 04:30:46 +0300 Subject: [PATCH 576/624] date fix? --- scrapers/SARJ-LLC/SARJ-LLC.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scrapers/SARJ-LLC/SARJ-LLC.yml b/scrapers/SARJ-LLC/SARJ-LLC.yml index 86069d719..ceda97094 100644 --- a/scrapers/SARJ-LLC/SARJ-LLC.yml +++ b/scrapers/SARJ-LLC/SARJ-LLC.yml @@ -91,4 +91,4 @@ performerByName: - SARJ-LLC.py - search - performer -# Last Updated December 05, 2023 +# Last Updated Dec 5, 2023 From dc9e6361fbdb021327717fa7e60c860434517df6 Mon Sep 17 00:00:00 2001 From: estellaarrieta <111574945+estellaarrieta@users.noreply.github.com> Date: Wed, 6 Dec 2023 04:35:22 +0300 Subject: [PATCH 577/624] date fix?? --- scrapers/SARJ-LLC/SARJ-LLC.yml | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/scrapers/SARJ-LLC/SARJ-LLC.yml b/scrapers/SARJ-LLC/SARJ-LLC.yml index ceda97094..db5d8d6d6 100644 --- a/scrapers/SARJ-LLC/SARJ-LLC.yml +++ b/scrapers/SARJ-LLC/SARJ-LLC.yml @@ -91,4 +91,5 @@ performerByName: - SARJ-LLC.py - search - performer -# Last Updated Dec 5, 2023 + +# Last Updated December 05, 2023 From 55ea582f41f0d3387e42dc6096a6e67f67cce0b9 Mon Sep 17 00:00:00 2001 From: estellaarrieta <111574945+estellaarrieta@users.noreply.github.com> Date: Wed, 6 Dec 2023 04:38:18 +0300 Subject: [PATCH 578/624] date fix??? From c93cdbeb3c1b8b3b849959df3c938591e9324470 Mon Sep 17 00:00:00 2001 From: Maista6969 Date: Wed, 6 Dec 2023 03:47:12 +0100 Subject: [PATCH 579/624] Remove validation of Last Updated comment --- validator/index.js | 40 ---------------------------------------- 1 file changed, 40 deletions(-) diff --git a/validator/index.js b/validator/index.js index 2a9650d98..00b73e082 100755 --- a/validator/index.js +++ b/validator/index.js @@ -55,8 +55,6 @@ class Validator { }); this.mappingPattern = /^([a-z]+)By(Fragment|Name|URL)$/; - this.commentPrefix = /^ *# *Last Updated/i; - this.commentPattern = /^#( *)Last Updated ((?:Jan|Febr)uary|March|April|May|June|July|August|(?:Septem|Octo|Novem|Decem)ber) (0[1-9]|[1-3]\d), (\d{4})$/; if (!!this.ajv.getKeyword('deprecated')) { this.ajv.removeKeyword('deprecated'); @@ -135,44 +133,6 @@ class Validator { console.log(output); } - // Verify that there is a "Last Updated" comment - if (valid) { - const lines = contents - .split(/\r?\n/g) - .slice(-5) - .reverse() - .filter(line => !!line.trim()); - - const commentLine = lines.findIndex(line => this.commentPrefix.test(line)); - let validComment = false; - if (commentLine === -1) { - console.error(chalk.red(`${chalk.bold('ERROR')} 'Last Updated' comment is missing.`)); - } else { - if (commentLine !== 0) { - console.error(chalk.red(`${chalk.bold('ERROR')} 'Last Updated' comment is not the last line.`)); - } - - const comment = lines[commentLine]; - const match = comment.trim().match(this.commentPattern); - if (!match) { - console.error(chalk.red(`${chalk.bold('ERROR')} 'Last Updated' comment's format is invalid: ${comment}`)); - } else { - // Validate leading spaces (trailing spaces are ignored) - const leadingSpaces = comment != comment.trimLeft(); - if (leadingSpaces) { - console.error(chalk.red(`${chalk.bold('ERROR')} Remove leading spaces: '${comment}'`)); - } - // Validate spacing between '#' and 'Last Updated' - if (match[1] !== ' ') { - console.error(chalk.red(`${chalk.bold('ERROR')} Missing single space between '#' and 'Last Updated': ${comment}`)); - } else { - validComment = true; - } - } - } - valid = valid && validComment; - } - if (this.verbose || !valid) { const validColor = valid ? chalk.green : chalk.red; console.log(`${relPath} Valid: ${validColor(valid)}`); From 9f616aa26e420dc707882f0f2f4bc954eb81787a Mon Sep 17 00:00:00 2001 From: Maista6969 Date: Thu, 7 Dec 2023 12:04:38 +0100 Subject: [PATCH 580/624] Fix relative import in IAFD --- scrapers/IAFD/IAFD.py | 18 ++++++++++++++---- 1 file changed, 14 insertions(+), 4 deletions(-) diff --git a/scrapers/IAFD/IAFD.py b/scrapers/IAFD/IAFD.py index 6b71dba43..e9197e18a 100644 --- a/scrapers/IAFD/IAFD.py +++ b/scrapers/IAFD/IAFD.py @@ -1,13 +1,21 @@ import argparse import json -import sys -import time -import re +import os import random +import re import requests +import sys +import time from typing import Iterable, Callable, TypeVar from datetime import datetime +# to import from a parent directory we need to add that directory to the system path +csd = os.path.dirname(os.path.realpath(__file__)) # get current script directory +parent = os.path.dirname(csd) # parent directory (should be the scrapers one) +sys.path.append( + parent +) # add parent dir to sys path so that we can import py_common from ther + # extra modules below need to be installed try: import py_common.log as log @@ -507,7 +515,9 @@ def performer_aliases(tree): tree.xpath( '//div[p[@class="bioheading" and contains(normalize-space(text()),"Performer AKA")]]//div[@class="biodata" and not(text()="No known aliases")]/text()' ), - lambda aliases: ", ".join(filter(None, (clean_alias(alias) for alias in aliases.split(", ")))), + lambda aliases: ", ".join( + filter(None, (clean_alias(alias) for alias in aliases.split(", "))) + ), ) From c43b9b26a98bd26caa7dbf35651eb4ff78be99dd Mon Sep 17 00:00:00 2001 From: Maista6969 Date: Thu, 7 Dec 2023 12:06:11 +0100 Subject: [PATCH 581/624] Fix relative import in ScrapeWithURL --- scrapers/ScrapeWithURL/ScrapeWithURL.py | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/scrapers/ScrapeWithURL/ScrapeWithURL.py b/scrapers/ScrapeWithURL/ScrapeWithURL.py index 1498ded76..340c626a8 100644 --- a/scrapers/ScrapeWithURL/ScrapeWithURL.py +++ b/scrapers/ScrapeWithURL/ScrapeWithURL.py @@ -1,6 +1,14 @@ import json +import os import sys +# to import from a parent directory we need to add that directory to the system path +csd = os.path.dirname(os.path.realpath(__file__)) # get current script directory +parent = os.path.dirname(csd) # parent directory (should be the scrapers one) +sys.path.append( + parent +) # add parent dir to sys path so that we can import py_common from ther + try: import py_common.graphql as graphql import py_common.log as log From 6c1b26838c11aaa9f55c4ba4aedcf24fffc1ddb3 Mon Sep 17 00:00:00 2001 From: AshoPash <152990772+AshoPash@users.noreply.github.com> Date: Fri, 8 Dec 2023 16:49:14 +0100 Subject: [PATCH 582/624] Added PerformerScraper to AdultEmpire (#1556) * Added PerformerScraper to AdultEmpire --- scrapers/AdultEmpire.yml | 50 ++++++++++++++++++++++++++++++++++++++-- 1 file changed, 48 insertions(+), 2 deletions(-) diff --git a/scrapers/AdultEmpire.yml b/scrapers/AdultEmpire.yml index e5807433e..39ab09139 100644 --- a/scrapers/AdultEmpire.yml +++ b/scrapers/AdultEmpire.yml @@ -21,7 +21,13 @@ sceneByQueryFragment: action: scrapeXPath queryURL: "{url}" scraper: sceneScraper - +performerByURL: + - + action: scrapeXPath + url: + - adultdvdempire.com + - adultempire.com + scraper: performerScraper xPathScrapers: sceneSearch: @@ -102,4 +108,44 @@ xPathScrapers: - regex: ^ with: "https://www.adultdvdempire.com" URL: //meta[@name='og:url']/@content -# Last Updated October 09, 2023 + performerScraper: + common: + $infoPiece: //*[@id="profileModal"]/div/div/div[2]/div[1]/ul + performer: + Name: //*[@id="content"]/section/div/div[2]/h1/text() + Birthdate: + selector: $infoPiece/li[contains(text(), 'Born:')]/text() + postProcess: + - replace: + - regex: Born:\s+(.*) + with: $1 + Height: + selector: $infoPiece/li[contains(text(), 'Height:')]/text() + postProcess: + - replace: + - regex: Height:\s+(.*) + with: $1 + - feetToCm: true + Image: //*[@id="content"]/section/div/div[1]/a[1]|/img/@src|//*[@id="content"]/section/div/div[1]/img/@src + Country: + selector: $infoPiecel/li[contains(text(), 'From:')]/text() + postProcess: + - replace: + - regex: From:\s+(.*) + with: $1 + Measurements: + selector: $infoPiece/li[contains(text(), 'Measurements:')]/text() + postProcess: + - replace: + - regex: Measurements:\s+(\d\d\w*)\D+(\d+)\D+(\d+).* + with: $1-$2-$3 + Aliases: + selector: //*[@id="content"]/section/div/div[2]/div[contains(text(), "Alias:")] + concat: ", " + postProcess: + - replace: + - regex: "Alias: (.*)" + with: $1 + Details: //*[@id="content"]/section/div/div[5]/aside/text() + URL: //link[@rel='canonical']/@href +# Last Updated December 07, 2023 From b9cb91254d57fd83b06d51ac0ca4741888b5a2da Mon Sep 17 00:00:00 2001 From: WithoutPants <53250216+WithoutPants@users.noreply.github.com> Date: Tue, 12 Dec 2023 11:52:03 +1100 Subject: [PATCH 583/624] Build stable branch --- .github/workflows/deploy.yml | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/.github/workflows/deploy.yml b/.github/workflows/deploy.yml index 15e6e3118..e8e7521fe 100644 --- a/.github/workflows/deploy.yml +++ b/.github/workflows/deploy.yml @@ -27,15 +27,15 @@ jobs: cd master ./build_site.sh ../_site/develop # uncomment this once we have a stable branch - # - name: Checkout Stable - # uses: actions/checkout@v2 - # with: - # path: stable - # ref: stable - # fetch-depth: '0' - # - run: | - # cd stable - # ../master/build_site.sh ../_site/stable + - name: Checkout Stable + uses: actions/checkout@v2 + with: + path: stable + ref: stable + fetch-depth: '0' + - run: | + cd stable + ../master/build_site.sh ../_site/stable - uses: actions/upload-pages-artifact@v2 deploy: From 11d67af5fd74b0ae570e1a3feebdb46fa5021e89 Mon Sep 17 00:00:00 2001 From: Muldec <34677746+Muldec@users.noreply.github.com> Date: Wed, 13 Dec 2023 01:51:35 +0100 Subject: [PATCH 584/624] Improve IWC cover scraping (#1558) * Improve cover scraping * Include other file formats --- scrapers/IWantClips.yml | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/scrapers/IWantClips.yml b/scrapers/IWantClips.yml index 5a1c32e9e..4b0ab25ef 100644 --- a/scrapers/IWantClips.yml +++ b/scrapers/IWantClips.yml @@ -54,6 +54,13 @@ xPathScrapers: split: "," Image: selector: //div[contains(@class,'vidStuff')]//video[contains(@id,'html5_api')]/@poster | //div[contains(@class,'vidStuff')]//img/@src + postProcess: + - replace: + - regex: (\d*_.*((\.gif)|(\.mp4))) + with: t_$1 + - replace: + - regex: '(\.gif)|(\.mp4)' + with: ".jpg" Studio: Name: $model Performers: From 23f0b4cb7260d8e015564d2a98776d9139fb4636 Mon Sep 17 00:00:00 2001 From: SpecialKeta <148014803+SpecialKeta@users.noreply.github.com> Date: Wed, 13 Dec 2023 01:51:53 +0100 Subject: [PATCH 585/624] Added Vip4K Subsites and Fixed Vip4K Scraper (#1557) --- SCRAPERS-LIST.md | 11 ++++++++ scrapers/Vip4K.yml | 65 ++++++++++++++++++++++++++-------------------- 2 files changed, 48 insertions(+), 28 deletions(-) diff --git a/SCRAPERS-LIST.md b/SCRAPERS-LIST.md index 45c79d118..693737a2e 100644 --- a/SCRAPERS-LIST.md +++ b/SCRAPERS-LIST.md @@ -260,6 +260,7 @@ brazzers.com|Brazzers.yml|:heavy_check_mark:|:heavy_check_mark:|:x:|:heavy_check breeditraw.com|BreedItRaw.yml|:heavy_check_mark:|:x:|:x:|:heavy_check_mark:|-|Gay brett-tyler.com|PornsiteManager.yml|:heavy_check_mark:|:x:|:heavy_check_mark:|:x:|-|Gay brickyates.com|brickyates.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +bride4k.com|Vip4K.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- brokenlatinawhores.com|BrokenLatinaWhores.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- brokensluts.net|ThirdRockEnt.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- brokestraightboys.com|BluMedia.yml|:heavy_check_mark:|:x:|:x:|:heavy_check_mark:|-|Gay @@ -332,6 +333,7 @@ creampie-angels.com|TeenMegaWorld.yml|:heavy_check_mark:|:x:|:x:|:heavy_check_ma creativeporn.com|Czechav.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- cruelgf.com|CruelGirlfriend.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- crunchboy.com|PornsiteManager.yml|:heavy_check_mark:|:x:|:heavy_check_mark:|:x:|-|Gay +cuck4k.com|Vip4K.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- cuckhunter.com|Arx.yml|:heavy_check_mark:|:x:|:x:|:x:|Python|- cuckoldsessions.com|Algolia_DogfartNetwork.yml|:heavy_check_mark:|:x:|:x:|:heavy_check_mark:|Python|- culioneros.com|Pornditos.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- @@ -415,6 +417,7 @@ daughterjoi.com|ThirdRockEnt.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- daughterswap.com|PaperStreetMedia.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- ddfbusty.com|DDFNetwork.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- deauxmalive.com|VNAGirls.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +debt4k.com|Vip4K.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- deeper.com|vixenNetwork.yml|:heavy_check_mark:|:x:|:x:|:x:|Python|- deeplush.com|Nubiles.yml|:heavy_check_mark:|:heavy_check_mark:|:x:|:x:|-|- deepthroatsirens.com|TopWebModels.yml|:heavy_check_mark:|:x:|:x:|:x:|Python|- @@ -466,6 +469,7 @@ dreamtranny.com|DreamTranny.yml|:heavy_check_mark:|:heavy_check_mark:|:x:|:heavy drilledchicks.com|Teencoreclub.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- driverxxx.com|Nubiles.yml|:heavy_check_mark:|:heavy_check_mark:|:x:|:x:|-|- dtfsluts.com|ThirdRockEnt.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +dyke4k.com|Vip4K.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- dyked.com|PaperStreetMedia.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- dylanryder.com|GammaEntertainment.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- eastboys.com|EastBoys.yml|:heavy_check_mark:|:x:|:x:|:x:|-|Gay @@ -735,6 +739,7 @@ iafd.com|IAFD.yml|:heavy_check_mark:|:x:|:heavy_check_mark:|:heavy_check_mark:|P iconmale.com|MindGeek.yml|:heavy_check_mark:|:x:|:heavy_check_mark:|:x:|-|Gay idols69.com|karatmedia.yml|:heavy_check_mark:|:x:|:x:|:x:|-|JAV ifeelmyself.com|IFeelMyself.yml|:heavy_check_mark:|:x:|:x:|:heavy_check_mark:|Python|- +ignore4k.com|Vip4K.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- ihuntmycunt.com|DollsHub.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- ikillitts.com|Trans500.yml|:heavy_check_mark:|:x:|:x:|:x:|-|Trans ikissgirls.com|IKissGirls.yml|:heavy_check_mark:|:x:|:x:|:x:|-|Lesbian @@ -862,6 +867,7 @@ littleasians.com|PaperStreetMedia.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- littlecaprice-dreams.com|LittleCapriceDreams.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- littlefromasia.com|Mypervmom.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- littlehellcat.com|Teencoreclub.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +loan4k.com|Vip4K.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- lonestarangel.com|LoneStarAngel.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- lookathernow.com|MindGeek.yml|:heavy_check_mark:|:x:|:x:|:heavy_check_mark:|-|- lordaardvark.com|LordAardvark.yml|:heavy_check_mark:|:heavy_check_mark:|:x:|:x:|CDP|- @@ -949,6 +955,7 @@ momcomesfirst.com|FamilyTherapyXXX.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- momcum.com|AMAMultimedia.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- momisamilf.com|Vivid.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- momlover.com|Nubiles.yml|:heavy_check_mark:|:heavy_check_mark:|:x:|:x:|-|- +mommy4k.com|Vip4K.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- mommyblowsbest.com|GammaEntertainment.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- mommyjoi.com|ThirdRockEnt.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- mommysboy.com|Algolia_Adultime.yml|:heavy_check_mark:|:x:|:x:|:x:|Python|- @@ -1121,6 +1128,7 @@ petitepov.com|ItsPOV.yml|:heavy_check_mark:|:x:|:x:|:x:|CDP|- philavise.com|Philavise.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- philippwants.com|PornsiteManager.yml|:heavy_check_mark:|:x:|:heavy_check_mark:|:x:|-|Gay pickupfuck.com|Wtfpass.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +pie4k.com|Vip4K.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- pinklabel.tv|PinkLabelTV.yml|:heavy_check_mark:|:x:|:heavy_check_mark:|:x:|-|- pinkoclub.com|Pinkoclub.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- pinkotgirls.com|Pinkoclub.yml|:heavy_check_mark:|:x:|:x:|:x:|-|Trans @@ -1264,6 +1272,7 @@ seemomsuck.com|Tugpass.yml|:heavy_check_mark:|:x:|:x:|:x:|CDP|- seemysextapes.com|Hustler.yml|:heavy_check_mark:|:x:|:x:|:x:|CDP|- selfiesuck.com|SelfieSuck.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- sensualpain.com|insexOL.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +serve4k.com|Vip4K.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- severesexfilms.com|AdultEmpireCash.yml|:heavy_check_mark:|:x:|:heavy_check_mark:|:x:|-|- sexart.com|SARJ-LLC.yml|:heavy_check_mark:|:heavy_check_mark:|:x:|:heavy_check_mark:|Python|- sexbabesvr.com|SexBabesVR.yml|:heavy_check_mark:|:x:|:x:|:x:|-|VR @@ -1279,6 +1288,7 @@ sexwithmuslims.com|PornCZ.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- sexworking.com|devianteNetwork.yml|:heavy_check_mark:|:x:|:x:|:x:|CDP|- sexyhub.com|RealityKingsOL.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- shagmag.com|shagmag.yml|:x:|:heavy_check_mark:|:x:|:x:|-|Magazines +shame4k.com|Vip4K.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- shandafay.com|VNAGirls.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- shanedieselsbanginbabes.com|NewSensationsNetworkSites.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- share.myfreecams.com|MFC.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- @@ -1567,6 +1577,7 @@ vickyathome.com|VNAGirls.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- viktor-rom.com|PornsiteManager.yml|:heavy_check_mark:|:x:|:heavy_check_mark:|:x:|-|Gay vinaskyxxx.com|ModelCentroAPI.yml|:heavy_check_mark:|:x:|:x:|:x:|Python|- vintagegaymovies.com|PornsiteManager.yml|:heavy_check_mark:|:x:|:heavy_check_mark:|:x:|-|Gay +vip4k.com|Vip4K.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- vipissy.com|Vipissy.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- vipsexvault.com|LetsDoeIt.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- virtualpee.com|VirtualPee.yml|:heavy_check_mark:|:x:|:x:|:x:|-|Fetish diff --git a/scrapers/Vip4K.yml b/scrapers/Vip4K.yml index c63805696..e09d62e93 100644 --- a/scrapers/Vip4K.yml +++ b/scrapers/Vip4K.yml @@ -1,51 +1,60 @@ -name: vip4k +name: 'Vip4K' sceneByURL: - action: scrapeXPath url: - black4k.com + - bride4k.com + - cuck4k.com - daddy4k.com + - debt4k.com + - dyke4k.com - fist4k.com - hunt4k.com + - ignore4k.com + - loan4k.com - mature4k.com + - mommy4k.com - old4k.com + - pie4k.com - rim4k.com + - serve4k.com + - shame4k.com - sis.porn - stuck4k.com - tutor4k.com + - vip4k.com scraper: sceneScraper + queryURL: '{url}' + queryURLReplace: + url: + - regex: ^.+(/\w+/videos/\d{1,4}) + with: https://vip4k.com$1 xPathScrapers: sceneScraper: scene: - Title: //div[@class="title_player" or @class="title"]| //h1[contains(@class,"player")][contains(@class,"title")] + Title: //h1[@class='player-description__title'] Performers: - Name: - selector: - //a[@class="link_player"]|//div[@class="player-item__row"][contains(div,"Name:")]//span| - //div[contains(@class,"player-item")]//*[local-name()="svg"][@class="ico ico--star"]/../following-sibling::div - Details: - selector: - //div[@class="player-item__text"]|//span[@class="player-info__text-area"]|//span[@class="episode-about__text text"]|//div[@class='desc_frame']/p| - //div[@class='wrap_player_desc']/p|//div[@class='wrap_post']/p|//div[@class="player-item__text text text--sm"] - Image: //div[@class="player_watch"]/img[@src]/@src | //img[@class="player-item__inner"]/@data-src + Name: //div[@class='model__name'] + Details: //div[@class="player-description__text"] + Code: + selector: //link[@rel='canonical']/@href + postProcess: + - replace: + - regex: https.+/(\w+)/?$ + with: $1 + Image: + selector: //picture[@class='player-item__inner']/img/@data-src + postProcess: + - replace: + - regex: ^// + with: https:// + Date: //span[@class='player-additional__text'][1] Tags: - Name: - selector: //div[@class="tag_line"]//a + Name: //div[@class='tags']/a Studio: Name: - selector: //head/title/text() + selector: //ul[@class='player-additional']//a postProcess: - - replace: - - regex: ^([^:\s]+).* - with: $1 - map: - Black4k.Com: Black4k - Daddy4k.Com: Daddy4k - Fist4k.Com: Fist4k - Hunt4k.Com: Hunt4k - Mature4k.Com: Mature4k - Old4k.Com: Old4k - Rim4k.Com: Rim4k - Sis.porn: Sis Porn - Stuck4k.Com: Struck4k - Tutor4k.Com: Tutor4k -# Last Updated April 05, 2021 + Sis: Sis.Porn +# Last Updated December 11, 2023 \ No newline at end of file From dc1b9fae6584a9039b05ab92ad0f5387be019afc Mon Sep 17 00:00:00 2001 From: imagineimaginingthings <87026461+imagineimaginingthings@users.noreply.github.com> Date: Wed, 13 Dec 2023 22:31:38 +0000 Subject: [PATCH 586/624] add gallery and performer scraper (#1559) --- SCRAPERS-LIST.md | 3 ++- scrapers/MFC.yml | 61 ++++++++++++++++++++++++++++++++++++++++++++---- 2 files changed, 59 insertions(+), 5 deletions(-) diff --git a/SCRAPERS-LIST.md b/SCRAPERS-LIST.md index 693737a2e..88481369a 100644 --- a/SCRAPERS-LIST.md +++ b/SCRAPERS-LIST.md @@ -1189,6 +1189,7 @@ private.com|Private.yml|:heavy_check_mark:|:x:|:heavy_check_mark:|:heavy_check_m privatecastings.com|privatecastings.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- privatesextapes.com|Wtfpass.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- producersfun.com|ProducersFun.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +profiles.myfreecams.com|MFC.yml|:x:|:x:|:x:|:heavy_check_mark:|-|- propertysex.com|RealityKingsOL.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- publicagent.com|MindGeek.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- publicfrombohemia.com|PornCZ.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- @@ -1291,7 +1292,7 @@ shagmag.com|shagmag.yml|:x:|:heavy_check_mark:|:x:|:x:|-|Magazines shame4k.com|Vip4K.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- shandafay.com|VNAGirls.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- shanedieselsbanginbabes.com|NewSensationsNetworkSites.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- -share.myfreecams.com|MFC.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +share.myfreecams.com|MFC.yml|:heavy_check_mark:|:heavy_check_mark:|:x:|:x:|-|- shefucksonthefirstdate.com|ThirdRockEnt.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- shegotsix.com|Teencoreclub.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- shelovesblack.com|LoveHerFilms.yml|:heavy_check_mark:|:x:|:x:|:heavy_check_mark:|-|- diff --git a/scrapers/MFC.yml b/scrapers/MFC.yml index 32fba1431..5a1dbfd2d 100644 --- a/scrapers/MFC.yml +++ b/scrapers/MFC.yml @@ -1,10 +1,19 @@ -name: "MFC Share" +name: "MyFreeCams" sceneByURL: - action: scrapeXPath url: - share.myfreecams.com/a/ scraper: sceneScraper - +galleryByURL: + - action: scrapeXPath + url: + - share.myfreecams.com/a/ + scraper: galleryScraper +performerByURL: + - action: scrapeXPath + url: + - profiles.myfreecams.com/ + scraper: performerScraper xPathScrapers: sceneScraper: scene: @@ -25,5 +34,49 @@ xPathScrapers: Name: //a[@class="user-link"] Image: selector: //meta[@name="twitter:image"]/@content - -# Last Updated November 01, 2020 + galleryScraper: + gallery: + Title: //h3/text() + Studio: + Name: + fixed: "MFC Share" + Date: //span[@class[contains(., "album-at")]]/@title + Details: //div[@class="description-view"]/text() + Tags: + Name: + selector: //div[@class[contains(., "tags-container")]]/a/text() + postProcess: + - replace: + - regex: ^# + with: "" + Performers: + Name: //a[@class="user-link"] + performerScraper: + performer: + Name: //span[@id="username_value"]/text() + Country: //span[@id="country_value"]/text() + Ethnicity: //span[@id="ethnicity_value"]/text() + Gender: //span[@id="gender_value"]/text() + HairColor: //span[@id="hair_value"]/text() + EyeColor: //span[@id="eyes_value"]/text() + Height: + selector: //span[@id="height_value"][contains(text(), "centimeters")]/text() # only get metric because we can't handle both; it's one or the other + postProcess: + - replace: + - regex: ((\d+)\s(.*)) + with: $2 + Weight: + selector: //span[@id="weight_value"][contains(text(), "kilos")]/text() # only get metric because we can't handle both; it's one or the other + postProcess: + - replace: + - regex: ((\d+)\s(.*)) + with: $2 + Image: + selector: //img[@id="main_photo"]/@src | //img[@id="profile_avatar"]/@src + postProcess: + - replace: # main photo 250px size modifier can be removed to get full size image; 300px avatar can be fetched instead of the 90px one + - regex: \.250\.jpg + with: .jpg + - regex: \.90x90\.jpg + with: .300x300.jpg +# Last Updated December 13, 2023 From 0e06f32e2beb55577b9d4f5ebeb1f597c2468dd8 Mon Sep 17 00:00:00 2001 From: Maista6969 Date: Thu, 14 Dec 2023 02:06:26 +0100 Subject: [PATCH 587/624] Unescape HTML characters in ManyVids scraper This should prevent accidentally introducing things like " to descriptions --- scrapers/ManyVids/ManyVids.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/scrapers/ManyVids/ManyVids.py b/scrapers/ManyVids/ManyVids.py index 0ee5daaae..4ac343319 100644 --- a/scrapers/ManyVids/ManyVids.py +++ b/scrapers/ManyVids/ManyVids.py @@ -3,6 +3,7 @@ import re import sys from urllib.parse import quote_plus +from html import unescape # to import from a parent directory we need to add that directory to the system path csd = os.path.dirname( @@ -122,8 +123,8 @@ def get_scene(scene_id: str) -> dict: meta = response.json() log.debug(f"Raw response from API: {json.dumps(meta)}") scrape = {} - scrape['title'] = meta.get('title') - scrape['details'] = meta.get('description') + scrape['title'] = meta['title'] + scrape['details'] = unescape(meta['description']) scrape['code'] = scene_id sceneURLPartial = meta.get('url') From 2bdc2a10b7d7f60505156707b97bb478b424a51d Mon Sep 17 00:00:00 2001 From: Maista6969 Date: Thu, 14 Dec 2023 02:07:07 +0100 Subject: [PATCH 588/624] Fix relative import in TokyoHot --- scrapers/Tokyohot/Tokyohot.py | 29 +++++++++++++++++++++-------- 1 file changed, 21 insertions(+), 8 deletions(-) diff --git a/scrapers/Tokyohot/Tokyohot.py b/scrapers/Tokyohot/Tokyohot.py index 34ea81233..eb01e79f3 100644 --- a/scrapers/Tokyohot/Tokyohot.py +++ b/scrapers/Tokyohot/Tokyohot.py @@ -1,8 +1,17 @@ import base64 import json import sys +import os import re + +# to import from a parent directory we need to add that directory to the system path +csd = os.path.dirname(os.path.realpath(__file__)) # get current script directory +parent = os.path.dirname(csd) # parent directory (should be the scrapers one) +sys.path.append( + parent +) # add parent dir to sys path so that we can import py_common from ther + BASE_QUERY_MEDIA_SEARCH = "https://my.tokyo-hot.com/product/?q=" BASE_DETAIL_URL = "https://my.tokyo-hot.com" @@ -24,10 +33,10 @@ ## must contain either 1 or 2 capture groups ## group 1 = the code ## group 2 (optional) = the part number if it's a multi-part (split) scene - "(n\d{4})\D*_\D{2}(\d)\S*", # "mult-part N series" - "(n\d{4})\S*", # "single part N series" - "(k\d{4})\S*", # "single part K series" - "(kb\d{4})\S*", # "single part KB series" + r"(n\d{4})\D*_\D{2}(\d)\S*", # "mult-part N series" + r"(n\d{4})\S*", # "single part N series" + r"(k\d{4})\S*", # "single part K series" + r"(kb\d{4})\S*", # "single part KB series" ] try: @@ -110,7 +119,9 @@ def get_performers(self): info_links = info.find_all("a") for link in info_links: if "cast" in link.get("href"): - perf = TokyoHotModel(model_url=BASE_DETAIL_URL + link.get("href")).get_json() + perf = TokyoHotModel( + model_url=BASE_DETAIL_URL + link.get("href") + ).get_json() performers.append(perf) return performers @@ -133,7 +144,9 @@ def get_date(self): def get_tags(self): potential_tags = self.soup.find("div", {"class": "infowrapper"}).find_all("a") - return [{"Name":a.text} for a in potential_tags if "type=play" in a.get("href")] + return [ + {"Name": a.text} for a in potential_tags if "type=play" in a.get("href") + ] def get_json(self): return { @@ -145,7 +158,7 @@ def get_json(self): "Studio": {"Name": self.studio}, "Code": self.scene_id, "Image": self.image, - "Tags": self.tags + "Tags": self.tags, } @@ -333,7 +346,7 @@ def scrape_scene(name, multipart, partnum): def get_image(image_url): try: response = requests.get(image_url, verify=False, timeout=(3, 6)) - except requests.exceptions.RequestException as req_ex: + except requests.exceptions.RequestException: log.error(f"Error fetching URL {image_url}") if response.status_code < 400: From 3ba2809fd0220b9d66460a769ec32380ffe32c41 Mon Sep 17 00:00:00 2001 From: Maista6969 Date: Thu, 14 Dec 2023 02:07:15 +0100 Subject: [PATCH 589/624] Fix relative import in Filename --- scrapers/Filename/Filename.py | 32 ++++++++++++++++++++++++-------- 1 file changed, 24 insertions(+), 8 deletions(-) diff --git a/scrapers/Filename/Filename.py b/scrapers/Filename/Filename.py index 8f1be3cf9..b19254ef5 100644 --- a/scrapers/Filename/Filename.py +++ b/scrapers/Filename/Filename.py @@ -2,33 +2,49 @@ import os import sys +# to import from a parent directory we need to add that directory to the system path +csd = os.path.dirname(os.path.realpath(__file__)) # get current script directory +parent = os.path.dirname(csd) # parent directory (should be the scrapers one) +sys.path.append( + parent +) # add parent dir to sys path so that we can import py_common from ther + try: from py_common import graphql from py_common import log except ModuleNotFoundError: print( "You need to download the folder 'py_common' from the community repo! (CommunityScrapers/tree/master/scrapers/py_common)", - file=sys.stderr) + file=sys.stderr, + ) sys.exit() REMOVE_EXT = False # remove file extension from title def title_from_filename(js): - scene_id = js['id'] - scene_title = js['title'] - response = graphql.callGraphQL(""" + scene_id = js["id"] + scene_title = js["title"] + response = graphql.callGraphQL( + """ query FilenameBySceneId($id: ID){ findScene(id: $id){ - path + files { + path + } } - }""", {"id": scene_id}) - path = response["findScene"]["path"] + }""", + {"id": scene_id}, + ) + assert response is not None + path = response["findScene"]["files"][0]["path"] filename = os.path.basename(path) if REMOVE_EXT: filename = os.path.splitext(filename)[0] if scene_title != filename: - log.info(f"Scene {scene_id}: Title differs from filename: '{scene_title}' => '{filename}'") + log.info( + f"Scene {scene_id}: Title differs from filename: '{scene_title}' => '{filename}'" + ) return {"title": filename} return {} From 3c622f8faac0e7c2c28960972632873cf872b8e2 Mon Sep 17 00:00:00 2001 From: imagineimaginingthings <87026461+imagineimaginingthings@users.noreply.github.com> Date: Thu, 14 Dec 2023 01:22:50 +0000 Subject: [PATCH 590/624] Add scene and gallery xpath scraper for reddit (#1560) * Add scene/gallery xpath scraper for reddit --- SCRAPERS-LIST.md | 1 + scrapers/Reddit.yml | 40 ++++++++++++++++++++++++++++++++++++++++ 2 files changed, 41 insertions(+) create mode 100644 scrapers/Reddit.yml diff --git a/SCRAPERS-LIST.md b/SCRAPERS-LIST.md index 88481369a..a508d46cb 100644 --- a/SCRAPERS-LIST.md +++ b/SCRAPERS-LIST.md @@ -1229,6 +1229,7 @@ realsensual.com|Spizoo.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- realtgirls.com|GroobyNetwork-Partial.yml|:heavy_check_mark:|:heavy_check_mark:|:x:|:x:|-|Trans realtimebondage.com|insex.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- realvr.com|BaDoink.yml|:heavy_check_mark:|:x:|:heavy_check_mark:|:heavy_check_mark:|-|VR +reddit.com|Reddit.yml|:heavy_check_mark:|:heavy_check_mark:|:x:|:x:|-|- redgifs.com|Redgifs.yml|:heavy_check_mark:|:x:|:heavy_check_mark:|:heavy_check_mark:|Python|Gifs redheadmariah.com|ChickPass.yml|:heavy_check_mark:|:x:|:x:|:heavy_check_mark:|-|- redhotstraightboys.com|RedHotStraightBoys.yml|:heavy_check_mark:|:x:|:x:|:heavy_check_mark:|-|Gay diff --git a/scrapers/Reddit.yml b/scrapers/Reddit.yml new file mode 100644 index 000000000..69278fac2 --- /dev/null +++ b/scrapers/Reddit.yml @@ -0,0 +1,40 @@ +name: "Reddit" +sceneByURL: + - action: scrapeXPath + url: + - reddit.com + scraper: sceneScraper +galleryByURL: + - action: scrapeXPath + url: + - reddit.com + scraper: galleryScraper +xPathScrapers: + sceneScraper: + scene: &redditPost + Title: //shreddit-post/@post-title | //meta[@property="og:title"]/@content + Date: + selector: //shreddit-post/@created-timestamp | //div[@class="top-matter"]//p//time/@datetime + postProcess: + - replace: + - regex: (\d{4}-\d{2}-\d{2})T(.*) + with: $1 + - parseDate: 2006-01-02 + Performers: + Name: + selector: //shreddit-post/@author | //meta[@property="og:description"]/@content + postProcess: + - replace: + - regex: Posted in r\/([\w]+) by u\/([\w-]+)\s(.*) + with: $2 + galleryScraper: + gallery: *redditPost +driver: + cookies: # over18 cookie necessary for old.reddit.com URLs due to redirect; new reddit just blurs content instead + - CookieURL: "https://old.reddit.com/over18/" + Cookies: + - Name: "over18" + Domain: ".reddit.com" + Value: "1" + Path: "/" +# Last Updated December 13, 2023 From fca60a859964387240866375dbbddccb2d1e8221 Mon Sep 17 00:00:00 2001 From: hooch316 <153773129+hooch316@users.noreply.github.com> Date: Thu, 14 Dec 2023 01:29:39 +0000 Subject: [PATCH 591/624] Update DesperateAmateurs scraper (#1562) --- scrapers/DesperateAmateurs.yml | 23 +++++++++++++---------- 1 file changed, 13 insertions(+), 10 deletions(-) diff --git a/scrapers/DesperateAmateurs.yml b/scrapers/DesperateAmateurs.yml index 7ad89cab9..7eba84329 100644 --- a/scrapers/DesperateAmateurs.yml +++ b/scrapers/DesperateAmateurs.yml @@ -8,21 +8,24 @@ xPathScrapers: sceneScraper: scene: Title: - selector: (//div[@class="title_bar"])[1] - Details: //div[@class="gallery_description"]/text() + selector: //span[@class="update_title"] + Details: + selector: //span[@class="latest_update_description"] Performers: - Name: //a[@class="model_category_link"][contains(@href, 'sets.php')]/text() + Name: //span[@class='tour_update_models']/a Tags: - Name: //a[@class="model_category_link"][contains(@href, 'category.php')]/text() - Image: //meta[@property="og:image"]/@content + Name: //span[@class="update_tags"]/a/text()|//span[@class="tour_update_tags"]/a/text() + Image: + selector: //a[@class="fancybox"]/@href + postProcess: + - replace: + - regex: ^ + with: https://desperateamateurs.com/tour3/ Studio: Name: fixed: Desperate Amateurs Date: - selector: //table[@width="100%"]//td[@class="date"]/text() + selector: //span[@class="update_date"] postProcess: - - replace: - - regex: .*(\d{2})/(\d{2})/(\d{4}).* - with: $1/$2/$3 - parseDate: 01/02/2006 -# Last Updated March 14, 2021 +# Last Updated December 13, 2023 From b7085059dfba2251190caaaea3d4c28a60fcdb13 Mon Sep 17 00:00:00 2001 From: hooch316 <153773129+hooch316@users.noreply.github.com> Date: Thu, 14 Dec 2023 01:31:31 +0000 Subject: [PATCH 592/624] Create CollectiveCorruption.yml (#1563) --- scrapers/CollectiveCorruption.yml | 36 +++++++++++++++++++++++++++++++ 1 file changed, 36 insertions(+) create mode 100644 scrapers/CollectiveCorruption.yml diff --git a/scrapers/CollectiveCorruption.yml b/scrapers/CollectiveCorruption.yml new file mode 100644 index 000000000..3996b82e1 --- /dev/null +++ b/scrapers/CollectiveCorruption.yml @@ -0,0 +1,36 @@ +name: CollectiveCorruption +sceneByURL: + - action: scrapeXPath + url: + - collectivecorruption.com + scraper: sceneScraper +xPathScrapers: + sceneScraper: + scene: + Title: + selector: //span[@class="update_title"] + Date: + selector: //span[@class="availdate"] + postProcess: + - parseDate: 01/02/2006 + Performers: + Name: //div[@class="update_block_info"]/span[2]/a + Tags: + Name: //span[@class="update_tags"]/a + Details: + selector: //span[@class="latest_update_description"] + Code: + selector: //a[@class="featured"]/@onclick + postProcess: + - replace: + - regex: ^tload\('\/trailers\/([\d_]+)\.mp4'\); return false; + with: $1 + Image: + selector: //img[@class="large_update_thumb left thumbs stdimage"]/@src0_3x + postProcess: + - replace: + - regex: ^/content/contentthumbs/ + with: "https://collectivecorruption.com/content/contentthumbs/" + Studio: + Name: + fixed: Collective Corruption From 56e7101403f9161cdc0e098a3906281c1586ec9e Mon Sep 17 00:00:00 2001 From: LewdPursuits <89112030+LewdPursuits@users.noreply.github.com> Date: Thu, 14 Dec 2023 12:32:12 +1100 Subject: [PATCH 593/624] Updated TheScoreGroup scraper (#1551) * Create TheScoreGroup.py --- scrapers/TheScoreGroup/TheScoreGroup.py | 99 +++++++++++++++++++ .../{ => TheScoreGroup}/TheScoreGroup.yml | 59 ++++++++++- 2 files changed, 157 insertions(+), 1 deletion(-) create mode 100644 scrapers/TheScoreGroup/TheScoreGroup.py rename scrapers/{ => TheScoreGroup}/TheScoreGroup.yml (62%) diff --git a/scrapers/TheScoreGroup/TheScoreGroup.py b/scrapers/TheScoreGroup/TheScoreGroup.py new file mode 100644 index 000000000..b66b2f3eb --- /dev/null +++ b/scrapers/TheScoreGroup/TheScoreGroup.py @@ -0,0 +1,99 @@ +import sys +import argparse +import json +import requests + +try: + from lxml import html +except ModuleNotFoundError: + print( + "You need to install the lxml module. (https://lxml.de/installation.html#installation)", + file=sys.stderr, + ) + print( + "If you have pip (normally installed with python), run this command in a terminal (cmd): pip install lxml", + file=sys.stderr, + ) + sys.exit() + +try: + import py_common.log as log +except ModuleNotFoundError: + print( + "You need to download the folder 'py_common' from the community repo! (CommunityScrapers/tree/master/scrapers/py_common)", + file=sys.stderr, + ) + sys.exit() + + +def performer_query(query: str): + # Form data to be sent as the POST request body + payload = { + "ci_csrf_token": "", + "keywords": f"{query}", + "s_filters[site]": "all", + "s_filters[type]": "models", + "m_filters[sort]": "top_rated", + "m_filters[gender]": "any", + "m_filters[body_type]": "any", + "m_filters[race]": "any", + "m_filters[hair_color]": "any" + } + result = requests.post("https://www.scoreland.com/search-es/", data=payload) + tree = html.fromstring(result.content) + performer_names: list[str] = tree.xpath("//a[contains(concat(' ',normalize-space(@class),' '),' i-model ')]/text()") + performer_urls: list[str] = tree.xpath("//a[contains(concat(' ',normalize-space(@class),' '),' i-model ')]/@href") + performers = [ + { + "Name": name, + "URL": url, + } + for name, url in zip(performer_names, performer_urls) + ] + if not performers: + log.warning(f"No performers found for '{query}'") + return performers + +def main(): + parser = argparse.ArgumentParser("ScoreGroup Scraper",argument_default="") + subparsers = parser.add_subparsers( + dest="operation", help="Operation to perform", required=True + ) + subparsers.add_parser("search", help="Search for performers").add_argument( + "name", nargs="?", help="Name to search for" + ) + + if len(sys.argv) == 1: + parser.print_help(sys.stderr) + sys.exit(1) + + args = parser.parse_args() + log.debug(f"Arguments from commandline: {args}") + # Script is being piped into, probably by Stash + if not sys.stdin.isatty(): + try: + frag = json.load(sys.stdin) + args.__dict__.update(frag) + log.debug(f"With arguments from stdin: {args}") + except json.decoder.JSONDecodeError: + log.error("Received invalid JSON from stdin") + sys.exit(1) + + if args.operation == "search": + name: str= args.name + if not name: + log.error("No query provided") + sys.exit(1) + log.debug(f"Searching for '{name}'") + matches = performer_query(name) + print(json.dumps(matches)) + sys.exit(0) + + # Just in case the above if statement doesn't trigger somehow + # Something has gone quite wrong should this ever get hit + log.error("An error has occured") + sys.exit(2) + + +if __name__ == "__main__": + main() diff --git a/scrapers/TheScoreGroup.yml b/scrapers/TheScoreGroup/TheScoreGroup.yml similarity index 62% rename from scrapers/TheScoreGroup.yml rename to scrapers/TheScoreGroup/TheScoreGroup.yml index 96cbf7cb5..43e0a0d1d 100644 --- a/scrapers/TheScoreGroup.yml +++ b/scrapers/TheScoreGroup/TheScoreGroup.yml @@ -20,6 +20,30 @@ galleryByURL: - action: scrapeXPath url: *urls scraper: galleryScraper +performerByURL: + - action: scrapeXPath + url: + - 18eighteen.com/ + - 40somethingmag.com/ + - 50plusmilfs.com/ + - 60plusmilfs.com/ + - bigtithooker.com/ + - legsex.com/ + - naughtymag.com + - pornmegaload.com/ + - scoreland.com/ + - scoreland2.com/ + - xlgirls.com/ + - scorevideos.com/ + - milftugs.com/ + scraper: performerScraper +performerByName: + action: script + script: + - python + - TheScoreGroup.py + - search + xPathScrapers: sceneScraper: common: @@ -85,4 +109,37 @@ xPathScrapers: Tags: Name: $photopage//div[@class='mb-3']/a/text() Performers: *performersAttr -# Last Updated November 18, 2023 + + performerScraper: + performer: + Name: //h1[@class="model-title"] + Gender: + fixed: Female + URL: //meta[@property="og:url"]/@content + Ethnicity: + selector: //span[text()="Ethnicity:"]/following-sibling::span + postProcess: + - map: + White: Caucasian + HairColor: //span[text()="Hair Color:"]/following-sibling::span + Height: + selector: //span[text()="Height:"]/following-sibling::span + postProcess: + - feetToCm: true + Weight: + selector: //span[text()="Weight:"]/following-sibling::span + postProcess: + - replace: + - regex: (\d+).* + with: $1 + - lbToKg: true + Measurements: + selector: //span[text()="Bra Size:"]/following-sibling::span|//span[text()="Measurements:"]/following-sibling::span + concat: "-" + postProcess: + - replace: + - regex: (\d+[a-zA-Z]{1,3})-\d+(-\d+-\d+) + with: $1$2 + Image: //section[@id="model-page"]//img[@class="lazyload"]/@src + +# Last Updated December 03, 2023 From de288c4b1b235c628bf5b23c521b0660ded77076 Mon Sep 17 00:00:00 2001 From: Maista6969 Date: Thu, 14 Dec 2023 03:38:12 +0100 Subject: [PATCH 594/624] Fix relative import in TheScoreGroup --- scrapers/TheScoreGroup/TheScoreGroup.py | 102 +++++++++++++++++------ scrapers/TheScoreGroup/TheScoreGroup.yml | 5 +- 2 files changed, 77 insertions(+), 30 deletions(-) diff --git a/scrapers/TheScoreGroup/TheScoreGroup.py b/scrapers/TheScoreGroup/TheScoreGroup.py index b66b2f3eb..f9da25dbf 100644 --- a/scrapers/TheScoreGroup/TheScoreGroup.py +++ b/scrapers/TheScoreGroup/TheScoreGroup.py @@ -1,7 +1,16 @@ import sys import argparse import json +import os import requests +import re + +# to import from a parent directory we need to add that directory to the system path +csd = os.path.dirname(os.path.realpath(__file__)) # get current script directory +parent = os.path.dirname(csd) # parent directory (should be the scrapers one) +sys.path.append( + parent +) # add parent dir to sys path so that we can import py_common from ther try: from lxml import html @@ -25,43 +34,82 @@ ) sys.exit() +# Shared client because we're making multiple requests +client = requests.Session() + + +# Example element: +#
+#
+#
+# +# +# +#
+#
+#
+# +# +#
+#
+#
+#
+def map_performer(el): + url = el.xpath(".//a/@href")[0] + if "no-model" in url: + return None + name = el.xpath(".//a/@title")[1] + image = el.xpath(".//img/@src")[0] + fixed_url = re.sub(r".*?([^/]*(?=/2/0))/2/0/([^?]*)", r"https://www.\1.com/\2", url) + + if client.head(fixed_url).status_code != 200: + log.debug(f"Performer '{name}' has a broken profile link, skipping") + return None + + return { + "name": name, + "url": fixed_url, + "image": image, + } + def performer_query(query: str): # Form data to be sent as the POST request body payload = { - "ci_csrf_token": "", - "keywords": f"{query}", - "s_filters[site]": "all", - "s_filters[type]": "models", - "m_filters[sort]": "top_rated", - "m_filters[gender]": "any", - "m_filters[body_type]": "any", - "m_filters[race]": "any", - "m_filters[hair_color]": "any" + "ci_csrf_token": "", + "keywords": query, + "s_filters[site]": "all", + "s_filters[type]": "models", + "m_filters[sort]": "top_rated", + "m_filters[gender]": "any", + "m_filters[body_type]": "any", + "m_filters[race]": "any", + "m_filters[hair_color]": "any", } - result = requests.post("https://www.scoreland.com/search-es/", data=payload) + result = client.post("https://www.scoreland.com/search-es/", data=payload) tree = html.fromstring(result.content) - performer_names: list[str] = tree.xpath("//a[contains(concat(' ',normalize-space(@class),' '),' i-model ')]/text()") - performer_urls: list[str] = tree.xpath("//a[contains(concat(' ',normalize-space(@class),' '),' i-model ')]/@href") - performers = [ - { - "Name": name, - "URL": url, - } - for name, url in zip(performer_names, performer_urls) - ] + performers = list( + filter(None, [map_performer(p) for p in tree.find_class("model")]) + ) + if not performers: log.warning(f"No performers found for '{query}'") return performers + def main(): - parser = argparse.ArgumentParser("ScoreGroup Scraper",argument_default="") + parser = argparse.ArgumentParser("ScoreGroup Scraper", argument_default="") subparsers = parser.add_subparsers( - dest="operation", help="Operation to perform", required=True - ) + dest="operation", help="Operation to perform", required=True + ) subparsers.add_parser("search", help="Search for performers").add_argument( - "name", nargs="?", help="Name to search for" - ) + "name", nargs="?", help="Name to search for" + ) if len(sys.argv) == 1: parser.print_help(sys.stderr) @@ -80,7 +128,7 @@ def main(): sys.exit(1) if args.operation == "search": - name: str= args.name + name: str = args.name if not name: log.error("No query provided") sys.exit(1) @@ -88,9 +136,9 @@ def main(): matches = performer_query(name) print(json.dumps(matches)) sys.exit(0) - + # Just in case the above if statement doesn't trigger somehow - # Something has gone quite wrong should this ever get hit + # Something has gone quite wrong should this ever get hit log.error("An error has occured") sys.exit(2) diff --git a/scrapers/TheScoreGroup/TheScoreGroup.yml b/scrapers/TheScoreGroup/TheScoreGroup.yml index 43e0a0d1d..0d52ce0fa 100644 --- a/scrapers/TheScoreGroup/TheScoreGroup.yml +++ b/scrapers/TheScoreGroup/TheScoreGroup.yml @@ -39,7 +39,7 @@ performerByURL: scraper: performerScraper performerByName: action: script - script: + script: - python - TheScoreGroup.py - search @@ -109,7 +109,7 @@ xPathScrapers: Tags: Name: $photopage//div[@class='mb-3']/a/text() Performers: *performersAttr - + performerScraper: performer: Name: //h1[@class="model-title"] @@ -141,5 +141,4 @@ xPathScrapers: - regex: (\d+[a-zA-Z]{1,3})-\d+(-\d+-\d+) with: $1$2 Image: //section[@id="model-page"]//img[@class="lazyload"]/@src - # Last Updated December 03, 2023 From 392c4d3e5937f4887295623abf4215f80c8a2174 Mon Sep 17 00:00:00 2001 From: Maista6969 Date: Thu, 14 Dec 2023 03:47:49 +0100 Subject: [PATCH 595/624] Replace filter with list comprehension in Score Group --- scrapers/TheScoreGroup/TheScoreGroup.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/scrapers/TheScoreGroup/TheScoreGroup.py b/scrapers/TheScoreGroup/TheScoreGroup.py index f9da25dbf..cc5dc4513 100644 --- a/scrapers/TheScoreGroup/TheScoreGroup.py +++ b/scrapers/TheScoreGroup/TheScoreGroup.py @@ -93,9 +93,7 @@ def performer_query(query: str): } result = client.post("https://www.scoreland.com/search-es/", data=payload) tree = html.fromstring(result.content) - performers = list( - filter(None, [map_performer(p) for p in tree.find_class("model")]) - ) + performers = [p for x in tree.find_class("model") if (p := map_performer(x))] if not performers: log.warning(f"No performers found for '{query}'") From 4c9a36fac8e1545130837f40956cceb72e0b9a66 Mon Sep 17 00:00:00 2001 From: Lamda604 <81937940+Lamda604@users.noreply.github.com> Date: Thu, 14 Dec 2023 09:23:41 -0800 Subject: [PATCH 596/624] Adding scrapers for TheBroNetwork, Raw Road Nation, KinkMen and Raw Fuck Club; update HimerosTV (#1528) Kink.yml did not include bifurcated KinkMen.com. Created a separate scraper based on the original to allow scraping of KinkMen.com. KinkMen.com Scene URLs no longer use the kink.com/...(scene#), however performer URLs seem to use kink.com or kinkmen.com links interchangeably. --- scrapers/HimerosTV.yml | 73 +++++++++--------- scrapers/KinkMen.yml | 153 +++++++++++++++++++++++++++++++++++++ scrapers/RawFuckClub.yml | 30 ++++++++ scrapers/RawRoadNation.yml | 28 +++++++ scrapers/TheBroNetwork.yml | 27 +++++++ 5 files changed, 276 insertions(+), 35 deletions(-) create mode 100644 scrapers/KinkMen.yml create mode 100644 scrapers/RawFuckClub.yml create mode 100644 scrapers/RawRoadNation.yml create mode 100644 scrapers/TheBroNetwork.yml diff --git a/scrapers/HimerosTV.yml b/scrapers/HimerosTV.yml index cc96388f2..351afe8c5 100644 --- a/scrapers/HimerosTV.yml +++ b/scrapers/HimerosTV.yml @@ -1,35 +1,38 @@ -name: himerostv -sceneByURL: - - action: scrapeXPath - url: - - himeros.tv - scraper: sceneScraper -xPathScrapers: - sceneScraper: - scene: - Title: //h1[@class='highlight']/text() - Date: - selector: //div[@class='update-info-row text-gray']/text()[1] - postProcess: - - replace: - - regex: \s*\|.* - with: - - parseDate: January 2, 2006 - Details: - selector: //div[@class='update-info-block']/div[@class='update-info-block'][1]//text() - concat: "\n\n" - Performers: - Name: - selector: //div[@class='item-title']/a[contains(@href,"/models/")] - Tags: - Name: - selector: //ul[@class='tags']/li/a/text() - Image: - selector: //script[contains(text(),'hidden_fake_trailer')]/text() - postProcess: - - replace: - - regex: .+(?:poster=")([^"]*).+ - with: https://himeros.tv$1 - - regex: "-1x.jpg" - with: "-3x.jpg" -# Last Updated January 09, 2022 +name: HimerosTV +sceneByURL: + - action: scrapeXPath + url: + - himeros.tv + scraper: sceneScraper +xPathScrapers: + sceneScraper: + scene: + Title: //h1[@class='highlight']/text() + Date: + selector: //div[@class='update-info-row text-gray']/text()[1] + postProcess: + - replace: + - regex: \s*\|.* + with: + - parseDate: January 2, 2006 + Details: + selector: //div[@class='update-info-block']/div[@class='update-info-block'][1]//text() + concat: "\n\n" + Performers: + Name: + selector: //div[@class='item-title']/a[contains(@href,"/models/")] + Tags: + Name: + selector: //ul[@class='tags']/li/a/text() + Image: + selector: //script[contains(text(),'hidden_fake_trailer')]/text() + postProcess: + - replace: + - regex: .+(?:poster=")([^"]*).+ + with: https://himeros.tv$1 + - regex: "-1x.jpg" + with: "-3x.jpg" + Studio: + Name: + fixed: HimerosTV +# Last Updated November 06, 2023 \ No newline at end of file diff --git a/scrapers/KinkMen.yml b/scrapers/KinkMen.yml new file mode 100644 index 000000000..23da43e49 --- /dev/null +++ b/scrapers/KinkMen.yml @@ -0,0 +1,153 @@ +name: Kink Men +sceneByURL: + - action: scrapeXPath + url: + - kinkmen.com + scraper: sceneScraper + +sceneByFragment: + action: scrapeXPath + queryURL: https://www.kinkmen.com/shoot/{filename} + # constructs the scene URL from the filename, provided that the filename includes the scene id + queryURLReplace: + filename: + # the id in kink.com is a 1-6 digit number + - regex: ^(\d+)[^\d].* # support filenames in the form 12345_performer_other_data.mp4 + with: $1 + - regex: .*\((\d+)\)\.[a-zA-Z\d]+$ #support filenames in the form scene - date - performer (12345).mp4 + with: $1 + scraper: sceneScraper +performerByName: + action: scrapeXPath + queryURL: https://www.kinkmen.com/search?type=performers&q={} + scraper: performerSearch +performerByURL: + - action: scrapeXPath + url: + - kinkmen.com/model + - kink.com/model + scraper: performerScraper + +xPathScrapers: + sceneScraper: + scene: + Title: //meta[@name="twitter:title"]/@content + Date: + selector: //span[@class="shoot-date"]/text() + postProcess: + - parseDate: January 2, 2006 + Details: + selector: //span[@class="description-text"]/*[self::p or self::ul] + concat: "\n\n" + Code: //div[@class="shoot-page"]/@data-shootid + Director: //span[@class="director-name"] + Performers: + Name: + selector: //p[@class="starring"]/span[@class="names h5"]/a/text() + postProcess: + - replace: + - regex: \, + with: "" + Tags: + Name: + selector: //a[@class="tag"] + postProcess: + - replace: + - regex: \, + with: "" + Image: //meta[@name="twitter:image"]/@content + Studio: + Name: + selector: //div[@class="column shoot-logo"]/a/@href + postProcess: + - replace: + - regex: /channel/ + with: "" + - map: + # List of sites as of 2023-11-06 from https://www.kinkmen.com/channels + # [...new Set([...document.querySelectorAll('h3 > a')] + # .map(a => a.href.split("/").pop() + ": " + a.innerText))] + # .toSorted() + # .join("\n") + 30-minutes-of-torment: 30 Minutes Of Torment + alternadudes: Alternadudes + bondage-compound: Bondage Compound + bonus-hole-boys: Bonus Hole Boys + bound-and-gagged: Bound And Gagged + bound-gods: Bound Gods + bound-in-public: Bound In Public + boynapped: Boynapped + butt-machine-boys: Butt Machine Boys + fetishnetwork-male: FetishNetwork Male + kinkmen-classics: Kink Men Classics + kink-men-series: Kink Men Series + kinkmen-test-shoot: Kink Men Test Shoots + kinky-bites-men: Kinky Bites Men + masqulin: Masqulin + men-at-play: Men At Play + men-on-edge: Men On Edge + missionary-boyz: Missionary Boyz + my-dirtiest-fantasy: My Dirtiest Fantasy + my-friends-feet: My Friends' Feet + naked-kombat: Naked Kombat + nasty-daddy: Nasty Daddy + str8hell: Str8Hell + sw-nude: SW Nude + taken-rough: Taken Rough + therapy-dick: Therapy Dick + titanmen-rough: TitanMen Rough + wrestlingmale: WrestlingMale + yes-father: Yes Father + yesirboys: Yesirboys + # Kinkmen.com pages still contain old/broken Kink.com link; commenting out URL as it is currently inaccurate + # URL: //link[@rel="canonical"]/@href + performerSearch: + common: + $result: //div/a[contains(@href, "/model") and contains(concat(" ", normalize-space(@class), " "), " model-link ")] + + performer: + Name: $result/img/@alt + URL: + selector: $result/@href + postProcess: + - replace: + - regex: ^ + with: https://www.kinkmen.com + performerScraper: + performer: + Name: + selector: //h1/text() # //div[@font-size][number(translate(@font-size,"px",""))>=35]/text() + concat: " " + postProcess: + - replace: + - regex: ^\s+ + with: "" + - regex: \s+$ + with: + Twitter: + selector: '//div/a[contains(concat(" ", normalize-space(@class), " "), " social-link ") and contains(@href, "twitter.com")]/@href' + Image: + selector: //div/img[contains(@src, "imagedb")][1]/@src + Tattoos: + selector: '//div/span[text()=" tags: "]/following-sibling::a[contains(@href,"/tattoo")]//text()' + postProcess: + - map: + Tattoo: "Yes" + Piercings: + selector: '//div/span[text()=" tags: "]/following-sibling::a[contains(@href,"/pierced")]/span' + concat: "\n" + Tags: + Name: '//div/span[text()=" tags: "]/following-sibling::a/span/text()' + Details: + selector: '//div/span/p[@class="bio"]/following-sibling::p' + concat: "\n" + postProcess: + - replace: + - regex: '(?i)]*>' + with: "" + URL: //link[@rel="canonical"]/@href +driver: + headers: + - Key: User-Agent + Value: stash-scraper/1.0.0 +# Last Updated November 06, 2023 \ No newline at end of file diff --git a/scrapers/RawFuckClub.yml b/scrapers/RawFuckClub.yml new file mode 100644 index 000000000..390ff69cf --- /dev/null +++ b/scrapers/RawFuckClub.yml @@ -0,0 +1,30 @@ +name: Raw Fuck Club +sceneByURL: + - action: scrapeXPath + url: + - www.rawfuckclub.com + scraper: sceneScraper +xPathScrapers: + sceneScraper: + scene: + Title: //div[@class="col-12 pr-0"]/h2 + Performers: + Name: //span[@class="badge badge-primary"] + Tags: + Name: //span[@class="badge badge-secondary"] + Details: + selector: //p[@class="watch-description"] + Image: //img[@class="img-responsive"]/@src + Date: + selector: //p[@class='watch-published-date']/text() + postProcess: + - replace: + - regex: 'Reposted on (.+)([.])' + with: $1 + - regex: 'Posted on (.+)' + with: $1 + - parseDate: January 2, 2006 + Studio: + Name: + fixed: Raw Fuck Club +# Last Updated November 08, 2023 \ No newline at end of file diff --git a/scrapers/RawRoadNation.yml b/scrapers/RawRoadNation.yml new file mode 100644 index 000000000..caefa2b5c --- /dev/null +++ b/scrapers/RawRoadNation.yml @@ -0,0 +1,28 @@ +name: Raw Road Nation +sceneByURL: + - action: scrapeXPath + url: + - rawroadnation.com + scraper: sceneScraper +xPathScrapers: + sceneScraper: + scene: + Title: + selector: //div[@class="panel-heading"]/h3 + Performers: + Name: //a[@class="model_name_title"] + Tags: + Name: //div[@class="col-12 text-center px-4 py-2"]/a[contains(@href,"en/videos/")] + Details: //div[@class="video-seo-description"]/p + Image: //video[@id="videoProtectedPlayer"]/@poster + Date: + selector: //h4[contains(text(), "Release Date")]/text() + postProcess: + - replace: + - regex: "Release Date: (.+)" + with: $1 + - parseDate: 2006-01-02 + Studio: + Name: + fixed: Raw Road Nation +# Last Updated November 05, 2023 \ No newline at end of file diff --git a/scrapers/TheBroNetwork.yml b/scrapers/TheBroNetwork.yml new file mode 100644 index 000000000..4a90f6ff1 --- /dev/null +++ b/scrapers/TheBroNetwork.yml @@ -0,0 +1,27 @@ +name: The Bro Network +sceneByURL: + - action: scrapeXPath + url: + - thebronetwork.com + scraper: sceneScraper +xPathScrapers: + sceneScraper: + scene: + Title: + selector: //div[@class="gallery_info spacer"]/h1 + Performers: + Name: //div[@class="gallery_info spacer"]//span[@class="tour_update_models"]/a + Tags: + Name: //a[@class="tagsVideoPage"] + Details: + selector: //p[@id="textDesc"] + Image: + selector: //video-js[@id="trailervid"]/@poster + Date: + selector: //span[@class='availdate'][1] + postProcess: + - parseDate: Jan 02, 2006 + Studio: + Name: + fixed: The Bro Network +# Last Updated November 02, 2023 \ No newline at end of file From 946a44fbe97efd8d5d30a13f54412d77105b2482 Mon Sep 17 00:00:00 2001 From: Muldec <34677746+Muldec@users.noreply.github.com> Date: Fri, 15 Dec 2023 01:38:09 +0100 Subject: [PATCH 597/624] Fix tags for IWantClips (#1564) --- scrapers/IWantClips.yml | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/scrapers/IWantClips.yml b/scrapers/IWantClips.yml index 4b0ab25ef..a45dfbf14 100644 --- a/scrapers/IWantClips.yml +++ b/scrapers/IWantClips.yml @@ -45,9 +45,12 @@ xPathScrapers: concat: "\n\n" Tags: Name: - selector: //div[@class="col-xs-12 hashtags fix"]/span/em | //div[@class="col-xs-12 category fix"]/span + selector: //div[@class="col-xs-12 hashtags hashtags-grey fix"]/span/em | //div[@class="col-xs-12 category fix"]/a concat: "," postProcess: + - replace: + - regex: 'Keywords:' + with: $1 - replace: - regex: ',\s+' with: "," @@ -88,4 +91,4 @@ xPathScrapers: driver: useCDP: true -# Last Updated November 24, 2023 +# Last Updated December 14, 2023 From e4241eac876cc264f87e15ca6123cb5b98f7552b Mon Sep 17 00:00:00 2001 From: ryosaeba75 <68193713+ryosaeba75@users.noreply.github.com> Date: Fri, 15 Dec 2023 18:49:12 +0100 Subject: [PATCH 598/624] Added search by name and some scenescraper fixes. (#1561) --- scrapers/DorcelVision.yml | 41 ++++++++++++++++++++++++++++++++++----- 1 file changed, 36 insertions(+), 5 deletions(-) diff --git a/scrapers/DorcelVision.yml b/scrapers/DorcelVision.yml index 5a5834c0d..30f4dddfb 100644 --- a/scrapers/DorcelVision.yml +++ b/scrapers/DorcelVision.yml @@ -10,8 +10,38 @@ movieByURL: url: - https://www.dorcelvision.com/en/movies scraper: movieScraper +sceneByName: + action: scrapeXPath + queryURL: https://www.dorcelvision.com/en/search?type=4&keyword={} + scraper: sceneSearch +sceneByQueryFragment: + action: scrapeXPath + queryURL: "{url}" + scraper: sceneScraper + xPathScrapers: + sceneSearch: + common: + $card: //div[@id="entriesList"]/div + scene: + Title: $card/details/div/h3 + URL: + selector: $card/a[@class="movies rolloverv2"]/@href + postProcess: + - replace: + - regex: (.*) + with: https://www.dorcelvision.com$1 + + Image: + selector: $card/a[@class="movies rolloverv2"]/img[last()]/@data-src + postProcess: + - replace: + - regex: (.*) + with: https://www.dorcelvision.com$1 + Details: $card/details//p[last()] + + sceneScraper: scene: Title: &titleSel //meta[@property="og:title"]/@content @@ -19,25 +49,26 @@ xPathScrapers: selector: //div[@id="biography"]/*/text() concat: "\n\n" Image: &imageAttr - selector: //div[contains(@class, "covers")]/a[contains(@class, "cover")]/@href + selector: //a[contains(@class, "cover")]/@href postProcess: - replace: - regex: (.*) with: https://dorcelvision.com$1 Date: &dateAttr - selector: //div[@class="informations"]//strong[contains(., "Production year")]/following-sibling::text() + selector: //div[@class="row informations"]/div/div/strong[contains(., "Production year")]/parent::div/following-sibling::div postProcess: - replace: - regex: ^(\d+)$ with: $1-01-01 - parseDate: 2006-01-02 Performers: - Name: //div[contains(@class, "casting")]//div[contains(@class, "slider-xl")]//div[@class="col-xs-2"]//a/strong/text() + Name: //div[contains(@class, "casting")]//div[contains(@class, "slider-xl")]//div[@class="col-xs-2"]//a/span/text() Studio: &studioAttr - Name: //div[@class="informations"]//strong[contains(., "Studio")]/following-sibling::a/text() + Name: //div[@class="row informations"]/div/div/strong[contains(., "Studio")]/parent::div/following-sibling::div Movies: Name: //meta[@property="og:title"]/@content URL: //meta[@property="og:url"]/@content + URL: //meta[@property="og:url"]/@content movieScraper: movie: Name: *titleSel @@ -54,4 +85,4 @@ xPathScrapers: FrontImage: *imageAttr Synopsis: *detailsAttr # Reference: https://github.com/PAhelper/PhoenixAdult.bundle/blob/master/Contents/Code/siteDorcelVision.py -# Last Updated October 13, 2021 +# Last Updated December 13, 2023 From a4dfb08e8d39120ef2ee00b77ba24b0e7754723f Mon Sep 17 00:00:00 2001 From: Clasp8775 <151955346+Clasp8775@users.noreply.github.com> Date: Sun, 17 Dec 2023 01:45:00 +0100 Subject: [PATCH 599/624] Minnano-AV.yml: allow HTTPS (#1565) --- scrapers/Minnano-AV/Minnano-AV.py | 8 ++++---- scrapers/Minnano-AV/Minnano-AV.yml | 3 ++- 2 files changed, 6 insertions(+), 5 deletions(-) diff --git a/scrapers/Minnano-AV/Minnano-AV.py b/scrapers/Minnano-AV/Minnano-AV.py index 9103b0501..9cfde5855 100644 --- a/scrapers/Minnano-AV/Minnano-AV.py +++ b/scrapers/Minnano-AV/Minnano-AV.py @@ -52,12 +52,12 @@ "measurements": ( r"(?<=T)(?P\d+)? / B(?P\d+)\([^=]+=(?P\w+)\) / W(?P\d+) / H(?P\d+)" ), - "url": r"http://www.minnano-av.com/actress\d+.html", + "url": r"https://www.minnano-av.com/actress\d+.html", } FORMATS = { - "image": "http://www.minnano-av.com{IMAGE_URL_FRAGMENT}", - "url": "http://www.minnano-av.com/actress{PERFORMER_ID}.html", + "image": "https://www.minnano-av.com{IMAGE_URL_FRAGMENT}", + "url": "https://www.minnano-av.com/actress{PERFORMER_ID}.html", } @@ -309,7 +309,7 @@ def performer_by_url(url): def performer_by_name(name: str, retry=True) -> None: - queryURL = f"http://www.minnano-av.com/search_result.php?search_scope=actress&search_word={name}" + queryURL = f"https://www.minnano-av.com/search_result.php?search_scope=actress&search_word={name}" result = requests.get(queryURL) tree = etree.HTML(result.text) diff --git a/scrapers/Minnano-AV/Minnano-AV.yml b/scrapers/Minnano-AV/Minnano-AV.yml index c26e6dbbc..fc0a6bb23 100644 --- a/scrapers/Minnano-AV/Minnano-AV.yml +++ b/scrapers/Minnano-AV/Minnano-AV.yml @@ -1,6 +1,7 @@ name: "Minnano-AV (JAV)" performerByURL: - url: + - https://www.minnano-av.com/ - http://www.minnano-av.com/ action: script script: @@ -14,4 +15,4 @@ performerByName: - python - Minnano-AV.py - performer_by_name -# Last Updated November 27, 2023 +# Last Updated December 16, 2023 From a2a63ee79a4916b93b9e7bab7ffb2ccb6a0a66c8 Mon Sep 17 00:00:00 2001 From: AndeanCoati <106705046+AndeanCoati@users.noreply.github.com> Date: Sun, 17 Dec 2023 21:59:32 +0200 Subject: [PATCH 600/624] Add JB Video scene scraper (#1568) --- SCRAPERS-LIST.md | 1 + scrapers/JBVideo.yml | 25 +++++++++++++++++++++++++ 2 files changed, 26 insertions(+) create mode 100644 scrapers/JBVideo.yml diff --git a/SCRAPERS-LIST.md b/SCRAPERS-LIST.md index a508d46cb..91ee61288 100644 --- a/SCRAPERS-LIST.md +++ b/SCRAPERS-LIST.md @@ -785,6 +785,7 @@ javlibrary.com|JavLibrary_python.yml|:heavy_check_mark:|:x:|:x:|:x:|Python|JAV javlibrary.com|JavLibrary.yml|:heavy_check_mark:|:x:|:heavy_check_mark:|:x:|-|JAV jaysinxxx.com|GammaEntertainment.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- jayspov.net|AdultEmpireCash.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +jbvideo.com|JBVideo.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- jcosplay.com|karatmedia.yml|:heavy_check_mark:|:x:|:x:|:x:|-|JAV jeedoo.com|Jeedoo.yml|:heavy_check_mark:|:x:|:heavy_check_mark:|:x:|-|- jeffsmodels.com|SmutPuppet.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- diff --git a/scrapers/JBVideo.yml b/scrapers/JBVideo.yml new file mode 100644 index 000000000..e3d554581 --- /dev/null +++ b/scrapers/JBVideo.yml @@ -0,0 +1,25 @@ +name: JB Video +sceneByURL: + - action: scrapeXPath + url: + - jbvideo.com/videos/ + scraper: sceneScraper +xPathScrapers: + sceneScraper: + scene: + Title: //h1[@id="view_title"]/text() + Performers: + Name: //strong[text()='Starring:']/following-sibling::a[@onclick='loginPrompt()']/text() + Studio: + Name: + fixed: JB Video + Image: //div[contains(@class, 'main_vid')]/img/@src + Details: //span[@id='view_description']//text()[.!='\u00A0'] + Date: + selector: //strong[text()='Released:']/following-sibling::text()[1] + postProcess: + - replace: + - regex: " " + with: "" + - parseDate: "01/02/06" +# Last Updated December 17, 2023 From 818fb3c6329e25e9b0a63320933c6dfec6bc55de Mon Sep 17 00:00:00 2001 From: Maista6969 Date: Mon, 18 Dec 2023 18:09:12 +0100 Subject: [PATCH 601/624] Fix relative import in MindGeekAPI --- scrapers/MindGeekAPI/MindGeekAPI.py | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/scrapers/MindGeekAPI/MindGeekAPI.py b/scrapers/MindGeekAPI/MindGeekAPI.py index 04612f8e7..0018fbfc2 100644 --- a/scrapers/MindGeekAPI/MindGeekAPI.py +++ b/scrapers/MindGeekAPI/MindGeekAPI.py @@ -7,6 +7,13 @@ from datetime import datetime from urllib.parse import urlparse +# to import from a parent directory we need to add that directory to the system path +csd = os.path.dirname(os.path.realpath(__file__)) # get current script directory +parent = os.path.dirname(csd) # parent directory (should be the scrapers one) +sys.path.append( + parent +) # add parent dir to sys path so that we can import py_common from ther + try: import py_common.log as log except ModuleNotFoundError: From 1de471cc16c8341c6d242988b9964bd94ef03252 Mon Sep 17 00:00:00 2001 From: Maista6969 Date: Mon, 18 Dec 2023 18:10:47 +0100 Subject: [PATCH 602/624] Fix relative imports in CopyTo scrapers --- scrapers/CopyFromScene/CopyFromScene.py | 8 ++++++++ scrapers/CopyToGallery/CopyToGallery.py | 9 ++++++++- 2 files changed, 16 insertions(+), 1 deletion(-) diff --git a/scrapers/CopyFromScene/CopyFromScene.py b/scrapers/CopyFromScene/CopyFromScene.py index 02e90bb22..c0f7a2364 100644 --- a/scrapers/CopyFromScene/CopyFromScene.py +++ b/scrapers/CopyFromScene/CopyFromScene.py @@ -1,6 +1,14 @@ import json +import os import sys +# to import from a parent directory we need to add that directory to the system path +csd = os.path.dirname(os.path.realpath(__file__)) # get current script directory +parent = os.path.dirname(csd) # parent directory (should be the scrapers one) +sys.path.append( + parent +) # add parent dir to sys path so that we can import py_common from ther + try: import py_common.graphql as graphql import py_common.log as log diff --git a/scrapers/CopyToGallery/CopyToGallery.py b/scrapers/CopyToGallery/CopyToGallery.py index 90bd41095..65fba0e84 100644 --- a/scrapers/CopyToGallery/CopyToGallery.py +++ b/scrapers/CopyToGallery/CopyToGallery.py @@ -1,6 +1,13 @@ import json -import sys import os +import sys + +# to import from a parent directory we need to add that directory to the system path +csd = os.path.dirname(os.path.realpath(__file__)) # get current script directory +parent = os.path.dirname(csd) # parent directory (should be the scrapers one) +sys.path.append( + parent +) # add parent dir to sys path so that we can import py_common from ther try: import py_common.graphql as graphql From b61288609b712bbd88537b5b922143d17fd222b1 Mon Sep 17 00:00:00 2001 From: Maista6969 Date: Mon, 18 Dec 2023 18:24:44 +0100 Subject: [PATCH 603/624] Scrape correct gender for trans performers on IAFD This closes #1524 --- scrapers/IAFD/IAFD.py | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/scrapers/IAFD/IAFD.py b/scrapers/IAFD/IAFD.py index e9197e18a..6e0767db5 100644 --- a/scrapers/IAFD/IAFD.py +++ b/scrapers/IAFD/IAFD.py @@ -489,8 +489,16 @@ def performer_url(tree): def performer_gender(tree): + def prepend_transgender(gender: str): + perf_id = next( + iter(tree.xpath('//form[@id="correct"]/input[@name="PerfID"]/@value')), "" + ) + trans = "Transgender " if perf_id.endswith("_ts") else "" + return trans + map_gender(gender) + return maybe( - tree.xpath('//form[@id="correct"]/input[@name="Gender"]/@value'), map_gender + tree.xpath('//form[@id="correct"]/input[@name="Gender"]/@value'), + prepend_transgender, ) From 148fc9b356c3f6844789fb635ae54d58a266131e Mon Sep 17 00:00:00 2001 From: Maista6969 Date: Mon, 18 Dec 2023 18:36:04 +0100 Subject: [PATCH 604/624] Another fix for transgender performers on IAFD IAFD are not consistent with how they label transgender performers but this is a best effort --- scrapers/IAFD/IAFD.py | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/scrapers/IAFD/IAFD.py b/scrapers/IAFD/IAFD.py index 6e0767db5..d3401cd88 100644 --- a/scrapers/IAFD/IAFD.py +++ b/scrapers/IAFD/IAFD.py @@ -493,7 +493,12 @@ def prepend_transgender(gender: str): perf_id = next( iter(tree.xpath('//form[@id="correct"]/input[@name="PerfID"]/@value')), "" ) - trans = "Transgender " if perf_id.endswith("_ts") else "" + trans = ( + "Transgender " + # IAFD are not consistent with their + if any(mark in perf_id for mark in ("_ts", "_ftm", "_mtf")) + else "" + ) return trans + map_gender(gender) return maybe( From d47d5a65a391f25e7b277b5c724d318eb580419e Mon Sep 17 00:00:00 2001 From: SpecialKeta <148014803+SpecialKeta@users.noreply.github.com> Date: Tue, 19 Dec 2023 00:23:39 +0100 Subject: [PATCH 605/624] Regroup all Pure Media sites into one scraper. (#1569) --- SCRAPERS-LIST.md | 13 +++---- scrapers/ChristianXXX.yml | 38 -------------------- scrapers/Pure-XXX.yml | 40 --------------------- scrapers/PureMedia.yml | 76 +++++++++++++++++++++++++++++++++++++++ scrapers/SissyPov.yml | 31 ---------------- 5 files changed, 83 insertions(+), 115 deletions(-) delete mode 100644 scrapers/ChristianXXX.yml delete mode 100644 scrapers/Pure-XXX.yml create mode 100644 scrapers/PureMedia.yml delete mode 100644 scrapers/SissyPov.yml diff --git a/SCRAPERS-LIST.md b/SCRAPERS-LIST.md index 91ee61288..0ac18cf4f 100644 --- a/SCRAPERS-LIST.md +++ b/SCRAPERS-LIST.md @@ -176,7 +176,7 @@ bbcsurprise.com|ExploitedX.yml|:heavy_check_mark:|:x:|:x:|:heavy_check_mark:|-|- beauty-angels.com|TeenMegaWorld.yml|:heavy_check_mark:|:x:|:x:|:heavy_check_mark:|-|- beauty4k.com|TeenMegaWorld.yml|:heavy_check_mark:|:x:|:x:|:heavy_check_mark:|-|- beaverhunt.com|Hustler.yml|:heavy_check_mark:|:x:|:x:|:x:|CDP|- -becomingfemme.com|ChristianXXX.yml|:heavy_check_mark:|:x:|:x:|:x:|-|Trans +becomingfemme.com|PureMedia.yml|:heavy_check_mark:|:x:|:x:|:x:|-|Trans behindtrans500.com|Trans500.yml|:heavy_check_mark:|:x:|:x:|:x:|-|Trans beingphoenixmarie.com|ModelCentroAPI.yml|:heavy_check_mark:|:x:|:x:|:x:|Python|- belamionline.com|BelAmi.yml|:heavy_check_mark:|:x:|:x:|:x:|-|Gay @@ -1161,6 +1161,7 @@ pornlandvideos.com|MVG.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- pornmegaload.com|TheScoreGroup.yml|:heavy_check_mark:|:heavy_check_mark:|:x:|:x:|-|- pornperverts.com|ThirdRockEnt.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- pornpros.com|AMAMultimedia.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +pornstarbts.com|PureMedia.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- pornstarhardcore.com|Hustler.yml|:heavy_check_mark:|:x:|:x:|:x:|CDP|- pornstarplatinum.com|PornstarPlatinum.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- pornstarstroker.com|AdultEmpireCash.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- @@ -1198,9 +1199,9 @@ publicsexadventures.com|Wtfpass.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- publicsexdate.com|ThirdRockEnt.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- puffynetwork.com|Puffynetwork.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- pumaswedexxx.com|VNAGirls.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- -pure-bbw.com|Pure-XXX.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- -pure-ts.com|Pure-XXX.yml|:heavy_check_mark:|:x:|:x:|:x:|-|Trans -pure-xxx.com|Pure-XXX.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +pure-bbw.com|PureMedia.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +pure-ts.com|PureMedia.yml|:heavy_check_mark:|:x:|:x:|:x:|-|Trans +pure-xxx.com|PureMedia.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- purebj.com|Only3xGirls.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- puremature.com|AMAMultimedia.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- purepov.com|PerfectGonzo.yml|:heavy_check_mark:|:x:|:x:|:heavy_check_mark:|Python|- @@ -1319,7 +1320,7 @@ siripornstar.com|VNAGirls.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- sis.porn|Vip4K.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- sislovesme.com|PaperStreetMedia.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- sisswap.com|PaperStreetMedia.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- -sissypov.com|SissyPov.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +sissypov.com|PureMedia.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- sketboy.com|PornsiteManager.yml|:heavy_check_mark:|:x:|:heavy_check_mark:|:x:|-|Gay slayed.com|vixenNetwork.yml|:heavy_check_mark:|:x:|:x:|:x:|Python|- slroriginals.com|SLRoriginals.yml|:heavy_check_mark:|:x:|:heavy_check_mark:|:x:|-|VR @@ -1545,7 +1546,7 @@ ts-castingcouch.com|GroobyClub.yml|:heavy_check_mark:|:x:|:x:|:x:|-|Trans tsfactor.com|Algolia_EvilAngel.yml|:heavy_check_mark:|:heavy_check_mark:|:heavy_check_mark:|:x:|Python|Trans tsgirlfriendexperience.com|Trans500.yml|:heavy_check_mark:|:x:|:x:|:x:|-|Trans tsplayground.com|TSPlayground.yml|:heavy_check_mark:|:x:|:x:|:heavy_check_mark:|-|Trans -tspov.com|ChristianXXX.yml|:heavy_check_mark:|:x:|:x:|:x:|-|Trans +tspov.com|PureMedia.yml|:heavy_check_mark:|:x:|:x:|:x:|-|Trans tsraw.com|LadyboyGold.yml|:heavy_check_mark:|:x:|:x:|:heavy_check_mark:|-|Trans tsvirtuallovers.com|RealityLovers.yml|:heavy_check_mark:|:x:|:x:|:heavy_check_mark:|Python|- tugpass.com|TugPass.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- diff --git a/scrapers/ChristianXXX.yml b/scrapers/ChristianXXX.yml deleted file mode 100644 index 048814eb7..000000000 --- a/scrapers/ChristianXXX.yml +++ /dev/null @@ -1,38 +0,0 @@ -name: "ChristianXXX" -sceneByURL: - - action: scrapeXPath - url: - - becomingfemme.com/tour - - pure-ts.com/tour - - tspov.com/tour - scraper: sceneScraper -xPathScrapers: - sceneScraper: - scene: - Title: //div[@class="video_membership"]//div[@class="titlebox clear"] - Details: //div[@class="aboutvideo"]/p - Date: - selector: //div[@class="video_description"]/h4[not(contains(.,"Featured"))] - postProcess: - - replace: - - regex: .*[|] - with: - - parseDate: 2006-01-02 - Performers: - Name: //ul[@class="featuredModels"]/li/a/span[not(@class)]/text() - Studio: - Name: //meta[@name="author"]/@content - Tags: - Name: - selector: //meta[@name="keywords"]/@content - split: "," - Image: - selector: //base/@href|//div[@class="videohere"]/img[@class="thumbs stdimage"]/@src|//script[contains(.,'jwplayer("jwbox").setup')]/text() - concat: "|" - postProcess: - - replace: - - regex: "(^[^|]+)\\|.*/tour/([^\\.]+\\.jpg).*" - with: $1$2 - - regex: ^\/\/ - with: "https://" -# Last Updated November 17, 2023 diff --git a/scrapers/Pure-XXX.yml b/scrapers/Pure-XXX.yml deleted file mode 100644 index 98186f748..000000000 --- a/scrapers/Pure-XXX.yml +++ /dev/null @@ -1,40 +0,0 @@ -name: "Pure-XXX" -sceneByURL: - - action: scrapeXPath - url: - - pure-xxx.com - - pure-ts.com - - pure-bbw.com - scraper: sceneScraper -xPathScrapers: - sceneScraper: - scene: - Title: - selector: //div[@class="video_membership"]/div[@class="titlebox clear"]/h3/text() - Details: //div[@class="aboutvideo"]/p/text() - Performers: - Name: //div[@class="video_description"]/ul[@class="featuredModels"]/li/a/span/text() - Image: - selector: (//div[@id="logo"]//a/@href | //div[@class="flexslider"]//a/@href) | (//div[@class="videohere"]/img[contains(@class,"stdimage")]/@src | //script[contains(.,'jwplayer("jwbox").setup')]/text()) - concat: '@~@' - postProcess: - - replace: - - regex: (.+@~@)(.+image:\s+")(.+jpg)(.+) - with: $1$3 - - regex: \/\/(.*).com\/.*@~@(.*) - with: https://$1.com$2 - Studio: - Name: - selector: //div[@id="logo"]//img/@alt | //div[@class="flexslider"]//a/@title - postProcess: - - replace: - - regex: \- - with: ' ' - Date: - selector: //div[@class="video_description"]/h4[1]/text() - postProcess: - - replace: - - regex: .*(\d{4})-(\d{2})-(\d{2}).* - with: $1-$2-$3 - - parseDate: 2006-01-02 -# Last Updated November 19, 2023 diff --git a/scrapers/PureMedia.yml b/scrapers/PureMedia.yml new file mode 100644 index 000000000..92f2504bb --- /dev/null +++ b/scrapers/PureMedia.yml @@ -0,0 +1,76 @@ +name: 'PureMedia' +sceneByURL: + - action: scrapeXPath + url: + - becomingfemme.com/tour + - pure-bbw.com/tour + - pure-ts.com/tour + - pure-xxx.com/tour + - tspov.com/tour + scraper: sceneScraper + - action: scrapeXPath + url: + - pornstarbts.com/tour + - sissypov.com/tour + scraper: newSiteScraper + +xPathScrapers: + sceneScraper: + scene: + Title: //div[@class='video_membership']//div[@class='titlebox clear'] + Details: //div[@class='aboutvideo'] + Date: + selector: //div[@class='video_description']/h4[not(contains(.,'Featured'))] + postProcess: + - replace: + - regex: .*[|] + with: + - parseDate: 2006-01-02 + Performers: + Name: //ul[@class='featuredModels']//span[last()] + Studio: + Name: //meta[@name='author']/@content + Tags: + Name: + selector: //meta[@name='keywords']/@content + split: ',' + Image: + selector: //base/@href|//div[@class='videohere']/img[@class='thumbs stdimage']/@src|//script[contains(.,'jwplayer("jwbox").setup')]/text() + concat: '|' + postProcess: + - replace: + - regex: "(^[^|]+)\\|.*/tour/([^\\.]+\\.jpg).*" + with: $1$2 + - regex: ^// + with: https:// + + newSiteScraper: + common: + $details: //div[@class='videoDetails clear'] + scene: + Title: $details/h3 + Details: $details/p + Date: + selector: //p[contains(span,'Date Added:')]/text() + postProcess: + - parseDate: January 2, 2006 + Performers: + Name: //li[@class='update_models']/a + Tags: + Name: //ul[contains(li,'Tags:')]//a + Image: + selector: //base/@href|//a[@class='fake_trailer']/img/@src0_1x + concat: '|' + postProcess: + - replace: + - regex: "(^[^|]+)\\|.*/tour/([^\\.]+\\.jpg).*" + with: $1$2 + - regex: ^// + with: https:// + Studio: + Name: + selector: //meta[@name='author']/@content + postProcess: + - map: + SISSY POV: Sissy POV +# Last Updated December 19, 2023 \ No newline at end of file diff --git a/scrapers/SissyPov.yml b/scrapers/SissyPov.yml deleted file mode 100644 index b91aff66f..000000000 --- a/scrapers/SissyPov.yml +++ /dev/null @@ -1,31 +0,0 @@ -name: SissyPov -sceneByURL: - - action: scrapeXPath - url: - - sissypov.com - scraper: sceneScraper -xPathScrapers: - sceneScraper: - scene: - Title: - selector: //div[@class="videoDetails clear"]/h3 - Performers: - Name: //li[@class="update_models"]/a - Date: - selector: //p[contains(span,"Date Added:")]/text() - postProcess: - - parseDate: January 2, 2006 - Tags: - Name: //ul[contains(li,"Tags:")]//a - Details: - selector: //div[@class="videoDetails clear"]/p - Image: - selector: //a[@class="fake_trailer"]/img/@src0_1x - postProcess: - - replace: - - regex: ^ - with: "https://sissypov.com" - Studio: - Name: - fixed: Sissy Pov -# Last Updated May 21, 2021 From 9f578dcb196497bc2d860160599503b7b76bbe4d Mon Sep 17 00:00:00 2001 From: SpecialKeta <148014803+SpecialKeta@users.noreply.github.com> Date: Tue, 19 Dec 2023 08:41:08 +0100 Subject: [PATCH 606/624] Adding sites to scrapers-list we have a scraper for. (#1570) --- SCRAPERS-LIST.md | 3 + scrapers/SCRAPERS-LIST.md | 1722 +++++++++++++++++++++++++++++++++++++ 2 files changed, 1725 insertions(+) create mode 100644 scrapers/SCRAPERS-LIST.md diff --git a/SCRAPERS-LIST.md b/SCRAPERS-LIST.md index 0ac18cf4f..464b41d18 100644 --- a/SCRAPERS-LIST.md +++ b/SCRAPERS-LIST.md @@ -315,6 +315,7 @@ cockyboys.com|CockyBoys.yml|:heavy_check_mark:|:x:|:x:|:heavy_check_mark:|-|Gay codycummings.com|Algolia_NextDoorStudios.yml|:heavy_check_mark:|:heavy_check_mark:|:x:|:x:|Python|Gay colbyknox.com|ColbyKnox.yml|:heavy_check_mark:|:x:|:x:|:x:|-|Gay colette.com|Colette.yml|:heavy_check_mark:|:heavy_check_mark:|:x:|:x:|-|- +collectivecorruption.com|CollectiveCorruption.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- college-amateurs.com|ChickPass.yml|:heavy_check_mark:|:x:|:x:|:heavy_check_mark:|-|- college-uniform.com|CollegeUniform.yml|:heavy_check_mark:|:heavy_check_mark:|:x:|:x:|-|- collegeboyphysicals.com|BluMedia.yml|:heavy_check_mark:|:x:|:x:|:heavy_check_mark:|-|Gay @@ -624,6 +625,7 @@ girlfaction.com|ThirdRockEnt.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- girlfriendsfilms.com|Algolia_Girlfriendsfilms.yml|:heavy_check_mark:|:heavy_check_mark:|:heavy_check_mark:|:x:|Python|Lesbian girlgirl.com|JulesJordan.yml|:heavy_check_mark:|:heavy_check_mark:|:x:|:x:|-|Lesbian girlgirlmania.com|VNAGirls.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +girlgirlxxx.com|NewSensationsNetworkSites.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- girlgrind.com|MindGeek.yml|:heavy_check_mark:|:x:|:x:|:heavy_check_mark:|-|Lesbian girlsandstuds.com|GammaEntertainment.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- girlsgotcream.com|Teencoreclub.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- @@ -1020,6 +1022,7 @@ naughtyamerica.com|NaughtyAmerica.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- naughtyamericavr.com|NaughtyAmericaVR.yml|:heavy_check_mark:|:x:|:x:|:x:|-|VR naughtynatural.com|NaughtyNatural.yml|:heavy_check_mark:|:heavy_check_mark:|-|-|-|- netvideogirls.com|NVGNetwork.yml|:heavy_check_mark:|-|-|-|-|- +newgrounds.com|Newgrounds.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- newsensations.com/tour_ns/|NewSensationsMain.yml|:heavy_check_mark:|:x:|:heavy_check_mark:|:x:|-|- newsensations.com/tour_rs/|NewSensationsNetworkSites.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- nextdoorbuddies.com|Algolia_NextDoorStudios.yml|:heavy_check_mark:|:heavy_check_mark:|:x:|:x:|Python|Gay diff --git a/scrapers/SCRAPERS-LIST.md b/scrapers/SCRAPERS-LIST.md new file mode 100644 index 000000000..0ac18cf4f --- /dev/null +++ b/scrapers/SCRAPERS-LIST.md @@ -0,0 +1,1722 @@ +## Supported Sites For Scraping + +This list is meant to keep track of which sites are already supported by existing community scrapers. And which scrapers support them. When introducting a new scraper, add the sites your scraper supports to this list in your PR. Please keep the site list in alphabetical order to keep the list tidy. If you are adding a lot of sites to the list, it may be useful to paste the sites in and then use a tool [like this](https://wordcounter.net/alphabetize) to alphabetize it. +If a scraper needs a newer stash release than the current stable/master the version info should be added in the **Needs** field. + +Column names are **S**cenes, **G**allery, **M**ovies, **P**erformers. + +Supported Site|Scraper| S | G | M | P |Needs|Contents +--------------|-------|:-:|:-:|:-:|:-:|:---:|:------: +1000facials.com|GammaEntertainment.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +10musume.com|10Musume.yml|:heavy_check_mark:|:x:|:x:|:x:|-|JAV +1111customsxxx.com|ChickPass.yml|:heavy_check_mark:|:x:|:x:|:heavy_check_mark:|-|- +18eighteen.com|TheScoreGroup.yml|:heavy_check_mark:|:heavy_check_mark:|:x:|:x:|-|- +18tokyo.com|karatmedia.yml|:heavy_check_mark:|:x:|:x:|:x:|-|JAV +18vr.com|BaDoink.yml|:heavy_check_mark:|:x:|:heavy_check_mark:|:heavy_check_mark:|-|VR +1by-day.com|DDFNetwork.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +1passforallsites.com|1passforallsites.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +1pondo.tv|1pondo.yml|:heavy_check_mark:|:x:|:x:|:x:|-|JAV +21naturals.com|Algolia_21Naturals.yml|:heavy_check_mark:|:x:|:x:|:x:|Python|- +21roles.com|21Roles.yml|:heavy_check_mark:|:x:|:x:|:x:|CDP|- +21sextreme.com|Algolia_21Sextreme.yml|:heavy_check_mark:|:x:|:x:|:x:|Python|- +21sextury.com|Algolia_21Sextury.yml|:heavy_check_mark:|:heavy_check_mark:|:x:|:x:|Python|- +2girls1camera.com|TopWebModels.yml|:heavy_check_mark:|:x:|:x:|:x:|Python|- +40somethingmag.com|TheScoreGroup.yml|:heavy_check_mark:|:heavy_check_mark:|:x:|:x:|-|- +50plusmilfs.com|TheScoreGroup.yml|:heavy_check_mark:|:heavy_check_mark:|:x:|:x:|-|- +5kporn.com|5KTeens.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +5kteens.com|5KTeens.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +60plusmilfs.com|TheScoreGroup.yml|:heavy_check_mark:|:heavy_check_mark:|:x:|:x:|-|Granny +65inchhugeasses.com|Vivid.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +abbiemaley.com|ThirdRockEnt.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +abbywinters.com|AbbyWinters.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +abuseme.com|AbuseMe.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +accidentalgangbang.com|Algolia_Adultime.yml|:heavy_check_mark:|:heavy_check_mark:|:heavy_check_mark:|:x:|Python|- +activeduty.com|Algolia_ActiveDuty.yml|:heavy_check_mark:|:x:|:x:|:x:|-|Gay +adamandevepictures.com|Algolia_Adultime.yml|:heavy_check_mark:|:heavy_check_mark:|:heavy_check_mark:|:x:|Python|- +addicted2girls.com|Algolia_Addicted2Girls.yml|:heavy_check_mark:|:heavy_check_mark:|:x:|:x:|Python|- +addisonstreet.com|ChickPass.yml|:heavy_check_mark:|:x:|:x:|:heavy_check_mark:|-|- +adultanime.dbsearch.net|Anime-DB.yml|:heavy_check_mark:|:x:|:x:|:x:|CDP|Japanese Hentai +adultdvdempire.com|AdultEmpire.yml|:heavy_check_mark:|:x:|:heavy_check_mark:|:x:|-|- +adultdvdmarketplace.com|AdultDvdMarketPlace.yml|:x:|:x:|:heavy_check_mark:|:x:|-|- +adultempire.com|AdultEmpire.yml|:heavy_check_mark:|:x:|:heavy_check_mark:|:x:|-|- +adultfilmindex.com|AdultFilmIndex.yml|:heavy_check_mark:|:x:|:heavy_check_mark:|:x:|-|- +adultprime.com|AdultPrime.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +adulttime.com|Algolia_Adultime.yml|:heavy_check_mark:|:x:|:heavy_check_mark:|:x:|Python|- +adulttimepilots.com|Algolia_Adultime.yml|:heavy_check_mark:|:heavy_check_mark:|:heavy_check_mark:|:x:|Python|- +aebn.com|AEBN.yml|:heavy_check_mark:|:x:|:heavy_check_mark:|:heavy_check_mark:|-|Straight + Gay +agentredgirl.com|Algolia_Adultime.yml|:heavy_check_mark:|:x:|:x:|:x:|Python|- +alettaoceanempire.com|GammaEntertainment.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +alexismonroe.com|bellapass.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +alexlegend.com|VNAGirls.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +aliciasgoddesses.com|Andomark.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +allanal.com|Nympho.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +allanalallthetime.com|AllAnalAllTheTime.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +allblackx.com|Algolia_xEmpire.yml|:heavy_check_mark:|:heavy_check_mark:|:x:|:x:|Python|- +allfinegirls.net|WowPorn.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +allgirlmassage.com|Algolia_AllGirlMassage.yml|:heavy_check_mark:|:heavy_check_mark:|:x:|:x:|Python|Lesbian +allherluv.com|MissaX.yml|:heavy_check_mark:|:x:|:x:|:x:|Python|Lesbian +allinternal.com|PerfectGonzo.yml|:heavy_check_mark:|:x:|:x:|:heavy_check_mark:|Python|- +alljapanesepass.com|karatmedia.yml|:heavy_check_mark:|:x:|:x:|:x:|-|JAV +allvr.porn|SLRoriginals.yml|:heavy_check_mark:|:x:|:heavy_check_mark:|:x:|-|VR +alphamales.com|PornsiteManager.yml|:heavy_check_mark:|:x:|:heavy_check_mark:|:x:|-|Gay +alsscan.com|SARJ-LLC.yml|:heavy_check_mark:|:heavy_check_mark:|:x:|:heavy_check_mark:|Python|- +amateripremium.com|PornCZ.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +amateurallure.com|AmateurAllure.yml|:heavy_check_mark:|:heavy_check_mark:|:x:|:x:|-|- +amateurav.com|Jhdv.yml|:heavy_check_mark:|:x:|:x:|:x:|-|JAV Uncensored +amateurboxxx.com|AmateurBoxxx.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +amateure-xtreme.com|AmateureExtreme.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +amateureuro.com|LetsDoeIt.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +amateursfrombohemia.com|PornCZ.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +amazinganna.com|ChickPass.yml|:heavy_check_mark:|:x:|:x:|:heavy_check_mark:|-|- +ambushmassage.com|Williamhiggins.yml|:heavy_check_mark:|:x:|:x:|:x:|-|Gay +amelielou.com|AmelieLou.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +americanmusclehunks.com|CarnalPlus.yml|:heavy_check_mark:|:x:|:x:|:x:|-|Gay +amkingdom.com|ATKExotics.yml|:heavy_check_mark:|:x:|:x:|:x:|CDP|- +amourangels.com|AmourAngels.yml|:heavy_check_mark:|:heavy_check_mark:|:x:|:x:|-|- +anal-angels.com|TeenMegaWorld.yml|:heavy_check_mark:|:x:|:x:|:heavy_check_mark:|-|- +anal-beauty.com|TeenMegaWorld.yml|:heavy_check_mark:|:x:|:x:|:heavy_check_mark:|-|- +anal4k.com|AMAMultimedia.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +analacrobats.com|GammaEntertainment.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +analamateur.com|ThirdRockEnt.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +analbbc.com|ThirdRockEnt.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +analcheckups.com|Teencoreclub.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +analhookers.com|Hustler.yml|:heavy_check_mark:|:x:|:x:|:x:|CDP|- +analized.com|ThirdRockEnt.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +analjust.com|Only3xGirls.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +analmom.com|PaperStreetMedia.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +analnippon.com|karatmedia.yml|:heavy_check_mark:|:x:|:x:|:x:|-|JAV +analonly.com|Nympho.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +analoverdose.com|PervCity.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +analteenangels.com|Algolia_Adultime.yml|:heavy_check_mark:|:x:|:x:|:x:|Python|- +analtherapyxxx.com|FamilyTherapyXXX.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +analvids.com|LegalPorno.yml|:heavy_check_mark:|:x:|:x:|:heavy_check_mark:|Python|- +analviolation.com|ThirdRockEnt.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +analyzedgirls.com|Teencoreclub.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +andolinixxl.com|PornsiteManager.yml|:heavy_check_mark:|:x:|:heavy_check_mark:|:x:|-|Gay +angelasommers.com|angelasommers.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +angelawhite.com|AngelaWhite.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +angelinacastrolive.com|VNAGirls.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +anidb.net|AniDB.yml|:heavy_check_mark:|:x:|:x:|:heavy_check_mark:|-|Hentai Database +anilos.com|Nubiles.yml|:heavy_check_mark:|:heavy_check_mark:|:x:|:x:|-|- +animecharactersdatabase.com|AnimeCharactersDatabase.yml|:x:|:x:|:x:|:heavy_check_mark:|Python|Database +antoniosuleiman.com|AntonioSuleiman.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +apovstory.com|APOVStory.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +archangelvideo.com|ArchAngelVideo.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +ariellynn.com|Andomark.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +ashemaletube.com|AShemaleTube.yml|:heavy_check_mark:|:x:|:x:|:heavy_check_mark:|-|- +ashleyfires.com|GammaEntertainment.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +ashlynnbrooke.com|NewSensationsNetworkSites.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +asian18.com|Hustler.yml|:heavy_check_mark:|:x:|:x:|:x:|CDP|- +asianamericantgirls.com|GroobyClub.yml|:heavy_check_mark:|:x:|:x:|:x:|-|Trans +asianfever.com|Hustler.yml|:heavy_check_mark:|:x:|:x:|:x:|CDP|- +asiansexdiary.com|VegasDreamsLLC.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +asiantgirl.com|GroobyNetwork-Partial.yml|:heavy_check_mark:|:heavy_check_mark:|:x:|:x:|-|Trans +asmrfantasy.com|Algolia_Adultime.yml|:heavy_check_mark:|:heavy_check_mark:|:x:|:x:|Python|- +assholefever.com|Algolia_Adultime.yml|:heavy_check_mark:|:x:|:x:|:x:|Python|- +assmeat.com|Hustler.yml|:heavy_check_mark:|:x:|:x:|:x:|CDP|- +assteenmouth.com|Teencoreclub.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +asstraffic.com|PerfectGonzo.yml|:heavy_check_mark:|:x:|:x:|:heavy_check_mark:|Python|- +assumethepositionstudios.com|AssumeThePositionStudios.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +assylum.com|Assylum.yml|:heavy_check_mark:|:x:|:x:|:x:|-|Fetish +atkexotics.com|ATKExotics.yml|:heavy_check_mark:|:x:|:x:|:x:|CDP|- +atkgirlfriends.com|ATKGirlfriends.yml|:heavy_check_mark:|:x:|:x:|:heavy_check_mark:|-|- +atkhairy.com|ATKHairy.yml|:heavy_check_mark:|:x:|:x:|:x:|CDP|- +atkpetites.com|ATKHairy.yml|:heavy_check_mark:|:x:|:x:|:x:|CDP|- +atkpremium.com|ATKHairy.yml|:heavy_check_mark:|:x:|:x:|:x:|CDP|- +attackboys.com|PornsiteManager.yml|:heavy_check_mark:|:x:|:heavy_check_mark:|:x:|-|Gay +auntjudys.com|AuntJudys.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +auntjudysxxx.com|AuntJudys.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +auntykathy.com|auntykathy.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +aussieass.com|AussieAss.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +aussiefellatioqueens.com|AussieFelatioQueens.yml|:heavy_check_mark:|:x:|:x:|:x:|CDP|- +aussiepov.com|AussieAss.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +austinwilde.com|Algolia_NextDoorStudios.yml|:heavy_check_mark:|:heavy_check_mark:|:x:|:x:|Python|Gay +av69.tv|Jhdv.yml|:heavy_check_mark:|:x:|:x:|:x:|-|JAV Uncensored +avadawn.com|bellapass.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +avanal.com|Jhdv.yml|:heavy_check_mark:|:x:|:x:|:x:|-|JAV Uncensored +aventertainments.com|AVE.yml|:heavy_check_mark:|:x:|:x:|:x:|-|Database +avidolz.com|Baberotica.yml|:heavy_check_mark:|:x:|:x:|:heavy_check_mark:|-|JAV Uncensored +avjiali.com|AVJiali.yml|:heavy_check_mark:|:x:|:x:|:heavy_check_mark:|-|JAV Uncensored +avstockings.com|Jhdv.yml|:heavy_check_mark:|:x:|:x:|:x:|-|JAV Uncensored +avtits.com|Jhdv.yml|:heavy_check_mark:|:x:|:x:|:x:|-|JAV Uncensored +aziani.com|Aziani.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +babearchives.com|BabeArchives.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +babepedia.com|Babepedia.yml|:x:|:x:|:x:|:heavy_check_mark:|-|Database +baberotica.com|Baberotica.yml|:heavy_check_mark:|:x:|:x:|:heavy_check_mark:|-|- +baberoticavr.com|Baberotica.yml|:heavy_check_mark:|:x:|:x:|:heavy_check_mark:|-|VR +babes.com|Brazzers.yml|:heavy_check_mark:|:heavy_check_mark:|:x:|:heavy_check_mark:|CDP|- +babesandstars.com|Babes&Stars.yml|:x:|:x:|:x:|:heavy_check_mark:|-|- +babesnetwork.com|MindGeek.yml|:heavy_check_mark:|:x:|:x:|:heavy_check_mark:|-|- +babevr.com|BaDoink.yml|:heavy_check_mark:|:x:|:heavy_check_mark:|:heavy_check_mark:|-|VR +backdoorpov.com|ItsPOV.yml|:heavy_check_mark:|:x:|:x:|:x:|CDP|- +backroomcastingcouch.com|ExploitedX.yml|:heavy_check_mark:|:x:|:x:|:heavy_check_mark:|-|- +baddaddypov.com|ThirdRockEnt.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +badfamilypov.com|ThirdRockEnt.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +badmilfs.com|PaperStreetMedia.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +badmommypov.com|ThirdRockEnt.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +badoinkvr.com|BaDoink.yml|:heavy_check_mark:|:x:|:heavy_check_mark:|:heavy_check_mark:|-|VR +badteenspunished.com|Nubiles.yml|:heavy_check_mark:|:heavy_check_mark:|:x:|:x:|-|- +baeb.com|AMAMultimedia.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +balletflatsfetish.com|FFCSH.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +bamvisions.com|BamVisions.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +bang.com|Bang.yml|:heavy_check_mark:|:x:|:heavy_check_mark:|:heavy_check_mark:|-|- +bangbangboys.com|CarnalPlus.yml|:heavy_check_mark:|:x:|:x:|:x:|-|Gay +bangbros.com|BangBros.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +bangingbeauties.com|PervCity.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +bangteenpussy.com|Teencoreclub.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +barbarafeet.com|FFCSH.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +barebackplus.com|CarnalPlus.yml|:heavy_check_mark:|:x:|:x:|:x:|-|Gay +barelylegal.com|Hustler.yml|:heavy_check_mark:|:x:|:x:|:x:|CDP|- +baretwinks.com|BoyCrush.yml|:heavy_check_mark:|:x:|:x:|:x:|-|Gay +bathhousebait.com|AMAMultimedia.yml|:heavy_check_mark:|:x:|:x:|:x:|-|Gay +battlebang.com|Hustler.yml|:heavy_check_mark:|:x:|:x:|:x:|CDP|- +bbcparadise.com|PaperStreetMedia.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +bbcpie.com|AMAMultimedia.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +bbcsurprise.com|ExploitedX.yml|:heavy_check_mark:|:x:|:x:|:heavy_check_mark:|-|- +beauty-angels.com|TeenMegaWorld.yml|:heavy_check_mark:|:x:|:x:|:heavy_check_mark:|-|- +beauty4k.com|TeenMegaWorld.yml|:heavy_check_mark:|:x:|:x:|:heavy_check_mark:|-|- +beaverhunt.com|Hustler.yml|:heavy_check_mark:|:x:|:x:|:x:|CDP|- +becomingfemme.com|PureMedia.yml|:heavy_check_mark:|:x:|:x:|:x:|-|Trans +behindtrans500.com|Trans500.yml|:heavy_check_mark:|:x:|:x:|:x:|-|Trans +beingphoenixmarie.com|ModelCentroAPI.yml|:heavy_check_mark:|:x:|:x:|:x:|Python|- +belamionline.com|BelAmi.yml|:heavy_check_mark:|:x:|:x:|:x:|-|Gay +bellahd.com|bellapass.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +bellanextdoor.com|bellapass.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +bellapass.com|bellapass.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +bellesafilms.com|RealityKingsOL.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +bellesahouse.com|RealityKingsOL.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +beltbound.com|BeltBound.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +berryboys.fr|PornsiteManager.yml|:heavy_check_mark:|:x:|:heavy_check_mark:|:x:|-|Gay +bestoftealconrad.com|VNAGirls.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +bffs.com|PaperStreetMedia.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +bigbootytgirls.com|BigBootyTGirls.yml|:heavy_check_mark:|:x:|:x:|:x:|-|Trans +bigfatcreampie.com|GammaEntertainment.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +biggulpgirls.com|TopWebModels.yml|:heavy_check_mark:|:x:|:x:|:x:|Python|- +bigtitstokyo.com|karatmedia.yml|:heavy_check_mark:|:x:|:x:|:x:|-|JAV +biguysfuck.com|BlurredMedia.yml|:heavy_check_mark:|:x:|:x:|:x:|CDP|- +billiestar.com|xSinsVR.yml|:heavy_check_mark:|:heavy_check_mark:|:x:|:x:|-|- +biphoria.com|Algolia_Biphoria.yml|:heavy_check_mark:|:heavy_check_mark:|:x:|:x:|Python|- +bjraw.com|BJRaw.yml|:heavy_check_mark:|:x:|:x:|:heavy_check_mark:|CDP|- +black-tgirls.com|GroobyNetwork-Partial.yml|:heavy_check_mark:|:heavy_check_mark:|:x:|:x:|-|Trans +black4k.com|Vip4K.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +blackambush.com|ExploitedX.yml|:heavy_check_mark:|:x:|:x:|:heavy_check_mark:|-|- +blackandbig.com|SmutPuppet.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +blackboyaddictionz.com|AdultSiteRunner.yml|:heavy_check_mark:|:x:|:x:|:x:|-|Gay +blacked.com|vixenNetwork.yml|:heavy_check_mark:|:x:|:x:|:x:|Python|- +blackedraw.com|vixenNetwork.yml|:heavy_check_mark:|:x:|:x:|:x:|Python|- +Blackfoxbound UK|BlackFoxBound.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +blackmarketxxx.com|CombatZone.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +blackmassivecocks.com|AdultEmpireCash.yml|:heavy_check_mark:|:x:|:heavy_check_mark:|:x:|-|- +blackmeatwhitefeet.com|Algolia_DogfartNetwork.yml|:heavy_check_mark:|:x:|:x:|:heavy_check_mark:|Python|- +blackph.com|FFCSH.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +blacksonblondes.com|Algolia_DogfartNetwork.yml|:heavy_check_mark:|:x:|:x:|:heavy_check_mark:|Python|- +blacksoncougars.com|Algolia_DogfartNetwork.yml|:heavy_check_mark:|:x:|:x:|:heavy_check_mark:|Python|- +blacksondaddies.com|AdultSiteRunner.yml|:heavy_check_mark:|:x:|:x:|:x:|-|Gay +blacktgirlshardcore.com|GroobyNetwork-Partial.yml|:heavy_check_mark:|:heavy_check_mark:|:x:|:x:|-|Trans +blackvalleygirls.com|PaperStreetMedia.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +blackwhitefuckfest.com|Vivid.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +blakemason.com|BlakeMason.yml|:heavy_check_mark:|:x:|:x:|:x:|-|Gay +blowmepov.com|Algolia_Adultime.yml|:heavy_check_mark:|:x:|:x:|:x:|Python|- +blownbyrone.com|VNAGirls.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +blowpass.com|Algolia_Blowpass.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +bobbiedenlive.com|VNAGirls.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +bobstgirls.com|GroobyNetwork-Partial.yml|:heavy_check_mark:|:heavy_check_mark:|:x:|:x:|-|Trans +bolatino.com|PornsiteManager.yml|:heavy_check_mark:|:x:|:heavy_check_mark:|:x:|-|Gay +bondagecafe.com|BondageCafe.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +bondageliberation.com|BondageLiberation.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +boobpedia.com|Boobpedia.yml|:x:|:x:|:x:|:heavy_check_mark:|-|Database +bootyclapxxx.com|Hustler.yml|:heavy_check_mark:|:x:|:x:|:x:|CDP|- +bootysisters.com|Hustler.yml|:heavy_check_mark:|:x:|:x:|:x:|CDP|- +boppingbabes.com|WankItNow.yml|:heavy_check_mark:|:x:|:x:|:x:|-|VR +bossymilfs.com|Hustler.yml|:heavy_check_mark:|:x:|:x:|:x:|CDP|- +bouncychicks.com|ChickPass.yml|:heavy_check_mark:|:x:|:x:|:heavy_check_mark:|-|- +boundheat.com|BoundHeat.yml|:heavy_check_mark:|:x:|:x:|:x:|-|Lesbian +boundhoneys.com|Boundhoneys.yml|:heavy_check_mark:|:x:|:x:|:x:|-|Lesbian +boundjocks.com|AdultSiteRunner.yml|:heavy_check_mark:|:x:|:x:|:x:|-|Gay +boundlife.com|Boundlife.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +boundtwinks.com|CarnalPlus.yml|:heavy_check_mark:|:x:|:x:|:x:|-|Gay +bountyhunterporn.com|Nubiles.yml|:heavy_check_mark:|:heavy_check_mark:|:x:|:x:|-|- +boxtrucksex.com|BoxTruckSex.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +boycrush.com|BoyCrush.yml|:heavy_check_mark:|:x:|:x:|:x:|-|Gay +boyforsale.com|CarnalPlus.yml|:heavy_check_mark:|:x:|:x:|:x:|-|Gay +boyfriendsharing.com|Mypervmom.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +boyfun.com|BoyFun.yml|:heavy_check_mark:|:x:|:x:|:x:|-|Gay +boygusher.com|BluMedia.yml|:heavy_check_mark:|:x:|:x:|:heavy_check_mark:|-|Gay +boysdestroyed.com|AMAMultimedia.yml|:heavy_check_mark:|:x:|:x:|:x:|-|Gay +boysfuckmilfs.com|PornCZ.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +boyshalfwayhouse.com|AdultSiteRunner.yml|:heavy_check_mark:|:x:|:x:|:x:|-|Gay +bradmontana.com|BradMontana.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +brandibelle.com|brandibelle.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +brandnewamateurs.com|BrandNewAmateurs.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +brandnewfaces.com|Vivid.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +brasilvr.com|POVR.yml|:heavy_check_mark:|:x:|:x:|:x:|-|VR +brattyfamily.com|Mypervmom.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +brattymilf.com|Nubiles.yml|:heavy_check_mark:|:heavy_check_mark:|:x:|:x:|-|- +brattysis.com|Nubiles.yml|:heavy_check_mark:|:heavy_check_mark:|:x:|:x:|-|- +bravofucker.com|PornsiteManager.yml|:heavy_check_mark:|:x:|:heavy_check_mark:|:x:|-|Gay +brazilian-transsexuals.com|GroobyNetwork-Partial.yml|:heavy_check_mark:|:x:|:x:|:x:|-|Trans +braziltgirls.xxx|GroobyNetwork-Brazilian.yml|:heavy_check_mark:|:x:|:x:|:x:|-|Trans +brazzers.com|Brazzers.yml|:heavy_check_mark:|:heavy_check_mark:|:x:|:heavy_check_mark:|CDP|- +breeditraw.com|BreedItRaw.yml|:heavy_check_mark:|:x:|:x:|:heavy_check_mark:|-|Gay +brett-tyler.com|PornsiteManager.yml|:heavy_check_mark:|:x:|:heavy_check_mark:|:x:|-|Gay +brickyates.com|brickyates.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +bride4k.com|Vip4K.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +brokenlatinawhores.com|BrokenLatinaWhores.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +brokensluts.net|ThirdRockEnt.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +brokestraightboys.com|BluMedia.yml|:heavy_check_mark:|:x:|:x:|:heavy_check_mark:|-|Gay +brookelynnebriar.com|ModelCentroAPI.yml|:heavy_check_mark:|:x:|:x:|:x:|Python|- +bruceandmorgan.net|bruceandmorgan.yml|:heavy_check_mark:|:x:|:x:|:x:|CDP|Amateur Fetish +brutalinvasion.com|Teencoreclub.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +bryci.com|bellapass.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +bskow.com|GammaEntertainment.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +bukkakenow.com|karatmedia.yml|:heavy_check_mark:|:x:|:x:|:x:|-|JAV +bulldogxxx.com|PornsiteManager.yml|:heavy_check_mark:|:x:|:heavy_check_mark:|:x:|-|Gay +burningangel.com|Algolia_BurningAngel.yml|:heavy_check_mark:|:x:|:x:|:x:|Python|- +bushybushy.com|GammaEntertainment.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +bustybeauties.com|Hustler.yml|:heavy_check_mark:|:x:|:x:|:x:|CDP|- +buttman.com|GammaEntertainment.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +cadinot.fr|PornsiteManager.yml|:heavy_check_mark:|:x:|:heavy_check_mark:|:x:|-|Gay +calicarter.com|bellapass.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +camwhores.tv|CamWhorestv.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +canada-tgirl.com|GroobyClub.yml|:heavy_check_mark:|:x:|:x:|:x:|-|Trans +caribbeancom.com|Carib.yml|:heavy_check_mark:|:x:|:x:|:x:|-|JAV Uncensored +caribbeancompr.com|Carib.yml|:heavy_check_mark:|:x:|:x:|:x:|-|JAV Uncensored +carmenvalentina.com|VNAGirls.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +carnalplus.com|CarnalPlus.yml|:heavy_check_mark:|:x:|:x:|:x:|-|Gay +castingcouch-x.com|AMAMultimedia.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +catalinacruz.com|Catalina Cruz.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +cazzofilm.com|PornsiteManager.yml|:heavy_check_mark:|:x:|:heavy_check_mark:|:x:|-|Gay +cfnmeu.com|Williamhiggins.yml|:heavy_check_mark:|:x:|:x:|:x:|-|Gay +chaosmen.com|Algolia_ChaosMen.yml|:heavy_check_mark:|:x:|:x:|:x:|Python|Gay +charleechaselive.com|VNAGirls.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +chastitybabes.com|chastitybabes.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +cheatingsis.com|Nubiles.yml|:heavy_check_mark:|:heavy_check_mark:|:x:|:x:|-|- +cherrypimps.com|CherryPimps.yml|:heavy_check_mark:|:heavy_check_mark:|:x:|:x:|-|- +chickpass.com|ChickPass.yml|:heavy_check_mark:|:x:|:x:|:heavy_check_mark:|-|- +chickpassinternational.com|ChickPass.yml|:heavy_check_mark:|:x:|:x:|:heavy_check_mark:|-|- +chickpasspornstars.com|ChickPass.yml|:heavy_check_mark:|:x:|:x:|:heavy_check_mark:|-|- +chickpassteens.com|ChickPass.yml|:heavy_check_mark:|:x:|:x:|:heavy_check_mark:|-|- +chloelamour.com|PornCZ.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +chocolatebjs.com|PervCity.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +citebeur.com|PornsiteManager.yml|:heavy_check_mark:|:x:|:heavy_check_mark:|:x:|-|Gay +clairprod.com|PornsiteManager.yml|:heavy_check_mark:|:x:|:heavy_check_mark:|:x:|-|Gay +class-lesbians.com|ClassLesbians.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +claudiamarie.com|ClaudiaMarie.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +clips4sale.com|Clips4Sale.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +clubdom.com|SubbyHubby.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +clubelegantangel.com|ClubElegantAngel.yml|:heavy_check_mark:|:x:|:heavy_check_mark:|:x:|-|- +clubinfernodungeon.com|Algolia_ClubInfernoDungeon.yml|:heavy_check_mark:|:heavy_check_mark:|:x:|:x:|-|Gay +clubseventeen.com|ClubSeventeen.yml|:heavy_check_mark:|:heavy_check_mark:|:x:|:x:|-|- +clubsweethearts.com|ClubSeventeen.yml|:heavy_check_mark:|:heavy_check_mark:|:x:|:x:|-|- +clubtug.com|TugPass.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +cockhero.info|Cockhero.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +cocksuremen.com|CocksureMen.yml|:heavy_check_mark:|:x:|:x:|:x:|-|Gay +cockyboys.com|CockyBoys.yml|:heavy_check_mark:|:x:|:x:|:heavy_check_mark:|-|Gay +codycummings.com|Algolia_NextDoorStudios.yml|:heavy_check_mark:|:heavy_check_mark:|:x:|:x:|Python|Gay +colbyknox.com|ColbyKnox.yml|:heavy_check_mark:|:x:|:x:|:x:|-|Gay +colette.com|Colette.yml|:heavy_check_mark:|:heavy_check_mark:|:x:|:x:|-|- +college-amateurs.com|ChickPass.yml|:heavy_check_mark:|:x:|:x:|:heavy_check_mark:|-|- +college-uniform.com|CollegeUniform.yml|:heavy_check_mark:|:heavy_check_mark:|:x:|:x:|-|- +collegeboyphysicals.com|BluMedia.yml|:heavy_check_mark:|:x:|:x:|:heavy_check_mark:|-|Gay +collegedudes.com|BluMedia.yml|:heavy_check_mark:|:x:|:x:|:heavy_check_mark:|-|Gay +collegefuckparties.com|Wtfpass.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +coltstudiogroup.com|AdultSiteRunner.yml|:heavy_check_mark:|:x:|:x:|:x:|-|Gay +combatzone.us|CombatZone.yml|:x:|:x:|:heavy_check_mark:|:x:|-|- +combatzonexxx.com|CombatZone.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +concoxxxion.com|AdultEmpireCash.yml|:heavy_check_mark:|:x:|:heavy_check_mark:|:x:|-|- +corbinfisher.com|CorbinFisher.yml|:heavy_check_mark:|:x:|:x:|:heavy_check_mark:|-|Gay +cosplayfeet.com|FFCSH.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +cospuri.com|Cospuri.yml|:heavy_check_mark:|:x:|:x:|:heavy_check_mark:|-|- +cougarseason.com|TopWebModels.yml|:heavy_check_mark:|:x:|:x:|:x:|Python|- +crashpadseries.com|CrashpadSeries.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +creampie-angels.com|TeenMegaWorld.yml|:heavy_check_mark:|:x:|:x:|:heavy_check_mark:|-|- +creativeporn.com|Czechav.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +cruelgf.com|CruelGirlfriend.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +crunchboy.com|PornsiteManager.yml|:heavy_check_mark:|:x:|:heavy_check_mark:|:x:|-|Gay +cuck4k.com|Vip4K.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +cuckhunter.com|Arx.yml|:heavy_check_mark:|:x:|:x:|:x:|Python|- +cuckoldsessions.com|Algolia_DogfartNetwork.yml|:heavy_check_mark:|:x:|:x:|:heavy_check_mark:|Python|- +culioneros.com|Pornditos.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +cum4k.com|AMAMultimedia.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +cumaholicteens.com|Teencoreclub.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +cumbang.com|Algolia_DogfartNetwork.yml|:heavy_check_mark:|:x:|:x:|:heavy_check_mark:|Python|- +cumblastcity.com|TugPass.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +cumdumpsterteens.com|ThirdRockEnt.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +cumforcover.com|PerfectGonzo.yml|:heavy_check_mark:|:x:|:x:|:heavy_check_mark:|Python|- +cumlouder.com|Cumlouder.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +cumshotoasis.com|GammaEntertainment.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +cumswappingsis.com|Nubiles.yml|:heavy_check_mark:|:heavy_check_mark:|:x:|:x:|-|- +currycreampie.com|GammaEntertainment.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +czechamateurs.com|Czechav.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +czechanalsex.com|PornCZ.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +czechbangbus.com|Czechav.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +czechbiporn.com|PornCZ.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +czechbitch.com|Czechav.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +czechboobs.com|PornCZ.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +czechcasting.com|czechcasting.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +czechcouples.com|Czechav.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +czechdeviant.com|PornCZ.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +czechdungeon.com|Czechav.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +czechescortgirls|PornCZ.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +czechestrogenolit.com|Czechav.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +czechexecutor.com|PornCZ.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +czechfantasy.com|Czechav.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +czechfirstvideo.com|Czechav.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +czechgame.com|Czechav.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +czechgangbang.com|Czechav.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +czechgardenparty.com|Czechav.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +czechgayamateurs.com|Czechav.yml|:heavy_check_mark:|:x:|:x:|:x:|-|Gay +czechgaycasting.com|Czechav.yml|:heavy_check_mark:|:x:|:x:|:x:|-|Gay +czechgaycity.com|PornCZ.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +czechgaycouples.com|Czechav.yml|:heavy_check_mark:|:x:|:x:|:x:|-|Gay +czechgayfantasy.com|Czechav.yml|:heavy_check_mark:|:x:|:x:|:x:|-|Gay +czechgaymassage.com|Czechav.yml|:heavy_check_mark:|:x:|:x:|:x:|-|Gay +czechgaysolarium.com|Czechav.yml|:heavy_check_mark:|:x:|:x:|:x:|-|Gay +czechgaytoilets.com|Czechav.yml|:heavy_check_mark:|:x:|:x:|:x:|-|Gay +czechgypsies.com|PornCZ.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +czechharem.com|Czechav.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +czechhitchhikers.com|PornCZ.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +czechhomeorgy.com|Czechav.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +czechlesbians.com|Czechav.yml|:heavy_check_mark:|:x:|:x:|:x:|-|Lesbian +czechmassage.com|Czechav.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +czechmegaswingers.com|Czechav.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +czechorgasm.com|Czechav.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +czechparties.com|Czechav.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +czechrealdolls.com|PornCZ.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +czechsexcasting.com|PornCZ.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +czechsexparty.com|PornCZ.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +czechshemale.com|PornCZ.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +czechsolarium.com|Czechav.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +czechspy.com|Czechav.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +czechstreets.com|Czechav.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +czechsupermodels.com|Czechav.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +czechtaxi.com|Czechav.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +czechvr.com|CzechVR.yml|:heavy_check_mark:|:x:|:heavy_check_mark:|:x:|-|VR +czechvrcasting.com|CzechVR.yml|:heavy_check_mark:|:x:|:heavy_check_mark:|:x:|-|VR +czechvrfetish.com|CzechVR.yml|:heavy_check_mark:|:x:|:heavy_check_mark:|:x:|-|VR +czechvrnetwork.com|CzechVR.yml|:heavy_check_mark:|:x:|:heavy_check_mark:|:x:|-|VR +czechwifeswap.com|Czechav.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +d52q.com|JavLibrary.yml|:heavy_check_mark:|:x:|:heavy_check_mark:|:x:|-|JAV +dadcrush.com|PaperStreetMedia.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +daddy4k.com|Vip4K.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +daddycarl.com|AdultSiteRunner.yml|:heavy_check_mark:|:x:|:x:|:x:|-|Gay +daddygetslucky.com|Hustler.yml|:heavy_check_mark:|:x:|:x:|:x:|CDP|- +daddyslilangel.com|Nubiles.yml|:heavy_check_mark:|:heavy_check_mark:|:x:|:x:|-|- +damnthatsbig.com|AMAMultimedia.yml|:heavy_check_mark:|:x:|:x:|:x:|-|Gay +danejones.com|RealityKingsOL.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +danidaniels.com|ModelCentroAPI.yml|:heavy_check_mark:|:x:|:x:|:x:|Python|- +danni.com|Danni.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +darkcruising.com|PornsiteManager.yml|:heavy_check_mark:|:x:|:heavy_check_mark:|:x:|-|Gay +darkroomvr.com|DarkRoomVR.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +darksodomy.com|SmutPuppet.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +darkx.com|Algolia_xEmpire.yml|:heavy_check_mark:|:heavy_check_mark:|:x:|:x:|Python|- +darttechstudios.com|DartTechStudios.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +data18.com|data18.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +datingmystepson.com|Nubiles.yml|:heavy_check_mark:|:heavy_check_mark:|:x:|:x:|-|- +daughterjoi.com|ThirdRockEnt.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +daughterswap.com|PaperStreetMedia.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +ddfbusty.com|DDFNetwork.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +deauxmalive.com|VNAGirls.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +debt4k.com|Vip4K.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +deeper.com|vixenNetwork.yml|:heavy_check_mark:|:x:|:x:|:x:|Python|- +deeplush.com|Nubiles.yml|:heavy_check_mark:|:heavy_check_mark:|:x:|:x:|-|- +deepthroatsirens.com|TopWebModels.yml|:heavy_check_mark:|:x:|:x:|:x:|Python|- +defiled18.com|Teencoreclub.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +dellaitwins.com|PornCZ.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +delphinefilms.com|ModelMediaUS.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +desperateamateurs.com|DesperateAmateurs.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +detentiongirls.com|Nubiles.yml|:heavy_check_mark:|:heavy_check_mark:|:x:|:x:|-|- +deviante.com|Deviante.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +devianthardcore.com|MindGeek.yml|:heavy_check_mark:|:x:|:x:|:heavy_check_mark:|-|- +devilsfilm.com|Algolia_Adultime.yml|:heavy_check_mark:|:heavy_check_mark:|:heavy_check_mark:|:x:|Python|- +devilsfilmparodies.com|GammaEntertainment.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +devilsgangbangs.com|GammaEntertainment.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +devilstgirls.com|Algolia_Adultime.yml|:heavy_check_mark:|:x:|:heavy_check_mark:|:x:|Python|Trans +devonlee.com|GammaEntertainment.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +dfbnetwork.com|DFB.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +dfxtra.com|Algolia_DogfartNetwork.yml|:heavy_check_mark:|:x:|:x:|:heavy_check_mark:|Python|- +diabolic.com|Algolia_zerotolerancefilms.yml|:heavy_check_mark:|:x:|:heavy_check_mark:|:x:|python|- +dianafeet.com|FFCSH.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +dickdrainers.com|Dickdrainers.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +dickontrip.com|PornCZ.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +digitaldesire.com|DigitalDesire.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +digitalplayground.com|MindGeek.yml|:heavy_check_mark:|:x:|:heavy_check_mark:|:heavy_check_mark:|-|- +dillionation.com|ModelCentroAPI.yml|:heavy_check_mark:|:x:|:x:|:x:|Python|- +dirty-coach.com|TeenMegaWorld.yml|:heavy_check_mark:|:x:|:x:|:heavy_check_mark:|-|- +dirty-doctor.com|TeenMegaWorld.yml|:heavy_check_mark:|:x:|:x:|:heavy_check_mark:|-|- +dirtyauditions.com|Nympho.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +dirtyboysociety.com|CarnalPlus.yml|:heavy_check_mark:|:x:|:x:|:x:|-|Gay +dirtycosplay.com|LetsDoeIt.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +dirtyflix.com|DirtyFlix.yml|:heavy_check_mark:|-|-|-|-|- +disruptivefilms.com|Algolia_disruptivefilms.yml|:heavy_check_mark:|:heavy_check_mark:|:x:|:x:|Python|Gay +dlsite.com|DLsite.yml|:heavy_check_mark:|:heavy_check_mark:|:x:|:x:|-|- +doegirls.com|LetsDoeIt.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +dogfartnetwork.com|Algolia_DogfartNetwork.yml|:heavy_check_mark:|:x:|:x:|:heavy_check_mark:|Python|- +doghousedigital.com|MindGeek.yml|:heavy_check_mark:|:x:|:heavy_check_mark:|:heavy_check_mark:|-|- +dollrotic.com|GymRotic.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +dollsporn.com|Wtfpass.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +domai.com|SARJ-LLC.yml|:x:|:heavy_check_mark:|:x:|:heavy_check_mark:|Python|- +dorcelclub.com|DorcelClub.yml|:heavy_check_mark:|:x:|:heavy_check_mark:|:x:|-|- +dorcelvision.com|DorcelVision.yml|:heavy_check_mark:|:x:|:heavy_check_mark:|:x:|-|- +dothewife.com|SmutPuppet.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +doubleteamedteens.com|Teencoreclub.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +downblousejerk.com|WankItNow.yml|:heavy_check_mark:|:x:|:x:|:x:|-|VR +downtofuckdating.com|ThirdRockEnt.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +dpfanatics.com|Algolia_Adultime.yml|:heavy_check_mark:|:x:|:x:|:x:|Python|- +dreamsofspanking.com|DreamsOfSpanking.yml|:heavy_check_mark:|:heavy_check_mark:|:x:|:x:|-|- +dreamteenshd.com|Teencoreclub.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +dreamtranny.com|DreamTranny.yml|:heavy_check_mark:|:heavy_check_mark:|:x:|:heavy_check_mark:|-|Trans +drilledchicks.com|Teencoreclub.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +driverxxx.com|Nubiles.yml|:heavy_check_mark:|:heavy_check_mark:|:x:|:x:|-|- +dtfsluts.com|ThirdRockEnt.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +dyke4k.com|Vip4K.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +dyked.com|PaperStreetMedia.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +dylanryder.com|GammaEntertainment.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +eastboys.com|EastBoys.yml|:heavy_check_mark:|:x:|:x:|:x:|-|Gay +ebonytugs.com|TugPass.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +edwardjames.com|CarnalPlus.yml|:heavy_check_mark:|:x:|:x:|:x:|-|Gay +elegantangel.com|AdultEmpireCash.yml|:heavy_check_mark:|:x:|:heavy_check_mark:|:x:|-|- +elitebabes.com|EliteBabes.yml|:x:|:x:|:x:|:heavy_check_mark:|-|Babes +englishlads.com|EnglishLads.yml|:heavy_check_mark:|:heavy_check_mark:|:x:|:heavy_check_mark:|-|Gay +enzorimenez.com|PornsiteManager.yml|:heavy_check_mark:|:x:|:heavy_check_mark:|:x:|-|Gay +eporner.com|Eporner.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +ericjohnssexadventures.com|EricJohnsSexAdventures.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +ericvideos.com|EricVideos.yml|:heavy_check_mark:|:x:|:x:|:heavy_check_mark:|-|Gay +erito.com|RealityKingsOL.yml|:heavy_check_mark:|:x:|:x:|:x:|-|JAV +eroprofile.com|EroProfile.yml|:heavy_check_mark:|:x:|:x:|:x:|-|Tube Site +eroticax.com|Algolia_xEmpire.yml|:heavy_check_mark:|:heavy_check_mark:|:x:|:x:|Python|- +eroticbeauty.com|SARJ-LLC.yml|:x:|:heavy_check_mark:|:x:|:heavy_check_mark:|Python|- +eroticspice.com|devianteNetwork.yml|:heavy_check_mark:|:x:|:x:|:x:|CDP|- +erotiquetvlive.com|InterracialPass.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +errotica-archives.com|SARJ-LLC.yml|:heavy_check_mark:|:heavy_check_mark:|:x:|:heavy_check_mark:|Python|- +eternaldesire.com|SARJ-LLC.yml|:heavy_check_mark:|:heavy_check_mark:|:x:|:heavy_check_mark:|Python|- +euro-tgirls.com|GroobyClub.yml|:heavy_check_mark:|:x:|:x:|:x:|-|Trans +eurocreme.com|PornsiteManager.yml|:heavy_check_mark:|:x:|:heavy_check_mark:|:x:|-|Gay +eurogirlsongirls.com|DDFNetwork.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +euroteenerotica.com|DDFNetwork.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +evilangel.com|Algolia_EvilAngel|:heavy_check_mark:|:heavy_check_mark:|:heavy_check_mark:|:x:|Python|- +evilangel.com|GammaEntertainment.yml|:x:|:x:|:x:|:heavy_check_mark:|-|- +evolvedfights.com|evolvedfightslez.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +evolvedfightslez.com|evolvedfightslez.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +exotic4k.com|AMAMultimedia.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +explicite-art.com|ExpliciteArt.yml|:heavy_check_mark:|:heavy_check_mark:|:x:|:x:|-|- +exploitedcollegegirls.com|ExploitedX.yml|:heavy_check_mark:|:x:|:x:|:heavy_check_mark:|-|- +excogigirls.com|ExploitedX.yml|:heavy_check_mark:|:x:|:x:|:heavy_check_mark:|-|- +extrapackage.com|ExtraPackage.yml|:heavy_check_mark:|:x:|:heavy_check_mark:|:x:|-|- +extremepickups.com|Algolia_Adultime.yml|:heavy_check_mark:|:x:|:x:|:x:|Python|- +exxxtrasmall.com|PaperStreetMedia.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +fabsluts.com|Teencoreclub.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +facials4k.com|AMAMultimedia.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +facialsforever.com|TopWebModels.yml|:heavy_check_mark:|:x:|:x:|:x:|Python|- +fakehostel.com|MindGeek.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +fakehub.com|MindGeek.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +fakeshooting.com|MMP_Network.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +faketaxi.com|MindGeek.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +falconstudios.com|Algolia_FalconStudios.yml|:heavy_check_mark:|:x:|:x:|:x:|-|Gay +fallinlovia.com|ModelCentroAPI.yml|:heavy_check_mark:|:x:|:x:|:x:|Python|- +famedigital.com|Algolia_Adultime.yml|:heavy_check_mark:|:x:|:x:|:heavy_check_mark:|Python|- +familycreep.com|GammaEntertainment.yml|:heavy_check_mark:|:x:|:x:|:x:|-|Gay +familyhookups.com|MindGeek.yml|:heavy_check_mark:|:x:|:x:|:heavy_check_mark:|-|- +familylust.com|TugPass.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +familysexmassage.com|FamilySexMassage.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +familysinners.com|MindGeek.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +familystrokes.com|PaperStreetMedia.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +familyswap.xxx|Nubiles.yml|:heavy_check_mark:|:heavy_check_mark:|:x:|:x:|-|- +familytherapyxxx.com|FamilyTherapyXXX.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +familyxxx.com|FamilyXXX.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +fantasyflipflop.com|FFCSH.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +fantasyhd.com|AMAMultimedia.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +fantasymassage.com|Algolia_FantasyMassage.yml|:heavy_check_mark:|:heavy_check_mark:|:x:|:x:|Python|- +faphouse.com|Faphouse.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +feetishpov.com|ItsPOV.yml|:heavy_check_mark:|:x:|:x:|:x:|CDP|- +femanic.com|Femanic.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +femdomempire.com|FemdomEmpire.yml|:heavy_check_mark:|:x:|:x:|:heavy_check_mark:|-|- +feminized.com|FemdomEmpire.yml|:heavy_check_mark:|:x:|:x:|:heavy_check_mark:|-|Trans +femjoy.com|FemJoy.yml|:heavy_check_mark:|:heavy_check_mark:|:x:|:x:|-|- +femlatex.com|GymRotic.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +femout.xxx|GroobyNetwork-Partial.yml|:heavy_check_mark:|:heavy_check_mark:|:x:|:x:|-|Trans +femoutsex.xxx|GroobyNetwork-Partial.yml|:heavy_check_mark:|:heavy_check_mark:|:x:|:x:|-|Trans +ferame.com|Jhdv.yml|:heavy_check_mark:|:x:|:x:|:x:|-|JAV Uncensored +fetishnetwork.com|FetishNetwork.yml|:heavy_check_mark:|:x:|:x:|:x:|-|Fetish +fetishpros.com|FetishPro.yml|:heavy_check_mark:|:x:|:x:|:x:|-|Fetish +ffstockings.com|FFstockings.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +filf.com|FILF.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +fillyfilms.com|CombatZone.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +filthflix.com|FilthFlix.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +filthyfamily.com|FilthyFamily.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +filthygapers.com|DollsHub.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +filthykings.com|Algolia_filthykings.yml|:heavy_check_mark:|:heavy_check_mark:|:x:|:x:|Python|- +finishesthejob.com|FinishesTheJob.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +finishhim.com|TugPass.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +firstanalquest.com|Firstanalquest.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +firstbgg.com|TeenMegaWorld.yml|:heavy_check_mark:|:x:|:x:|:heavy_check_mark:|-|- +firstclasspov.com|Spizoo.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +fist4k.com|Vip4K.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +fistertwister.com|Fistertwister.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +fistflush.com|PerfectGonzo.yml|:heavy_check_mark:|:x:|:x:|:heavy_check_mark:|Python|- +fistflush.com|sapphix.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +fistingcentral.com|GammaEntertainment.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +fit18.com|Fit18.yml|:heavy_check_mark:|:x:|:x:|:x:|Python|- +fitting-room.com|FittingRoom.yml|:heavy_check_mark:|:heavy_check_mark:|:x:|:x:|-|- +footfetishbeauties.com|FFCSH.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +footfetishdaily.com|FootFetishDaily.yml|:heavy_check_mark:|:heavy_check_mark:|:x:|:heavy_check_mark:|-|foot fetish +footsiebabes.com|Algolia_Adultime.yml|:heavy_check_mark:|:x:|:x:|:x:|Python|- +forbiddenfruitsfilms.com|AdultEmpireCash.yml|:heavy_check_mark:|:x:|:heavy_check_mark:|:x:|-|- +forbiddenseductions.com|Algolia_Adultime.yml|:heavy_check_mark:|:heavy_check_mark:|:heavy_check_mark:|:x:|Python|- +forbondage.com|LetsDoeIt.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +forgivemefather.com|devianteNetwork.yml|:heavy_check_mark:|:x:|:x:|:x:|CDP|- +fostertapes.com|PaperStreetMedia.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +fourfingerclub.com|NewSensationsNetworkSites.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +foxxedup.com|VNAGirls.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +fragileslave.com|FragileSlave.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +franks-tgirlworld.com|GroobyNetwork-Partial.yml|:heavy_check_mark:|:heavy_check_mark:|:x:|:x:|-|Trans +fratx.com|FratX.yml|:heavy_check_mark:|:x:|:x:|:x:|Python|Gay +freakmobmedia.com|FreakMobMedia.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +free.premiumbukkake.com|PremiumBukkake.yml|:heavy_check_mark:|:x:|:x:|:x:|Python|- +freeones.com|FreeonesCommunity.yml|:heavy_check_mark:|:x:|:heavy_check_mark:|:heavy_check_mark:|-|Database +freeones.xxx|FreeonesCommunity.yml|:heavy_check_mark:|:x:|:heavy_check_mark:|:heavy_check_mark:|-|- +freeusefantasy.com|PaperStreetMedia.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +freeusemilf.com|PaperStreetMedia.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +freeze.xxx|Hentaied.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +french-twinks.com|Frenchtwinks.yml|:heavy_check_mark:|:x:|:x:|:x:|-|Gay +frenchporn.fr|PornsiteManager.yml|:heavy_check_mark:|:x:|:heavy_check_mark:|:x:|-|Gay +freshmen.net|Freshmen.yml|:heavy_check_mark:|:x:|:x:|:x:|-|Gay +freshoutofhighschool.com|NewSensationsNetworkSites.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +frolicme.com|FrolicMe.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +ftmmen.com|CarnalPlus.yml|:heavy_check_mark:|:x:|:x:|:x:|-|FTM +ftmplus.com|CarnalPlus.yml|:heavy_check_mark:|:x:|:x:|:x:|-|FTM +fuckedfeet.com|VNAGirls.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +fuckedhard18.com|FuckedHard18.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +fuckermate.com|Fuckermate.yml|:heavy_check_mark:|:x:|:x:|:x:|-|Gay +fuckfiesta.com|Hustler.yml|:heavy_check_mark:|:x:|:x:|:x:|CDP|- +fuckingawesome.com|FuckingAwesome.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +fuckinghardcore.com|Hustler.yml|:heavy_check_mark:|:x:|:x:|:x:|CDP|- +fuckingoffice.com|PornCZ.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +fuckingparties.com|ChickPass.yml|:heavy_check_mark:|:x:|:x:|:heavy_check_mark:|-|- +fuckingstreet.com|PornCZ.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +fuckstudies.com|TeenMegaWorld.yml|:heavy_check_mark:|:x:|:x:|:heavy_check_mark:|-|- +fuckthegeek.com|ChickPass.yml|:heavy_check_mark:|:x:|:x:|:heavy_check_mark:|-|- +fullpornnetwork.com|ThirdRockEnt.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +funbags.com|Voyeurweb.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +funsizeboys.com|CarnalPlus.yml|:heavy_check_mark:|:x:|:x:|:x:|-|Gay +futanari.xxx|Hentaied.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +futanarica.com|Futanarica.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +gag-n-gape.com|TeenMegaWorld.yml|:heavy_check_mark:|:x:|:x:|:heavy_check_mark:|-|- +gangav.com|Jhdv.yml|:heavy_check_mark:|:x:|:x:|:x:|-|JAV Uncensored +gangbangcreampie.com|Algolia_GangBangCreampie.yml|:heavy_check_mark:|:heavy_check_mark:|:x:|:x:|Python|- +gapingangels.com|GammaEntertainment.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +gasm.com|GASM.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +gayarabclub.com|PornsiteManager.yml|:heavy_check_mark:|:x:|:heavy_check_mark:|:x:|-|Gay +gaycastings.com|AMAMultimedia.yml|:heavy_check_mark:|:x:|:x:|:x:|-|Gay +gaycest.com|CarnalPlus.yml|:heavy_check_mark:|:x:|:x:|:x:|-|Gay +gaycreeps.com|AMAMultimedia.yml|:heavy_check_mark:|:x:|:x:|:x:|-|Gay +gaydvdempire.com|GayAdultEmpire.yml|:heavy_check_mark:|:x:|:heavy_check_mark:|:x:|-|Gay +gayempire.com|GayAdultEmpire.yml|:heavy_check_mark:|:x:|:heavy_check_mark:|:x:|-|Gay +gayfrenchkiss.fr|PornsiteManager.yml|:heavy_check_mark:|:x:|:heavy_check_mark:|:x:|-|Gay +gayhoopla.com|BlurredMedia.yml|:heavy_check_mark:|:x:|:x:|:x:|CDP|Gay +gayhorrorporn.com|Czechav.yml|:heavy_check_mark:|:x:|:x:|:x:|-|Gay +gayroom.com|AMAMultimedia.yml|:heavy_check_mark:|:x:|:x:|:x:|-|Gay +gayviolations.com|AMAMultimedia.yml|:heavy_check_mark:|:x:|:x:|:x:|-|Gay +genderxfilms.com|Algolia_GenderXFilms.yml|:heavy_check_mark:|:heavy_check_mark:|:heavy_check_mark:|:x:|-|Trans +genlez.com|SmutPuppet.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +girlcum.com|AMAMultimedia.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +girlfaction.com|ThirdRockEnt.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +girlfriendsfilms.com|Algolia_Girlfriendsfilms.yml|:heavy_check_mark:|:heavy_check_mark:|:heavy_check_mark:|:x:|Python|Lesbian +girlgirl.com|JulesJordan.yml|:heavy_check_mark:|:heavy_check_mark:|:x:|:x:|-|Lesbian +girlgirlmania.com|VNAGirls.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +girlgrind.com|MindGeek.yml|:heavy_check_mark:|:x:|:x:|:heavy_check_mark:|-|Lesbian +girlsandstuds.com|GammaEntertainment.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +girlsgotcream.com|Teencoreclub.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +girlsonlyporn.com|Nubiles.yml|:heavy_check_mark:|:heavy_check_mark:|:x:|:x:|-|- +girlsoutwest.com|GirlsOutWest.yml|:heavy_check_mark:|:heavy_check_mark:|:x:|:heavy_check_mark:|-|Lesbian +girlsrimming.com|GirlsRimming.yml|:heavy_check_mark:|:x:|:x:|:x:|-|Rimjobs +girlstakeaway.com|PornCZ.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +girlstryanal.com|Algolia_Adultime.yml|:heavy_check_mark:|:x:|:x:|:x:|Python|Lesbian +girlsunderarrest.com|GirlsUnderArrest.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +girlsway.com|Algolia_GirlsWay.yml|:heavy_check_mark:|:heavy_check_mark:|:x:|:x:|Python|Lesbian +girlswhofuckgirls.com|Vivid.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +givemepink.com|sapphix.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +givemeteens.com|Algolia_Adultime.yml|:heavy_check_mark:|:x:|:x:|:x:|Python|- +gloryhole.com|Algolia_DogfartNetwork.yml|:heavy_check_mark:|:x:|:x:|:heavy_check_mark:|Python|- +gloryholeinitiations.com|Algolia_DogfartNetwork.yml|:heavy_check_mark:|:x:|:x:|:heavy_check_mark:|Python|- +gloryholesecrets.com|Algolia_Gloryholesecrets.yml|:heavy_check_mark:|:heavy_check_mark:|:x:|:x:|-|- +gloryholeswallow.com|GloryHoleSwallow.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +glosstightsglamour.com|GlossTightsGlamour.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +goddessnudes.com|SARJ-LLC.yml|:x:|:heavy_check_mark:|:x:|:heavy_check_mark:|Python|- +goddesssnow.com|GoddessSnow.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +goldenslut.com|SmutPuppet.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +gostuckyourself.net|Mypervmom.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +gotfilled.com|BJRaw.yml|:heavy_check_mark:|:x:|:x:|:heavy_check_mark:|CDP|- +grannyghetto.com|GammaEntertainment.yml|:heavy_check_mark:|:x:|:x:|:x:|-|Granny +grannyvsbbc.com|SmutPuppet.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +grooby-archives.com|GroobyNetwork-Partial.yml|:heavy_check_mark:|:heavy_check_mark:|:x:|:x:|-|- +grooby.club|GroobyClub.yml|:heavy_check_mark:|:x:|:x:|:x:|-|Trans +groobygirls.com|GroobyNetwork-Partial.yml|:heavy_check_mark:|:heavy_check_mark:|:x:|:x:|-|Trans +groobyvr.com|GroobyNetwork-Partial.yml|:heavy_check_mark:|:x:|:x:|:x:|-|Trans +guysinsweatpants.com|GuysInSweatpants.yml|:heavy_check_mark:|:x:|:x:|:heavy_check_mark:|-|Gay +gymnastic.xxx|GymRotic.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +gymrotic.com|GymRotic.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +gynoexclusive.com|DollsHub.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +hairyav.com|Jhdv.yml|:heavy_check_mark:|:x:|:x:|:x:|-|JAV Uncensored +hairyundies.com|Algolia_Adultime.yml|:heavy_check_mark:|:x:|:x:|:x:|Python|- +handdomination.com|HandDomination.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +handsonhardcore.com|DDFNetwork.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +hanime.tv|hanime.yml|:heavy_check_mark:|:x:|:x:|:x:|-|Hentai +hardcoreyouth.com|Teencoreclub.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +hardfuckgirls.com|Wtfpass.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +hardkinks.com|PornsiteManager.yml|:heavy_check_mark:|:x:|:heavy_check_mark:|:x:|-|Gay +hardonvr.com|SLRoriginals.yml|:heavy_check_mark:|:x:|:heavy_check_mark:|:x:|-|VR +hardtied.com|insex.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +hardx.com|Algolia_xEmpire.yml|:heavy_check_mark:|:heavy_check_mark:|:x:|:x:|Python|- +harlemsex.com|PornsiteManager.yml|:heavy_check_mark:|:x:|:heavy_check_mark:|:x:|-|Gay +harmonyvision.com|HarmonyVision.yml|:heavy_check_mark:|:x:|:heavy_check_mark:|:x:|-|- +hazel-tucker.com|GroobyClub.yml|:heavy_check_mark:|:x:|:x:|:x:|-|Trans +hd19.com|bellapass.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +hdmassageporn.com|Wtfpass.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +hdsex18.com|Wtfpass.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +heavyonhotties.com|HeavyOnHotties.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +hegre.com|Hegre.yml|:heavy_check_mark:|:heavy_check_mark:|:x:|:heavy_check_mark:|-|- +helixstudios.com|HelixStudios.yml|:heavy_check_mark:|:x:|:x:|:heavy_check_mark:|-|gay +helloladyboy.com|VegasDreamsLLC.yml|:heavy_check_mark:|:x:|:x:|:heavy_check_mark:|-|- +hentai2read.com|hentai2read.yml|:x:|:heavy_check_mark:|:x:|:x:|-|Hentai +hentaied.com|Hentaied.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +hergape.com|ThirdRockEnt.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +hersexdebut.com|Wtfpass.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +heymilf.com|Jhdv.yml|:heavy_check_mark:|:x:|:x:|:x:|-|JAV Uncensored +heyoutdoor.com|Jhdv.yml|:heavy_check_mark:|:x:|:x:|:x:|-|JAV Uncensored +heyzo.com|Heyzo.yml|:heavy_check_mark:|:x:|:x:|:x:|-|JAV Uncensored +hijabhookup.com|PaperStreetMedia.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +hijabmylfs.com|PaperStreetMedia.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +himeros.tv|HimerosTV.yml|:heavy_check_mark:|:x:|:x:|:x:|-|Gay +hitzefrei.com|Hitzefrei.yml|:heavy_check_mark:|:x:|:x:|:heavy_check_mark:|-|- +hmvmania.com|HmvMania.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +hobybuchanon.com|HobyBuchanon.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +holed.com|AMAMultimedia.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +hollyrandall.com|HollyRandall.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +homeclips.com|Voyeurweb.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +homemadeanalwhores.com|ThirdRockEnt.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +hometowngirls.com|Hustler.yml|:heavy_check_mark:|:x:|:x:|:x:|CDP|- +hometownhoneys.com|Hustler.yml|:heavy_check_mark:|:x:|:x:|:x:|CDP|- +honeytrans.com|Arx.yml|:heavy_check_mark:|:x:|:x:|:x:|Python|Trans +hongkongslut.com|ChickPass.yml|:heavy_check_mark:|:x:|:x:|:heavy_check_mark:|-|- +hookuphotshot.com|hussiemodels.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +hornydoctor.com|PornCZ.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +hornygirlscz.com|PornCZ.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +hornyhousehold.com|AdultEmpireCash.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +horrorporn.com|Czechav.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +hotandtatted.com|hussiemodels.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +hotcast.fr|PornsiteManager.yml|:heavy_check_mark:|:x:|:heavy_check_mark:|:x:|-|Gay +hotcrazymess.com|Nubiles.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +hotguysfuck.com|BlurredMedia.yml|:heavy_check_mark:|:x:|:x:|:x:|CDP|- +hothouse.com|Algolia_FalconStudios.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +hotlegsandfeet.com|DDFNetwork.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +hotmilfsfuck.com|ExploitedX.yml|:heavy_check_mark:|:x:|:x:|:heavy_check_mark:|-|- +hotmovies.com|Hotmovies.yml|:heavy_check_mark:|:x:|:heavy_check_mark:|:heavy_check_mark:|-|- +hotoldermale.com|AdultSiteRunner.yml|:heavy_check_mark:|:x:|:x:|:x:|-|Gay +hottiemoms.com|Hustler.yml|:heavy_check_mark:|:x:|:x:|:x:|CDP|- +hotwifexxx.com|HotWifeXXX.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +houseofgord.com|HouseOfGord.yml|:heavy_check_mark:|:x:|:x:|:x:|CDP|- +houseoftaboo.com|DDFNetwork.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +houseofyre.com|HouseOfFyre.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +hqporner.com|HQPorner.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +hucows.com|Hucows.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +hugecockbreak.com|Mypervmom.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +hungarianhoneys.com|HungarianHoneys.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +hungfuckers.com|CarnalPlus.yml|:heavy_check_mark:|:x:|:x:|:x:|-|Gay +hunt4k.com|Vip4K.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +hunterleigh.com|bellapass.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +hunterpov.com|PornCZ.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +hushpass.com|hushpass.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +hussiepass.com|hussiemodels.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +hustlaz.com|Hustler.yml|:heavy_check_mark:|:x:|:x:|:x:|CDP|- +hustler.com|Hustler.yml|:heavy_check_mark:|:x:|:x:|:x:|CDP|- +hustlerhd.com|Hustler.yml|:heavy_check_mark:|:x:|:x:|:x:|CDP|- +hustlerparodies.com|Hustler.yml|:heavy_check_mark:|:x:|:x:|:x:|CDP|- +hustlerslesbians.com|Hustler.yml|:heavy_check_mark:|:x:|:x:|:x:|CDP|Lesbian +hustlerstaboo.com|Hustler.yml|:heavy_check_mark:|:x:|:x:|:x:|CDP|- +hypnotube.com|Hypnotube.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +iafd.com|IAFD.yml|:heavy_check_mark:|:x:|:heavy_check_mark:|:heavy_check_mark:|Python|Database +iconmale.com|MindGeek.yml|:heavy_check_mark:|:x:|:heavy_check_mark:|:x:|-|Gay +idols69.com|karatmedia.yml|:heavy_check_mark:|:x:|:x:|:x:|-|JAV +ifeelmyself.com|IFeelMyself.yml|:heavy_check_mark:|:x:|:x:|:heavy_check_mark:|Python|- +ignore4k.com|Vip4K.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +ihuntmycunt.com|DollsHub.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +ikillitts.com|Trans500.yml|:heavy_check_mark:|:x:|:x:|:x:|-|Trans +ikissgirls.com|IKissGirls.yml|:heavy_check_mark:|:x:|:x:|:x:|-|Lesbian +iknowthatgirl.com|MindGeek.yml|:heavy_check_mark:|:x:|:x:|:heavy_check_mark:|-|- +imdb.com|IMDB.yml|:heavy_check_mark:|:x:|:heavy_check_mark:|:heavy_check_mark:|-|Database +immorallive.com|GammaEntertainment.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +imnotyourmommy.com|Nubiles.yml|:heavy_check_mark:|:heavy_check_mark:|:x:|:x:|-|- +infernalrestraints.com|insex.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +innocenthigh.com|PaperStreetMedia.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +inserted.com|KBProductions.yml|:heavy_check_mark:|:x:|:x:|:heavy_check_mark:|Python|- +insex.com|insex.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +insexondemand.com|insex.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +interracialblowbang.com|Algolia_DogfartNetwork.yml|:heavy_check_mark:|:x:|:x:|:heavy_check_mark:|Python|- +interracialpass.com|InterracialPass.yml|:heavy_check_mark:|:x:|:x:|:x:|-|Interracial +interracialpickups.com|Algolia_DogfartNetwork.yml|:heavy_check_mark:|:x:|:x:|:heavy_check_mark:|Python|- +interracialpovs.com|FFCSH.yml|:heavy_check_mark:|:x:|:x:|:x:|-|Interracial +intimatelesbians.com|Spizoo.yml|:heavy_check_mark:|:x:|:x:|:x:|-|Lesbian +intimatepov.com|ItsPOV.yml|:heavy_check_mark:|:x:|:x:|:x:|CDP|- +ipinkvisualpass.com|PinkVisual.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +isthisreal.com|Algolia_Adultime.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +italianshotclub.com|MVG.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +itscleolive.com|VNAGirls.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +itspov.com|ItsPOV.yml|:heavy_check_mark:|:x:|:x:|:x:|CDP|- +iwantclips.com|IWantClips.yml|:heavy_check_mark:|:x:|:x:|:heavy_check_mark:|CDP|- +iwara.tv|Iwara.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +jacquieetmichelelite.com|JacquieEtMichelElite.yml|:heavy_check_mark:|:x:|:heavy_check_mark:|:x:|-|- +jacquieetmicheltv.net|JacquieEtMichelTV.yml|:heavy_check_mark:|:x:|:x:|:x:|Python|- +jalifstudio.com|CarnalPlus.yml|:heavy_check_mark:|:x:|:x:|:x:|-|Gay +jamesdeen.com|ThirdRockEnt.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +janafox.com|bellapass.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +japaneseflashers.com|karatmedia.yml|:heavy_check_mark:|:x:|:x:|:x:|-|JAV +japaneseslurp.com|karatmedia.yml|:heavy_check_mark:|:x:|:x:|:x:|-|JAV +japanhdv.com|JapanHDV.yml|:heavy_check_mark:|:x:|:x:|:x:|-|JAV Uncensored +japanlust.com|Arx.yml|:heavy_check_mark:|:x:|:x:|:x:|Python|- +japornxxx.com|JapornXXX.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +jasonsparkslive.com|CarnalPlus.yml|:heavy_check_mark:|:x:|:x:|:x:|-|Gay +jav.land|JavLand.yml|:heavy_check_mark:|:x:|:heavy_check_mark:|:x:|-|JAV +javdb.com|javdb.yml|:heavy_check_mark:|:x:|:heavy_check_mark:|:x:|-|Database +javdb36.com|javdb.yml|:heavy_check_mark:|:x:|:heavy_check_mark:|:x:|-|Database +javhd.com|JavHD.yml|:heavy_check_mark:|:x:|:x:|:x:|-|JAV Uncensored +javhub.com|JavHub.yml|:heavy_check_mark:|:x:|:x:|:x:|-|JAV Uncensored +javlibrary.com|JavLibrary_python.yml|:heavy_check_mark:|:x:|:x:|:x:|Python|JAV +javlibrary.com|JavLibrary.yml|:heavy_check_mark:|:x:|:heavy_check_mark:|:x:|-|JAV +jaysinxxx.com|GammaEntertainment.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +jayspov.net|AdultEmpireCash.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +jbvideo.com|JBVideo.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +jcosplay.com|karatmedia.yml|:heavy_check_mark:|:x:|:x:|:x:|-|JAV +jeedoo.com|Jeedoo.yml|:heavy_check_mark:|:x:|:heavy_check_mark:|:x:|-|- +jeffsmodels.com|SmutPuppet.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +jelenajensen.com|VNAGirls.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +jerk-offpass.com|Teencoreclub.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +jerkaoke.com|ModelMediaUS.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +jessicajaymesxxx.com|Spizoo.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +jessroyan.com|PornsiteManager.yml|:heavy_check_mark:|:x:|:heavy_check_mark:|:x:|-|Gay +jimweathersarchives.com|JimWeathersArchives.yml|:heavy_check_mark:|:x:|:x:|:heavy_check_mark:|-|- +jizzbomb.com|NewSensationsNetworkSites.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +jnrc.fr|PornsiteManager.yml|:heavy_check_mark:|:x:|:heavy_check_mark:|:x:|-|Gay +jockbreeders.com|CarnalPlus.yml|:heavy_check_mark:|:x:|:x:|:x:|-|Gay +jockpussy.com|CarnalPlus.yml|:heavy_check_mark:|:x:|:x:|:x:|-|FTM +jodiwest.com|AdultEmpireCash.yml|:heavy_check_mark:|:x:|:x:|:x:|-|MILF +joeperv.com|bellapass.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +johnnyrapid.com|Algolia_Johnnyrapid.yml|:heavy_check_mark:|:heavy_check_mark:|:x:|:x:|Python|Gay +joibabes.com|Arx.yml|:heavy_check_mark:|:x:|:x:|:x:|Python|- +jonnidarkkoxxx.com|GammaEntertainment.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +joybear.com|JoyBear.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +joymii.com|Algolia_Adultime.yml|:heavy_check_mark:|:heavy_check_mark:|:x:|:x:|Python|- +jpmilfs.com|karatmedia.yml|:heavy_check_mark:|:x:|:x:|:x:|-|JAV +jpnurse.com|karatmedia.yml|:heavy_check_mark:|:x:|:x:|:x:|-|JAV +jpshavers.com|karatmedia.yml|:heavy_check_mark:|:x:|:x:|:x:|-|JAV +jpteacher.com|karatmedia.yml|:heavy_check_mark:|:x:|:x:|:x:|-|JAV +jschoolgirls.com|karatmedia.yml|:heavy_check_mark:|:x:|:x:|:x:|-|JAV +julesjordan.com|JulesJordan.yml|:heavy_check_mark:|:heavy_check_mark:|:heavy_check_mark:|:heavy_check_mark:|-|- +juliaannlive.com|VNAGirls.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +karissa-diamond.com|Karissa-Diamond.yml|:heavy_check_mark:|:heavy_check_mark:|:x:|:x:|-|- +karups.com|Karups.yml|:heavy_check_mark:|:heavy_check_mark:|:x:|:x:|-|- +katiebanks.com|bellapass.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +kellymadison.com|KellyMadisonMedia.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +kendrajames.com|VNAGirls.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +killergram.com|Killergram.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +kimberleelive.com|VNAGirls.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +kin8tengoku.com|Kin8tengoku.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +kingsoffetish.com|AdultEmpireCash.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +kink.com|Kink.yml|:heavy_check_mark:|:x:|:x:|:heavy_check_mark:|CDP|- +kink305.com|VNAGirls.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +kinkbomb.com|KinkBomb.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +kinkvr.com|BaDoink.yml|:heavy_check_mark:|:x:|:heavy_check_mark:|:heavy_check_mark:|-|VR +kinkyfamily.com|kinkyfamily.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +kinkymistresses.com|KinkyMistresses.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +kinkyspa.com|MindGeek.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +kinkytwink.com|PornsiteManager.yml|:heavy_check_mark:|:x:|:heavy_check_mark:|:x:|-|Gay +kissmefuckme.com|Algolia_Adultime.yml|:heavy_check_mark:|:x:|:x:|:x:|Python|- +kpopping.com|Kpopping.yml|:x:|:x:|:x:|:heavy_check_mark:|-|- +kristenbjorn.com|KristenBjorn.yml|:heavy_check_mark:|:x:|:heavy_check_mark:|:heavy_check_mark:|-|Gay +ladyboy-ladyboy.com|GroobyNetwork-Partial.yml|:heavy_check_mark:|:heavy_check_mark:|:x:|:x:|-|Trans +ladyboy.xxx|GroobyNetwork-Partial.yml|:heavy_check_mark:|:heavy_check_mark:|:x:|:x:|-|Trans +ladyboygold.com|LadyboyGold.yml|:heavy_check_mark:|:x:|:x:|:heavy_check_mark:|-|Trans +ladydee.xxx|PornCZ.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +lanakendrick.com|PinupDollars.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +lanesisters.com|GammaEntertainment.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +lasublimexxx.com|Lasublime.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +latinamilf.com|LetsDoeIt.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +latinoguysporn.com|AdultEmpireCash.yml|:heavy_check_mark:|:x:|:x:|:x:|-|Gay +leannecrow.com|PinupDollars.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +legsex.com|TheScoreGroup.yml|:heavy_check_mark:|:heavy_check_mark:|:x:|:x:|-|- +lesbea.com|RealityKingsOL.yml|:heavy_check_mark:|:x:|:x:|:x:|-|Lesbian +lesbiananalsluts.com|ThirdRockEnt.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +lesbianass.com|Hustler.yml|:heavy_check_mark:|:x:|:x:|:x:|CDP|Lesbian +lesbianfactor.com|GammaEntertainment.yml|:heavy_check_mark:|:x:|:x:|:x:|-|Lesbian +lesbiantribe.com|MVG.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +lesbianx.com|Algolia_xEmpire.yml|:heavy_check_mark:|:heavy_check_mark:|:x:|:x:|Python|- +lesworship.com|Arx.yml|:heavy_check_mark:|:x:|:x:|:x:|Python|Lesbian +lethalhardcore.com|AdultEmpireCash.yml|:heavy_check_mark:|:x:|:heavy_check_mark:|:x:|-|- +lethalhardcorevr.com|AdultEmpireCash.yml|:heavy_check_mark:|:x:|:x:|:x:|-|VR +lethalpass.com|lethalpass.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +letsdoeit.com|LetsDoeIt.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +lewood.com|Algolia_EvilAngel|:heavy_check_mark:|:heavy_check_mark:|:heavy_check_mark:|:x:|Python|- +lewrubens.com|LewRubens.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +lexidona.com|LexiDona.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +lexingtonsteele.com|GammaEntertainment.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +lezcuties.com|Algolia_Adultime.yml|:heavy_check_mark:|:x:|:x:|:x:|Python|Lesbian +lifeselector.com|21Roles.yml|:heavy_check_mark:|:x:|:x:|:x:|CDP|- +lilhumpers.com|MindGeek.yml|:heavy_check_mark:|:x:|:x:|:heavy_check_mark:|-|- +lingerieav.com|Jhdv.yml|:heavy_check_mark:|:x:|:x:|:x:|-|JAV Uncensored +lingerietales.com|WankItNow.yml|:heavy_check_mark:|:x:|:x:|:x:|-|VR +littleasians.com|PaperStreetMedia.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +littlecaprice-dreams.com|LittleCapriceDreams.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +littlefromasia.com|Mypervmom.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +littlehellcat.com|Teencoreclub.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +loan4k.com|Vip4K.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +lonestarangel.com|LoneStarAngel.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +lookathernow.com|MindGeek.yml|:heavy_check_mark:|:x:|:x:|:heavy_check_mark:|-|- +lordaardvark.com|LordAardvark.yml|:heavy_check_mark:|:heavy_check_mark:|:x:|:x:|CDP|- +lovehairy.com|SARJ-LLC.yml|:heavy_check_mark:|:heavy_check_mark:|:x:|:heavy_check_mark:|Python|- +loveherass.com|devianteNetwork.yml|:heavy_check_mark:|:x:|:x:|:x:|CDP|- +loveherboobs.com|LoveHerFilms.yml|:heavy_check_mark:|:x:|:x:|:heavy_check_mark:|-|Boob Fetish +loveherfeet.com|LoveHerFilms.yml|:heavy_check_mark:|:x:|:x:|:heavy_check_mark:|-|Foot Fetish +loveherfilms.com|LoveHerFilms.yml|:heavy_check_mark:|:x:|:x:|:heavy_check_mark:|-|- +lubed.com|AMAMultimedia.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +lucasentertainment.com|LucasEntertainment.yml|:heavy_check_mark:|:x:|:x:|:x:|-|Gay +lustcinema.com|LustCinema.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +lustery.com|Lustery.yml|:heavy_check_mark:|:x:|:x:|:x:|-|Amateur +lustreality.com|SLRoriginals.yml|:heavy_check_mark:|:x:|:heavy_check_mark:|:x:|-|VR +lustylina.com|ChickPass.yml|:heavy_check_mark:|:x:|:x:|:heavy_check_mark:|-|- +mackstudio.com|PornsiteManager.yml|:heavy_check_mark:|:x:|:heavy_check_mark:|:x:|-|Gay +madeincanada.xxx|MadeInCanada.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +madouqu.com|Madou.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +maggiegreenlive.com|VNAGirls.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +maketeengape.com|Teencoreclub.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +maledigital.com|GammaEntertainment.yml|:heavy_check_mark:|:x:|:x:|:x:|-|Gay +malefeet4u.com|Williamhiggins.yml|:heavy_check_mark:|:x:|:x:|:x:|-|Gay +mamacitaz.com|LetsDoeIt.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +mandyflores.com|Mandyflores.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +manojob.com|FinishesTheJob.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +manroyale.com|AMAMultimedia.yml|:heavy_check_mark:|:x:|:x:|:x:|-|Gay +manuelferrara.com|JulesJordan.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +manyvids.com|ManyVids|:heavy_check_mark:|:x:|:x:|:heavy_check_mark:|python|- +marcusmojo.com|Algolia_NextDoorStudios.yml|:heavy_check_mark:|:heavy_check_mark:|:x:|:x:|Python|Gay +mariskax.com|MariskaX.yml|:heavy_check_mark:|:x:|:x:|:heavy_check_mark:|-|- +maskurbate.com|GammaEntertainment.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +masonicboys.com|CarnalPlus.yml|:heavy_check_mark:|:x:|:x:|:x:|-|Gay +masqulin.com|Masqulin.yml|:heavy_check_mark:|:x:|:x:|:x:|-|Gay +massage-parlor.com|Algolia_Adultime.yml|:heavy_check_mark:|:x:|:x:|:x:|Python|- +massagebait.com|AMAMultimedia.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +mature.nl|MatureNL.yml|:heavy_check_mark:|:heavy_check_mark:|:x:|:x:|-|- +mature4k.com|Vip4K.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +maturegapers.com|DollsHub.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +maturegynoexam.com|DollsHub.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +maturegynospy.com|DollsHub.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +max-hardcore.com|MaxHardcore.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +maxence-angel.com|PornsiteManager.yml|:heavy_check_mark:|:x:|:heavy_check_mark:|:x:|-|Gay +maxinex.com|VNAGirls.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +meanawolf.com|MeanaWolf.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +meanbitches.com|MeanBitches.yml|:heavy_check_mark:|:x:|:x:|:heavy_check_mark:|-|-| +meanmassage.com|TugPass.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +meetsuckandfuck.com|Wtfpass.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +men.com|Brazzers.yml|:heavy_check_mark:|:x:|:x:|:x:|CDP|Gay +menatplay.com|MenAtPlay.yml|:heavy_check_mark:|:x:|:x:|:x:|-|Gay +menoboy.com|PornsiteManager.yml|:heavy_check_mark:|:x:|:heavy_check_mark:|:x:|-|Gay +menover30.com|Algolia_MenOver30.yml|:heavy_check_mark:|:heavy_check_mark:|:x:|:x:|Python|Gay +menpov.com|AMAMultimedia.yml|:heavy_check_mark:|:x:|:x:|:x:|-|Gay +messyxxx.com|ChickPass.yml|:heavy_check_mark:|:x:|:x:|:heavy_check_mark:|-|- +metadataapi.net|**Use the Endpoint**|:heavy_check_mark:|:x:|:x:|:heavy_check_mark:|-|- +metadataapi.net/jav|ThePornDBJAV.yml|:heavy_check_mark:|:x:|:heavy_check_mark:|:x:|-|JAV +metadataapi.net/movies|ThePornDBMovies.yml|:heavy_check_mark:|:x:|:heavy_check_mark:|:x:|-|Movies +metalbondage.com|MetalBondage.yml|:heavy_check_mark:|:x:|:x:|:x:|CDP|- +metart.com|SARJ-LLC.yml|:heavy_check_mark:|:heavy_check_mark:|:x:|:heavy_check_mark:|Python|- +metartnetwork.com|SARJ-LLC.yml|:heavy_check_mark:|:heavy_check_mark:|:x:|:heavy_check_mark:|Python|- +metartx.com|SARJ-LLC.yml|:heavy_check_mark:|:heavy_check_mark:|:x:|:heavy_check_mark:|Python|- +milehighmedia.com|MindGeek.yml|:heavy_check_mark:|:x:|:heavy_check_mark:|:x:|-|- +milfed.com|MindGeek.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +milfsodomy.com|SmutPuppet.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +milfthing.com|PerfectGonzo.yml|:heavy_check_mark:|:x:|:x:|:heavy_check_mark:|Python|- +milftrip.com|VegasDreamsLLC.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +milftugs.com|MilfTugs.yml|:heavy_check_mark:|:x:|:x:|:heavy_check_mark:|-|- +milfvr.com|POVR.yml|:heavy_check_mark:|:x:|:x:|:x:|-|VR +milkingtable.com|GammaEntertainment.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +milovana.com|Milovana.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +minimuff.com|ChickPass.yml|:heavy_check_mark:|:x:|:x:|:heavy_check_mark:|-|- +minnano-av.com|Minnano-AV.yml|:x:|:x:|:x:|:heavy_check_mark:|-|- +missax.com|MissaX.yml|:heavy_check_mark:|:x:|:x:|:x:|Python|- +mistermale.com|PornsiteManager.yml|:heavy_check_mark:|:x:|:heavy_check_mark:|:x:|-|Gay +mixedx.com|Algolia_Adultime.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +mmpnetwork.com|MMP_Network.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +modelcentro.com|ModelCentroAPI.yml|:heavy_check_mark:|:x:|:x:|:x:|Python|- +modelhub.com|Modelhub.yml|:heavy_check_mark:|:x:|:x:|:heavy_check_mark:|-|- +modelmediaasia.com|ModelMediaUS.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +modelmediaus.com|ModelMediaUS.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +modeltime.com|Algolia_Adultime.yml|:heavy_check_mark:|:heavy_check_mark:|:heavy_check_mark:|:x:|Python|- +moderndaysins.com|Algolia_Adultime.yml|:heavy_check_mark:|:x:|:x:|:x:|Python|- +mofos.com|MindGeek.yml|:heavy_check_mark:|:x:|:x:|:heavy_check_mark:|-|- +mofosnetwork.com|MindGeek.yml|:heavy_check_mark:|:x:|:x:|:heavy_check_mark:|-|- +mom4k.com|AMAMultimedia.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +momcomesfirst.com|FamilyTherapyXXX.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +momcum.com|AMAMultimedia.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +momisamilf.com|Vivid.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +momlover.com|Nubiles.yml|:heavy_check_mark:|:heavy_check_mark:|:x:|:x:|-|- +mommy4k.com|Vip4K.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +mommyblowsbest.com|GammaEntertainment.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +mommyjoi.com|ThirdRockEnt.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +mommysboy.com|Algolia_Adultime.yml|:heavy_check_mark:|:x:|:x:|:x:|Python|- +mommysboy.net|Mypervmom.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +mommysgirl.com|Algolia_Adultime.yml|:heavy_check_mark:|:heavy_check_mark:|:x:|:x:|Python|- +momsbangteens.com|RealityKingsOL.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +momsboytoy.com|Nubiles.yml|:heavy_check_mark:|:heavy_check_mark:|:x:|:x:|-|- +momsfamilysecrets.com|Nubiles.yml|:heavy_check_mark:|:heavy_check_mark:|:x:|:x:|-|- +momslickteens.com|RealityKingsOL.yml|:heavy_check_mark:|:x:|:x:|:x:|-|Lesbian +momsteachsex.com|Nubiles.yml|:heavy_check_mark:|:heavy_check_mark:|:x:|:x:|-|- +momstight.com|Nubiles.yml|:heavy_check_mark:|:heavy_check_mark:|:x:|:x:|-|- +momswap.com|PaperStreetMedia.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +momswapped.com|Nubiles.yml|:heavy_check_mark:|:heavy_check_mark:|:x:|:x:|-|- +momwantscreampie.com|Nubiles.yml|:heavy_check_mark:|:heavy_check_mark:|:x:|:x:|-|- +momwantstobreed.com|Nubiles.yml|:heavy_check_mark:|:heavy_check_mark:|:x:|:x:|-|- +momxxx.org|Mypervmom.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +mongerinasia.com|MongerInAsia.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +monicamendez.com|PinupDollars.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +monroelee.com|bellapass.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +monstercub.com|AdultSiteRunner.yml|:heavy_check_mark:|:x:|:x:|:x:|-|Gay +mormongirlz.com|Mormongirlz.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +motherfuckerxxx.com|GammaEntertainment.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +motherless.com|Motherless.yml|:heavy_check_mark:|:x:|:x:|:x:|-|Tube Site +movieporn.com|Czechav.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +mplstudios.com|MPLStudios.yml|:heavy_check_mark:|:heavy_check_mark:|:x:|:x:|-|- +mrbigfatdick.com|MrBigFatDick.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +mrluckypov.com|Spizoo.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +mrpov.com|FinishesTheJob.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +muchaslatinas.com|Hustler.yml|:heavy_check_mark:|:x:|:x:|:x:|CDP|- +mugfucked.com|MugFucked.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +mugfucked.com|ThirdRockEnt.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +muses.com|Algolia_Adultime.yml|:heavy_check_mark:|:heavy_check_mark:|:heavy_check_mark:|:x:|Python|- +my-slavegirl.com|my-slavegirl.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +mybabysittersclub.com|PaperStreetMedia.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +mybadmilfs.com|Mypervmom.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +mycherrycrush.com|MyCherryCrush.yml|:heavy_check_mark:|:x:|:x:|:x:|CDP|- +mydaughterswap.com|Mypervmom.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +mydirtyhobby.com|MyDirtyHobby.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +myfamilypies.com|Nubiles.yml|:heavy_check_mark:|:heavy_check_mark:|:x:|:x:|-|- +myfirstdaddy.com|AdultSiteRunner.yml|:heavy_check_mark:|:x:|:x:|:x:|-|Gay +mylf.com|Mylf.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +mylfdom.com|PaperStreetMedia.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +mylifeinmiami.com|ModelCentroAPI.yml|:heavy_check_mark:|:x:|:x:|:x:|Python|- +mylked.com|Mylked.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +mypervmom.com|Mypervmom.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +mypervyfamily.com|Algolia_mypervyfamily.yml|:heavy_check_mark:|:heavy_check_mark:|:x:|:x:|Python|- +myracequeens.com|karatmedia.yml|:heavy_check_mark:|:x:|:x:|:x:|-|JAV +mysislovesme.com|Mypervmom.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +mysweetapple.com|MySweetApple.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +myteenoasis.com|GammaEntertainment.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +myveryfirsttime.com|AMAMultimedia.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +n53i.com|JavLibrary.yml|:heavy_check_mark:|:x:|:heavy_check_mark:|:x:|-|JAV +nakedsword.com|NakedSword.yml|:heavy_check_mark:|:x:|:heavy_check_mark:|:x:|-|Gay +nannyspy.com|AMAMultimedia.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +nastypublicsex.com|DollsHub.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +nastystepfamily.com|Vivid.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +nataliastarr.com|VNAGirls.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +natashanice.com|ModelCentroAPI.yml|:heavy_check_mark:|:x:|:x:|:x:|Python|- +naturalbornbreeders.com|AdultSiteRunner.yml|:heavy_check_mark:|:x:|:x:|:x:|-|Gay +naughtyamerica.com|NaughtyAmerica.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +naughtyamericavr.com|NaughtyAmericaVR.yml|:heavy_check_mark:|:x:|:x:|:x:|-|VR +naughtynatural.com|NaughtyNatural.yml|:heavy_check_mark:|:heavy_check_mark:|-|-|-|- +netvideogirls.com|NVGNetwork.yml|:heavy_check_mark:|-|-|-|-|- +newsensations.com/tour_ns/|NewSensationsMain.yml|:heavy_check_mark:|:x:|:heavy_check_mark:|:x:|-|- +newsensations.com/tour_rs/|NewSensationsNetworkSites.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +nextdoorbuddies.com|Algolia_NextDoorStudios.yml|:heavy_check_mark:|:heavy_check_mark:|:x:|:x:|Python|Gay +nextdoorcasting.com|Algolia_NextDoorStudios.yml|:heavy_check_mark:|:heavy_check_mark:|:x:|:x:|Python|Gay +nextdoorebony.com|Algolia_NextDoorStudios.yml|:heavy_check_mark:|:heavy_check_mark:|:x:|:x:|Python|Gay +nextdoorfilms.com|Algolia_NextDoorStudios.yml|:heavy_check_mark:|:heavy_check_mark:|:x:|:x:|Python|Gay +nextdoorhomemade.com|Algolia_NextDoorStudios.yml|:heavy_check_mark:|:heavy_check_mark:|:x:|:x:|Python|Gay +nextdoormale.com|Algolia_NextDoorStudios.yml|:heavy_check_mark:|:heavy_check_mark:|:x:|:x:|Python|Gay +nextdoororiginals.com|Algolia_NextDoorStudios.yml|:heavy_check_mark:|:heavy_check_mark:|:x:|:x:|Python|Gay +nextdoorraw.com|Algolia_NextDoorStudios.yml|:heavy_check_mark:|:heavy_check_mark:|:x:|:x:|Python|Gay +nextdoorstudios.com|Algolia_NextDoorStudios.yml|:heavy_check_mark:|:heavy_check_mark:|:x:|:x:|Python|Gay +nextdoortaboo.com|Algolia_NextDoorStudios.yml|:heavy_check_mark:|:heavy_check_mark:|:x:|:x:|Python|Gay +nextdoortwink.com|Algolia_NextDoorStudios.yml|:heavy_check_mark:|:heavy_check_mark:|:x:|:x:|Python|Gay +nfbusty.com|Nubiles.yml|:heavy_check_mark:|:heavy_check_mark:|:x:|:x:|-|- +nhentai.net|nhentai.yml|:x:|:heavy_check_mark:|:x:|:x:|-|Hentai +nikkibenz.com|VNAGirls.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +nikkiphoenixxx.com|NikkiAndTera.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +ninakayy.com|VNAGirls.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +noboring.com|TeenMegaWorld.yml|:heavy_check_mark:|:x:|:x:|:heavy_check_mark:|-|- +noelalejandrofilms.com|NoelAlejandro.yml|:heavy_check_mark:|:x:|:x:|:x:|-|Gay +noirmale.com|MindGeek.yml|:heavy_check_mark:|:x:|:heavy_check_mark:|:x:|-|Gay +noodledude.io|NoodleDude.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +notmygrandpa.com|PaperStreetMedia.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +nubilefilms.com|Nubiles.yml|:heavy_check_mark:|:heavy_check_mark:|:x:|:x:|-|- +nubiles-casting.com|Nubiles.yml|:heavy_check_mark:|:heavy_check_mark:|:x:|:x:|-|- +nubiles-porn.com|Nubiles.yml|:heavy_check_mark:|:heavy_check_mark:|:x:|:x:|-|- +nubiles.net|Nubiles.yml|:heavy_check_mark:|:heavy_check_mark:|:x:|:x:|-|- +nubileset.com|Nubiles.yml|:heavy_check_mark:|:heavy_check_mark:|:x:|:x:|-|- +nubilesporn.com|Nubiles.yml|:heavy_check_mark:|:heavy_check_mark:|:x:|:x:|-|- +nubilesunscripted.com|Nubiles.yml|:heavy_check_mark:|:heavy_check_mark:|:x:|:x:|-|- +nucosplay.com|Baberotica.yml|:heavy_check_mark:|:x:|:x:|:heavy_check_mark:|-|- +nudefightclub.com|Algolia_Adultime.yml|:heavy_check_mark:|:x:|:x:|:x:|Python|- +nudeyogaporn.com|Arx.yml|:heavy_check_mark:|:x:|:x:|:x:|Python|- +nurumassage.com|Algolia_Adultime.yml|:heavy_check_mark:|:heavy_check_mark:|:x:|:x:|Python|- +nylonfeetlove.com|FFCSH.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +nylonspunkjunkies.com|Teencoreclub.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +nylonsweeties.com|Teencoreclub.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +nylonup.com|FFCSH.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +nympho.com|Nympho.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +ocreampies.com|karatmedia.yml|:heavy_check_mark:|:x:|:x:|:x:|-|JAV +officecock.com|AMAMultimedia.yml|:heavy_check_mark:|:x:|:x:|:x:|-|Gay +officemsconduct.com|Algolia_Adultime.yml|:heavy_check_mark:|:heavy_check_mark:|:heavy_check_mark:|:x:|Python|- +officepov.com|ItsPOV.yml|:heavy_check_mark:|:x:|:x:|:x:|CDP|- +officesexjp.com|karatmedia.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +ohmyholes.com|TeenMegaWorld.yml|:heavy_check_mark:|:x:|:x:|:heavy_check_mark:|-|- +old-n-young.com|TeenMegaWorld.yml|:heavy_check_mark:|:x:|:x:|:heavy_check_mark:|-|- +old4k.com|Vip4K.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +older4me.com|AdultSiteRunner.yml|:heavy_check_mark:|:x:|:x:|:x:|-|Gay +oldgoesyoung.com|OldGoesYoung.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +oldje-3some.com|Oldje.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +oldje.com|Oldje.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +oldnanny.com|OldNanny.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +oldsfuckdolls.com|DollsHub.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +only3xgirls.com|Only3xGirls.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +only3xlost.com|Only3xGirls.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +only3xseries.com|Only3xGirls.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +only3xvr.com|Only3xGirls.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +onlyblowjob.com|DDFNetwork.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +onlygolddigger.com|Only3xGirls.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +onlyprince.com|ThirdRockEnt.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +onlytarts.com|OnlyTarts.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +onlyteenblowjobs.com|GammaEntertainment.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +oopsie.com|Algolia_Adultime.yml|:heavy_check_mark:|:heavy_check_mark:|:x:|:x:|Python|- +openlife.com|GammaEntertainment.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +oraloverdose.com|PervCity.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +oreno3d.com|Oreno3d.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +orgytrain.com|Vivid.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +outdoorjp.com|karatmedia.yml|:heavy_check_mark:|:x:|:x:|:x:|-|JAV +outhim.com|AMAMultimedia.yml|:heavy_check_mark:|:x:|:x:|:x:|-|Gay +outofthefamily.com|Algolia_Adultime.yml|:heavy_check_mark:|:x:|:x:|:x:|Python|- +over40handjobs.com|TugPass.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +p54u.com|JavLibrary.yml|:heavy_check_mark:|:x:|:heavy_check_mark:|:x:|-|JAV +pacopacomama.com|Paco.yml|:heavy_check_mark:|:x:|:x:|:x:|-|JAV +paintoy.com|insexOL.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +pandafuck.com|Wtfpass.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +pansexualx.com|Algolia_EvilAngel|:heavy_check_mark:|:heavy_check_mark:|:heavy_check_mark:|:x:|Python|Trans +pantyjobs.com|pantyjobs.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +pantypops.com|GammaEntertainment.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +paradisegfs.com|VegasDreamworks.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +parasited.com|Hentaied.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +parodypass.com|NewSensationsNetworkSites.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +passion-hd.com|AMAMultimedia.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +passionxxx.com|Passionxxx.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +paulomassaxxx.com|PornsiteManager.yml|:heavy_check_mark:|:x:|:heavy_check_mark:|:x:|-|Gay +pawged.com|Pawged.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +peeonher.com|peeonher.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +pegasproductions.com|PegasProductions.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +pennypaxlive.com|VNAGirls.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +penthousegold.com|Penthouse.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +perfectgirlfriend.com|FamilyTherapyXXX.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +perfectgonzo.com|PerfectGonzo.yml|:heavy_check_mark:|:x:|:x:|:heavy_check_mark:|Python|- +pervcity.com|PervCity.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +pervdoctor.com|PaperStreetMedia.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +perversefamily.com|Czechav.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +pervertgallery.com|ThirdRockEnt.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +pervmom.com|PaperStreetMedia.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +pervnana.com|PaperStreetMedia.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +pervpricipal.com|PaperStreetMedia.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +pervtherapy.com|PaperStreetMedia.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +peternorth.com|Algolia_Adultime.yml|:heavy_check_mark:|:heavy_check_mark:|:x:|:x:|Python|- +petite18.com|TugPass.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +petiteballerinasfucked.com|Nubiles.yml|:heavy_check_mark:|:heavy_check_mark:|:x:|:x:|-|- +petited.com|Vivid.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +petitehdporn.com|Nubiles.yml|:heavy_check_mark:|:heavy_check_mark:|:x:|:x:|-|- +petiteleeanna.com|ChickPass.yml|:heavy_check_mark:|:x:|:x:|:heavy_check_mark:|-|- +petitepov.com|ItsPOV.yml|:heavy_check_mark:|:x:|:x:|:x:|CDP|- +philavise.com|Philavise.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +philippwants.com|PornsiteManager.yml|:heavy_check_mark:|:x:|:heavy_check_mark:|:x:|-|Gay +pickupfuck.com|Wtfpass.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +pie4k.com|Vip4K.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +pinklabel.tv|PinkLabelTV.yml|:heavy_check_mark:|:x:|:heavy_check_mark:|:x:|-|- +pinkoclub.com|Pinkoclub.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +pinkotgirls.com|Pinkoclub.yml|:heavy_check_mark:|:x:|:x:|:x:|-|Trans +pinupfiles.com|PinupDollars.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +pissplay.com|PissPlay.yml|:heavy_check_mark:|:x:|:x:|:x:|-|Amateur Fetish +pissynetwork.com|PissyNetwork.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +pjgirls.com|pjgirls.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +pkfstudios.com|PKFStudios.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +playboy.tv|PlayboyTV.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +playboyplus.com|PlayboyPlus.yml|:heavy_check_mark:|:heavy_check_mark:|:x:|:x:|-|- +playdaddy.com|AdultSiteRunner.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +playwithrae.com|PlayWithRae.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +plumperpass.com|PlumperPass.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +plushies.tv|Plushies.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +pmvhaven.com|PMVHeaven.yml|:heavy_check_mark:|:x:|:x:|:x:|Python|PMVs +porkvendors.com|ThirdRockEnt.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +pornbox.com|Pornbox.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +porncornvr.com|RealJamVR.yml|:heavy_check_mark:|:heavy_check_mark:|:x:|:x:|-|VR +porncz.com|PornCZ.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +pornditos.com|Pornditos.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +porndudecasting.com|PornDudeCasting.yml|:heavy_check_mark:|:x:|:x:|:heavy_check_mark:|-|- +pornfidelity.com|KellyMadisonMedia.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +pornforce.com|ThirdRockEnt.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +porngoespro.com|Spizoo.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +pornhex.com|PornHex.yml|:heavy_check_mark:|:x:|:x:|:heavy_check_mark:|-|Tube Site +pornhub.com|Pornhub.yml|:heavy_check_mark:|:x:|:x:|:heavy_check_mark:|-|- +pornhubpremium.com|PornhubPremium.yml|:heavy_check_mark:|:x:|:x:|:heavy_check_mark:|-|- +pornlandvideos.com|MVG.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +pornmegaload.com|TheScoreGroup.yml|:heavy_check_mark:|:heavy_check_mark:|:x:|:x:|-|- +pornperverts.com|ThirdRockEnt.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +pornpros.com|AMAMultimedia.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +pornstarbts.com|PureMedia.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +pornstarhardcore.com|Hustler.yml|:heavy_check_mark:|:x:|:x:|:x:|CDP|- +pornstarplatinum.com|PornstarPlatinum.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +pornstarstroker.com|AdultEmpireCash.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +pornstartease.com|Spizoo.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +pornweekends.com|Wtfpass.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +pornworld.com|pornworld.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +portagloryhole.com|PortaGloryhole.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +poundedpetite.com|TopWebModels.yml|:heavy_check_mark:|:x:|:x:|:x:|Python|- +povadventure.com|ModelMediaUS.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +povav.com|Jhdv.yml|:heavy_check_mark:|:x:|:x:|:x:|-|JAV Uncensored +povbitch.com|MMP_Network.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +povblowjobs.com|GammaEntertainment.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +povd.com|AMAMultimedia.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +povjp.com|karatmedia.yml|:heavy_check_mark:|:x:|:x:|:x:|-|JAV +povmania.com|VNAGirls.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +povmasters.com|Arx.yml|:heavy_check_mark:|:x:|:x:|:x:|Python|- +povperverts.net|ThirdRockEnt.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +povpornstars.com|hussiemodels.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +povr.com|POVR.yml|:heavy_check_mark:|:x:|:x:|:x:|-|VR +povthis.com|GammaEntertainment.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +prettydirty.com|Algolia_Adultime.yml|:heavy_check_mark:|:heavy_check_mark:|:x:|:x:|Python|- +prettydirtyteens.com|devianteNetwork.yml|:heavy_check_mark:|:x:|:x:|:x:|CDP|- +pridestudios.com|Algolia_MenOver30.yml|:heavy_check_mark:|:heavy_check_mark:|:x:|:x:|Python|Gay +primecups.com|PerfectGonzo.yml|:heavy_check_mark:|:x:|:x:|:heavy_check_mark:|Python|- +princesscum.com|Nubiles.yml|:heavy_check_mark:|:heavy_check_mark:|:x:|:x:|-|- +private.com|Private.yml|:heavy_check_mark:|:x:|:heavy_check_mark:|:heavy_check_mark:|-|- +privatecastings.com|privatecastings.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +privatesextapes.com|Wtfpass.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +producersfun.com|ProducersFun.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +profiles.myfreecams.com|MFC.yml|:x:|:x:|:x:|:heavy_check_mark:|-|- +propertysex.com|RealityKingsOL.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +publicagent.com|MindGeek.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +publicfrombohemia.com|PornCZ.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +publicsexadventures.com|Wtfpass.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +publicsexdate.com|ThirdRockEnt.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +puffynetwork.com|Puffynetwork.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +pumaswedexxx.com|VNAGirls.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +pure-bbw.com|PureMedia.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +pure-ts.com|PureMedia.yml|:heavy_check_mark:|:x:|:x:|:x:|-|Trans +pure-xxx.com|PureMedia.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +purebj.com|Only3xGirls.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +puremature.com|AMAMultimedia.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +purepov.com|PerfectGonzo.yml|:heavy_check_mark:|:x:|:x:|:heavy_check_mark:|Python|- +puretaboo.com|Algolia_Adultime.yml|:heavy_check_mark:|:heavy_check_mark:|:x:|:x:|Python|- +pussyav.com|Jhdv.yml|:heavy_check_mark:|:x:|:x:|:x:|-|JAV Uncensored +putalocura.com|Putalocura.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +r18.dev|R18.dev.yml|:heavy_check_mark:|:x:|:heavy_check_mark:|:x:|-|JAV +rachel-steele.com|RachelSteele.yml|:heavy_check_mark:|:x:|:x:|:x:|-|MILF +rachelaldana.com|PinupDollars.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +rachelstormsxxx.com|VNAGirls.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +ragingstallion.com|Algolia_RagingStallion.yml|:heavy_check_mark:|:x:|:x:|:x:|-|Gay +randyblue.com|RandyBlue.yml|:heavy_check_mark:|:x:|:x:|:x:|-|Gay +raunchybastards.com|RaunchyBastards.yml|:heavy_check_mark:|:x:|:x:|:x:|-|Gay +ravenswallowzxxx.com|Andomark.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +rawattack.com|Spizoo.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +rawcouples.com|TeenMegaWorld.yml|:heavy_check_mark:|:x:|:x:|:heavy_check_mark:|-|- +rawfuck.com|PornsiteManager.yml|:heavy_check_mark:|:x:|:heavy_check_mark:|:x:|-|Gay +rawfuckboys.com|CarnalPlus.yml|:heavy_check_mark:|:x:|:x:|:x:|-|Gay +reaganfoxx.com|AdultEmpireCash.yml|:heavy_check_mark:|:x:|:x:|:x:|-|MILF +realbikinigirls.com|WankItNow.yml|:heavy_check_mark:|:x:|:x:|:x:|-|VR +realfuckingdating.com|ThirdRockEnt.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +realityjunkies.com|MindGeek.yml|:heavy_check_mark:|:x:|:heavy_check_mark:|:x:|-|- +realitykings.com|MindGeek.yml|:heavy_check_mark:|:x:|:x:|:heavy_check_mark:|-|- +realitylovers.com|RealityLovers.yml|:heavy_check_mark:|:x:|:x:|:heavy_check_mark:|Python|- +realjamvr.com|RealJamVR.yml|:heavy_check_mark:|:heavy_check_mark:|:x:|:x:|-|VR +realsensual.com|Spizoo.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +realtgirls.com|GroobyNetwork-Partial.yml|:heavy_check_mark:|:heavy_check_mark:|:x:|:x:|-|Trans +realtimebondage.com|insex.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +realvr.com|BaDoink.yml|:heavy_check_mark:|:x:|:heavy_check_mark:|:heavy_check_mark:|-|VR +reddit.com|Reddit.yml|:heavy_check_mark:|:heavy_check_mark:|:x:|:x:|-|- +redgifs.com|Redgifs.yml|:heavy_check_mark:|:x:|:heavy_check_mark:|:heavy_check_mark:|Python|Gifs +redheadmariah.com|ChickPass.yml|:heavy_check_mark:|:x:|:x:|:heavy_check_mark:|-|- +redhotstraightboys.com|RedHotStraightBoys.yml|:heavy_check_mark:|:x:|:x:|:heavy_check_mark:|-|Gay +redpolishfeet.com|FFCSH.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +reidmylips.com|Andomark.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +reidmylips.elxcomplete.com|Andomark.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +renderfiend.com|insex.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +restrictedsenses.com|RestrictedSenses.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +retroporncz.com|PornCZ.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +rickysroom.com|KBProductions.yml|:heavy_check_mark:|:x:|:x:|:heavy_check_mark:|Python|- +ridleydovarez.com|PornsiteManager.yml|:heavy_check_mark:|:x:|:heavy_check_mark:|:x:|-|Gay +riggsfilms.vip|RiggsFilms.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +rim4k.com|Vip4K.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +rk.com|MindGeek.yml|:heavy_check_mark:|:x:|:x:|:heavy_check_mark:|-|- +roccosiffredi.com|Algolia_RoccoSiffredi.yml|:heavy_check_mark:|:heavy_check_mark:|:x:|:x:|Python|- +roddaily.com|Algolia_NextDoorStudios.yml|:heavy_check_mark:|:heavy_check_mark:|:x:|:x:|Python|Gay +rodsroom.com|Algolia_NextDoorStudios.yml|:heavy_check_mark:|:heavy_check_mark:|:x:|:x:|Python|Gay +romemajor.com|VNAGirls.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +rubberdoll.net|VNAGirls.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +rule34video.com|Rule34Video.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +russian-tgirls.com|GroobyClub.yml|:heavy_check_mark:|:x:|:x:|:x:|-|Trans +rylskyart.com|SARJ-LLC.yml|:heavy_check_mark:|:heavy_check_mark:|:x:|:heavy_check_mark:|Python|- +sabiendemonia.com|Algolia_Adultime.yml|:heavy_check_mark:|:x:|:x:|:x:|Python|- +samanthagrace.net|VNAGirls.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +samuelotoole.com|Algolia_NextDoorStudios.yml|:heavy_check_mark:|:heavy_check_mark:|:x:|:x:|Python|Gay +sapphicerotica.com|sapphix.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +sapphix.com|sapphix.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +sarajay.com|VNAGirls.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +sayuncle.com|Sayuncle.yml|:heavy_check_mark:|:x:|:x:|:x:|-|Gay +scarybigdicks.com|Hustler.yml|:heavy_check_mark:|:x:|:x:|:x:|CDP|- +schoolgirlshd.com|Jhdv.yml|:heavy_check_mark:|:x:|:x:|:x:|-|JAV Uncensored +schoolpov.com|ItsPOV.yml|:heavy_check_mark:|:x:|:x:|:x:|CDP|- +scoreland.com|TheScoreGroup.yml|:heavy_check_mark:|:heavy_check_mark:|:x:|:x:|-|- +scoreland2.com|TheScoreGroup.yml|:heavy_check_mark:|:heavy_check_mark:|:x:|:x:|-|- +scoutboys.com|CarnalPlus.yml|:heavy_check_mark:|:x:|:x:|:x:|-|Gay +screwmetoo.com|VegasDreamsLLC.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +seancody.com|MindGeek.yml|:heavy_check_mark:|:x:|:x:|:heavy_check_mark:|-|Gay +seductive18.com|Teencoreclub.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +seehimfuck.com|hussiemodels.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +seehimsolo.com|hussiemodels.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +seemomsuck.com|Tugpass.yml|:heavy_check_mark:|:x:|:x:|:x:|CDP|- +seemysextapes.com|Hustler.yml|:heavy_check_mark:|:x:|:x:|:x:|CDP|- +selfiesuck.com|SelfieSuck.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +sensualpain.com|insexOL.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +serve4k.com|Vip4K.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +severesexfilms.com|AdultEmpireCash.yml|:heavy_check_mark:|:x:|:heavy_check_mark:|:x:|-|- +sexart.com|SARJ-LLC.yml|:heavy_check_mark:|:heavy_check_mark:|:x:|:heavy_check_mark:|Python|- +sexbabesvr.com|SexBabesVR.yml|:heavy_check_mark:|:x:|:x:|:x:|-|VR +sexintaxi.com|PornCZ.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +sexlikereal.com|SexLikeReal.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +sexmex.xxx|SexMex.yml|:heavy_check_mark:|:heavy_check_mark:|:x:|:x:|-|- +sexmywife.com|VNAGirls.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +sexsee.com|Hustler.yml|:heavy_check_mark:|:x:|:x:|:x:|CDP|- +sexselector.com|SexSelector.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +sexuallybroken.com|insex.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +sexvr.com|SexVR.yml|:heavy_check_mark:|:x:|:x:|:heavy_check_mark:|-|VR +sexwithmuslims.com|PornCZ.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +sexworking.com|devianteNetwork.yml|:heavy_check_mark:|:x:|:x:|:x:|CDP|- +sexyhub.com|RealityKingsOL.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +shagmag.com|shagmag.yml|:x:|:heavy_check_mark:|:x:|:x:|-|Magazines +shame4k.com|Vip4K.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +shandafay.com|VNAGirls.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +shanedieselsbanginbabes.com|NewSensationsNetworkSites.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +share.myfreecams.com|MFC.yml|:heavy_check_mark:|:heavy_check_mark:|:x:|:x:|-|- +shefucksonthefirstdate.com|ThirdRockEnt.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +shegotsix.com|Teencoreclub.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +shelovesblack.com|LoveHerFilms.yml|:heavy_check_mark:|:x:|:x:|:heavy_check_mark:|-|- +shesbrandnew.com|TopWebModels.yml|:heavy_check_mark:|:x:|:x:|:x:|Python|- +sheseducedme.com|Andomark.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +shewillcheat.com|RealityKingsOL.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +shinybound.com|ShinyBound.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +shinysboundsluts.com|ShinyBound.yml|:heavy_check_mark:|:x:|:x:|:x:|-|Trans +shiofuky.com|Jhdv.yml|:heavy_check_mark:|:x:|:x:|:x:|-|JAV Uncensored +shoplyfter.com|PaperStreetMedia.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +shoplyftermylf.com|PaperStreetMedia.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +showerbait.com|AMAMultimedia.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +showybeauty.com|ShowyBeauty.yml|:heavy_check_mark:|:heavy_check_mark:|:x:|:x:|-|- +shylaj.com|ShylaJ.yml|:heavy_check_mark:|:heavy_check_mark:|:x:|:x:|-|- +sidechick.com|KBProductions.yml|:heavy_check_mark:|:x:|:x:|:heavy_check_mark:|Python|- +silverstonedvd.com|GammaEntertainment.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +silviasaint.com|GammaEntertainment.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +simplyanal.com|PuffyNetworkSite.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +sinematica.com|Sinematica.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +sinslife.com|SinsLife.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +siripornstar.com|VNAGirls.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +sis.porn|Vip4K.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +sislovesme.com|PaperStreetMedia.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +sisswap.com|PaperStreetMedia.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +sissypov.com|PureMedia.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +sketboy.com|PornsiteManager.yml|:heavy_check_mark:|:x:|:heavy_check_mark:|:x:|-|Gay +slayed.com|vixenNetwork.yml|:heavy_check_mark:|:x:|:x:|:x:|Python|- +slroriginals.com|SLRoriginals.yml|:heavy_check_mark:|:x:|:heavy_check_mark:|:x:|-|VR +slutinspection.com|ThirdRockEnt.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +slutsbts.com|ThirdRockEnt.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +slutspov.com|ThirdRockEnt.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +sluttybbws.com|ThirdRockEnt.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +smashed.xxx|Nubiles.yml|:heavy_check_mark:|:heavy_check_mark:|:x:|:x:|-|- +smashpictures.com|CombatZone.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +smokingmina.com|SmokingMina.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +smutfactor.com|AdultEmpireCash.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +smutmerchants.com|SmutPuppet.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +soapymassage.com|Algolia_Adultime.yml|:heavy_check_mark:|:x:|:x:|:x:|Python|- +sofiemariexxx.com|SofieMariexxx.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +sologirlsmania.com|MVG.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +soloteengirls.net|TeenMegaWorld.yml|:heavy_check_mark:|:x:|:x:|:heavy_check_mark:|-|- +sophiedeelive.com|VNAGirls.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +sororitysluts.com|Hustler.yml|:heavy_check_mark:|:x:|:x:|:x:|CDP|- +spankbang.com|SpankBang.yml|:heavy_check_mark:|:x:|:x:|:x:|-|Tube Site +spankmonster.com|AdultEmpireCash.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +spearteenpussy.com|Teencoreclub.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +spermantino.com|Teencoreclub.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +spermmania.com|SpermMania.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +spermswallowers.com|JulesJordan.yml|:heavy_check_mark:|:heavy_check_mark:|:x:|:x:|-|- +spermswap.com|PerfectGonzo.yml|:heavy_check_mark:|:x:|:x:|:heavy_check_mark:|Python|- +spizoo.com|Spizoo.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +spoiledvirgins.com|OldGoesYoung.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +spunkworthy.com|SpunkWorthy.yml|:heavy_check_mark:|:x:|:x:|:heavy_check_mark:|-|Gay +spyfam.com|AMAMultimedia.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +squirtalicious.com|GammaEntertainment.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +squirted.com|MindGeek.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +squirtinglesbian.com|GammaEntertainment.yml|:heavy_check_mark:|:x:|:x:|:x:|-|Lesbian +squirtingorgies.com|GammaEntertainment.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +stagcollective.com|Algolia_NextDoorStudios.yml|:heavy_check_mark:|:heavy_check_mark:|:x:|:x:|Python|Gay +staghomme.com|CarnalPlus.yml|:heavy_check_mark:|:x:|:x:|:x:|-|Gay +stasyq.com|StasyQ.yml|:heavy_check_mark:|:x:|:x:|:heavy_check_mark:|-|- +stasyqvr.com|StasyQVR.yml|:heavy_check_mark:|:x:|:x:|:x:|-|VR +staxus.com|Staxus.yml|:heavy_check_mark:|:x:|:x:|:heavy_check_mark:|-|gay +stayhomepov.com|PaperStreetMedia.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +stephousexxx.com|AdultEmpireCash.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +steppov.com|ItsPOV.yml|:heavy_check_mark:|:x:|:x:|:x:|CDP|- +stepsiblings.com|PaperStreetMedia.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +stepsiblingscaught.com|Nubiles.yml|:heavy_check_mark:|:heavy_check_mark:|:x:|:x:|-|- +stockingvideos.com|FFstockings.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +stockydudes.com|AdultSiteRunner.yml|:heavy_check_mark:|:x:|:x:|:x:|-|Gay +store.evilangel.com|EvilAngelStore.yml|:heavy_check_mark:|:x:|:heavy_check_mark:|:x:|-|- +store.evilangelvideo.com|EvilAngelStore.yml|:heavy_check_mark:|:x:|:heavy_check_mark:|:x:|-|- +store.freeones.com|FreeonesStore.yml|:heavy_check_mark:|:x:|:heavy_check_mark:|:x:|-|- +str8hell.com|Williamhiggins.yml|:heavy_check_mark:|:x:|:x:|:x:|-|Gay +strapattackers.com|GammaEntertainment.yml|:heavy_check_mark:|:x:|:x:|:x:|-|Femdom +straplez.com|SARJ-LLC.yml|:heavy_check_mark:|:heavy_check_mark:|:x:|:heavy_check_mark:|Python|- +straponcum.com|StrapOnCum.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +strapondreamer.com|StrapDreamer.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +streaming.iafd.com|IafdStreaming.yml|:heavy_check_mark:|:x:|:heavy_check_mark:|:x:|-|- +stretchedoutsnatch.com|NewSensationsNetworkSites.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +stripzvr.com|StripzVR.yml|:heavy_check_mark:|:x:|:heavy_check_mark:|:x:|-|VR +strokies.com|Strokies.yml|:heavy_check_mark:|:x:|:x:|:x:|CDP|- +stuck4k.com|Vip4K.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +studiofow.com|StudioFOW.yml|:heavy_check_mark:|:x:|:x:|:x:|-|3D Animation +stuffintwats.com|ChickPass.yml|:heavy_check_mark:|:x:|:x:|:heavy_check_mark:|-|- +stunning18.com|SARJ-LLC.yml|:heavy_check_mark:|:heavy_check_mark:|:x:|:heavy_check_mark:|Python|- +subbyhubby.com|SubbyHubby.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +submissivex.com|SubmissiveX.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +subspaceland.com|SubspaceLand.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +suckmevr.com|Baberotica.yml|:heavy_check_mark:|:x:|:x:|:heavy_check_mark:|-|VR +sugarcookie.xxx|sugarcookie.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +sugardaddyporn.com|BlurredMedia.yml|:heavy_check_mark:|:x:|:x:|:x:|CDP|- +suggabunny.com|SmutPuppet.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +sunnylanelive.com|VNAGirls.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +sunnyleone.com|GammaEntertainment.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +superbemodels.com|superbemodels.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +superramon.com|Trans500.yml|:heavy_check_mark:|:x:|:x:|:x:|-|Trans +susanayn.com|PornCZ.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +swallowbay.com|SwallowBay.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +swallowed.com|Nympho.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +swallowsalon.com|AmateurAllure.yml|:heavy_check_mark:|:heavy_check_mark:|:x:|:x:|-|- +sweetcarla.com|FFCSH.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +sweetfemdom.com|SweetFemdom.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +sweetheartvideo.com|MindGeek.yml|:heavy_check_mark:|:x:|:heavy_check_mark:|:x:|-|Lesbian +sweetsinner.com|MindGeek.yml|:heavy_check_mark:|:x:|:heavy_check_mark:|:heavy_check_mark:|-|- +sweetyx.com|SweetyX.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +swinger-blog.xxx|SwingerBlog.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +swnude.com|Williamhiggins.yml|:heavy_check_mark:|:x:|:x:|:x:|-|Gay +tabooheat.com|Algolia_TabooHeat.yml|:heavy_check_mark:|:heavy_check_mark:|:x:|:x:|Python|- +taboopov.com|taboopov.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +tacamateurs.com|TACAmateurs.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +tadpolexstudio.com|TadpolexStudio.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +takevan.com|TakeVan.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +taliashepard.com|bellapass.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +tamedteens.com|PerfectGonzo.yml|:heavy_check_mark:|:x:|:x:|:heavy_check_mark:|Python|- +tandaamateurs.com|ChickPass.yml|:heavy_check_mark:|:x:|:x:|:heavy_check_mark:|-|- +tandaasians.com|ChickPass.yml|:heavy_check_mark:|:x:|:x:|:heavy_check_mark:|-|- +tandablondes.com|ChickPass.yml|:heavy_check_mark:|:x:|:x:|:heavy_check_mark:|-|- +tandabrunettes.com|ChickPass.yml|:heavy_check_mark:|:x:|:x:|:heavy_check_mark:|-|- +tandaebony.com|ChickPass.yml|:heavy_check_mark:|:x:|:x:|:heavy_check_mark:|-|- +tandahousewives.com|ChickPass.yml|:heavy_check_mark:|:x:|:x:|:heavy_check_mark:|-|- +tandalatinas.com|ChickPass.yml|:heavy_check_mark:|:x:|:x:|:heavy_check_mark:|-|- +tandalesbians.com|ChickPass.yml|:heavy_check_mark:|:x:|:x:|:heavy_check_mark:|-|- +tandaredheads.com|ChickPass.yml|:heavy_check_mark:|:x:|:x:|:heavy_check_mark:|-|- +tanyatate.com|ModelCentroAPI.yml|:heavy_check_mark:|:x:|:x:|:x:|Python|- +taratainton.com|TaraTainton.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +teacherfucksteens.com|Nubiles.yml|:heavy_check_mark:|:heavy_check_mark:|:x:|:x:|-|- +teachmyass.com|Teencoreclub.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +teamskeet.com|Teamskeet.yml|:heavy_check_mark:|:x:|:x:|:x:|Python|- +teasepov.com|TeasePOV.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +teasingandpleasing.com|ThirdRockEnt.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +teenageanalsluts.com|ThirdRockEnt.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +teenagecorruption.com|ThirdRockEnt.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +teenagetryouts.com|ThirdRockEnt.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +teenanalcasting.com|Teencoreclub.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +teencoreclub.com|Teencoreclub.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +teencorezine.com|Teencoreclub.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +teencurves.com|PaperStreetMedia.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +teendrillers.com|Teencoreclub.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +teenerotica.xxx|SmutPuppet.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +teenfidelity.com|KellyMadisonMedia.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +teenfrombohemia.com|PornCZ.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +teenmegaworld.net|TeenMegaWorld.yml|:heavy_check_mark:|:x:|:x:|:heavy_check_mark:|-|- +teenpies.com|PaperStreetMedia.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +teensandtwinks.com|CarnalPlus.yml|:heavy_check_mark:|:x:|:x:|:x:|-|Gay +teensexmania.com|TeenMegaWorld.yml|:heavy_check_mark:|:x:|:x:|:heavy_check_mark:|-|- +teensexmovs.com|TeenMegaWorld.yml|:heavy_check_mark:|:x:|:x:|:heavy_check_mark:|-|- +teensgoporn.com|Teencoreclub.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +teensloveanal.com|PaperStreetMedia.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +teensloveblackcocks.com|PaperStreetMedia.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +teenslovehugecocks.com|MindGeek.yml|:heavy_check_mark:|:x:|:x:|:heavy_check_mark:|-|- +teensnaturalway.com|Teencoreclub.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +teensneaks.com|Algolia_Adultime.yml|:heavy_check_mark:|:x:|:x:|:x:|Python|- +teenstryblacks.com|Teencoreclub.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +teenthais.com|Baberotica.yml|:heavy_check_mark:|:x:|:x:|:heavy_check_mark:|-|Thai Uncensored +teentugs.com|TugPass.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +teenytaboo.com|TeenyTaboo.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +tenshigao.com|Tenshigao.yml|:heavy_check_mark:|:x:|:x:|:heavy_check_mark:|-|Jav +terapatrick.com|NikkiAndTera.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +tessafowler.com|PinupDollars.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +texasbukkake.com|TexasBukkake.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +tgirl40.com|GroobyClub.yml|:heavy_check_mark:|:x:|:x:|:x:|-|Trans +tgirlbbw.com|GroobyClub.yml|:heavy_check_mark:|:x:|:x:|:x:|-|Trans +tgirljapan.com|GroobyClub.yml|:heavy_check_mark:|:x:|:x:|:x:|-|Trans +tgirljapanhardcore.com|GroobyClub.yml|:heavy_check_mark:|:x:|:x:|:x:|-|Trans +tgirlpornstar.com|GroobyClub.yml|:heavy_check_mark:|:x:|:x:|:x:|-|Trans +tgirlpostop.com|GroobyClub.yml|:heavy_check_mark:|:x:|:x:|:x:|-|Trans +tgirls.porn|GroobyNetwork-Partial.yml|:heavy_check_mark:|:heavy_check_mark:|:x:|:x:|-|Trans +tgirls.xxx|GroobyNetwork-Partial.yml|:heavy_check_mark:|:heavy_check_mark:|:x:|:x:|-|Trans +tgirlsex.xxx|GroobyNetwork-Partial.yml|:heavy_check_mark:|:heavy_check_mark:|:x:|:x:|-|Trans +tgirlsfuck.com|GroobyNetwork-Partial.yml|:heavy_check_mark:|:x:|:x:|:x:|-|Trans +tgirlshookup.com|GroobyNetwork-Partial.yml|:heavy_check_mark:|:heavy_check_mark:|:x:|:x:|-|Trans +tgirltops.com|GroobyNetwork-Partial.yml|:heavy_check_mark:|:heavy_check_mark:|:x:|:x:|-|Trans +thatsitcomshow.com|Nubiles.yml|:heavy_check_mark:|:heavy_check_mark:|:x:|:x:|-|- +theartporn.com|Wtfpass.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +theassfactory.com|JulesJordan.yml|:heavy_check_mark:|:heavy_check_mark:|:x:|:x:|-|- +thedicksuckers.com|FinishesTheJob.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +theflourishamateurs.com|TheFlourish.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +theflourishfetish.com|TheFlourish.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +theflourishpov.com|TheFlourish.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +theflourishxxx.com|TheFlourish.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +thehabibshow.com|TheHabibShow.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +thelesbianexperience.com|NewSensationsNetworkSites.yml|:heavy_check_mark:|:x:|:x:|:x:|-|Lesbian +thelifeerotic.com|SARJ-LLC.yml|:heavy_check_mark:|:heavy_check_mark:|:x:|:heavy_check_mark:|Python|- +thenude.com|TheNude.yml|:x:|:x:|:x:|:heavy_check_mark:|-|- +thetabutales.com|NewSensationsNetworkSites.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +theyeslist.com|Algolia_Adultime.yml|:heavy_check_mark:|:x:|:x:|:x:|Python|- +thicc18.com|Fit18.yml|:heavy_check_mark:|:x:|:x:|:x:|Python|- +thickandbig.com|AMAMultimedia.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +thickumz.com|PaperStreetMedia.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +thirdsexxxx.com|ThirdSexXXX.yml|:heavy_check_mark:|:x:|:x:|:x:|-|Trans +thirdworldxxx.com|AdultEmpireCash.yml|:heavy_check_mark:|:x:|:heavy_check_mark:|:x:|-|Asian + Latin + Trans +thisvid.com|ThisVid.yml|:heavy_check_mark:|:x:|:x:|:x:|-|Tube Site +throated.com|GammaEntertainment.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +timtales.com|TimTales.yml|:heavy_check_mark:|:x:|:x:|:heavy_check_mark:|-|Gay +tiny4k.com|AMAMultimedia.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +tinysis.com|PaperStreetMedia.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +tittycreampies.com|GammaEntertainment.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +titworld.com|Hustler.yml|:heavy_check_mark:|:x:|:x:|:x:|CDP|- +tmwpov.com|TeenMegaWorld.yml|:heavy_check_mark:|:x:|:x:|:heavy_check_mark:|-|- +tmwvrnet.com|TeenMegaWorld.yml|:heavy_check_mark:|:x:|:x:|:heavy_check_mark:|-|VR +tokyo-hot.com|Tokyohot.yml|:heavy_check_mark:|:x:|:x:|:x:|-|JAV Uncensored +tokyobang.com|karatmedia.yml|:heavy_check_mark:|:x:|:x:|:x:|-|JAV +tommydxxx.com|Algolia_NextDoorStudios.yml|:heavy_check_mark:|:heavy_check_mark:|:x:|:x:|Python|Gay +tonightsgirlfriend.com|Tonightsgirlfriend.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +toomanytrannies.com|Hustler.yml|:heavy_check_mark:|:x:|:x:|:x:|CDP|Trans +topgrl.com|insex.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +toplatindaddies.com|AdultSiteRunner.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +topwebmodels.com|TopWebModels.yml|:heavy_check_mark:|:x:|:x:|:x:|Python|- +toticos.com|VegasDreamworks.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +touchmywife.com|Algolia_touchmywife.yml|:heavy_check_mark:|:heavy_check_mark:|:x:|:x:|Python|- +toughlovex.com|toughlovex.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +tour.purgatoryx.com|purgatoryx.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +trans500.com/tour/|Trans500.yml|:heavy_check_mark:|:x:|:x:|:x:|-|Trans +trans500.com/tour3/|Trans500.yml|:heavy_check_mark:|:x:|:x:|:x:|-|Trans +trans500.com/tourespanol|Trans500.yml|:heavy_check_mark:|:x:|:x:|:x:|-|Trans +transangels.com|MindGeek.yml|:heavy_check_mark:|:x:|:x:|:x:|-|Trans +transatplay.com|Trans500.yml|:heavy_check_mark:|:x:|:x:|:x:|-|Trans +transbella.com|LetsDoeIt.yml|:heavy_check_mark:|:x:|:x:|:x:|-|Trans +transcest.com|CarnalPlus.yml|:heavy_check_mark:|:x:|:x:|:x:|-|FTM +transerotica.com|Transerotica.yml|:heavy_check_mark:|:x:|:x:|:x:|-|Trans +transexdomination.com|GroobyClub.yml|:heavy_check_mark:|:x:|:x:|:x:|-|Trans +transexpov.com|GroobyNetwork-Partial.yml|:heavy_check_mark:|:heavy_check_mark:|:x:|:x:|-|Trans +transfeet.com|FFCSH.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +transfixed.com|Algolia_Adultime.yml|:heavy_check_mark:|:heavy_check_mark:|:x:|:x:|Python|Trans +transgasm.com|GroobyNetwork-Partial.yml|:heavy_check_mark:|:heavy_check_mark:|:x:|:x:|-|Trans +transgressivefilms.com|Algolia_Adultime.yml|:heavy_check_mark:|:x:|:x:|:x:|Python|Trans +transgressivexxx.com|Algolia_EvilAngel|:heavy_check_mark:|:heavy_check_mark:|:heavy_check_mark:|:x:|Python|Trans +transmodeldatabase.com|TransModelDatabase.yml|:x:|:x:|:x:|:heavy_check_mark:|-|Trans +transnificent.com|GroobyNetwork-Partial.yml|:heavy_check_mark:|:heavy_check_mark:|:x:|:x:|-|Trans +transroommates.com|Arx.yml|:heavy_check_mark:|:x:|:x:|:x:|Python|Trans +transsensual.com|MindGeek.yml|:heavy_check_mark:|:heavy_check_mark:|:heavy_check_mark:|:heavy_check_mark:|-|Trans +transsexualangel.com|GammaEntertainment.yml|:heavy_check_mark:|:x:|:x:|:x:|-|Trans +transsexualroadtrip.com|GammaEntertainment.yml|:heavy_check_mark:|:x:|:x:|:x:|-|Trans +tranzvr.com|POVR.yml|:heavy_check_mark:|:x:|:x:|:x:|-|VR +traxxx.me|Traxxx.yml|:heavy_check_mark:|:x:|:x:|:heavy_check_mark:|-|- +trickymasseur.com|TeenMegaWorld.yml|:heavy_check_mark:|:x:|:x:|:heavy_check_mark:|-|- +trickyoldteacher.com|OldGoesYoung.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +trickyspa.com|Algolia_Adultime.yml|:heavy_check_mark:|:x:|:x:|:x:|Python|- +trikepatrol.com|TrikePatrol.yml|:heavy_check_mark:|:x:|:x:|:heavy_check_mark:|-|- +tripforfuck.com|TripForFuck.yml|:heavy_check_mark:|:x:|:x:|:heavy_check_mark:|-|- +trueamateurs.com|MindGeek.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +trueanal.com|Nympho.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +truelesbian.com|Algolia_Adultime.yml|:heavy_check_mark:|:x:|:x:|:x:|Python|- +trystanbull.com|Algolia_NextDoorStudios.yml|:heavy_check_mark:|:heavy_check_mark:|:x:|:x:|Python|Gay +tryteens.com|Teencoreclub.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +ts-castingcouch.com|GroobyClub.yml|:heavy_check_mark:|:x:|:x:|:x:|-|Trans +tsfactor.com|Algolia_EvilAngel.yml|:heavy_check_mark:|:heavy_check_mark:|:heavy_check_mark:|:x:|Python|Trans +tsgirlfriendexperience.com|Trans500.yml|:heavy_check_mark:|:x:|:x:|:x:|-|Trans +tsplayground.com|TSPlayground.yml|:heavy_check_mark:|:x:|:x:|:heavy_check_mark:|-|Trans +tspov.com|PureMedia.yml|:heavy_check_mark:|:x:|:x:|:x:|-|Trans +tsraw.com|LadyboyGold.yml|:heavy_check_mark:|:x:|:x:|:heavy_check_mark:|-|Trans +tsvirtuallovers.com|RealityLovers.yml|:heavy_check_mark:|:x:|:x:|:heavy_check_mark:|Python|- +tugpass.com|TugPass.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +tuktukpatrol.com|TukTukPatrol.yml|:heavy_check_mark:|:x:|:x:|:heavy_check_mark:|-|- +tushy.com|vixenNetwork.yml|:heavy_check_mark:|:x:|:x:|:x:|Python|- +tushyraw.com|vixenNetwork.yml|:heavy_check_mark:|:x:|:x:|:x:|Python|- +tutor4k.com|Vip4K.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +twinkloads.com|CarnalPlus.yml|:heavy_check_mark:|:x:|:x:|:x:|-|Gay +twinks.com|CarnalPlus.yml|:heavy_check_mark:|:x:|:x:|:x:|-|Gay +twinktop.com|CarnalPlus.yml|:heavy_check_mark:|:x:|:x:|:x:|-|Gay +twistedvisual.com|ThirdRockEnt.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +twistys.com|Twistys.yml|:heavy_check_mark:|:x:|:x:|:heavy_check_mark:|-|- +twistysnetwork.com|Twistys.yml|:heavy_check_mark:|:x:|:x:|:heavy_check_mark:|-|- +twotgirls.com|TwoTGirls.yml|:heavy_check_mark:|:x:|:x:|:x:|-|Trans +uk-tgirls.com|GroobyClub.yml|:heavy_check_mark:|:x:|:x:|:x:|-|Trans +ultrafilms.com|UltraFilms.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +underhentai.com|UnderHentai.yml|:heavy_check_mark:|:x:|:x:|:x:|-|Hentai +universblack.com|PornsiteManager.yml|:heavy_check_mark:|:x:|:heavy_check_mark:|:x:|-|Gay +unlimitedmilfs.com|NewSensationsNetworkSites.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +unrealporn.com|Czechav.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +upherasshole.com|PervCity.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +upskirtjerk.com|WankItNow.yml|:heavy_check_mark:|:x:|:x:|:x:|-|VR +valentina.passionepiedi.com|FFCSH.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +vangoren.com|MVG.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +vcaxxx.com|Hustler.yml|:heavy_check_mark:|:x:|:x:|:x:|CDP|- +velvetveronica.com|VelvetVeronica.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +venus.allfinegirls.com|WowNetworkVenus.yml|:heavy_check_mark:|:x:|:x:|:x:|Python|- +venus.ultrafilms.com|WowNetworkVenus.yml|:heavy_check_mark:|:x:|:x:|:x:|Python|- +venus.wowgirls.com|WowNetworkVenus.yml|:heavy_check_mark:|:x:|:x:|:x:|Python|- +venus.wowporn.com|WowNetworkVenus.yml|:heavy_check_mark:|:x:|:x:|:x:|Python|- +vickyathome.com|VNAGirls.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +viktor-rom.com|PornsiteManager.yml|:heavy_check_mark:|:x:|:heavy_check_mark:|:x:|-|Gay +vinaskyxxx.com|ModelCentroAPI.yml|:heavy_check_mark:|:x:|:x:|:x:|Python|- +vintagegaymovies.com|PornsiteManager.yml|:heavy_check_mark:|:x:|:heavy_check_mark:|:x:|-|Gay +vip4k.com|Vip4K.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +vipissy.com|Vipissy.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +vipsexvault.com|LetsDoeIt.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +virtualpee.com|VirtualPee.yml|:heavy_check_mark:|:x:|:x:|:x:|-|Fetish +virtualporn.com|VirtualPorn.yml|:heavy_check_mark:|:x:|:x:|:x:|-|VR +virtualrealamateurporn.com|VirtualRealPorn.yml|:heavy_check_mark:|:x:|:heavy_check_mark:|:x:|-|VR +virtualrealgay.com|VirtualRealPorn.yml|:heavy_check_mark:|:x:|:heavy_check_mark:|:x:|-|VR +virtualrealjapan.com|VirtualRealPorn.yml|:heavy_check_mark:|:x:|:heavy_check_mark:|:x:|-|VR +virtualrealpassion.com|VirtualRealPorn.yml|:heavy_check_mark:|:x:|:heavy_check_mark:|:x:|-|VR +virtualrealporn.com|VirtualRealPorn.yml|:heavy_check_mark:|:x:|:heavy_check_mark:|:x:|-|VR +virtualrealtrans.com|VirtualRealPorn.yml|:heavy_check_mark:|:x:|:heavy_check_mark:|:x:|-|VR +virtualtaboo.com|VirtualTaboo.yml|:heavy_check_mark:|:x:|:heavy_check_mark:|:x:|-|VR +visit-x.net|Visit-X.yml|:heavy_check_mark:|:x:|:x:|:x:|CDP|- +vivid.com|Vivid.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +vividclassic.com|Vivid.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +vivthomas.com|SARJ-LLC.yml|:heavy_check_mark:|:heavy_check_mark:|:x:|:heavy_check_mark:|Python|- +vixen.com|vixenNetwork.yml|:heavy_check_mark:|:x:|:x:|:x:|Python|- +vlogxxx.com|Spizoo.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +vogov.com|vogov.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +vrbangers.com|VRBangers.yml|:heavy_check_mark:|:x:|:heavy_check_mark:|:x:|-|VR +vrbgay.com|VRBangers.yml|:heavy_check_mark:|:x:|:x:|:x:|-|VR +vrbtrans.com|VRBangers.yml|:heavy_check_mark:|:x:|:x:|:x:|-|VR +vrconk.com|VRBangers.yml|:heavy_check_mark:|:x:|:x:|:x:|-|VR +vrcosplayx.com|BaDoink.yml|:heavy_check_mark:|:x:|:heavy_check_mark:|:heavy_check_mark:|-|VR +vrfirsttimer.com|SLRoriginals.yml|:heavy_check_mark:|:x:|:heavy_check_mark:|:x:|-|VR +vrhush.com|VRHush.yml|:heavy_check_mark:|:x:|:heavy_check_mark:|:x:|-|VR +vrintimacy.com|CzechVR.yml|:heavy_check_mark:|:x:|:heavy_check_mark:|:x:|-|VR +vrlatina.com|VRLatina.yml|:heavy_check_mark:|:x:|:heavy_check_mark:|:heavy_check_mark:|-|VR +vrporn.com|VRPorn.yml|:heavy_check_mark:|:x:|:x:|:x:|-|VR +vrporncz.com|PornCZ.yml|:heavy_check_mark:|:x:|:x:|:x:|-|VR +vrpornpass.com|Baberotica.yml|:heavy_check_mark:|:x:|:x:|:heavy_check_mark:|-|VR +vrxdb.com|VrxDB.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +vtubie.com|vTubie.yml|:x:|:x:|:x:|:heavy_check_mark:|-|VTuber Database +wakeupnfuck.com|WakeUpnFuck.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +wankitnow.com|WankItNow.yml|:heavy_check_mark:|:x:|:x:|:x:|-|VR +wankz.com|wankz.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +wankzvr.com|POVR.yml|:heavy_check_mark:|:x:|:heavy_check_mark:|:x:|-|VR +warashi-asian-pornstars.fr|WAPdB.yml|:x:|:x:|:x:|:heavy_check_mark:|Python|JAV +watch4beauty.com|Watch4Beauty.yml|:heavy_check_mark:|:heavy_check_mark:|:x:|:x:|-|- +watch4fetish.com|Watch4Fetish.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +watchingmydaughtergoblack.com|Algolia_DogfartNetwork.yml|:heavy_check_mark:|:x:|:x:|:heavy_check_mark:|Python|- +watchingmymomgoblack.com|Algolia_DogfartNetwork.yml|:heavy_check_mark:|:x:|:x:|:heavy_check_mark:|Python|- +watchmygf.me|WatchMyGF.yml|:heavy_check_mark:|:x:|:x:|:x:|-|Tube Site +watchreal.com|Hustler.yml|:heavy_check_mark:|:x:|:x:|:x:|CDP|- +watchyoujerk.com|WatchYouJerk.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +waybig.com|Waybig.yml|:heavy_check_mark:|:x:|:x:|:heavy_check_mark:|-|- +wcpclub.com|AdultEmpireCash.yml|:heavy_check_mark:|:x:|:heavy_check_mark:|:x:|-|- +wearehairy.com|WeAreHairy.yml|:heavy_check_mark:|:heavy_check_mark:|:x:|:heavy_check_mark:|-|- +webyoung.com|Algolia_Adultime.yml|:heavy_check_mark:|:heavy_check_mark:|:x:|:x:|Python|Lesbian +wefuckblackgirls.com|Algolia_DogfartNetwork.yml|:heavy_check_mark:|:x:|:x:|:heavy_check_mark:|Python|- +wefuckblackgirls.com|WeFuckBlackGirls.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +welikegirls.com|Algolia_Adultime.yml|:heavy_check_mark:|:heavy_check_mark:|:heavy_check_mark:|:x:|Python|- +weliketosuck.com|PuffyNetworkSite.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +welivetogether.com|RealityKingsOL.yml|:heavy_check_mark:|:x:|:x:|:x:|-|Lesbian +weneednewtalents.com|Teencoreclub.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +westcoastproductions.com|AdultEmpireCash.yml|:heavy_check_mark:|:x:|:heavy_check_mark:|:x:|-|- +wetandpissy.com|PuffyNetworkSite.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +wetandpuffy.com|PuffyNetworkSite.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +wetvr.com|AMAMultimedia.yml|:heavy_check_mark:|:x:|:x:|:x:|-|VR +whiteghetto.com|GammaEntertainment.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +whiteteensblackcocks.com|Teencoreclub.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +whorecraftvr.com|AdultEmpireCash.yml|:heavy_check_mark:|:x:|:x:|:x:|-|Fantasy VR +wicked.com (/movies)|WickedMovies.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +wicked.com|Algolia_Wicked.yml|:heavy_check_mark:|:heavy_check_mark:|:heavy_check_mark:|:x:|Python|- +wifespov.com|ThirdRockEnt.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +wildoncam.com|trafficpimps.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +williamhiggins.com|Williamhiggins.yml|:heavy_check_mark:|:x:|:x:|:x:|-|Gay +willtilexxx.com|WillTileXXX.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +wolfwagner.com|Algolia_Adultime.yml|:heavy_check_mark:|:x:|:x:|:x:|Python|- +woodmancastingx.com|WoodmancastingX.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +wowgirls.xxx|WowPorn.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +wowgirlsblog.com|WOWGirlsBlog.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +wowporn.xxx|WowPorn.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +wtfpass.com|Wtfpass.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +wurstfilm.com|PornsiteManager.yml|:heavy_check_mark:|:x:|:heavy_check_mark:|:x:|-|Gay +www.mgstage.com|mgstage.yml|:heavy_check_mark:|:x:|:x:|:x:|-|JAV +x-angels.com|TeenMegaWorld.yml|:heavy_check_mark:|:x:|:x:|:heavy_check_mark:|-|- +x-art.com|X-artcom.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +xart.xxx|Xartxxx.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +xconfessions.com|XConfessions.yml|:heavy_check_mark:|:x:|:heavy_check_mark:|:x:|-|- +xcoreclub.com|Teencoreclub.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +xempire.com|Algolia_xEmpire.yml|:heavy_check_mark:|:heavy_check_mark:|:x:|:x:|Python|- +xes.pl|Xes.yml|:heavy_check_mark:|:x:|:x:|:x:|-|Polish porn site +xevunleashed.com|Xevunleashed.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +xhamster.com|Xhamster.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +xlgirls.com|TheScoreGroup.yml|:heavy_check_mark:|:heavy_check_mark:|:x:|:x:|-|- +xnxx.com|Xnxx.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +xrares.com|Xrares.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +xsinsvr.com|xSinsVR.yml|:heavy_check_mark:|:heavy_check_mark:|:x:|:x:|-|- +xslist.org|Xslist.yml|:x:|:x:|:x:|:heavy_check_mark:|-|JAV Database +xtube.com|Xtube.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +xvideos.com|Xvideos.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +xvideos.red|Xvideos.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +xvirtual.com|XVirtual.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +xxxjobinterviews.com|XXXJobInterviews.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +xxxnj.com|ChickPass.yml|:heavy_check_mark:|:x:|:x:|:heavy_check_mark:|-|- +xxxpawn.com|XXXPawn.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +yanks.com|Yanks.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +yesgirlz.com|YesGirls.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +yngr.com|YNGR.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +younganaltryouts.com|OldGoesYoung.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +youngerloverofmine.com|Mypervmom.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +youngermommy.com|Nubiles.yml|:heavy_check_mark:|:heavy_check_mark:|:x:|:x:|-|- +youngthroats.com|Teencoreclub.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +youporn.com|YouPorn.yml|:heavy_check_mark:|:x:|:x:|:x:|-|Tube Site +yourmomdoesanal.com|ThirdRockEnt.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +yourmomdoesporn.com|ThirdRockEnt.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +yummysofie.com|YummySofie.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +z-filmz-originals.com|Z-Filmz-Originals.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +zebragirls.com|Algolia_DogfartNetwork.yml|:heavy_check_mark:|:x:|:x:|:heavy_check_mark:|Python|- +zentaidolls.com|ZentaiDolls.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +zerotolerancefilms.com|Algolia_zerotolerancefilms.yml|:heavy_check_mark:|:heavy_check_mark:|:heavy_check_mark:|:x:|Python|- +zexyvr.com|ZexyVR.yml|:heavy_check_mark:|:x:|:x:|:heavy_check_mark:|-|VR +zishy.com|Zishy.yml|:heavy_check_mark:|:heavy_check_mark:|:x:|:x:|-|- +zoiestarr.com|ChickPass.yml|:heavy_check_mark:|:x:|:x:|:heavy_check_mark:|-|- + +## Non url scrapers + +The following scrapers do not support xxxByURL scraping and are not included to the above list. To keep the below list tidy please add scrapers keeping the list in alphabetical order by the .yml filename. +For each scraper a short description, an optional comment with the usage and the related PR(s) with usage details would be appreciated. + +Scraper | Description | Comments | PR +--------|-------------|----------|:--: +ComicInfoXML.yml| A ComixInfo XML gallery scraper | A python scraper that looks for ComicInfo xml compatible files in the gallery's folder/filename and parses them | [#827](https://github.com/stashapp/CommunityScrapers/pull/827) +CopyFromScene.yml| A gallery scraper that returns metadata from the first linked scene | A python scraper that returns metadata from copied scenes, first link the scene to the gallery then run the scraper on the gallery | +CopyToGallery.yml| A scene to gallery scraper | A python scene scraper that copies metadata from a scene to the associated galleries. Can optionally (check .py file) associate and copy meta to all galleries in the same folder as the scene| [#895](https://github.com/stashapp/CommunityScrapers/pull/895) +dc-onlyfans.yml| An Onlyfans DB scene scraper | A python scraper that scrapes Only Fans scenes using the DB file (user_data.db) created from DIGITALCRIMINAL's tool | [#847](https://github.com/stashapp/CommunityScrapers/pull/847) +dc_onlyfans_fansdb.yml | FansDB "compliant" OnlyFans metadata DB scraper | Python scraper which scrapes metadata from DIGITALCRIMINAL compatible `user_data.db` databases following FansDB guidelines | [#1500](https://github.com/stashapp/CommunityScrapers/pull/1500) +Filename.yml | Scrape a scenes (local) filename to set as scene title | Utility scraper useful if you've bulk updated filenames outside of stash and want the changes synced back into stash | [#1136](https://github.com/stashapp/CommunityScrapers/pull/1136) +jellyfin.yml| A Jellyfin/Emby scraper | A python scraper that uses the Jellyfin/Emby API to look for Scenes, Performers and Movies via URL, Query or Fragments. Needs the URL, API-Key and User from Jellyfin set in jellyfin.py and the URLs in jellyfin.yml adopted to your local Jelly/Emby Instance | +MindGeekAPI.yml| A sceneBy(Name\|Fragment) scraper for MindGeek network| A python scraper that queries directly the MindGeek API. For further **needed** instructions refer to the relevant PRs and have a look in the `MindGeekApi.py` file | [#711](https://github.com/stashapp/CommunityScrapers/pull/711) [#738](https://github.com/stashapp/CommunityScrapers/pull/738) [#411](https://github.com/stashapp/CommunityScrapers/pull/411) +multiscrape.yml| A performer scraper that can utilize multiple stash Performer scrapers| A python scraper that can use multiple existing performer scrapers in order to get performer meta. To configure it edit the `multiscrape.py` file|[#594](https://github.com/stashapp/CommunityScrapers/pull/594) +performer-image-by-scene.yml| A performer image scraper that gets images from scene covers | A python scraper that searches for scenes with the performer and sets the scene cover image as the performer image|[#1039](https://github.com/stashapp/CommunityScrapers/pull/1039) +performer-image-dir.yml| A performer image scraper compatible with the actress-pics repo | A python scraper that searches in a cloned actress-pics repo for performer images. Configuration and more info in `performer-image-dir.py`|[#453](https://github.com/stashapp/CommunityScrapers/pull/453) +ScrapeWithURL.yml| A sceneByFragment scraper to perform a sceneByURL scape on scenes with URLs provided | This scraper allows users to perform sceneByURL scrapes in bulk.| [#900](https://github.com/stashapp/CommunityScrapers/issues/900) +ShokoAPI.yml| A sceneByFragment scraper for [Shoko Server](https://shokoanime.com) | A sceneByFragment scraper that queries a local Shoko Server instance using the filename for scene meta. To configure it edit the `ShokoAPI.py` file| [#586](https://github.com/stashapp/CommunityScrapers/issues/586) [#628](https://github.com/stashapp/CommunityScrapers/pull/628) +stash-sqlite.yml| A performer scraper that searches a stash db file | This python scraper uses the sqlite database from another stash instance and allows you to parse performers |[#230](https://github.com/stashapp/CommunityScrapers/pull/230) [#460](https://github.com/stashapp/CommunityScrapers/pull/460) +torrent.yml| A torrent scene scraper | This python scraper parses all torrent files in the specified directory (edit `torrent.py`) for embedded metadata using the scene's title.The title can either be a file from the torrent or the filename of the .torrent file | [#358](https://github.com/stashapp/CommunityScrapers/pull/358) +xbvrdb.yml| A scene/gallery scraper for XBVR db files| This python scraper uses the title from stash to search the sqlite database from XBVR for metadata. You would need to copy `main.db` from your xbvr configuration and move this to `scrapers/xbvr.db` | [#190](https://github.com/stashapp/CommunityScrapers/pull/190) From e7d53150e59ee48ccd05c9b4bf4d624ff2c1286b Mon Sep 17 00:00:00 2001 From: SpecialKeta <148014803+SpecialKeta@users.noreply.github.com> Date: Tue, 19 Dec 2023 08:42:30 +0100 Subject: [PATCH 607/624] Add scraping director(s) for scenes (#1571) --- scrapers/IAFD/IAFD.py | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/scrapers/IAFD/IAFD.py b/scrapers/IAFD/IAFD.py index d3401cd88..25de82b69 100644 --- a/scrapers/IAFD/IAFD.py +++ b/scrapers/IAFD/IAFD.py @@ -65,7 +65,7 @@ def maybe( """ Returns the first value in values that is not "No data" after applying f to it """ - return next((f(x) for x in values if not re.search(r"(?i)no data", x)), None) + return next((f(x) for x in values if not re.search(r"(?i)no data|director", x)), None) def cleandict(d: dict): @@ -549,6 +549,15 @@ def performer_measurements(tree): ) +def scene_director(tree): + return maybe( + tree.xpath( + '//p[@class="bioheading"][text()="Director" or text()="Directors"]/following-sibling::p[1]//text()' + ), + lambda d: d.strip(), + ) + + def scene_studio(tree): return maybe( tree.xpath( @@ -701,6 +710,7 @@ def scene_from_tree(tree): "title": scene_title(tree), "date": scene_date(tree), "details": scene_details(tree), + "director": scene_director(tree), "studio": scene_studio(tree), "performers": [ {"name": name} for name in tree.xpath('//div[@class="castbox"]/p/a/text()') From 82e246b08595e014fa49443690145988f7cbe7c4 Mon Sep 17 00:00:00 2001 From: Maista6969 Date: Tue, 19 Dec 2023 22:49:33 +0100 Subject: [PATCH 608/624] Fix relative import in MissaX --- scrapers/MissaX/MissaX.py | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/scrapers/MissaX/MissaX.py b/scrapers/MissaX/MissaX.py index cefa077df..30e041c73 100644 --- a/scrapers/MissaX/MissaX.py +++ b/scrapers/MissaX/MissaX.py @@ -2,9 +2,18 @@ import base64 import datetime import json +import os import re import sys import urllib.parse + +# to import from a parent directory we need to add that directory to the system path +csd = os.path.dirname(os.path.realpath(__file__)) # get current script directory +parent = os.path.dirname(csd) # parent directory (should be the scrapers one) +sys.path.append( + parent +) # add parent dir to sys path so that we can import py_common from ther + # extra modules below need to be installed try: import py_common.log as log From 4fcb3131a7049400827e8e5940865338b3af9287 Mon Sep 17 00:00:00 2001 From: Maista6969 Date: Tue, 19 Dec 2023 23:39:58 +0100 Subject: [PATCH 609/624] Fix relative import in AnimeCharactersDatabase --- .../AnimeCharactersDatabase/AnimeCharactersDatabase.py | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/scrapers/AnimeCharactersDatabase/AnimeCharactersDatabase.py b/scrapers/AnimeCharactersDatabase/AnimeCharactersDatabase.py index 6628d04c4..35e6f76fc 100644 --- a/scrapers/AnimeCharactersDatabase/AnimeCharactersDatabase.py +++ b/scrapers/AnimeCharactersDatabase/AnimeCharactersDatabase.py @@ -1,8 +1,16 @@ import json +import os import re import sys from datetime import datetime +# to import from a parent directory we need to add that directory to the system path +csd = os.path.dirname(os.path.realpath(__file__)) # get current script directory +parent = os.path.dirname(csd) # parent directory (should be the scrapers one) +sys.path.append( + parent +) # add parent dir to sys path so that we can import py_common from there + try: import cloudscraper except ModuleNotFoundError: From bedad6cffbe2747ba3a63b3e823395df3f94040e Mon Sep 17 00:00:00 2001 From: Maista6969 Date: Tue, 19 Dec 2023 23:40:47 +0100 Subject: [PATCH 610/624] Fix relative import in ATK Girlfriends --- scrapers/ATKGirlfriends/ATKGirlfriends.py | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/scrapers/ATKGirlfriends/ATKGirlfriends.py b/scrapers/ATKGirlfriends/ATKGirlfriends.py index 255a38f2d..b934c1995 100644 --- a/scrapers/ATKGirlfriends/ATKGirlfriends.py +++ b/scrapers/ATKGirlfriends/ATKGirlfriends.py @@ -1,7 +1,16 @@ import json +import os import re import requests import sys + +# to import from a parent directory we need to add that directory to the system path +csd = os.path.dirname(os.path.realpath(__file__)) # get current script directory +parent = os.path.dirname(csd) # parent directory (should be the scrapers one) +sys.path.append( + parent +) # add parent dir to sys path so that we can import py_common from there + try: import py_common.log as log except ModuleNotFoundError: From 3cbc1ff6ee91f0eb94fb1ed4d3f8442fdd93a8c1 Mon Sep 17 00:00:00 2001 From: Maista6969 Date: Tue, 19 Dec 2023 23:41:26 +0100 Subject: [PATCH 611/624] Fix relative import in Aunt Judys --- scrapers/AuntJudys/AuntJudys.py | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/scrapers/AuntJudys/AuntJudys.py b/scrapers/AuntJudys/AuntJudys.py index c85b06c09..ecaf50c3c 100644 --- a/scrapers/AuntJudys/AuntJudys.py +++ b/scrapers/AuntJudys/AuntJudys.py @@ -1,8 +1,16 @@ import json +import os import sys import urllib.request import urllib.parse +# to import from a parent directory we need to add that directory to the system path +csd = os.path.dirname(os.path.realpath(__file__)) # get current script directory +parent = os.path.dirname(csd) # parent directory (should be the scrapers one) +sys.path.append( + parent +) # add parent dir to sys path so that we can import py_common from there + try: from lxml import html except ModuleNotFoundError: From 9c00d4426a24aa768996173aff0df1540d05ed83 Mon Sep 17 00:00:00 2001 From: Maista6969 Date: Tue, 19 Dec 2023 23:42:19 +0100 Subject: [PATCH 612/624] Fix relative import in ComicInfoXML --- scrapers/ComicInfoXML/ComicInfoXML.py | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/scrapers/ComicInfoXML/ComicInfoXML.py b/scrapers/ComicInfoXML/ComicInfoXML.py index bf92b3552..297e4de79 100644 --- a/scrapers/ComicInfoXML/ComicInfoXML.py +++ b/scrapers/ComicInfoXML/ComicInfoXML.py @@ -1,7 +1,15 @@ -import sys import json -import xml.etree.ElementTree as ET +import os import pathlib +import sys +import xml.etree.ElementTree as ET + +# to import from a parent directory we need to add that directory to the system path +csd = os.path.dirname(os.path.realpath(__file__)) # get current script directory +parent = os.path.dirname(csd) # parent directory (should be the scrapers one) +sys.path.append( + parent +) # add parent dir to sys path so that we can import py_common from there try: import py_common.graphql as graphql From 9c284c8d67428cda6d993f935a07758c6d68f557 Mon Sep 17 00:00:00 2001 From: Maista6969 Date: Tue, 19 Dec 2023 23:43:00 +0100 Subject: [PATCH 613/624] Fix relative import in Fit18 --- scrapers/Fit18/Fit18.py | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/scrapers/Fit18/Fit18.py b/scrapers/Fit18/Fit18.py index 1428dbb8b..19aa4bca8 100644 --- a/scrapers/Fit18/Fit18.py +++ b/scrapers/Fit18/Fit18.py @@ -1,9 +1,17 @@ import json +import os import re import sys import urllib.parse from urllib.parse import urlparse +# to import from a parent directory we need to add that directory to the system path +csd = os.path.dirname(os.path.realpath(__file__)) # get current script directory +parent = os.path.dirname(csd) # parent directory (should be the scrapers one) +sys.path.append( + parent +) # add parent dir to sys path so that we can import py_common from there + try: import requests except ModuleNotFoundError: From 29608cbaf6128b817230733433c5396a94438d28 Mon Sep 17 00:00:00 2001 From: Maista6969 Date: Wed, 20 Dec 2023 00:05:39 +0100 Subject: [PATCH 614/624] Fix regexp syntax warning in MissaX --- scrapers/MissaX/MissaX.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/scrapers/MissaX/MissaX.py b/scrapers/MissaX/MissaX.py index 30e041c73..0216b7eb6 100644 --- a/scrapers/MissaX/MissaX.py +++ b/scrapers/MissaX/MissaX.py @@ -60,7 +60,7 @@ def scrape_scene_page(url): #scrape the main url title = tree.xpath('//p[@class="raiting-section__title"]/text()')[0].strip() #title scrape log.debug(f'Title:{title}') date = tree.xpath('//p[@class="dvd-scenes__data" and contains(text(), " Added:")]/text()[1]')[0] #get date - date = re.sub("(?:.+Added:\s)([\d\/]*).+", r'\g<1>', date).strip() #date cleanup + date = re.sub(r"(?:.+Added:\s)([\d\/]*).+", r'\g<1>', date).strip() #date cleanup date = datetime.datetime.strptime(date, "%m/%d/%Y").strftime("%Y-%m-%d") #date parse log.debug(f'Date:{date}') studio = tree.xpath('//base/@href')[0].strip() #studio scrape @@ -74,7 +74,7 @@ def scrape_scene_page(url): #scrape the main url details = tree.xpath('//p[@class="dvd-scenes__title"]/following-sibling::p//text()') #details scrape details = ''.join(details) #join details details = '\n'.join(' '.join(line.split()) for line in details.split('\n')) #get rid of double spaces - details = re.sub("\r?\n\n?", r'\n', details) #get rid of double newlines + details = re.sub(r"\r?\n\n?", r'\n', details) #get rid of double newlines log.debug(f'Details:{details}') bad_cover_url = tree.xpath("//img[@src0_4x]/@src0_4x") #cover from scene's page if better one is not found (it will be) datauri = "data:image/jpeg;base64," From 9517c14f68339abdae72b0fb805ec670e7ac0840 Mon Sep 17 00:00:00 2001 From: Maista6969 Date: Wed, 20 Dec 2023 15:59:07 +0100 Subject: [PATCH 615/624] Remove year from titles scraped from IAFD --- scrapers/IAFD/IAFD.py | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/scrapers/IAFD/IAFD.py b/scrapers/IAFD/IAFD.py index 25de82b69..1d0716aea 100644 --- a/scrapers/IAFD/IAFD.py +++ b/scrapers/IAFD/IAFD.py @@ -65,7 +65,9 @@ def maybe( """ Returns the first value in values that is not "No data" after applying f to it """ - return next((f(x) for x in values if not re.search(r"(?i)no data|director", x)), None) + return next( + (f(x) for x in values if not re.search(r"(?i)no data|director", x)), None + ) def cleandict(d: dict): @@ -581,7 +583,9 @@ def scene_date(tree): def scene_title(tree): - return maybe(tree.xpath("//h1/text()"), lambda t: t.strip()) + return maybe( + tree.xpath("//h1/text()"), lambda t: re.sub(r"\s*\(\d{4}\)$", "", t.strip()) + ) def movie_studio(tree): @@ -631,7 +635,7 @@ def movie_director(tree): def movie_title(tree): return maybe( - tree.xpath("//h1/text()"), lambda t: re.sub(r"\s*\([0-9]+\)$", "", t.strip()) + tree.xpath("//h1/text()"), lambda t: re.sub(r"\s*\(\d+\)$", "", t.strip()) ) From b8430242a378be820d388b82d44b81e95c6c7b9c Mon Sep 17 00:00:00 2001 From: AshoPash <152990772+AshoPash@users.noreply.github.com> Date: Wed, 20 Dec 2023 18:41:03 +0100 Subject: [PATCH 616/624] Sexuria.net scraper was added (#1572) --- scrapers/sexuria.yml | 34 ++++++++++++++++++++++++++++++++++ 1 file changed, 34 insertions(+) create mode 100644 scrapers/sexuria.yml diff --git a/scrapers/sexuria.yml b/scrapers/sexuria.yml new file mode 100644 index 000000000..b22cd8198 --- /dev/null +++ b/scrapers/sexuria.yml @@ -0,0 +1,34 @@ +name: Sexuria +# movieByURL: + # - action: scrapeXPath + # url: + # - sexuria.net + # scraper: sceneScraper +sceneByURL: + - action: scrapeXPath + url: + - sexuria.net + scraper: sceneScraper +xPathScrapers: + sceneScraper: + scene: + Title: //*[@id="dle-content"]/article/h1 + Details: + selector: //h4[text()='Description']/following-sibling::div/text() + concat: " " + Date: + selector: //div[text()='Release Date:']/following-sibling::div/text() + postProcess: + - parseDate: Jan 02 2006 + Image: //*[@id="dle-content"]/article/div[1]/div[1]/a/@href + Studio: + Name: //div[text()="Production:"]/following-sibling::div/a/text() + Movies: + Name: //*[@id="dle-content"]/article/h1 + URL: //link[@rel="canonical"]/@href + Tags: + Name: //div[text()="Tags:"]/following-sibling::div/a/text() + Performers: + Name: //div[@class="stars-porn"]/a/text() + URL: //meta[@name='og:url']/@content +# Last Updated December 17, 2023 From f5a87cf8dad4a67df632312cf7c23b8a89d83067 Mon Sep 17 00:00:00 2001 From: AshoPash <152990772+AshoPash@users.noreply.github.com> Date: Mon, 25 Dec 2023 19:45:23 +0100 Subject: [PATCH 617/624] Search feature was added to Sexuria.net (#1574) --- scrapers/sexuria.yml | 29 +++++++++++++++++++++++------ 1 file changed, 23 insertions(+), 6 deletions(-) diff --git a/scrapers/sexuria.yml b/scrapers/sexuria.yml index b22cd8198..30a82264c 100644 --- a/scrapers/sexuria.yml +++ b/scrapers/sexuria.yml @@ -1,14 +1,17 @@ name: Sexuria -# movieByURL: - # - action: scrapeXPath - # url: - # - sexuria.net - # scraper: sceneScraper sceneByURL: - action: scrapeXPath url: - sexuria.net scraper: sceneScraper +sceneByName: + action: scrapeXPath + queryURL: "https://sexuria.net/f/j.x.name-released;p.title={}/sort=date/order=desc/" + scraper: searchScraper +sceneByQueryFragment: + action: scrapeXPath + queryURL: "{url}" + scraper: sceneScraper xPathScrapers: sceneScraper: scene: @@ -31,4 +34,18 @@ xPathScrapers: Performers: Name: //div[@class="stars-porn"]/a/text() URL: //meta[@name='og:url']/@content -# Last Updated December 17, 2023 + searchScraper: + common: + $result: //article[contains(@class, "short")] + scene: + Title: $result//h2 + URL: ($result//a)[1]/@href + Details: $result//div[@class="author-cat"] + Date: $result//div[@class="date"] + Image: + selector: $result//img/@data-src + postProcess: + - replace: + - regex: ^ + with: https://sexuria.net +# Last Updated December 25, 2023 From 78aeeada47d5388a87e42a84604d199e956998f9 Mon Sep 17 00:00:00 2001 From: symptom6186 <149571017+symptom6186@users.noreply.github.com> Date: Tue, 26 Dec 2023 00:55:36 +0100 Subject: [PATCH 618/624] Update AniDB.yml - scrape by episode URL, add movie scraper, improve regex (#1573) --- scrapers/AniDB.yml | 161 ++++++++++++++++++++++++++++++++++----------- 1 file changed, 124 insertions(+), 37 deletions(-) diff --git a/scrapers/AniDB.yml b/scrapers/AniDB.yml index 5e4f3d76c..2cfe7212b 100644 --- a/scrapers/AniDB.yml +++ b/scrapers/AniDB.yml @@ -1,31 +1,68 @@ name: AniDB -################################################################################################################ -# HOW TO SET UP # -# Store this file in the ~/stash/scrapers/AniDB.yml # -# (If the scrapers directory is not there it needs to be created) # -# SET COOKIES: # -# Access the anidb.net website > login > right button > inspect > find cookies storage # -# Copy the "Value" of "adbsess" and "adbuin" and replace in the cookies category of this document # -# If your account is new, you need to access any NSFW anime and confirm that you want to see 18+ content # -# Do not change the order of the columns, as it can make it stop working # -# I recommend creating a new account just for this # -# SET USER AGENT: # -# Go to your Stash > Settings > Metadata Providers > Scraping > Scraper User Agent # -# Use the User Agent of your choice # -# I'm currently using: Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:79.0) Gecko/20100101 Firefox/79.0 # -################################################################################################################ -# HOW TO USE # -# SCENES: # -# The scene Scraper by Fragment is the best option in case the file name is the name of the anime # -# Scenes that were not found can easily be found by the name scraper # -# It is also possible to scrape individually with the anime URL # -# The scraper doesn't recognize the episode number, I recommend changing it manually at the end # -# PERFORMERS: # -# Performers need to be individually scraped by name or URL # -# I recommend creating them by scraping scenes and then searching individually by name # -# THAT'S IT, ENJOY! # -# Made by @escargotbuffed # -################################################################################################################ + +# ~~~~~~ GETTING STARTED ~~~~~~ +# Store this file in the ~/stash/scrapers/AniDB.yml +# - If the scrapers directory is not there, you must create it first +# +# ~~~~~~ SETTING COOKIES ~~~~~~ +# Note: I recommend creating a new account just for this scraper +# 1. Access the anidb.net website > login > right button > inspect > find cookies storage +# 2. Copy the "Value" of "adbsess" and "adbuin" and replace in the cookies category of this document +# 3. If your account is new, you need to access any NSFW anime and confirm that you want to see 18+ content +# 4. Do not change the order of the columns, as it can make it stop working +# +# ~~~~~~ SETTING USER AGENT ~~~~~~ +# - Go to your Stash > Settings > Metadata Providers > Scraping > Scraper User Agent +# - Use the User Agent of your choice +# - For example: Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:79.0) Gecko/20100101 Firefox/79.0 +# +# ~~~~~ RECOMMENDED WORKFLOW ~~~~~ +# 1. Scrape scene by fragment (for performers, tags, artwork, etc) +# - If this fails, scrape by anime URL +# 2. Scrape by episode URL (for title, date) +# 3. Manually set movie scene number on scene page +# 3. Navigate to each performer's page & scrape by URL +# 4. Navigate to movie page & scrape by URL +# +# ~~~~~~ HOW TO USE (detailed) ~~~~~~ +# tl;dr when in doubt, use the URL scrapers +# - For scenes: anidb.net/episode/XXX, anidb.net/anime/XXX +# - For performers: anidb.net/character/XXX +# - For movies: anidb.net/anime/XXX +# +# SCENES (by anime): +# - The Scraper by Fragment will usually work, assuming a filename like "[XX] My Lewd Anime - 01 (720p) (x264).mkv" +# - This regex expression strips underscores, dashes, content containing brackets and parentheses, and two digit numbers +# - For example, the above filename is stripped to "My Lewd Anime" +# - If this does not work, I recommend scraping with the episode URL, the anime URL, or the name scraper +# - By default, the scene scraper does not set the title, as the episode scraper serves this purpose better +# - However, if you'd like to enable this functionality, uncomment the "Title" line in sceneScraperAnime > scene +# - The scene (by anime) scraper automatically creates a new movie (i.e., series) entry, +# but unfortunately you will have to set the movie scene (i.e., episode) number manually +# +# SCENES (by episode): +# - This scraper is only accessible by scraping the episode URL (anidb.net/episode/XXX) +# - The scene episode scraper sets the episode title, the anime URL (if missing), and the original airing date +# - By default, the regex expression strips the episode number when setting the title +# - If you want to keep the episode number, delete the second regex replacement in +# sceneScraperEpisode > scene > Title > postProcess > replace +# +# MOVIES: +# - The scene (by anime) scraper automatically creates a new movie entry using the anime title and anime URL +# - On the movie page, you can scrape by URL +# +# PERFORMERS: +# - Performers need to be individually scraped by name or URL +# - I recommend creating them by scraping the anime URL, then navigating to the performer page. +# The performer URL should already be set, so you just need to press the scrape by URL button. +# +# ~~~~~ TROUBLESHOOTING ~~~~~ +# - If you find that the scraper has suddenly stopped working, RESET YOUR COOKIES! +# +# ~~~~~ ANYTHING ELSE? ~~~~~ +# THAT'S IT, ENJOY! +# Made by @escargotbuffed, further improvements by @symptom6186 + performerByURL: - action: scrapeXPath url: @@ -41,16 +78,22 @@ sceneByFragment: queryURL: https://anidb.net/anime/?adb.search={filename} queryURLReplace: filename: - - regex: \..+$|\d+ - with: "" + - regex: '\[.*?\]|\(.*?\)|\d\d|\..*' + with: + - regex: '\-|\_' + with: " " - regex: \s+ with: "%20" - scraper: sceneScraper + scraper: sceneScraperAnime sceneByURL: - action: scrapeXPath url: - - https://anidb.net/ - scraper: sceneScraper + - https://anidb.net/episode/ + scraper: sceneScraperEpisode + - action: scrapeXPath + url: + - https://anidb.net/anime/ + scraper: sceneScraperAnime sceneByName: action: scrapeXPath queryURL: https://anidb.net/search/anime/?adb.search={}&entity.animetb=1 @@ -58,7 +101,13 @@ sceneByName: sceneByQueryFragment: action: scrapeXPath queryURL: "{url}" - scraper: sceneScraper + scraper: sceneScraperAnime + +movieByURL: + - action: scrapeXPath + url: + - https://anidb.net/ + scraper: sceneScraperAnime xPathScrapers: performerSearch: @@ -104,15 +153,37 @@ xPathScrapers: - regex: ^ with: https://anidb.net Image: //td[@class="thumb anime"]//img/@src - sceneScraper: + sceneScraperEpisode: + scene: + Title: + selector: //div[@id="layout-main"]//h1[@class="ep"] + postProcess: + - replace: + - regex: ^.{0,9} + with: "" + - regex: \- \d+ \- + with: "/" + URL: + selector: //ul[@class="main-tabs"]//li[@class="g_odd anime"]//span/a/@href + postProcess: + - replace: + - regex: ^ + with: https://anidb.net + Date: //div[@id="layout-main"]//span[@itemprop="datePublished"]/@content + sceneScraperAnime: common: $info: //div[@class="g_section info"] + $title: //div[@class="g_section info"]//div[@id="tab_1_pane"]//span[@itemprop="name"] + $en_title: //div[@class="g_section info"]//div[@id="tab_1_pane"]//tr[contains(@class, "official verified") and contains(.//span, 'en')]//label[@itemprop="alternateName"] $character: //div[@id="characterlist"]//div[contains(@class, 'main character') or contains(@class, 'secondary cast')]//div[@itemprop="character"] scene: - Title: $info//div[@id="tab_1_pane"]//span[@itemprop="name"] + #Title: $en_title or $title + #Date: + # selector: $info//div[@id="tab_1_pane"]//span[contains(@itemprop, "datePublished") or contains(@itemprop, "startDate")] + # parseDate: 02.01.2006 Details: selector: //div[@itemprop="description"]//text() - concat: "\n" + concat: " " Tags: Name: $info//div[@id="tab_1_pane"]//span[@class="tagname"] Performers: @@ -123,10 +194,26 @@ xPathScrapers: - replace: - regex: ^ with: https://anidb.net + Movies: + Name: $title + URL: //link[@rel="canonical"]/@href Studio: Name: $info//table[@id="staffoverview"]//tr[last()]/td[@class="name creator"]/a Image: $info//div[@class="image"]//img/@src URL: //link[@rel="canonical"]/@href + movie: + Name: $title + Aliases: $en_title + Date: + selector: $info//div[@id="tab_1_pane"]//span[contains(@itemprop, "datePublished") or contains(@itemprop, "startDate")] + parseDate: 02.01.2006 + Synopsis: + selector: //div[@itemprop="description"]//text() + concat: " " + Studio: + Name: $info//table[@id="staffoverview"]//tr[last()]/td[@class="name creator"]/a + FrontImage: $info//div[@class="image"]//img/@src + URL: //link[@rel="canonical"]/@href driver: cookies: @@ -142,4 +229,4 @@ driver: Domain: "anidb.net" Value: "" # Enter the value of the 'adbuin' here Path: "/" -# Last Updated Dec 5, 2023 +# Last Updated Dec 20, 2023 From 1068624ea05f6e9118f376b1306f9124e701c5f4 Mon Sep 17 00:00:00 2001 From: Maista6969 Date: Wed, 27 Dec 2023 04:56:27 +0100 Subject: [PATCH 619/624] Update TopWebModels scraper Updated to work with their newer version of NextJS Should no longer scrape GIF scene covers --- scrapers/TopWebModels/TopWebModels.py | 182 +++++++++++++------------ scrapers/TopWebModels/TopWebModels.yml | 2 +- 2 files changed, 94 insertions(+), 90 deletions(-) diff --git a/scrapers/TopWebModels/TopWebModels.py b/scrapers/TopWebModels/TopWebModels.py index 5b982b2f9..2066e6905 100644 --- a/scrapers/TopWebModels/TopWebModels.py +++ b/scrapers/TopWebModels/TopWebModels.py @@ -1,119 +1,123 @@ +import html import json import os import re import sys +# to import from a parent directory we need to add that directory to the system path +csd = os.path.dirname(os.path.realpath(__file__)) # get current script directory +parent = os.path.dirname(csd) # parent directory (should be the scrapers one) +sys.path.append( + parent +) # add parent dir to sys path so that we can import py_common from there + try: import py_common.log as log except ModuleNotFoundError: - print("You need to download the folder 'py_common' from the community repo! (CommunityScrapers/tree/master/scrapers/py_common)", file=sys.stderr) - sys.exit() + print( + "You need to download the folder 'py_common' from the community repo!" + " (CommunityScrapers/tree/master/scrapers/py_common)", + file=sys.stderr, + ) + sys.exit(1) + # make sure to install below modules if needed try: import requests except ModuleNotFoundError: - print("You need to install the requests module. (https://docs.python-requests.org/en/latest/user/install/)", file=sys.stderr) - print("If you have pip (normally installed with python), run this command in a terminal (cmd): pip install requests", file=sys.stderr) - sys.exit() + log.error( + "You need to install the requests module. (https://docs.python-requests.org/en/latest/user/install/)" + ) + log.error("Run this command in a terminal (cmd): python -m pip install requests") + sys.exit(1) try: from bs4 import BeautifulSoup except ModuleNotFoundError: - print("You need to install the BeautifulSoup module. (https://pypi.org/project/beautifulsoup4/)", file=sys.stderr) - print("If you have pip (normally installed with python), run this command in a terminal (cmd): pip install beautifulsoup4", file=sys.stderr) - sys.exit() + log.error( + "You need to install the BeautifulSoup module. (https://pypi.org/project/beautifulsoup4/)" + ) + log.error( + "Run this command in a terminal (cmd): python -m pip install beautifulsoup4" + ) + sys.exit(1) -def get_from_url(url_to_parse): - m = re.match(r'https?://tour\.((\w+)\.com)/scenes/(\d+)/([a-z0-9-]+)', url_to_parse) - if m is None: - return None, None, None, None - return m.groups() +def parse_url(url): + if m := re.match(r"https?://tour\.((\w+)\.com)/scenes/(\d+)/([a-z0-9-]+)", url): + return m.groups() + log.error("The URL could not be parsed") + sys.exit(1) -def make_request(request_url, origin_site): - requests.packages.urllib3.util.ssl_.DEFAULT_CIPHERS += 'HIGH:!DH:!aNULL' - requests.packages.urllib3.disable_warnings() - - try: - requests.packages.urllib3.contrib.pyopenssl.DEFAULT_SSL_CIPHER_LIST += 'HIGH:!DH:!aNULL' - except AttributeError: - # no pyopenssl support used / needed / available - pass +def make_request(request_url): try: - r = requests.get(request_url, headers={ - 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:79.0) Gecko/20100101 Firefox/79.0', - 'Origin': origin_site, - 'Referer': request_url - }, timeout=(3, 6), verify=False) + r = requests.get( + request_url, + headers={ + "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:79.0) Gecko/20100101 Firefox/79.0", + "Referer": request_url, + }, + timeout=(3, 6), + ) except requests.exceptions.RequestException as e: - return None, e + log.error(f"Request to '{request_url}' failed: {e}") + exit(1) - if r.status_code == 200: - return r.text, None - return None, f"HTTP Error: {r.status_code}" + if r.status_code != 200: + log.error(f": {r.status_code}") + exit(1) + return BeautifulSoup(r.text, "html.parser") -def fetch_page_json(page_html): - matches = re.findall(r'window\.__DATA__ = (.+)$', page_html, re.MULTILINE) - return json.loads(matches[0]) if matches else None -def main(): - stdin = sys.stdin.read() - log.debug(stdin) - fragment = json.loads(stdin) +if __name__ == "__main__": + fragment = json.loads(sys.stdin.read()) - if not fragment['url']: - log.error('No URL entered.') + if not (url := fragment["url"]): + log.error("No URL entered.") sys.exit(1) - url = fragment['url'].strip() - site, studio, sid, slug = get_from_url(url) - if site is None: - log.error('The URL could not be parsed') - sys.exit(1) - response, err = make_request(url, f"https://{site}") - if err is not None: - log.error('Could not fetch page HTML', err) - sys.exit(1) - j = fetch_page_json(response) - if j is None: - log.error('Could not find JSON on page') + log.debug(f"Scraping URL: {url}") + + soup = make_request(url) + props = soup.find("script", {"type": "application/json"}) + if not props: + log.error("Could not find JSON in page") sys.exit(1) - if 'video' not in j['data']: - log.error('Could not locate scene within JSON') + + props = json.loads(props.text) + content = props["props"]["pageProps"]["content"] + + with open("debug.json", "w", encoding="utf-8") as f: + f.write(json.dumps(content, indent=2)) + + if not (scene_id := content.get("id")): + log.error("Could not find scene ID") sys.exit(1) + log.info(f"Scene {scene_id} found") - scene = j["data"]["video"] - - if scene.get('id'): - if str(scene['id']) != sid: - log.error('Wrong scene within JSON') - sys.exit(1) - log.info(f"Scene {sid} found") - scrape = {} - if scene.get('title'): - scrape['title'] = scene['title'] - if scene.get('release_date'): - scrape['date'] = scene['release_date'][:10] - if scene.get('description'): - details = BeautifulSoup(scene['description'], "html.parser").get_text() - scrape['details'] = details - if scene.get('sites'): - scene_studio = scene['sites'][0]['name'] - scrape['studio'] = {'name': scene_studio} - if scene.get('models'): - models = [] - for m in scene['models']: - models.extend([x.strip() for x in m['name'].split("&") ]) - scrape['performers'] = [{'name': x} for x in models] - if scene.get('tags'): - scrape['tags'] = [{'name': x['name']} for x in scene['tags']] - if j['data'].get('file_poster'): - scrape['image'] = j['data']['file_poster'] - print(json.dumps(scrape)) - - -if __name__ == '__main__': - try: - main() - except Exception as e: - log.error(e) + scene = { + "code": str(scene_id), + } + + if title := content.get("title"): + scene["title"] = html.unescape(title) + if date := content.get("publish_datedate"): + from datetime import datetime + + scene["date"] = datetime.strptime(date[:10], "%Y/%m/%d").strftime("%Y-%m-%d") + if description := content.get("description"): + scene["details"] = html.unescape(description).replace("\u00a0", " ") + if sites := content.get("sites"): + scene_studio = sites[0]["name"] + scene["studio"] = {"name": scene_studio} + if models := content.get("models"): + scene["performers"] = [{"name": x} for x in models] + if tags := content.get("tags"): + scene["tags"] = [{"name": x} for x in tags] + if scene_cover := content.get("thumb"): + if not scene_cover.endswith(".gif"): + scene["image"] = scene_cover + elif alternative_covers := content.get("thumbs"): + # We don't want gifs + scene["image"] = alternative_covers[0] + print(json.dumps(scene)) diff --git a/scrapers/TopWebModels/TopWebModels.yml b/scrapers/TopWebModels/TopWebModels.yml index ae9a847e5..e8140ac3c 100644 --- a/scrapers/TopWebModels/TopWebModels.yml +++ b/scrapers/TopWebModels/TopWebModels.yml @@ -15,4 +15,4 @@ sceneByURL: script: - python3 - TopWebModels.py -# Last Updated November 20, 2021 +# Last Updated December 27, 2023 From 4f63378cf160774363a8a5e987a30f0c689924a8 Mon Sep 17 00:00:00 2001 From: WithoutPants <53250216+WithoutPants@users.noreply.github.com> Date: Wed, 27 Dec 2023 16:50:11 +1100 Subject: [PATCH 620/624] Fix validation --- scrapers/AniDB.yml | 3 ++- validator/scraper.schema.json | 1 - 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/scrapers/AniDB.yml b/scrapers/AniDB.yml index 2cfe7212b..e138715cc 100644 --- a/scrapers/AniDB.yml +++ b/scrapers/AniDB.yml @@ -206,7 +206,8 @@ xPathScrapers: Aliases: $en_title Date: selector: $info//div[@id="tab_1_pane"]//span[contains(@itemprop, "datePublished") or contains(@itemprop, "startDate")] - parseDate: 02.01.2006 + postProcess: + - parseDate: 02.01.2006 Synopsis: selector: //div[@itemprop="description"]//text() concat: " " diff --git a/validator/scraper.schema.json b/validator/scraper.schema.json index 4c7e47ef0..8e83a0dc1 100644 --- a/validator/scraper.schema.json +++ b/validator/scraper.schema.json @@ -427,7 +427,6 @@ "title": "Scene object", "type": "object", "additionalProperties": false, - "required": ["Title"], "properties": { "Title": { "title": "Scene title", From 2c6c9820f415e48012dd8988f8d35a7b0a5a465e Mon Sep 17 00:00:00 2001 From: WithoutPants <53250216+WithoutPants@users.noreply.github.com> Date: Wed, 27 Dec 2023 17:18:43 +1100 Subject: [PATCH 621/624] Use master as stable, use branch for develop --- .github/workflows/deploy.yml | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/.github/workflows/deploy.yml b/.github/workflows/deploy.yml index e8e7521fe..477b3666e 100644 --- a/.github/workflows/deploy.yml +++ b/.github/workflows/deploy.yml @@ -25,17 +25,17 @@ jobs: fetch-depth: '0' - run: | cd master - ./build_site.sh ../_site/develop - # uncomment this once we have a stable branch + ./build_site.sh ../_site/stable - name: Checkout Stable uses: actions/checkout@v2 with: - path: stable - ref: stable + path: dev + # replace with develop tag/branch when necessary + ref: master fetch-depth: '0' - run: | - cd stable - ../master/build_site.sh ../_site/stable + cd dev + ../master/build_site.sh ../_site/develop - uses: actions/upload-pages-artifact@v2 deploy: From ea2cfba37774b33099321e7cb4b80e9ec3b110c4 Mon Sep 17 00:00:00 2001 From: MortonBridges <97797486+MortonBridges@users.noreply.github.com> Date: Wed, 27 Dec 2023 15:41:25 -0500 Subject: [PATCH 622/624] Add lilsis.com to Nubiles scraper (#1576) * Add lilsis.com to SCRAPERS-LIST.md Added lilsis.com, which uses the Nubiles.yml scraper. * Added lilsis.com to scene scrapers The lilsis.com site is part of the Nubiles network and uses the same structure as the others in the scene scrapers list. --- SCRAPERS-LIST.md | 1 + scrapers/Nubiles.yml | 3 ++- 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/SCRAPERS-LIST.md b/SCRAPERS-LIST.md index 464b41d18..a110076ed 100644 --- a/SCRAPERS-LIST.md +++ b/SCRAPERS-LIST.md @@ -864,6 +864,7 @@ lexingtonsteele.com|GammaEntertainment.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- lezcuties.com|Algolia_Adultime.yml|:heavy_check_mark:|:x:|:x:|:x:|Python|Lesbian lifeselector.com|21Roles.yml|:heavy_check_mark:|:x:|:x:|:x:|CDP|- lilhumpers.com|MindGeek.yml|:heavy_check_mark:|:x:|:x:|:heavy_check_mark:|-|- +lilsis.com|Nubiles.yml|:heavy_check_mark:|:heavy_check_mark:|:x:|:x:|-|- lingerieav.com|Jhdv.yml|:heavy_check_mark:|:x:|:x:|:x:|-|JAV Uncensored lingerietales.com|WankItNow.yml|:heavy_check_mark:|:x:|:x:|:x:|-|VR littleasians.com|PaperStreetMedia.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- diff --git a/scrapers/Nubiles.yml b/scrapers/Nubiles.yml index 403a1b8e5..26e855f71 100644 --- a/scrapers/Nubiles.yml +++ b/scrapers/Nubiles.yml @@ -20,6 +20,7 @@ sceneByURL: - driverxxx.com/video/watch/ - familyswap.xxx/video/watch/ - imnotyourmommy.com/video/watch/ + - lilsis.com/video/watch/ - momlover.com/video/watch/ - momsboytoy.com/video/watch/ - momsfamilysecrets.com/video/watch/ @@ -242,4 +243,4 @@ xPathScrapers: Performers: *performersAttr Tags: *tagsAttr Studio: *studioFromTitleAttr -# Last Updated October 19, 2023 +# Last Updated December 27, 2023 From 8a143ac9177157e339a4653a1a5aa750959b688c Mon Sep 17 00:00:00 2001 From: MortonBridges <97797486+MortonBridges@users.noreply.github.com> Date: Wed, 27 Dec 2023 22:03:38 -0500 Subject: [PATCH 623/624] Add biempire.com to MindGeek.yml (#1577) * Added biempire.com Added biempire.com, a member of the MindGeek / Aylo network. * Added biempire.com to scene scraper The biempire.com site is a member of the Aylo / Mindgeek network, and compatible with the sceneScraper. * Add biempire.com to performer scraper. Added biempire.com site to performer scraper. --- SCRAPERS-LIST.md | 1 + scrapers/MindGeek.yml | 4 +++- 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/SCRAPERS-LIST.md b/SCRAPERS-LIST.md index a110076ed..3e3bb07c3 100644 --- a/SCRAPERS-LIST.md +++ b/SCRAPERS-LIST.md @@ -189,6 +189,7 @@ beltbound.com|BeltBound.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- berryboys.fr|PornsiteManager.yml|:heavy_check_mark:|:x:|:heavy_check_mark:|:x:|-|Gay bestoftealconrad.com|VNAGirls.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- bffs.com|PaperStreetMedia.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- +biempire.com|MindGeek.yml|:heavy_check_mark:|:x:|:x:|:heavy_check_mark:|-|- bigbootytgirls.com|BigBootyTGirls.yml|:heavy_check_mark:|:x:|:x:|:x:|-|Trans bigfatcreampie.com|GammaEntertainment.yml|:heavy_check_mark:|:x:|:x:|:x:|-|- biggulpgirls.com|TopWebModels.yml|:heavy_check_mark:|:x:|:x:|:x:|Python|- diff --git a/scrapers/MindGeek.yml b/scrapers/MindGeek.yml index d90166a22..0a88b8290 100644 --- a/scrapers/MindGeek.yml +++ b/scrapers/MindGeek.yml @@ -3,6 +3,7 @@ sceneByURL: - action: scrapeXPath url: - babesnetwork.com/scene/ + - biempire.com/scene/ - devianthardcore.com/scene/ - doghousedigital.com/ - familyhookups.com/scene/ @@ -63,6 +64,7 @@ performerByURL: - action: scrapeXPath url: - babesnetwork.com/model/ + - biempire.com/model/ - devianthardcore.com/model/ - digitalplayground.com/modelprofile/ - doghousedigital.com/model/ @@ -377,4 +379,4 @@ xPathScrapers: Performers: *performers Tags: *tags Studio: *studio -# Last Updated July 24, 2023 +# Last Updated December 27, 2023 From 391490c04ae57efe6d6941b1add350f7e4ff0241 Mon Sep 17 00:00:00 2001 From: Ronnie711 <147556958+Ronnie711@users.noreply.github.com> Date: Fri, 29 Dec 2023 13:48:24 +0000 Subject: [PATCH 624/624] Update Penthouse.yml (#1579) Updating to correct (Sub) Studio. This should reduce the number of submissions that need Studio updating. Have tested & a) confirmed it works b) without forcing a fixed studio name no studio is returned on scrape. --- scrapers/Penthouse.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/scrapers/Penthouse.yml b/scrapers/Penthouse.yml index 80e2dd3fe..371fcad11 100644 --- a/scrapers/Penthouse.yml +++ b/scrapers/Penthouse.yml @@ -1,4 +1,4 @@ -name: Penthouse +name: Penthouse Gold sceneByURL: - action: scrapeXPath url: @@ -20,7 +20,7 @@ xPathScrapers: - parseDate: 01/02/2006 Studio: Name: - fixed: "Penthouse" + fixed: "Penthouse Gold" Performers: Name: //div[@class="model"]/a/@title Tags: