From bf6675fb275d84fc06caa001da4f02d01f80d42e Mon Sep 17 00:00:00 2001 From: jasonkhadka Date: Wed, 18 May 2022 15:19:30 +0200 Subject: [PATCH 1/2] filter texts added --- .gitignore | 5 ++++- grab_pictures.py | 13 +++++++++++-- 2 files changed, 15 insertions(+), 3 deletions(-) diff --git a/.gitignore b/.gitignore index 67183cd..70af2fb 100644 --- a/.gitignore +++ b/.gitignore @@ -3,6 +3,7 @@ env/ !requirements.txt images/* comments/* +.idea/ # Byte-compiled / optimized / DLL files __pycache__/ @@ -83,6 +84,7 @@ target/ # Jupyter Notebook .ipynb_checkpoints +*.ipynb # IPython profile_default/ @@ -113,6 +115,7 @@ celerybeat.pid # Environments .env .venv +.venv* env/ venv/ ENV/ @@ -141,4 +144,4 @@ dmypy.json .pytype/ # Cython debug symbols -cython_debug/ \ No newline at end of file +cython_debug/ diff --git a/grab_pictures.py b/grab_pictures.py index ae8ed3f..eafe8a8 100644 --- a/grab_pictures.py +++ b/grab_pictures.py @@ -4,7 +4,9 @@ from utils import get_valid_filename, erase_previous_line, get_userAgent -def get_pictures_from_subreddit(data, subreddit, location, nsfw): +def get_pictures_from_subreddit(data, subreddit, location, nsfw, filter_texts): + if filter_texts is None: + filter_texts = [] for i in range(len(data)): if data[i]['data']['over_18']: # if nsfw post and you only want sfw @@ -16,6 +18,11 @@ def get_pictures_from_subreddit(data, subreddit, location, nsfw): continue current_post = data[i]['data'] + title = current_post['title'].lower() + + if not any(map(lambda x: x.lower() in title, filter_texts)): + continue + image_url = current_post['url'] if '.png' in image_url: extension = '.png' @@ -55,6 +62,8 @@ def main(): help='Optionally specify the directory/location to be downloaded') parser.add_argument('-x', '--nsfw', type=str, metavar='', default='y', help='Optionally specify the behavior for handling NSFW content. y=yes download, n=no skip nsfw, x=only download nsfw content') + parser.add_argument('-f', '--filter-texts', nargs='+', type=str, metavar='', + required=False, help='One or more of the given filter texts need to be included in title of the images (e.g. "digital").') args = parser.parse_args() # initializing userAgent @@ -97,7 +106,7 @@ def main(): print('downloading pictures from r/' + args.subreddit[j] + '..') data = response.json()['data']['children'] - get_pictures_from_subreddit(data, args.subreddit[j], location, args.nsfw) + get_pictures_from_subreddit(data, args.subreddit[j], location, args.nsfw, args.filter_texts) erase_previous_line() print('Downloaded pictures from r/' + args.subreddit[j]) From 928dac6f212549b70634244fe8c58c26d2cbf8cc Mon Sep 17 00:00:00 2001 From: jasonkhadka Date: Wed, 18 May 2022 15:25:32 +0200 Subject: [PATCH 2/2] new line --- .gitignore | 1 + 1 file changed, 1 insertion(+) diff --git a/.gitignore b/.gitignore index 70af2fb..9d01300 100644 --- a/.gitignore +++ b/.gitignore @@ -145,3 +145,4 @@ dmypy.json # Cython debug symbols cython_debug/ +