-
Notifications
You must be signed in to change notification settings - Fork 0
/
main.py
72 lines (52 loc) · 1.85 KB
/
main.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
import pyperclip
from bs4 import BeautifulSoup
from requests_html import HTMLSession
from tqdm import tqdm
session = HTMLSession()
def get_videos(query):
search_url = f"https://www.youtube.com/results?search_query={query}"
search = session.get(search_url)
search.html.render(sleep=1)
videos = search.html.find(
"ytd-video-renderer.style-scope.ytd-item-section-renderer"
)
video_urls = [
f"https://www.youtube.com{video.find('a#video-title', first=True).attrs['href']}"
for video in tqdm(videos)
]
return video_urls
def get_tags(url, max_chars=400):
video = session.get(url)
video.html.render(sleep=0, timeout=100, keep_page=False, scrolldown=5)
soup = BeautifulSoup(video.html.html, "lxml")
tag_elements = soup.find_all("meta", {"property": "og:video:tag"})
tags = []
num_chars = 0
for meta in tag_elements:
element_content = meta.attrs.get("content")
for element in tags:
num_chars += len(element)
if num_chars < max_chars:
if element_content not in tags:
tags.append(element_content)
else:
break
return tags
def main():
query = input("Query: ").replace(" ", "+")
video_urls = get_videos(query)
print(f"Scraping {len(video_urls)} videos...")
amount = int(input(f"How many videos do you scrap tags from?: "))
tags = []
print("Scraping tags...")
for i in tqdm(range(min(amount, len(video_urls)))):
tags.extend(get_tags(video_urls[i]))
tags_str = str(tags).replace("[", "").replace("]", "").replace("'", "")
print("\033c") # clear screen
print("Tags:")
print(tags_str)
copy = input("Do you want to copy the tags to the clipboard? [Y/N]\n> ")
if copy.capitalize() == "Y":
pyperclip.copy(tags_str)
if __name__ == "__main__":
main()