-
Notifications
You must be signed in to change notification settings - Fork 3
/
kavita_to_kitty.py
148 lines (116 loc) · 5.38 KB
/
kavita_to_kitty.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
"""
This was put here in a known broken state. It was mainly uploaded at the request of a few people so they had something to work off. In due time I'll come back to this...
"""
import requests
import csv
import os
import time
import json
import feedparser
import re
from bs4 import BeautifulSoup
from urllib.parse import urlparse
##### - Allow the user to input their full ODPS URL so that we don't have to ask for IP + API key.
url = input("Paste in your full ODPS URL from your Kavita user dashboard (/preferences#clients): ")
parsed_url = urlparse(url)
host_address = parsed_url.scheme + "://" + parsed_url.netloc
api_key = parsed_url.path.split('/')[-1]
print("Host Address:", host_address)
print("API Key:", api_key)
print("---------------------------")
login_endpoint = "/api/Plugin/authenticate" # Don't change
search_endpoint = "/api/Library/list?path=/manga" #Change to match your path you want to monitor
try:
apikeylogin = requests.post(host_address + login_endpoint + "?apiKey=" + api_key + "&pluginName=CheckSeries")
apikeylogin.raise_for_status()
jwt_token = apikeylogin.json()['token']
# print("JWT Token:", jwt_token) # Only for debug
except requests.exceptions.RequestException as e:
print("Error during authentication:", e)
exit()
headers = {
"Authorization": f"Bearer {jwt_token}",
"Content-Type": "application/json"
}
response = requests.get(host_address + search_endpoint, headers=headers)
if response.status_code != 200:
raise Exception(f"API call failed: {response.status_code}")
series_list = response.json()
""" Debug for checking the response
# Save the JSON response directly to a file
with open("series_list.json", "w") as jsonfile:
json.dump(series_list, jsonfile, indent=4)
"""
sorted_series_list = sorted(series_list, key=lambda x: x["name"])
# Create a CSV file
with open("series_list.csv", "w", newline="", encoding="utf-8") as csvfile:
writer = csv.writer(csvfile, quoting=csv.QUOTE_ALL)
writer.writerow(["Title", "Full Path"])
for series in sorted_series_list:
title = series["name"]
full_path = series["fullPath"]
writer.writerow([title, full_path])
# Read the series titles from the CSV and store them in a set
series_titles = set()
with open("series_list.csv", "r", encoding="utf-8") as csvfile:
reader = csv.DictReader(csvfile)
for row in reader:
series_titles.add(row["Title"])
# RSS Caching to not hammer nyaa
CACHE_FILE = "rss_cache.xml"
CACHE_EXPIRATION = 900 # 15 minutes in seconds
# Check if the cache file exists and if it's still valid
if os.path.exists(CACHE_FILE) and (time.time() - os.path.getmtime(CACHE_FILE)) < CACHE_EXPIRATION:
with open(CACHE_FILE, "r") as file:
rss_feed = file.read()
else:
# Fetch the RSS feed from the URL
response = requests.get("https://nyaa.si/?page=rss&c=3_1")
rss_feed = response.text
# Cache the result
with open(CACHE_FILE, "w") as file:
file.write(rss_feed)
# Parse the RSS feed
feed = feedparser.parse(rss_feed)
# Process the feed entries
for entry in feed.entries:
# print("Entry:", entry) # Print the entire entry object for debugging
rss_title = entry.title
# Check if the RSS title matches any series title from the CSV
# TODO: matching needs to be cleaned up. It should be more percise so it doesn't just match a series based on the first word. Example: 'Kingdom Hearts' will match against 'Kingdom'
matching_series = [series_title for series_title in series_titles if series_title in rss_title]
if matching_series:
print(f"Matching series found: {matching_series[0]}")
# print(f"Link: {entry.link}")
# Use BeautifulSoup to remove HTML tags from entry.description
description_html = entry.description
description_text = BeautifulSoup(description_html, "html.parser").get_text()
print(f"Description: {description_text}")
# Access nyaa:infoHash
nyaa_info_hash = entry.get("nyaa_infohash")
if nyaa_info_hash:
print(f"nyaa:infoHash: {nyaa_info_hash}")
# Access nyaa:size
nyaa_size = entry.get("nyaa_size")
if nyaa_size:
print(f"nyaa:size: {nyaa_size}")
# Extract series name, volume number, and chapter number using regular expressions
# TODO: Volume / Chapter matching needs to improve.
series_info_match = re.search(r'(.*?)\s*(?:(?:V(\d+))|(?:Volume (\d+))|(V(\d+)-(\d+))|(Chapter (\d+)))', description_text)
if series_info_match:
series_name = {matching_series[0]}
volume_number = int(series_info_match.group(2)) if series_info_match.group(2) else None
chapter_number = int(series_info_match.group(3)) if series_info_match.group(3) else None
print(f"Series Name: {series_name}")
print(f"Volume Number: {volume_number}")
print(f"Chapter Number: {chapter_number}")
else:
print("Match not found")
# Extract series name and number using regular expressions
series_name_match = re.search(r'(.*?)(\d+\.\d+)', description_text)
if series_name_match:
series_name = series_name_match.group(1).strip()
series_number = float(series_name_match.group(2))
print(f"Series Name: {series_name}")
print(f"Series Number: {series_number}")
print("-----")