From c481df26a7a436a6cddd4e9284560b2edb911e6d Mon Sep 17 00:00:00 2001 From: aureliony <39163684+aureliony@users.noreply.github.com> Date: Sat, 24 Jun 2023 08:11:35 +0800 Subject: [PATCH 1/4] fix --- kattis/problems.py | 89 ++++++++++++++++++---------------- kattis/user.py | 117 +++++++++++++++++++++++++-------------------- 2 files changed, 112 insertions(+), 94 deletions(-) diff --git a/kattis/problems.py b/kattis/problems.py index 59edefa..cd0618c 100644 --- a/kattis/problems.py +++ b/kattis/problems.py @@ -5,85 +5,92 @@ URL = "https://open.kattis.com/problems/" + def problems(pages=1) -> dict: - """ + """ Fetches all Kattis problems :param pages: number of problem pages, defaults to 1 :rtype: list of problem objects """ - ret = [] - for page in range(pages): - probs = Utils.html_page(requests.get(URL + "?page={}".format(page))) - for problem_id in problem_list(probs): - ret.append(problem(problem_id)) - return ret + ret = [] + for page in range(pages): + probs = Utils.html_page(requests.get(URL + "?page={}".format(page))) + for problem_id in problem_list(probs): + ret.append(problem(problem_id)) + return ret + def problem(problem_id: str) -> dict: - """ + """ Fetches information for a single Kattis problem :param problem_id: id of a Kattis problem :rtype: json object """ - obj = { - "url": URL + problem_id, - "stats_url": URL + problem_id + "/statistics", - } + obj = { + "url": URL + problem_id, + "stats_url": URL + problem_id + "/statistics", + } + + problem_page = Utils.html_page(requests.get(obj["url"])) + stats_page = Utils.html_page(requests.get(obj["stats_url"])) - problem_page = Utils.html_page(requests.get(obj["url"])) - stats_page = Utils.html_page(requests.get(obj["stats_url"])) + add_problem_information(problem_page, obj) + add_problem_statistics(stats_page, obj) - add_problem_information(problem_page, obj) - add_problem_statistics(stats_page, obj) + return obj - return obj def add_problem_information(problem_page, problem: dict) -> None: - """ + """ Parses problem information and adds it to problem object """ - fields = ["time_limit", "memory_limit", "difficulty"] + fields = ["time_limit", "memory_limit", "difficulty"] - info = problem_page.find("div", {"class": "sidebar-info"}).findAll("p", recursive=True)[1:-1] + info = problem_page.findAll("div", "metadata_list-item") - for i in range(len(info)): - s = re.compile(r"[^\d.]+") - info[i] = s.sub("", str(info[i])) + for i in range(len(info)): + s = re.compile(r"[^\d.]+") + info[i] = s.sub("", str(info[i])) - problem["info"] = {fields[i]: info[i] for i in range(min(len(info), len(fields)))} + problem["info"] = {fields[i]: info[i] for i in range(min(len(info), len(fields)))} def add_problem_statistics(stats_page, problem: dict) -> None: - """ + """ Parses problem statistics and adds it to problem object """ - fields = [ - "submissions", - "accepted_submissions", - "submission_ratio", - "authors", - "accepted_authors", - "author_ratio", - ] + fields = [ + "submissions", + "accepted_submissions", + "submission_ratio", + "authors", + "accepted_authors", + "author_ratio", + ] - stats = stats_page.find("div", {"class": "stats-content"}).findAll("li", recursive=True)[:6] + stats = stats_page.find("table", class_="table2 condensed mt-5").findAll("tr") - for i in range(len(stats)): - s = re.compile(r"[^\d.]+") - stats[i] = s.sub("", str(stats[i])) + # Extract the numeric values from each tag + stats = [re.sub(r'<[^>]+>', '', str(td)).strip('\n%') for tr in stats for td in tr.findAll("td")[1:]] + + problem["stats"] = { + fields[i]: stats[i] for i in range(min(len(stats), len(fields))) + } - problem["stats"] = {fields[i]: stats[i] for i in range(min(len(stats), len(fields)))} def problem_list(page): - """ + """ Returns a list of problem ID's scraped from a Kattis problem page :param page: problem page """ - problems = page.findAll("a", recursive=True)[18:-4] - return [str(problems[i]).split("/")[2].split('"')[0] for i in range(0, len(problems), 3)] + problems = page.findAll("a", recursive=True)[18:-4] + return [ + str(problems[i]).split("/")[2].split('"')[0] for i in range(0, len(problems), 3) + ] diff --git a/kattis/user.py b/kattis/user.py index a9abe47..9426426 100644 --- a/kattis/user.py +++ b/kattis/user.py @@ -3,76 +3,87 @@ from .utils import Utils from .problems import problem + class KattisUser: - """ + """ An authenticated Kattis User :param username: kattis username :param password: kattis password :param cookies: user login cookies """ - def __init__(self, username, password, cookies): - self.__username = username - self.__password = password - self.__cookies = cookies - self.__submission_url = "https://open.kattis.com/users/" - self.__problem_url = "https://open.kattis.com/problems?show_solved=on&show_tried=off&show_untried=off" - - def problems(self, pages=1) -> dict: - """ - Gets a users solved problems. - """ - obj, data, count = {}, {"script": "true"}, 0 - - for page in range(pages): - problem_page = Utils.html_page( - requests.get( - self.__problem_url + "&page={}".format(page), - data=data, - cookies=self.__cookies, - ) - ) - - problem_list = problem_page.find_all("td", {"class", "name_column"}) + def __init__(self, username, password, cookies): + self.__username = username + self.__password = password + self.__cookies = cookies + self.__submission_url = "https://open.kattis.com/users/" + self.__problem_url = "https://open.kattis.com/problems?show_solved=on&show_tried=off&show_untried=off" - for prob in problem_list: - children = prob.findChildren("a", recursive=False, href=True) - problem_id = children[0]["href"].split("/")[2] - obj[problem_id] = problem(problem_id) - count += 1 - - obj["count"] = count - return obj + def problems(self, pages=1) -> dict: + """ + Gets a users solved problems. - def stats(self) -> dict: - """ + """ + obj, data, count = {}, {"script": "true"}, 0 + + for page in range(pages): + problem_page = Utils.html_page( + requests.get( + self.__problem_url + "&page={}".format(page), + data=data, + cookies=self.__cookies, + ) + ) + + problem_list = problem_page.find_all( + "td", {"class", "name_column"}) + + for prob in problem_list: + children = prob.findChildren("a", recursive=False, href=True) + problem_id = children[0]["href"].split("/")[2] + obj[problem_id] = problem(problem_id) + count += 1 + + obj["count"] = count + return obj + + def stats(self) -> dict: + """ Gets a users stats (score, rank) """ - fields, data = ["score", "rank"], {"script": "true"} - - stats_page = Utils.html_page( - requests.get( - self.__submission_url + self.__username, - data=data, - cookies=self.__cookies, - ) - ) + fields, data = ["score", "rank"], {"script": "true"} + + stats_page = Utils.html_page( + requests.get( + self.__submission_url + self.__username, + data=data, + cookies=self.__cookies, + ) + ) - # Parse score and rank - user_stats = stats_page.find("ul", {"class": "profile-header-list"}).findAll("li") + # Parse score and rank + user_stats = stats_page.find("ul", {"class": "profile-header-list"}).findAll( + "li" + ) - for i in range(len(user_stats)): - s = re.compile(r"[^\d.]+") - user_stats[i] = s.sub("", str(user_stats[i])) + for i in range(len(user_stats)): + s = re.compile(r"[^\d.]+") + user_stats[i] = s.sub("", str(user_stats[i])) - return {fields[i]: user_stats[i] for i in range(min(len(user_stats), len(fields)))} + return { + fields[i]: user_stats[i] for i in range(min(len(user_stats), len(fields))) + } - def data(self) -> dict: - """ + def data(self) -> dict: + """ Combined solved problems and user stats """ - pages = 28 - return {"username": self.__username, "stats": self.stats(), "problems": self.problems(pages)} + pages = 28 + return { + "username": self.__username, + "stats": self.stats(), + "problems": self.problems(pages) + } From a6ef0a28b05f081f1127732c7d2d0aa458c725dd Mon Sep 17 00:00:00 2001 From: aureliony <39163684+aureliony@users.noreply.github.com> Date: Sat, 24 Jun 2023 08:49:07 +0800 Subject: [PATCH 2/4] fix --- kattis/user.py | 17 ++++++----------- 1 file changed, 6 insertions(+), 11 deletions(-) diff --git a/kattis/user.py b/kattis/user.py index 9426426..6a67857 100644 --- a/kattis/user.py +++ b/kattis/user.py @@ -18,7 +18,7 @@ def __init__(self, username, password, cookies): self.__password = password self.__cookies = cookies self.__submission_url = "https://open.kattis.com/users/" - self.__problem_url = "https://open.kattis.com/problems?show_solved=on&show_tried=off&show_untried=off" + self.__problem_url = "https://open.kattis.com/problems?show_solved=on&show_partial=off&show_tried=off&show_untried=off" def problems(self, pages=1) -> dict: """ @@ -36,13 +36,11 @@ def problems(self, pages=1) -> dict: ) ) - problem_list = problem_page.find_all( - "td", {"class", "name_column"}) - - for prob in problem_list: - children = prob.findChildren("a", recursive=False, href=True) + problem_list = problem_page.find("table", "table2").find_all("tr") + for prob in problem_list[1:]: # skip table header + children = prob.findChildren("a") problem_id = children[0]["href"].split("/")[2] - obj[problem_id] = problem(problem_id) + obj[problem_id] = problem(problem_id) # can take very long if there are many solved problems count += 1 obj["count"] = count @@ -64,10 +62,7 @@ def stats(self) -> dict: ) # Parse score and rank - user_stats = stats_page.find("ul", {"class": "profile-header-list"}).findAll( - "li" - ) - + user_stats = stats_page.findAll("div", "divider_list-item") for i in range(len(user_stats)): s = re.compile(r"[^\d.]+") user_stats[i] = s.sub("", str(user_stats[i])) From e55b230821f9c3d9ae4001bbc6055a36e645dc7b Mon Sep 17 00:00:00 2001 From: aureliony <39163684+aureliony@users.noreply.github.com> Date: Sat, 24 Jun 2023 10:32:51 +0800 Subject: [PATCH 3/4] fix problems with difficulty range --- kattis/problems.py | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) diff --git a/kattis/problems.py b/kattis/problems.py index cd0618c..a50cbd2 100644 --- a/kattis/problems.py +++ b/kattis/problems.py @@ -49,13 +49,10 @@ def add_problem_information(problem_page, problem: dict) -> None: """ fields = ["time_limit", "memory_limit", "difficulty"] - - info = problem_page.findAll("div", "metadata_list-item") - + info = problem_page.findAll("div", "metadata_list-item")[:3] for i in range(len(info)): - s = re.compile(r"[^\d.]+") - info[i] = s.sub("", str(info[i])) - + s = info[i].find('span').find_next_sibling().text.strip() + info[i] = re.sub(r'[a-zA-Z]', '', s).strip() problem["info"] = {fields[i]: info[i] for i in range(min(len(info), len(fields)))} def add_problem_statistics(stats_page, problem: dict) -> None: From 342019029b4d721a587a57e9a066204e0c0013b6 Mon Sep 17 00:00:00 2001 From: aureliony <39163684+aureliony@users.noreply.github.com> Date: Sat, 24 Jun 2023 11:41:25 +0800 Subject: [PATCH 4/4] implement problem caching on local drive --- kattis/database.py | 23 +++++++++++++++++++++++ kattis/problems.py | 8 ++++---- 2 files changed, 27 insertions(+), 4 deletions(-) create mode 100644 kattis/database.py diff --git a/kattis/database.py b/kattis/database.py new file mode 100644 index 0000000..25f05a6 --- /dev/null +++ b/kattis/database.py @@ -0,0 +1,23 @@ +import requests +import os +from .utils import Utils +from bs4 import BeautifulSoup + +CACHE_DIR = os.path.expanduser("~") + "/.cache/kattis/" + +class Database: + def __init__(self): + if not os.path.exists(CACHE_DIR): + os.makedirs(CACHE_DIR) + self.db = set() + for root, dirs, files in os.walk(CACHE_DIR): + self.db.update(files) + + def get(self, filename, url): + filename += ".html" + if filename in self.db: + return BeautifulSoup(open(CACHE_DIR + filename, 'r', encoding = 'utf-8').read(), "html.parser") + else: + r = requests.get(url) + open(CACHE_DIR + filename, 'w', encoding = 'utf-8').write(r.text) + return Utils.html_page(r) diff --git a/kattis/problems.py b/kattis/problems.py index a50cbd2..fa34416 100644 --- a/kattis/problems.py +++ b/kattis/problems.py @@ -1,11 +1,11 @@ import requests import re -import json from .utils import Utils +from kattis.database import Database +database = Database() URL = "https://open.kattis.com/problems/" - def problems(pages=1) -> dict: """ Fetches all Kattis problems @@ -33,8 +33,8 @@ def problem(problem_id: str) -> dict: "stats_url": URL + problem_id + "/statistics", } - problem_page = Utils.html_page(requests.get(obj["url"])) - stats_page = Utils.html_page(requests.get(obj["stats_url"])) + problem_page = database.get(problem_id, obj["url"]) + stats_page = database.get(problem_id + "_statistics", obj["stats_url"]) add_problem_information(problem_page, obj) add_problem_statistics(stats_page, obj)