diff --git a/kattis/database.py b/kattis/database.py
new file mode 100644
index 0000000..25f05a6
--- /dev/null
+++ b/kattis/database.py
@@ -0,0 +1,23 @@
+import requests
+import os
+from .utils import Utils
+from bs4 import BeautifulSoup
+CACHE_DIR = os.path.expanduser("~") + "/.cache/kattis/"
+class Database:
+ def __init__(self):
+ if not os.path.exists(CACHE_DIR):
+ os.makedirs(CACHE_DIR)
+ self.db = set()
+ for root, dirs, files in os.walk(CACHE_DIR):
+ self.db.update(files)
+ def get(self, filename, url):
+ filename += ".html"
+ if filename in self.db:
+ return BeautifulSoup(open(CACHE_DIR + filename, 'r', encoding = 'utf-8').read(), "html.parser")
+ else:
+ r = requests.get(url)
+ open(CACHE_DIR + filename, 'w', encoding = 'utf-8').write(r.text)
+ return Utils.html_page(r)
diff --git a/kattis/problems.py b/kattis/problems.py
index 59edefa..fa34416 100644
--- a/kattis/problems.py
+++ b/kattis/problems.py
@@ -1,89 +1,93 @@
import requests
import re
-import json
from .utils import Utils
+from kattis.database import Database
+database = Database()
URL = "https://open.kattis.com/problems/"
def problems(pages=1) -> dict:
- """
+ """
Fetches all Kattis problems
:param pages: number of problem pages, defaults to 1
:rtype: list of problem objects
- ret = []
- for page in range(pages):
- probs = Utils.html_page(requests.get(URL + "?page={}".format(page)))
- for problem_id in problem_list(probs):
- ret.append(problem(problem_id))
- return ret
+ ret = []
+ for page in range(pages):
+ probs = Utils.html_page(requests.get(URL + "?page={}".format(page)))
+ for problem_id in problem_list(probs):
+ ret.append(problem(problem_id))
+ return ret
def problem(problem_id: str) -> dict:
- """
+ """
Fetches information for a single Kattis problem
:param problem_id: id of a Kattis problem
:rtype: json object
- obj = {
- "url": URL + problem_id,
- "stats_url": URL + problem_id + "/statistics",
- }
+ obj = {
+ "url": URL + problem_id,
+ "stats_url": URL + problem_id + "/statistics",
+ }
- problem_page = Utils.html_page(requests.get(obj["url"]))
- stats_page = Utils.html_page(requests.get(obj["stats_url"]))
+ problem_page = database.get(problem_id, obj["url"])
+ stats_page = database.get(problem_id + "_statistics", obj["stats_url"])
- add_problem_information(problem_page, obj)
- add_problem_statistics(stats_page, obj)
+ add_problem_information(problem_page, obj)
+ add_problem_statistics(stats_page, obj)
+ return obj
- return obj
def add_problem_information(problem_page, problem: dict) -> None:
- """
+ """
Parses problem information and adds it
to problem object
- fields = ["time_limit", "memory_limit", "difficulty"]
- info = problem_page.find("div", {"class": "sidebar-info"}).findAll("p", recursive=True)[1:-1]
- for i in range(len(info)):
- s = re.compile(r"[^\d.]+")
- info[i] = s.sub("", str(info[i]))
- problem["info"] = {fields[i]: info[i] for i in range(min(len(info), len(fields)))}
+ fields = ["time_limit", "memory_limit", "difficulty"]
+ info = problem_page.findAll("div", "metadata_list-item")[:3]
+ for i in range(len(info)):
+ s = info[i].find('span').find_next_sibling().text.strip()
+ info[i] = re.sub(r'[a-zA-Z]', '', s).strip()
+ problem["info"] = {fields[i]: info[i] for i in range(min(len(info), len(fields)))}
def add_problem_statistics(stats_page, problem: dict) -> None:
- """
+ """
Parses problem statistics and adds it
to problem object
- fields = [
- "submissions",
- "accepted_submissions",
- "submission_ratio",
- "authors",
- "accepted_authors",
- "author_ratio",
- ]
+ fields = [
+ "submissions",
+ "accepted_submissions",
+ "submission_ratio",
+ "authors",
+ "accepted_authors",
+ "author_ratio",
+ ]
- stats = stats_page.find("div", {"class": "stats-content"}).findAll("li", recursive=True)[:6]
+ stats = stats_page.find("table", class_="table2 condensed mt-5").findAll("tr")
- for i in range(len(stats)):
- s = re.compile(r"[^\d.]+")
- stats[i] = s.sub("", str(stats[i]))
+ # Extract the numeric values from each
+ stats = [re.sub(r'<[^>]+>', '', str(td)).strip('\n%') for tr in stats for td in tr.findAll("td")[1:]]
+ problem["stats"] = {
+ fields[i]: stats[i] for i in range(min(len(stats), len(fields)))
+ }
- problem["stats"] = {fields[i]: stats[i] for i in range(min(len(stats), len(fields)))}
def problem_list(page):
- """
+ """
Returns a list of problem ID's scraped from a
Kattis problem page
:param page: problem page
- problems = page.findAll("a", recursive=True)[18:-4]
- return [str(problems[i]).split("/")[2].split('"')[0] for i in range(0, len(problems), 3)]
+ problems = page.findAll("a", recursive=True)[18:-4]
+ return [
+ str(problems[i]).split("/")[2].split('"')[0] for i in range(0, len(problems), 3)
+ ]
diff --git a/kattis/user.py b/kattis/user.py
index a9abe47..6a67857 100644
--- a/kattis/user.py
+++ b/kattis/user.py
@@ -3,76 +3,82 @@
from .utils import Utils
from .problems import problem
class KattisUser:
- """
+ """
An authenticated Kattis User
:param username: kattis username
:param password: kattis password
:param cookies: user login cookies
- def __init__(self, username, password, cookies):
- self.__username = username
- self.__password = password
- self.__cookies = cookies
- self.__submission_url = "https://open.kattis.com/users/"
- self.__problem_url = "https://open.kattis.com/problems?show_solved=on&show_tried=off&show_untried=off"
- def problems(self, pages=1) -> dict:
- """
- Gets a users solved problems.
- """
- obj, data, count = {}, {"script": "true"}, 0
- for page in range(pages):
- problem_page = Utils.html_page(
- requests.get(
- self.__problem_url + "&page={}".format(page),
- data=data,
- cookies=self.__cookies,
- )
- )
- problem_list = problem_page.find_all("td", {"class", "name_column"})
+ def __init__(self, username, password, cookies):
+ self.__username = username
+ self.__password = password
+ self.__cookies = cookies
+ self.__submission_url = "https://open.kattis.com/users/"
+ self.__problem_url = "https://open.kattis.com/problems?show_solved=on&show_partial=off&show_tried=off&show_untried=off"
- for prob in problem_list:
- children = prob.findChildren("a", recursive=False, href=True)
- problem_id = children[0]["href"].split("/")[2]
- obj[problem_id] = problem(problem_id)
- count += 1
- obj["count"] = count
- return obj
+ def problems(self, pages=1) -> dict:
+ """
+ Gets a users solved problems.
- def stats(self) -> dict:
- """
+ """
+ obj, data, count = {}, {"script": "true"}, 0
+ for page in range(pages):
+ problem_page = Utils.html_page(
+ requests.get(
+ self.__problem_url + "&page={}".format(page),
+ data=data,
+ cookies=self.__cookies,
+ )
+ )
+ problem_list = problem_page.find("table", "table2").find_all("tr")
+ for prob in problem_list[1:]: # skip table header
+ children = prob.findChildren("a")
+ problem_id = children[0]["href"].split("/")[2]
+ obj[problem_id] = problem(problem_id) # can take very long if there are many solved problems
+ count += 1
+ obj["count"] = count
+ return obj
+ def stats(self) -> dict:
+ """
Gets a users stats (score, rank)
- fields, data = ["score", "rank"], {"script": "true"}
- stats_page = Utils.html_page(
- requests.get(
- self.__submission_url + self.__username,
- data=data,
- cookies=self.__cookies,
- )
- )
- # Parse score and rank
- user_stats = stats_page.find("ul", {"class": "profile-header-list"}).findAll("li")
+ fields, data = ["score", "rank"], {"script": "true"}
+ stats_page = Utils.html_page(
+ requests.get(
+ self.__submission_url + self.__username,
+ data=data,
+ cookies=self.__cookies,
+ )
+ )
- for i in range(len(user_stats)):
- s = re.compile(r"[^\d.]+")
- user_stats[i] = s.sub("", str(user_stats[i]))
+ # Parse score and rank
+ user_stats = stats_page.findAll("div", "divider_list-item")
+ for i in range(len(user_stats)):
+ s = re.compile(r"[^\d.]+")
+ user_stats[i] = s.sub("", str(user_stats[i]))
- return {fields[i]: user_stats[i] for i in range(min(len(user_stats), len(fields)))}
+ return {
+ fields[i]: user_stats[i] for i in range(min(len(user_stats), len(fields)))
+ }
- def data(self) -> dict:
- """
+ def data(self) -> dict:
+ """
Combined solved problems and user stats
- pages = 28
- return {"username": self.__username, "stats": self.stats(), "problems": self.problems(pages)}
+ pages = 28
+ return {
+ "username": self.__username,
+ "stats": self.stats(),
+ "problems": self.problems(pages)
+ }