Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix API for new kattis website layout #1

Open
wants to merge 4 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
23 changes: 23 additions & 0 deletions kattis/database.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
import requests
import os
from .utils import Utils
from bs4 import BeautifulSoup

CACHE_DIR = os.path.expanduser("~") + "/.cache/kattis/"

class Database:
def __init__(self):
if not os.path.exists(CACHE_DIR):
os.makedirs(CACHE_DIR)
self.db = set()
for root, dirs, files in os.walk(CACHE_DIR):
self.db.update(files)

def get(self, filename, url):
filename += ".html"
if filename in self.db:
return BeautifulSoup(open(CACHE_DIR + filename, 'r', encoding = 'utf-8').read(), "html.parser")
else:
r = requests.get(url)
open(CACHE_DIR + filename, 'w', encoding = 'utf-8').write(r.text)
return Utils.html_page(r)
94 changes: 49 additions & 45 deletions kattis/problems.py
Original file line number Diff line number Diff line change
@@ -1,89 +1,93 @@
import requests
import re
import json
from .utils import Utils
from kattis.database import Database

database = Database()
URL = "https://open.kattis.com/problems/"

def problems(pages=1) -> dict:
"""
"""
Fetches all Kattis problems

:param pages: number of problem pages, defaults to 1
:rtype: list of problem objects
"""
ret = []
for page in range(pages):
probs = Utils.html_page(requests.get(URL + "?page={}".format(page)))
for problem_id in problem_list(probs):
ret.append(problem(problem_id))
return ret
ret = []
for page in range(pages):
probs = Utils.html_page(requests.get(URL + "?page={}".format(page)))
for problem_id in problem_list(probs):
ret.append(problem(problem_id))
return ret


def problem(problem_id: str) -> dict:
"""
"""
Fetches information for a single Kattis problem

:param problem_id: id of a Kattis problem
:rtype: json object
"""
obj = {
"url": URL + problem_id,
"stats_url": URL + problem_id + "/statistics",
}
obj = {
"url": URL + problem_id,
"stats_url": URL + problem_id + "/statistics",
}

problem_page = Utils.html_page(requests.get(obj["url"]))
stats_page = Utils.html_page(requests.get(obj["stats_url"]))
problem_page = database.get(problem_id, obj["url"])
stats_page = database.get(problem_id + "_statistics", obj["stats_url"])

add_problem_information(problem_page, obj)
add_problem_statistics(stats_page, obj)
add_problem_information(problem_page, obj)
add_problem_statistics(stats_page, obj)

return obj

return obj

def add_problem_information(problem_page, problem: dict) -> None:
"""
"""
Parses problem information and adds it
to problem object

"""
fields = ["time_limit", "memory_limit", "difficulty"]

info = problem_page.find("div", {"class": "sidebar-info"}).findAll("p", recursive=True)[1:-1]

for i in range(len(info)):
s = re.compile(r"[^\d.]+")
info[i] = s.sub("", str(info[i]))

problem["info"] = {fields[i]: info[i] for i in range(min(len(info), len(fields)))}
fields = ["time_limit", "memory_limit", "difficulty"]
info = problem_page.findAll("div", "metadata_list-item")[:3]
for i in range(len(info)):
s = info[i].find('span').find_next_sibling().text.strip()
info[i] = re.sub(r'[a-zA-Z]', '', s).strip()
problem["info"] = {fields[i]: info[i] for i in range(min(len(info), len(fields)))}

def add_problem_statistics(stats_page, problem: dict) -> None:
"""
"""
Parses problem statistics and adds it
to problem object

"""
fields = [
"submissions",
"accepted_submissions",
"submission_ratio",
"authors",
"accepted_authors",
"author_ratio",
]
fields = [
"submissions",
"accepted_submissions",
"submission_ratio",
"authors",
"accepted_authors",
"author_ratio",
]

stats = stats_page.find("div", {"class": "stats-content"}).findAll("li", recursive=True)[:6]
stats = stats_page.find("table", class_="table2 condensed mt-5").findAll("tr")

for i in range(len(stats)):
s = re.compile(r"[^\d.]+")
stats[i] = s.sub("", str(stats[i]))
# Extract the numeric values from each <td> tag
stats = [re.sub(r'<[^>]+>', '', str(td)).strip('\n%') for tr in stats for td in tr.findAll("td")[1:]]

problem["stats"] = {
fields[i]: stats[i] for i in range(min(len(stats), len(fields)))
}

problem["stats"] = {fields[i]: stats[i] for i in range(min(len(stats), len(fields)))}

def problem_list(page):
"""
"""
Returns a list of problem ID's scraped from a
Kattis problem page

:param page: problem page
"""
problems = page.findAll("a", recursive=True)[18:-4]
return [str(problems[i]).split("/")[2].split('"')[0] for i in range(0, len(problems), 3)]
problems = page.findAll("a", recursive=True)[18:-4]
return [
str(problems[i]).split("/")[2].split('"')[0] for i in range(0, len(problems), 3)
]
114 changes: 60 additions & 54 deletions kattis/user.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,76 +3,82 @@
from .utils import Utils
from .problems import problem


class KattisUser:
"""
"""
An authenticated Kattis User

:param username: kattis username
:param password: kattis password
:param cookies: user login cookies
"""
def __init__(self, username, password, cookies):
self.__username = username
self.__password = password
self.__cookies = cookies
self.__submission_url = "https://open.kattis.com/users/"
self.__problem_url = "https://open.kattis.com/problems?show_solved=on&show_tried=off&show_untried=off"

def problems(self, pages=1) -> dict:
"""
Gets a users solved problems.

"""
obj, data, count = {}, {"script": "true"}, 0

for page in range(pages):
problem_page = Utils.html_page(
requests.get(
self.__problem_url + "&page={}".format(page),
data=data,
cookies=self.__cookies,
)
)

problem_list = problem_page.find_all("td", {"class", "name_column"})
def __init__(self, username, password, cookies):
self.__username = username
self.__password = password
self.__cookies = cookies
self.__submission_url = "https://open.kattis.com/users/"
self.__problem_url = "https://open.kattis.com/problems?show_solved=on&show_partial=off&show_tried=off&show_untried=off"

for prob in problem_list:
children = prob.findChildren("a", recursive=False, href=True)
problem_id = children[0]["href"].split("/")[2]
obj[problem_id] = problem(problem_id)
count += 1

obj["count"] = count
return obj
def problems(self, pages=1) -> dict:
"""
Gets a users solved problems.

def stats(self) -> dict:
"""
"""
obj, data, count = {}, {"script": "true"}, 0

for page in range(pages):
problem_page = Utils.html_page(
requests.get(
self.__problem_url + "&page={}".format(page),
data=data,
cookies=self.__cookies,
)
)

problem_list = problem_page.find("table", "table2").find_all("tr")
for prob in problem_list[1:]: # skip table header
children = prob.findChildren("a")
problem_id = children[0]["href"].split("/")[2]
obj[problem_id] = problem(problem_id) # can take very long if there are many solved problems
count += 1

obj["count"] = count
return obj

def stats(self) -> dict:
"""
Gets a users stats (score, rank)

"""
fields, data = ["score", "rank"], {"script": "true"}

stats_page = Utils.html_page(
requests.get(
self.__submission_url + self.__username,
data=data,
cookies=self.__cookies,
)
)

# Parse score and rank
user_stats = stats_page.find("ul", {"class": "profile-header-list"}).findAll("li")
fields, data = ["score", "rank"], {"script": "true"}

stats_page = Utils.html_page(
requests.get(
self.__submission_url + self.__username,
data=data,
cookies=self.__cookies,
)
)

for i in range(len(user_stats)):
s = re.compile(r"[^\d.]+")
user_stats[i] = s.sub("", str(user_stats[i]))
# Parse score and rank
user_stats = stats_page.findAll("div", "divider_list-item")
for i in range(len(user_stats)):
s = re.compile(r"[^\d.]+")
user_stats[i] = s.sub("", str(user_stats[i]))

return {fields[i]: user_stats[i] for i in range(min(len(user_stats), len(fields)))}
return {
fields[i]: user_stats[i] for i in range(min(len(user_stats), len(fields)))
}

def data(self) -> dict:
"""
def data(self) -> dict:
"""
Combined solved problems and user stats

"""
pages = 28
return {"username": self.__username, "stats": self.stats(), "problems": self.problems(pages)}
pages = 28
return {
"username": self.__username,
"stats": self.stats(),
"problems": self.problems(pages)
}