From bf08147de0aaea2ca98f6a32a581ba298560382e Mon Sep 17 00:00:00 2001 From: Andrea Barbagallo Date: Sat, 17 Sep 2022 14:03:23 +0200 Subject: [PATCH 1/3] Inizio implementazione per videocorsi nel vecchio portale elearning --- src/politodown/datatypes.py | 111 ++++++++++++++++++++++++++++++++++++ src/politodown/polito.py | 15 +++-- src/politodown/urls.py | 2 + 3 files changed, 124 insertions(+), 4 deletions(-) diff --git a/src/politodown/datatypes.py b/src/politodown/datatypes.py index 5e242db..c57276b 100644 --- a/src/politodown/datatypes.py +++ b/src/politodown/datatypes.py @@ -1,3 +1,4 @@ +import sre_compile from typing import Optional, Callable, AsyncIterator, Union, Dict, List import datetime import asyncio @@ -187,6 +188,116 @@ async def _get_videolesson_info( return File(self, filename, videohref, properties=properties) +class Videostore_old(Videostore): + def __init__( + self, + year: int, + category: str, + name: str, + inc: int, + utente: str, + data: str, + token: str + ): + + self.year = year + self.name = name + self.category = category + self.vis = httpx.URL( + urls.elearn/"template_video.php", + params = { + 'inc': inc, + 'utente': utente, + 'data': data, + 'token': token, + } + + ) + self._videolessons = {} + + async def videolessons( + self, + force_update: bool = False + ) -> dict[str, "File"]: + """ + Get videolessons and cache the response. + + Cache will be overwrite only if `force_update` is `True` + """ + if self._videolessons and not force_update: + return self._videolessons + + coros = await self._get_videolessons() + self._videolessons = { + videolesson.properties["name"]: videolesson + for videolesson in await asyncio.gather(*coros) + } + + return self._videolessons + + async def _get_videolessons(self): + response = await session.get(self.vis) + page = bs4.BeautifulSoup(response.content, "html.parser") + + summary = page.find_all("ul", {"class": "lezioni"}) + lessons = summary.find_all("a") + dates = summary.find_all("span", {"class": "small"}) + lessons_arguments = summary.find_all("li", {"class": "argEspansi1"}) + + coros = [] + + for lesson, date, arguments in zip(lessons, dates, lessons_arguments): + # Name + name = lesson.text + + # Date + raw_date = date.text[4:] # date = "del YYYY-mm-dd" + date = datetime.datetime.strptime(raw_date, "%Y-%m-%d") + + # Arguments + arguments = [ + argument.text + for argument in arguments.find_all("a", {"class": "argoLink"}) + ] + + # Open the videolesson page to extract infos about the video file + url = urls.elearn/lesson['href'] + + coros.append(self._get_videolesson_info(url, name, date, arguments)) + + return coros + + async def _get_videolesson_info( + self, + url: urls.BaseURL, + name: str, + date: datetime.datetime, + arguments: List[str] + ) -> "File": + async with session.stream("GET", url) as stream: + page = bs4.BeautifulSoup(await stream.aread(), "html.parser") + + videohref = urls.elearn/page.find("a", text="Video")["href"] + + videoinfo = page.find_all('div', {'id':'tooltip1'}) + filename = videoinfo.find_all('td', {'class':'value'})[0] + + properties = { + "name": name, + "date": date, + "arguments": arguments, + **{ + name.text.strip().lower(): value.text + for name, value in [ + info.find_all("td") + for info in videoinfo.find_all("tr") + ] + }, + } + + return File(self, filename, videohref, properties=properties) + + class Folder: """ diff --git a/src/politodown/polito.py b/src/politodown/polito.py index e8d9659..e76720a 100644 --- a/src/politodown/polito.py +++ b/src/politodown/polito.py @@ -4,7 +4,7 @@ import httpx import bs4 -from .datatypes import Material, Videostore +from .datatypes import Material, Videostore, Videostore_old from . import session, urls async def get_material(year: int) -> dict[str, Material]: @@ -53,7 +53,7 @@ async def get_videostores(year: int) -> dict[str, Videostore]: page = bs4.BeautifulSoup(response.content, "html.parser") videostores = {} - data_regex = re.compile(r"sviluppo\.videolezioni\.vis\?cor=(\d+)") + data_regex = re.compile(r"(sviluppo\.videolezioni\.vis\?cor=(\d+))|(javascript:void\(null\);)") raw_videostores = page.find_all("a", {"onclick": re.compile(r"showDivVideoteca\('\w+'\)")}) videolessons_group = page.find_all("div", {"class": "policorpo"}) for videostore, raw_videolessons in zip(raw_videostores, videolessons_group): @@ -70,9 +70,16 @@ async def get_videostores(year: int) -> dict[str, Videostore]: continue videolesson_name = videolesson.text.strip() - cor, = data_regex.search(videolesson["href"]).groups() - videolessons[videolesson_name] = \ + if data_regex.match(videolesson["href"]).group(1): + cor = data_regex.search(videolesson["href"]).group(2) + videolessons[videolesson_name] = \ Videostore(year, videostore_name, videolesson_name, cor) + else: + inc_regex = re.compile(r"dokeosLez\(\'(\d+)\'\)") + inc, = inc_regex.match(videolesson["onclick"]).groups() + videolessons[videolesson_name] = \ + Videostore_old(year, videostore_name, videolesson_name, inc) + videostores[videostore_name] = videolessons diff --git a/src/politodown/urls.py b/src/politodown/urls.py index 9f9fb32..c020733 100644 --- a/src/politodown/urls.py +++ b/src/politodown/urls.py @@ -12,6 +12,8 @@ def __truediv__(self, url): IDP = BaseURL("https://idp.polito.it/") did = BaseURL("https://didattica.polito.it/") +elearn = BaseURL("https://elearning.polito.it/gadgets/video/") loginpage = IDP/"idp/x509mixed-login" login = IDP/"idp/Authn/X509Mixed/UserPasswordLogin" + From e6e3395a67c93b8601342e0398fab3015ce315b4 Mon Sep 17 00:00:00 2001 From: Andrea Barbagallo Date: Sat, 17 Sep 2022 16:59:56 +0200 Subject: [PATCH 2/3] Ottiene correttamente l'elenco di tutte le materie, non sono riuscito a testare il download. da capire se il get lesson all'interno della classe funziona correttamente --- src/politodown/datatypes.py | 2 +- src/politodown/polito.py | 12 +++++++++++- 2 files changed, 12 insertions(+), 2 deletions(-) diff --git a/src/politodown/datatypes.py b/src/politodown/datatypes.py index c57276b..23ae11f 100644 --- a/src/politodown/datatypes.py +++ b/src/politodown/datatypes.py @@ -239,7 +239,7 @@ async def _get_videolessons(self): response = await session.get(self.vis) page = bs4.BeautifulSoup(response.content, "html.parser") - summary = page.find_all("ul", {"class": "lezioni"}) + summary = page.find_all("ul", {"class": "lezioni"})[0] lessons = summary.find_all("a") dates = summary.find_all("span", {"class": "small"}) lessons_arguments = summary.find_all("li", {"class": "argEspansi1"}) diff --git a/src/politodown/polito.py b/src/politodown/polito.py index e76720a..f9427ce 100644 --- a/src/politodown/polito.py +++ b/src/politodown/polito.py @@ -77,8 +77,18 @@ async def get_videostores(year: int) -> dict[str, Videostore]: else: inc_regex = re.compile(r"dokeosLez\(\'(\d+)\'\)") inc, = inc_regex.match(videolesson["onclick"]).groups() + data = await session.get( + httpx.URL( + urls.did/"pls/portal30/sviluppo.materiale.json_dokeos_par", + params={"inc": inc} + ) + ) + data.raise_for_status() + data_json = data.json() videolessons[videolesson_name] = \ - Videostore_old(year, videostore_name, videolesson_name, inc) + Videostore_old(year, videostore_name, videolesson_name, inc, data_json['utente'], data_json['data'], data_json['token'] + ) + videostores[videostore_name] = videolessons From 06665214aade33d921ee51422f0fb4b1da180130 Mon Sep 17 00:00:00 2001 From: Andrea Barbagallo Date: Sun, 18 Sep 2022 00:47:18 +0200 Subject: [PATCH 3/3] Correzione elearn come BaseURL --- src/politodown/datatypes.py | 6 +++--- src/politodown/urls.py | 2 +- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/src/politodown/datatypes.py b/src/politodown/datatypes.py index 23ae11f..297c26b 100644 --- a/src/politodown/datatypes.py +++ b/src/politodown/datatypes.py @@ -204,7 +204,7 @@ def __init__( self.name = name self.category = category self.vis = httpx.URL( - urls.elearn/"template_video.php", + urls.elearn/"gadgets/video/template_video.php", params = { 'inc': inc, 'utente': utente, @@ -261,7 +261,7 @@ async def _get_videolessons(self): ] # Open the videolesson page to extract infos about the video file - url = urls.elearn/lesson['href'] + url = urls.elearn/"gadgets/video/"/lesson['href'] coros.append(self._get_videolesson_info(url, name, date, arguments)) @@ -277,7 +277,7 @@ async def _get_videolesson_info( async with session.stream("GET", url) as stream: page = bs4.BeautifulSoup(await stream.aread(), "html.parser") - videohref = urls.elearn/page.find("a", text="Video")["href"] + videohref = urls.elearn/"gadgets/video/"/page.find("a", text="Video")["href"] videoinfo = page.find_all('div', {'id':'tooltip1'}) filename = videoinfo.find_all('td', {'class':'value'})[0] diff --git a/src/politodown/urls.py b/src/politodown/urls.py index c020733..724bde8 100644 --- a/src/politodown/urls.py +++ b/src/politodown/urls.py @@ -12,7 +12,7 @@ def __truediv__(self, url): IDP = BaseURL("https://idp.polito.it/") did = BaseURL("https://didattica.polito.it/") -elearn = BaseURL("https://elearning.polito.it/gadgets/video/") +elearn = BaseURL("https://elearning.polito.it/") loginpage = IDP/"idp/x509mixed-login" login = IDP/"idp/Authn/X509Mixed/UserPasswordLogin"