Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[WIP]: Integrazione del download delle videolezioni presenti nel vecchio portale elearning #1

Draft
wants to merge 3 commits into
base: main
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
111 changes: 111 additions & 0 deletions src/politodown/datatypes.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
import sre_compile
Copy link
Owner

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It looks like this module isn't used anywhere.

Suggested change
import sre_compile

from typing import Optional, Callable, AsyncIterator, Union, Dict, List
import datetime
import asyncio
Expand Down Expand Up @@ -187,6 +188,116 @@ async def _get_videolesson_info(

return File(self, filename, videohref, properties=properties)

class Videostore_old(Videostore):
def __init__(
self,
year: int,
category: str,
name: str,
inc: int,
utente: str,
data: str,
token: str
):

self.year = year
self.name = name
self.category = category
self.vis = httpx.URL(
urls.elearn/"gadgets/video/template_video.php",
params = {
'inc': inc,
'utente': utente,
'data': data,
'token': token,
}

)
self._videolessons = {}

async def videolessons(
self,
force_update: bool = False
) -> dict[str, "File"]:
"""
Get videolessons and cache the response.

Cache will be overwrite only if `force_update` is `True`
"""
if self._videolessons and not force_update:
return self._videolessons

coros = await self._get_videolessons()
self._videolessons = {
videolesson.properties["name"]: videolesson
for videolesson in await asyncio.gather(*coros)
}

return self._videolessons

async def _get_videolessons(self):
response = await session.get(self.vis)
page = bs4.BeautifulSoup(response.content, "html.parser")

summary = page.find_all("ul", {"class": "lezioni"})[0]
lessons = summary.find_all("a")
dates = summary.find_all("span", {"class": "small"})
lessons_arguments = summary.find_all("li", {"class": "argEspansi1"})

coros = []

for lesson, date, arguments in zip(lessons, dates, lessons_arguments):
# Name
name = lesson.text

# Date
raw_date = date.text[4:] # date = "del YYYY-mm-dd"
date = datetime.datetime.strptime(raw_date, "%Y-%m-%d")

# Arguments
arguments = [
argument.text
for argument in arguments.find_all("a", {"class": "argoLink"})
]

# Open the videolesson page to extract infos about the video file
url = urls.elearn/"gadgets/video/"/lesson['href']

coros.append(self._get_videolesson_info(url, name, date, arguments))

return coros

async def _get_videolesson_info(
self,
url: urls.BaseURL,
name: str,
date: datetime.datetime,
arguments: List[str]
) -> "File":
async with session.stream("GET", url) as stream:
page = bs4.BeautifulSoup(await stream.aread(), "html.parser")

videohref = urls.elearn/"gadgets/video/"/page.find("a", text="Video")["href"]

videoinfo = page.find_all('div', {'id':'tooltip1'})
filename = videoinfo.find_all('td', {'class':'value'})[0]

properties = {
"name": name,
"date": date,
"arguments": arguments,
**{
name.text.strip().lower(): value.text
for name, value in [
info.find_all("td")
for info in videoinfo.find_all("tr")
]
},
}

return File(self, filename, videohref, properties=properties)



class Folder:
"""
Expand Down
25 changes: 21 additions & 4 deletions src/politodown/polito.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
import httpx
import bs4

from .datatypes import Material, Videostore
from .datatypes import Material, Videostore, Videostore_old
from . import session, urls

async def get_material(year: int) -> dict[str, Material]:
Expand Down Expand Up @@ -53,7 +53,7 @@ async def get_videostores(year: int) -> dict[str, Videostore]:
page = bs4.BeautifulSoup(response.content, "html.parser")
videostores = {}

data_regex = re.compile(r"sviluppo\.videolezioni\.vis\?cor=(\d+)")
data_regex = re.compile(r"(sviluppo\.videolezioni\.vis\?cor=(\d+))|(javascript:void\(null\);)")
raw_videostores = page.find_all("a", {"onclick": re.compile(r"showDivVideoteca\('\w+'\)")})
videolessons_group = page.find_all("div", {"class": "policorpo"})
for videostore, raw_videolessons in zip(raw_videostores, videolessons_group):
Expand All @@ -70,9 +70,26 @@ async def get_videostores(year: int) -> dict[str, Videostore]:
continue

videolesson_name = videolesson.text.strip()
cor, = data_regex.search(videolesson["href"]).groups()
videolessons[videolesson_name] = \
if data_regex.match(videolesson["href"]).group(1):
cor = data_regex.search(videolesson["href"]).group(2)
Comment on lines +73 to +74
Copy link
Owner

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

data_regex.match(videolesson["href"]) is already used in line 65. What if we would use a variable for it?

videolessons[videolesson_name] = \
Videostore(year, videostore_name, videolesson_name, cor)
else:
inc_regex = re.compile(r"dokeosLez\(\'(\d+)\'\)")
inc, = inc_regex.match(videolesson["onclick"]).groups()
data = await session.get(
httpx.URL(
urls.did/"pls/portal30/sviluppo.materiale.json_dokeos_par",
params={"inc": inc}
)
)
data.raise_for_status()
Copy link
Owner

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

TODO: Check wether it's safe to raise an error in this part of code, or if it should just return instead.

data_json = data.json()
videolessons[videolesson_name] = \
Videostore_old(year, videostore_name, videolesson_name, inc, data_json['utente'], data_json['data'], data_json['token']
)



videostores[videostore_name] = videolessons

Expand Down
2 changes: 2 additions & 0 deletions src/politodown/urls.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,8 @@ def __truediv__(self, url):

IDP = BaseURL("https://idp.polito.it/")
did = BaseURL("https://didattica.polito.it/")
elearn = BaseURL("https://elearning.polito.it/")

loginpage = IDP/"idp/x509mixed-login"
login = IDP/"idp/Authn/X509Mixed/UserPasswordLogin"