From 922623d01534a87cf1ab323951209934524088dc Mon Sep 17 00:00:00 2001 From: Peter Law Date: Sat, 14 Sep 2024 14:20:51 +0100 Subject: [PATCH] Extract common utils in rendering SRAWN This removes a lot of duplicate handling & validation of path names in favour of having a single source of this logic. It also relocates a couple of instances of similar but separate logic to being alongside each other within the utils file so that even though they can't share code, they are at least defined next to each other. Alongside doing this, we also fix some warnings from Python due to the regexes not being marked as raw strings (such that '\d' was previously trying to be an escape at the string parsing level, rather than a regex escape). --- scripts/render-feed.py | 18 +++------- scripts/render-indices.py | 38 +++++++++----------- scripts/render-mjml.py | 23 +++++------- scripts/srawn_utils.py | 76 +++++++++++++++++++++++++++++++++++++++ 4 files changed, 106 insertions(+), 49 deletions(-) create mode 100644 scripts/srawn_utils.py diff --git a/scripts/render-feed.py b/scripts/render-feed.py index 3c557e5..3c6aac5 100755 --- a/scripts/render-feed.py +++ b/scripts/render-feed.py @@ -7,6 +7,7 @@ from feedgenerator import DefaultFeed import mistune +import srawn_utils working_dir = Path('.') feed = DefaultFeed( @@ -18,26 +19,17 @@ md = mistune.create_markdown() -for md_path in sorted(working_dir.glob("SR20*/*.md")): - filename_match = re.match("^(20\d{2}-\d{2}-\d{2})-srawn-(\d{2})$", md_path.stem) - if not filename_match: - exit(f"{md_path.stem} does not match format. Run the linter.") - date, issue = filename_match.groups() - - folder_match = re.match("^(SR20\d{2})$", md_path.parent.name) - if not folder_match: - exit(f"{md_path.parent.name} does not match format. Run the linter.") - sryear, = folder_match.groups() - +for issue in srawn_utils.get_all_issues(working_dir): + md_path = issue.path link = f"https://studentrobotics.org/srawn/{md_path.parent.stem}/{md_path.stem}.html" content = md(md_path.read_text()) feed.add_item( - title=f"{sryear} Issue {issue}", + title=issue.title, link=link, description=content, unique_id=link, - pubdate=datetime.date.fromisoformat(date), + pubdate=issue.date, content=content ) diff --git a/scripts/render-indices.py b/scripts/render-indices.py index bc2c8d1..db6ce60 100755 --- a/scripts/render-indices.py +++ b/scripts/render-indices.py @@ -4,6 +4,8 @@ import jinja2 +import srawn_utils + # cspell:disable-next-line templateLoader = jinja2.FileSystemLoader(searchpath="./templates") templateEnv = jinja2.Environment(loader=templateLoader) @@ -13,29 +15,23 @@ working_dir = Path('.') html_dir = working_dir / "out/html" -years = sorted(working_dir.glob("SR20*"), reverse=True) +years = srawn_utils.get_years(working_dir) # render main index with html_dir.joinpath("index.html").open("w") as fh: fh.write(archive_template.render(years=years)) -# render year indices -for year in years: - year_html_path = html_dir / year - year_path = working_dir / year - issues = sorted(year_path.glob("*.md")) - issue_data = [] - for md_path in issues: - filename_match = re.match("^(20\d{2}-\d{2}-\d{2})-srawn-(\d{2})$", md_path.stem) - if not filename_match: - exit(f"{md_path.stem} does not match format. Run the linter.") - date, issue = filename_match.groups() - - folder_match = re.match("^(SR20\d{2})$", md_path.parent.name) - if not folder_match: - exit(f"{md_path.parent.name} does not match format. Run the linter.") - sryear, = folder_match.groups() - issue_data.append((md_path.stem, f"{date}: {sryear} Issue {issue}")) - - with year_html_path.joinpath("index.html").open("w") as fh: - fh.write(year_template.render(year=year, issues=issue_data)) +with srawn_utils.exit_on_invalid(): + # render year indices + for year in years: + year_html_path = html_dir / year + year_path = working_dir / year + issue_data = [] + for issue in srawn_utils.get_year_issues(year_path): + issue_data.append(( + issue.path.stem, + f"{issue.date_text}: {issue.title}", + )) + + with year_html_path.joinpath("index.html").open("w") as fh: + fh.write(year_template.render(year=year, issues=issue_data)) diff --git a/scripts/render-mjml.py b/scripts/render-mjml.py index 5bbdd7b..74add93 100755 --- a/scripts/render-mjml.py +++ b/scripts/render-mjml.py @@ -8,6 +8,8 @@ import jinja2 import mistune +import srawn_utils + FONTS: Dict[int, Tuple[str, int, Optional[int]]] = { 0: ("Open Sans", 20, 25), # Paragraph 1: ("Open Sans", 35, None), @@ -62,24 +64,15 @@ def render_paragraph(self, text: str, *, font_size: int, font_family: str, line_ if not md_path.is_file(): exit(f"{md_path} is not a file") - filename_match = re.match("^(20\d{2}-\d{2}-\d{2})-srawn-(\d{2})$", md_path.stem) - if not filename_match: - exit(f"{md_path.stem} does not match format. Run the linter.") - date, issue = filename_match.groups() - - folder_match = re.match("^(SR20\d{2})$", md_path.parent.name) - if not folder_match: - exit(f"{md_path.parent.name} does not match format. Run the linter.") - sryear, = folder_match.groups() - + with srawn_utils.exit_on_invalid(): + parsed_issue_path = srawn_utils.parse_path(md_path) with md_path.open("r") as fh: raw_markdown = fh.read() # Remove title from old issues. This prevents duplicate titles in the archive # without changing the original files. - raw_markdown = re.sub("^# SR\(A\)WN \d{4} [—-]+ \d+$", "", raw_markdown, flags=re.MULTILINE) - + raw_markdown = re.sub(r"^# SR\(A\)WN \d{4} [—-]+ \d+$", "", raw_markdown, flags=re.MULTILINE) md = mistune.create_markdown(renderer=MJMLRenderer()) content = md(raw_markdown) @@ -90,9 +83,9 @@ def render_paragraph(self, text: str, *, font_size: int, font_family: str, line_ template = templateEnv.get_template("newsletter.mjml.j2") output = template.render( - date=date, - sryear=sryear, - issue=issue, + date=parsed_issue_path.date_text, + sryear=parsed_issue_path.sryear, + issue=parsed_issue_path.issue_number, content=content, ) print(output) diff --git a/scripts/srawn_utils.py b/scripts/srawn_utils.py new file mode 100644 index 0000000..8a1d5ac --- /dev/null +++ b/scripts/srawn_utils.py @@ -0,0 +1,76 @@ +import re +import contextlib +import dataclasses,datetime +from collections.abc import Iterator +from pathlib import Path + + +class InvalidPath(ValueError): + pass + + +@dataclasses.dataclass(frozen=True) +class ParsedIssuePath: + path: Path + date: datetime.date + issue_number: int + sryear: str + + @property + def title(self) -> str: + return f"{self.sryear} Issue {self.issue_number}" + + @property + def date_text(self) -> str: + return self.date.isoformat() + + +def parse_path(path: Path) -> ParsedIssuePath: + filename_match = re.match( + r"^(20\d{2}-\d{2}-\d{2})-srawn-(\d{2})$", + path.stem, + ) + if not filename_match: + raise InvalidPath( + f"{path.stem!r} does not match format. Run the linter.", + ) + date, issue_number = filename_match.groups() + + folder_match = re.match( + r"^(SR20\d{2})$", + path.parent.name, + ) + if not folder_match: + raise InvalidPath( + f"{path.parent.name!r} does not match format. Run the linter.", + ) + sryear, = folder_match.groups() + + return ParsedIssuePath( + path, + datetime.date.fromisoformat(date), + int(issue_number), + sryear, + ) + + +@contextlib.contextmanager +def exit_on_invalid() -> Iterator[None]: + try: + yield + except InvalidPath as e: + exit(str(e)) + + +def get_years(root: Path) -> list[Path]: + return sorted(root.glob("SR20*"), reverse=True) + + +def get_year_issues(year_root: Path) -> list[ParsedIssuePath]: + paths = sorted(year_root.glob("*.md")) + return [parse_path(x) for x in paths] + + +def get_all_issues(root: Path) -> list[ParsedIssuePath]: + paths = sorted(root.glob("SR20*/*.md")) + return [parse_path(x) for x in paths]