Extract common utils in rendering SRAWN

This removes a lot of duplicate handling & validation of path names in favour of having a single source of this logic. It also relocates a couple of instances of similar but separate logic to being alongside each other within the utils file so that even though they can't share code, they are at least defined next to each other. Alongside doing this, we also fix some warnings from Python due to the regexes not being marked as raw strings (such that '\d' was previously trying to be an escape at the string parsing level, rather than a regex escape).
srobo · Sep 14, 2024 · 922623d · 922623d
1 parent e7f9d86
commit 922623d
Show file tree

Hide file tree

Showing 4 changed files with 106 additions and 49 deletions.
diff --git a/scripts/render-feed.py b/scripts/render-feed.py
@@ -7,6 +7,7 @@
 from feedgenerator import DefaultFeed
 import mistune
 
+import srawn_utils
 
 working_dir = Path('.')
 feed = DefaultFeed(
@@ -18,26 +19,17 @@
 md = mistune.create_markdown()
 
 
-for md_path in sorted(working_dir.glob("SR20*/*.md")):
-    filename_match = re.match("^(20\d{2}-\d{2}-\d{2})-srawn-(\d{2})$", md_path.stem)
-    if not filename_match:
-        exit(f"{md_path.stem} does not match format. Run the linter.")
-    date, issue = filename_match.groups()
-
-    folder_match = re.match("^(SR20\d{2})$", md_path.parent.name)
-    if not folder_match:
-        exit(f"{md_path.parent.name} does not match format. Run the linter.")
-    sryear, = folder_match.groups()
-
+for issue in srawn_utils.get_all_issues(working_dir):
+    md_path = issue.path
     link = f"https://studentrobotics.org/srawn/{md_path.parent.stem}/{md_path.stem}.html"
     content = md(md_path.read_text())
 
     feed.add_item(
-        title=f"{sryear} Issue {issue}",
+        title=issue.title,
         link=link,
         description=content,
         unique_id=link,
-        pubdate=datetime.date.fromisoformat(date),
+        pubdate=issue.date,
         content=content
     )
 

diff --git a/scripts/render-indices.py b/scripts/render-indices.py
@@ -4,6 +4,8 @@
 
 import jinja2
 
+import srawn_utils
+
 # cspell:disable-next-line
 templateLoader = jinja2.FileSystemLoader(searchpath="./templates")
 templateEnv = jinja2.Environment(loader=templateLoader)
@@ -13,29 +15,23 @@
 working_dir = Path('.')
 html_dir = working_dir / "out/html"
 
-years = sorted(working_dir.glob("SR20*"), reverse=True)
+years = srawn_utils.get_years(working_dir)
 
 # render main index
 with html_dir.joinpath("index.html").open("w") as fh:
     fh.write(archive_template.render(years=years))
 
-# render year indices
-for year in years:
-    year_html_path = html_dir / year
-    year_path = working_dir / year
-    issues = sorted(year_path.glob("*.md"))
-    issue_data = []
-    for md_path in issues:
-        filename_match = re.match("^(20\d{2}-\d{2}-\d{2})-srawn-(\d{2})$", md_path.stem)
-        if not filename_match:
-            exit(f"{md_path.stem} does not match format. Run the linter.")
-        date, issue = filename_match.groups()
-
-        folder_match = re.match("^(SR20\d{2})$", md_path.parent.name)
-        if not folder_match:
-            exit(f"{md_path.parent.name} does not match format. Run the linter.")
-        sryear, = folder_match.groups()
-        issue_data.append((md_path.stem, f"{date}: {sryear} Issue {issue}"))
-
-    with year_html_path.joinpath("index.html").open("w") as fh:
-        fh.write(year_template.render(year=year, issues=issue_data))
+with srawn_utils.exit_on_invalid():
+    # render year indices
+    for year in years:
+        year_html_path = html_dir / year
+        year_path = working_dir / year
+        issue_data = []
+        for issue in srawn_utils.get_year_issues(year_path):
+            issue_data.append((
+                issue.path.stem,
+                f"{issue.date_text}: {issue.title}",
+            ))
+
+        with year_html_path.joinpath("index.html").open("w") as fh:
+            fh.write(year_template.render(year=year, issues=issue_data))
diff --git a/scripts/render-mjml.py b/scripts/render-mjml.py
@@ -8,6 +8,8 @@
 import jinja2
 import mistune
 
+import srawn_utils
+
 FONTS: Dict[int, Tuple[str, int, Optional[int]]] = {
     0: ("Open Sans", 20, 25),  # Paragraph
     1: ("Open Sans", 35, None),
@@ -62,24 +64,15 @@ def render_paragraph(self, text: str, *, font_size: int, font_family: str, line_
     if not md_path.is_file():
         exit(f"{md_path} is not a file")
 
-    filename_match = re.match("^(20\d{2}-\d{2}-\d{2})-srawn-(\d{2})$", md_path.stem)
-    if not filename_match:
-        exit(f"{md_path.stem} does not match format. Run the linter.")
-    date, issue = filename_match.groups()
-
-    folder_match = re.match("^(SR20\d{2})$", md_path.parent.name)
-    if not folder_match:
-        exit(f"{md_path.parent.name} does not match format. Run the linter.")
-    sryear, = folder_match.groups()
-
+    with srawn_utils.exit_on_invalid():
+        parsed_issue_path = srawn_utils.parse_path(md_path)
 
     with md_path.open("r") as fh:
         raw_markdown = fh.read()
 
     # Remove title from old issues. This prevents duplicate titles in the archive
     # without changing the original files.
-    raw_markdown = re.sub("^# SR\(A\)WN \d{4} [—-]+ \d+$", "", raw_markdown, flags=re.MULTILINE)
-
+    raw_markdown = re.sub(r"^# SR\(A\)WN \d{4} [—-]+ \d+$", "", raw_markdown, flags=re.MULTILINE)
 
     md = mistune.create_markdown(renderer=MJMLRenderer())
     content = md(raw_markdown)
@@ -90,9 +83,9 @@ def render_paragraph(self, text: str, *, font_size: int, font_family: str, line_
     template = templateEnv.get_template("newsletter.mjml.j2")
 
     output = template.render(
-        date=date,
-        sryear=sryear,
-        issue=issue,
+        date=parsed_issue_path.date_text,
+        sryear=parsed_issue_path.sryear,
+        issue=parsed_issue_path.issue_number,
         content=content,
     )
     print(output)
diff --git a/scripts/srawn_utils.py b/scripts/srawn_utils.py
@@ -0,0 +1,76 @@
+import re
+import contextlib
+import dataclasses,datetime
+from collections.abc import Iterator
+from pathlib import Path
+
+
+class InvalidPath(ValueError):
+    pass
+
+
+@dataclasses.dataclass(frozen=True)
+class ParsedIssuePath:
+    path: Path
+    date: datetime.date
+    issue_number: int
+    sryear: str
+
+    @property
+    def title(self) -> str:
+        return f"{self.sryear} Issue {self.issue_number}"
+
+    @property
+    def date_text(self) -> str:
+        return self.date.isoformat()
+
+
+def parse_path(path: Path) -> ParsedIssuePath:
+    filename_match = re.match(
+        r"^(20\d{2}-\d{2}-\d{2})-srawn-(\d{2})$",
+        path.stem,
+    )
+    if not filename_match:
+        raise InvalidPath(
+            f"{path.stem!r} does not match format. Run the linter.",
+        )
+    date, issue_number = filename_match.groups()
+
+    folder_match = re.match(
+        r"^(SR20\d{2})$",
+        path.parent.name,
+    )
+    if not folder_match:
+        raise InvalidPath(
+            f"{path.parent.name!r} does not match format. Run the linter.",
+        )
+    sryear, = folder_match.groups()
+
+    return ParsedIssuePath(
+        path,
+        datetime.date.fromisoformat(date),
+        int(issue_number),
+        sryear,
+    )
+
+
+@contextlib.contextmanager
+def exit_on_invalid() -> Iterator[None]:
+    try:
+        yield
+    except InvalidPath as e:
+        exit(str(e))
+
+
+def get_years(root: Path) -> list[Path]:
+    return sorted(root.glob("SR20*"), reverse=True)
+
+
+def get_year_issues(year_root: Path) -> list[ParsedIssuePath]:
+    paths = sorted(year_root.glob("*.md"))
+    return [parse_path(x) for x in paths]
+
+
+def get_all_issues(root: Path) -> list[ParsedIssuePath]:
+    paths = sorted(root.glob("SR20*/*.md"))
+    return [parse_path(x) for x in paths]