From 922623d01534a87cf1ab323951209934524088dc Mon Sep 17 00:00:00 2001
From: Peter Law <PeterJCLaw@gmail.com>
Date: Sat, 14 Sep 2024 14:20:51 +0100
Subject: [PATCH] Extract common utils in rendering SRAWN

This removes a lot of duplicate handling & validation of path names
in favour of having a single source of this logic. It also relocates
a couple of instances of similar but separate logic to being
alongside each other within the utils file so that even though they
can't share code, they are at least defined next to each other.

Alongside doing this, we also fix some warnings from Python due to
the regexes not being marked as raw strings (such that '\d' was
previously trying to be an escape at the string parsing level,
rather than a regex escape).
---
 scripts/render-feed.py    | 18 +++-------
 scripts/render-indices.py | 38 +++++++++-----------
 scripts/render-mjml.py    | 23 +++++-------
 scripts/srawn_utils.py    | 76 +++++++++++++++++++++++++++++++++++++++
 4 files changed, 106 insertions(+), 49 deletions(-)
 create mode 100644 scripts/srawn_utils.py

diff --git a/scripts/render-feed.py b/scripts/render-feed.py
index 3c557e5..3c6aac5 100755
--- a/scripts/render-feed.py
+++ b/scripts/render-feed.py
@@ -7,6 +7,7 @@
 from feedgenerator import DefaultFeed
 import mistune
 
+import srawn_utils
 
 working_dir = Path('.')
 feed = DefaultFeed(
@@ -18,26 +19,17 @@
 md = mistune.create_markdown()
 
 
-for md_path in sorted(working_dir.glob("SR20*/*.md")):
-    filename_match = re.match("^(20\d{2}-\d{2}-\d{2})-srawn-(\d{2})$", md_path.stem)
-    if not filename_match:
-        exit(f"{md_path.stem} does not match format. Run the linter.")
-    date, issue = filename_match.groups()
-
-    folder_match = re.match("^(SR20\d{2})$", md_path.parent.name)
-    if not folder_match:
-        exit(f"{md_path.parent.name} does not match format. Run the linter.")
-    sryear, = folder_match.groups()
-
+for issue in srawn_utils.get_all_issues(working_dir):
+    md_path = issue.path
     link = f"https://studentrobotics.org/srawn/{md_path.parent.stem}/{md_path.stem}.html"
     content = md(md_path.read_text())
 
     feed.add_item(
-        title=f"{sryear} Issue {issue}",
+        title=issue.title,
         link=link,
         description=content,
         unique_id=link,
-        pubdate=datetime.date.fromisoformat(date),
+        pubdate=issue.date,
         content=content
     )
 
diff --git a/scripts/render-indices.py b/scripts/render-indices.py
index bc2c8d1..db6ce60 100755
--- a/scripts/render-indices.py
+++ b/scripts/render-indices.py
@@ -4,6 +4,8 @@
 
 import jinja2
 
+import srawn_utils
+
 # cspell:disable-next-line
 templateLoader = jinja2.FileSystemLoader(searchpath="./templates")
 templateEnv = jinja2.Environment(loader=templateLoader)
@@ -13,29 +15,23 @@
 working_dir = Path('.')
 html_dir = working_dir / "out/html"
 
-years = sorted(working_dir.glob("SR20*"), reverse=True)
+years = srawn_utils.get_years(working_dir)
 
 # render main index
 with html_dir.joinpath("index.html").open("w") as fh:
     fh.write(archive_template.render(years=years))
 
-# render year indices
-for year in years:
-    year_html_path = html_dir / year
-    year_path = working_dir / year
-    issues = sorted(year_path.glob("*.md"))
-    issue_data = []
-    for md_path in issues:
-        filename_match = re.match("^(20\d{2}-\d{2}-\d{2})-srawn-(\d{2})$", md_path.stem)
-        if not filename_match:
-            exit(f"{md_path.stem} does not match format. Run the linter.")
-        date, issue = filename_match.groups()
-
-        folder_match = re.match("^(SR20\d{2})$", md_path.parent.name)
-        if not folder_match:
-            exit(f"{md_path.parent.name} does not match format. Run the linter.")
-        sryear, = folder_match.groups()
-        issue_data.append((md_path.stem, f"{date}: {sryear} Issue {issue}"))
-
-    with year_html_path.joinpath("index.html").open("w") as fh:
-        fh.write(year_template.render(year=year, issues=issue_data))
+with srawn_utils.exit_on_invalid():
+    # render year indices
+    for year in years:
+        year_html_path = html_dir / year
+        year_path = working_dir / year
+        issue_data = []
+        for issue in srawn_utils.get_year_issues(year_path):
+            issue_data.append((
+                issue.path.stem,
+                f"{issue.date_text}: {issue.title}",
+            ))
+
+        with year_html_path.joinpath("index.html").open("w") as fh:
+            fh.write(year_template.render(year=year, issues=issue_data))
diff --git a/scripts/render-mjml.py b/scripts/render-mjml.py
index 5bbdd7b..74add93 100755
--- a/scripts/render-mjml.py
+++ b/scripts/render-mjml.py
@@ -8,6 +8,8 @@
 import jinja2
 import mistune
 
+import srawn_utils
+
 FONTS: Dict[int, Tuple[str, int, Optional[int]]] = {
     0: ("Open Sans", 20, 25),  # Paragraph
     1: ("Open Sans", 35, None),
@@ -62,24 +64,15 @@ def render_paragraph(self, text: str, *, font_size: int, font_family: str, line_
     if not md_path.is_file():
         exit(f"{md_path} is not a file")
 
-    filename_match = re.match("^(20\d{2}-\d{2}-\d{2})-srawn-(\d{2})$", md_path.stem)
-    if not filename_match:
-        exit(f"{md_path.stem} does not match format. Run the linter.")
-    date, issue = filename_match.groups()
-
-    folder_match = re.match("^(SR20\d{2})$", md_path.parent.name)
-    if not folder_match:
-        exit(f"{md_path.parent.name} does not match format. Run the linter.")
-    sryear, = folder_match.groups()
-
+    with srawn_utils.exit_on_invalid():
+        parsed_issue_path = srawn_utils.parse_path(md_path)
 
     with md_path.open("r") as fh:
         raw_markdown = fh.read()
 
     # Remove title from old issues. This prevents duplicate titles in the archive
     # without changing the original files.
-    raw_markdown = re.sub("^# SR\(A\)WN \d{4} [—-]+ \d+$", "", raw_markdown, flags=re.MULTILINE)
-
+    raw_markdown = re.sub(r"^# SR\(A\)WN \d{4} [—-]+ \d+$", "", raw_markdown, flags=re.MULTILINE)
 
     md = mistune.create_markdown(renderer=MJMLRenderer())
     content = md(raw_markdown)
@@ -90,9 +83,9 @@ def render_paragraph(self, text: str, *, font_size: int, font_family: str, line_
     template = templateEnv.get_template("newsletter.mjml.j2")
 
     output = template.render(
-        date=date,
-        sryear=sryear,
-        issue=issue,
+        date=parsed_issue_path.date_text,
+        sryear=parsed_issue_path.sryear,
+        issue=parsed_issue_path.issue_number,
         content=content,
     )
     print(output)
diff --git a/scripts/srawn_utils.py b/scripts/srawn_utils.py
new file mode 100644
index 0000000..8a1d5ac
--- /dev/null
+++ b/scripts/srawn_utils.py
@@ -0,0 +1,76 @@
+import re
+import contextlib
+import dataclasses,datetime
+from collections.abc import Iterator
+from pathlib import Path
+
+
+class InvalidPath(ValueError):
+    pass
+
+
+@dataclasses.dataclass(frozen=True)
+class ParsedIssuePath:
+    path: Path
+    date: datetime.date
+    issue_number: int
+    sryear: str
+
+    @property
+    def title(self) -> str:
+        return f"{self.sryear} Issue {self.issue_number}"
+
+    @property
+    def date_text(self) -> str:
+        return self.date.isoformat()
+
+
+def parse_path(path: Path) -> ParsedIssuePath:
+    filename_match = re.match(
+        r"^(20\d{2}-\d{2}-\d{2})-srawn-(\d{2})$",
+        path.stem,
+    )
+    if not filename_match:
+        raise InvalidPath(
+            f"{path.stem!r} does not match format. Run the linter.",
+        )
+    date, issue_number = filename_match.groups()
+
+    folder_match = re.match(
+        r"^(SR20\d{2})$",
+        path.parent.name,
+    )
+    if not folder_match:
+        raise InvalidPath(
+            f"{path.parent.name!r} does not match format. Run the linter.",
+        )
+    sryear, = folder_match.groups()
+
+    return ParsedIssuePath(
+        path,
+        datetime.date.fromisoformat(date),
+        int(issue_number),
+        sryear,
+    )
+
+
+@contextlib.contextmanager
+def exit_on_invalid() -> Iterator[None]:
+    try:
+        yield
+    except InvalidPath as e:
+        exit(str(e))
+
+
+def get_years(root: Path) -> list[Path]:
+    return sorted(root.glob("SR20*"), reverse=True)
+
+
+def get_year_issues(year_root: Path) -> list[ParsedIssuePath]:
+    paths = sorted(year_root.glob("*.md"))
+    return [parse_path(x) for x in paths]
+
+
+def get_all_issues(root: Path) -> list[ParsedIssuePath]:
+    paths = sorted(root.glob("SR20*/*.md"))
+    return [parse_path(x) for x in paths]