Skip to content

Commit

Permalink
Replace IDs & IPs in test result URLs (#288)
Browse files Browse the repository at this point in the history
* Replace IDs & IPs in test result URLs

* Simplify `unquote_url`

* Expand match group explanations

* Fix comment typo
  • Loading branch information
pederhan authored Jul 29, 2024
1 parent 18454c3 commit dc11694
Showing 1 changed file with 36 additions and 0 deletions.
36 changes: 36 additions & 0 deletions ci/diff.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
import json
import re
import sys
import urllib.parse
from enum import StrEnum
from itertools import zip_longest
from typing import Any, Iterable
Expand Down Expand Up @@ -32,6 +33,16 @@
ipv6_pattern = re.compile(r"\b([0-9a-fA-F]{1,4}::?){1,7}[0-9a-fA-F]{1,4}\b")
mac_pattern = re.compile(r"\b([0-9a-f]{2}:){5}[0-9a-f]{2}\b")

# Pattern matching strings starting with `"url": "/api/v1/` and ending with `"`
api_v1_pattern = re.compile(r'"url":\s*"/api/v1/.*?"')
# Pattern matching URLs where the final component is a number
# Defines 4 capture groups to be able to replace the number with a placeholder.
# Only matches the number if it is preceded by a `/` or `=`
# Does not match patterns containing `<IPv4>` and `<IPv6>` after `/api/v1/`.
api_v1_pattern_with_number = re.compile(
r'("url":\s*"/api/v1/(?!.*?<(?:IPv6|IPv4)>).*?)([/=])(\d+)(")'
)


class DiffError(Exception):
"""Base class for diff errors."""
Expand All @@ -48,6 +59,20 @@ def __init__(self, expected: int, result: int) -> None: # noqa: D107
)


def unquote_url(match: re.Match[str]) -> str:
"""Unquote URL encoded text in a /api/v1/ URL."""
return urllib.parse.unquote(match.group(0))


def replace_url_id(match: re.Match[str]) -> str:
"""Replace the final number (ID) in a URL with a placeholder."""
# match.group(1) contains the part before the separator (`"url": "/api/...`)
# match.group(2) contains the separator (/ or =)
# match.group(3) contains the number we want to replace
# match.group(4) contains the closing double quote
return f"{match.group(1)}{match.group(2)}<ID>{match.group(4)}"


def group_objects(json_file_path: str) -> list[dict[str, Any]]:
"""Group objects in a JSON file by a specific criterion.
Expand All @@ -56,12 +81,23 @@ def group_objects(json_file_path: str) -> list[dict[str, Any]]:
"""
with open(json_file_path, "r") as f:
s = f.read()
# Replace all URL encoded text in /api/v1/ URLs with unquoted text
# This lets us replace it down the line with our normal IPv{4,6} and MAC placeholders
# Must be done _before_ all other replacements
s = api_v1_pattern.sub(unquote_url, s)

# Replace all non-deterministic values with placeholders
s = timestamp_pattern.sub("<TIME>", s)
s = datetime_str_pattern.sub("<TIME>", s)
s = serial_pattern.sub("Serial: <NUMBER>", s)
s = mac_pattern.sub("<macaddress>", s)
s = ipv4_pattern.sub("<IPv4>", s)
s = ipv6_pattern.sub("<IPv6>", s)

# Replace all IDs in URLs with a placeholder
# Must be done _after_ all other replacements
s = api_v1_pattern_with_number.sub(replace_url_id, s)

s = re.sub(
r"\s+", " ", s
) # replace all whitespace with one space, so the diff doesn't complain about different lengths
Expand Down

0 comments on commit dc11694

Please sign in to comment.