Skip to content

Commit

Permalink
Suppress "Compile called before Add" in re2.Filter
Browse files Browse the repository at this point in the history
When compiling an empty set, ``FilteredRE2::Compile`` logs a warning
to stderr which can not be suppressed (google/re2#485).

Replace `re2.Filter` by a null object if the corresponding matchers
list is empty: not only do we need to skip `Filter.Compile` to
suppress the warning message, we need to skip `Filter.Match` or the
program will segfault (google/re2#484). Using a null object seems
safer and more reliable than adding conditionals, even if it requires
more code and reindenting half the file.

Doing this also seems safer than my first instinct of trying to use
low-level fd redirection: fd redirection suffers from race
conditions[^thread] and could suffer from other cross-platform
compatibility issues (e.g. does every python-supported OS have stderr
on fd 2 and correctly supports dup, dup2, and close?)

[^thread]: AFAIK CPython does not provide a python-level GIL-pin
    feature (even less so with the GILectomy plans), so we have no way
    to prevent context-switching and any message sent to stderr by
    sibling threads would be lost
  • Loading branch information
masklinn committed Mar 16, 2024
1 parent b45380d commit f6c786e
Show file tree
Hide file tree
Showing 3 changed files with 47 additions and 19 deletions.
3 changes: 2 additions & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -51,7 +51,8 @@ ignore = [
]

[tool.ruff.lint.isort]
classes = ["LRU", "OS"]
classes = ["OS"]
known-first-party = ["ua_parser"]
combine-as-imports = true

[tool.mypy]
Expand Down
50 changes: 32 additions & 18 deletions src/ua_parser/re2.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,11 @@
)


class DummyFilter:
def Match(self, _: str) -> None:
pass


class Resolver:
ua: re2.Filter
user_agent_matchers: List[Matcher[UserAgent]]
Expand All @@ -30,26 +35,35 @@ def __init__(
) -> None:
self.user_agent_matchers, self.os_matchers, self.device_matchers = matchers

self.ua = re2.Filter()
for u in self.user_agent_matchers:
self.ua.Add(u.pattern)
self.ua.Compile()
if self.user_agent_matchers:
self.ua = re2.Filter()
for u in self.user_agent_matchers:
self.ua.Add(u.pattern)
self.ua.Compile()
else:
self.ua = DummyFilter()

self.os = re2.Filter()
for o in self.os_matchers:
self.os.Add(o.pattern)
self.os.Compile()
if self.os_matchers:
self.os = re2.Filter()
for o in self.os_matchers:
self.os.Add(o.pattern)
self.os.Compile()
else:
self.os = DummyFilter()

self.devices = re2.Filter()
for d in self.device_matchers:
# Prepend the i global flag if IGNORECASE is set. Assumes
# no pattern uses global flags, but since they're not
# supported in JS that seems safe.
if d.flags & re.IGNORECASE:
self.devices.Add("(?i)" + d.pattern)
else:
self.devices.Add(d.pattern)
self.devices.Compile()
if self.device_matchers:
self.devices = re2.Filter()
for d in self.device_matchers:
# Prepend the i global flag if IGNORECASE is set. Assumes
# no pattern uses global flags, but since they're not
# supported in JS that seems safe.
if d.flags & re.IGNORECASE:
self.devices.Add("(?i)" + d.pattern)
else:
self.devices.Add(d.pattern)
self.devices.Compile()
else:
self.devices = DummyFilter()

def __call__(self, ua: str, domains: Domain, /) -> PartialParseResult:
user_agent = os = device = None
Expand Down
13 changes: 13 additions & 0 deletions tests/test_re2.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
import pytest # type: ignore

from ua_parser import Domain, PartialParseResult

re2 = pytest.importorskip("ua_parser.re2")


def test_empty(capfd: pytest.CaptureFixture[str]) -> None:
r = re2.Resolver(([], [], []))
assert r("", Domain.ALL) == PartialParseResult(Domain.ALL, None, None, None, "")
out, err = capfd.readouterr()
assert out == ""
assert err == ""

0 comments on commit f6c786e

Please sign in to comment.