Skip to content

Commit

Permalink
Add support for threadlocal caches
Browse files Browse the repository at this point in the history
Doesn't seem super useful, as the GIL means a cache lock might not get
to contend that much. However it might find utility with the free
threaded interpreter eventually. Anyway it's not huge and it's not
very complex, although the contextvars API is not great for lazy
initialisation.

Still even though the initialisation looks like it could lead to
redundant inits (similar to the clearing cache which can get
multi-cleared) it should be safe: different threads hitting `cache`
concurrently will each hit their own lookup failure, and initialise
their local cache, and set their personal contextvar.

For a var to get double-init would require the same thread to be
concurrent with itself, which is not possible.

Fixes #180
  • Loading branch information
masklinn committed Feb 28, 2024
1 parent 670fdf6 commit c0abcbb
Show file tree
Hide file tree
Showing 2 changed files with 35 additions and 4 deletions.
7 changes: 4 additions & 3 deletions src/ua_parser/__main__.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@
PartialParseResult,
Resolver,
)
from .caching import Cache
from .caching import Cache, Local
from .loaders import load_builtins, load_yaml
from .re2 import Resolver as Re2Resolver
from .user_agent_parser import Parse
Expand Down Expand Up @@ -243,11 +243,12 @@ def run_threaded(args: argparse.Namespace) -> None:
basic = BasicResolver(load_builtins())
resolvers: List[Tuple[str, Resolver]] = [
("clearing", CachingResolver(basic, Clearing(CACHESIZE))),
("LRU", CachingResolver(basic, Locking(LRU(CACHESIZE)))),
("locking-lru", CachingResolver(basic, Locking(LRU(CACHESIZE)))),
("local-lru", CachingResolver(basic, Local(lambda: LRU(CACHESIZE)))),
("re2", Re2Resolver(load_builtins())),
]
for name, resolver in resolvers:
print(f"{name:10}: ", end="", flush=True)
print(f"{name:11}: ", end="", flush=True)
# randomize the dataset for each thread, predictably, to
# simulate distributed load (not great but better than
# nothing, and probably better than reusing the exact same
Expand Down
32 changes: 31 additions & 1 deletion src/ua_parser/caching.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,8 @@
import abc
import threading
from collections import OrderedDict
from typing import Dict, Optional, Protocol
from contextvars import ContextVar
from typing import Callable, Dict, Optional, Protocol

from .core import Domain, PartialParseResult, Resolver

Expand Down Expand Up @@ -122,6 +123,35 @@ def __setitem__(self, key: str, value: PartialParseResult) -> None:
self.cache[key] = value


class Local:
"""Thread local cache decorator. Takes a cache factory and lazily
instantiates a cache for each thread it's accessed from.
This means the cache capacity and memory consumption is
figuratively multiplied by however many threads the cache is used
from, but those threads don't share their caching.
"""

def __init__(self, factory: Callable[[], Cache]) -> None:
self.cv: ContextVar[Cache] = ContextVar("local-cache")
self.factory = factory

@property
def cache(self) -> Cache:
c = self.cv.get(None)
if c is None:
c = self.factory()
self.cv.set(c)
return c

def __getitem__(self, key: str) -> Optional[PartialParseResult]:
return self.cache[key]

def __setitem__(self, key: str, value: PartialParseResult) -> None:
self.cache[key] = value


class CachingResolver:
"""A wrapping parser which takes an underlying concrete :class:`Cache`
for the actual caching and cache strategy.
Expand Down

0 comments on commit c0abcbb

Please sign in to comment.