From f0a27548b047d0cd2116d477ef6b827fd2fe1895 Mon Sep 17 00:00:00 2001 From: masklinn Date: Sat, 16 Mar 2024 20:03:12 +0100 Subject: [PATCH] add caching and resolver guides Fixes #183 --- doc/guides.rst | 148 ++++++++++++++++++++++++++++++++++++--- src/ua_parser/caching.py | 4 +- 2 files changed, 139 insertions(+), 13 deletions(-) diff --git a/doc/guides.rst b/doc/guides.rst index 6d28692..2e88092 100644 --- a/doc/guides.rst +++ b/doc/guides.rst @@ -72,20 +72,146 @@ in the process which is both the advantage and risk device=None, string='foo') -Cache Customisation -=================== +Cache And Other Advanced Parser Customisation +============================================= -.. todo:: +While loading custom rulesets has built-in support, other forms of +parser customisations don't and require manually instantiating and +composing :class:`~ua_parser.Resolver` objects. - - how to build a custom resolver stack and wrap it in a parser - - minor discussion of caches - - maybe link to an advanced document about the specifics of - individual caches and their memory consumption? +The most basic such customisation is simply configuring caching away +from the default setup. + +As an example, in the default configuration if |re2|_ is available the +RE2-based resolver is not cached, a user might consider the memory +investment worth it and want to reconfigure the stack for a cached +base. + +The process is uncomplicated as the APIs are designed to compose +together. + +The first step is to instantiate a base resolver, instantiated with +the relevant :class:`Matchers` data:: + + import ua_parser.loaders + import ua_parser.re2 + base = ua_parser.re2.Resolver( + ua_parser.loaders.load_lazy_builtins()) + +The next step is to instantiate the cache [#cache]_ suitably +configured:: + + cache = ua_parser.Cache(1000) + +And compose the base resolver and cache together:: + + resolver = ua_parser.caching.CachingResolver( + base, + cache + ) + +Finally, for convenience a :class:`ua_parser.Parser` can be wrapped +around the resolver, and that can either be used as-is, or set as the +global parser for all the library users to use this new configuration +from here on:: + + ua_parser.parser = ua_parser.Parser(resolver) + +.. note:: + + To be honest aside from configuring the presence, algorithm, and + size of caches there currently isn't much to compose that's built + in. The only remaining member of the cast is + :class:`~ua_parser.caching.Local`, which is also caching-related, + and serves to use thread-local caches rather than a shared cache. Writing Custom Resolvers ======================== -.. todo:: - - - explanation of the resolver protocol - - maybe a fanout resolver as demo? +It is unclear if there would be any fun or profit to it, but an +express goal of the new API is to allow writing and composing +resolvers, so what is a resolver? + +:class:`~ua_parser.Resolver` is a structural :py:class:`typing.Protocol` for +implementation convenience (nothing to inherit, and not even a class +to write). Here it is in full:: + + class Resolver(Protocol): + @abc.abstractmethod + def __call__(self, ua: str, domain: Domain, /) -> PartialResult: + ... + +So a :class:`~ua_parser.Resolver` is just a callable which takes a +string and a :class:`~ua_parser.Domain`, and returns a +:class:`~ua_parser.PartialResult`. + +For our first resolver, let's say that we have an API and a mobile +application, and as we expect the mobile application to be the main +caller we want to special-case it, we could do it in many ways but the +way we're doing it is a bespoke :class:`~ua_parser.Resolver` which +matches the application's user agent and performs trivial parsing:: + + def foo_resolver(ua: str, domain: Domain, /) -> PartialResult: + if not ua.startswith('fooapp/'): + # not our application, match failure + return PartialResult(domain, None, None, None, ua) + + # we've defined our UA as $appname/$version/$user-token + app, version, user = ua.split('/', 3) + major, minor = version.split('.') + return PartialResult( + domain, + UserAgent(app, major, minor), + None, + Device(user), + ua, + ) + +This resolver is not hugely interesting as it resolves a very limited +number of user agent strings and fails everything else, although it +does demonstrate two important requirements of the protocol: + +- If a domain is requested, it must be returned, even if ``None`` + (signaling a matching failure). +- If it's efficient there is nothing wrong with returning data for + domains which were not requested, at worst they will be ignored. + +For a more interesting resolver, we can write a *fallback* resolver: +it's a higher-order resolver which tries to call multiple +sub-resolvers in sequence until the UA is resolved. This means we +could then use something like:: + + Parser(FallbackResolver([ + foo_resolver, + re2.Resolver(load_lazy_builtins()), + ])) + +to prioritise cheap resolving of our application while still resolving +third party user agents:: + + class FallbackResolver: + def __init__(self, resolvers: List[Resolver]) -> None: + self.resolvers = resolvers + + def __call__(self, ua: str, domain: Domain, /) -> PartialResult: + if domain: + for resolver in self.resolvers: + r = resolver(ua, domain) + # if any value is non-none the resolver found a match + if r.user_agent_string is not None \ + or r.os is not None \ + or r.device is not None: + return r + + # if no resolver found a match (or nothing was requested), + # resolve to failure + return PartialResult(domain, None, None, None, ua) + +.. [#cache] If it has been written yet, see :doc:`advanced/caches` for + way too much information you probably don't care about if you just + want to parse user agent stings. + + The tldr is that bigger increases hit rates which decreases costs + but uses more memory, and while really easy to write in Python an + :class:`~ua_parser.caching.Lru` is a pretty bad cache all things + considered. diff --git a/src/ua_parser/caching.py b/src/ua_parser/caching.py index b22321c..ae7eff6 100644 --- a/src/ua_parser/caching.py +++ b/src/ua_parser/caching.py @@ -292,8 +292,8 @@ class CachingResolver: """ - def __init__(self, parser: Resolver, cache: Cache): - self.parser: Resolver = parser + def __init__(self, resolver: Resolver, cache: Cache): + self.parser: Resolver = resolver self.cache: Cache = cache def __call__(self, ua: str, domains: Domain, /) -> PartialResult: