From da13b484d609644dba1917aad142fb5da193519c Mon Sep 17 00:00:00 2001 From: Stephen Rosen Date: Thu, 15 Jun 2023 17:05:30 -0500 Subject: [PATCH 01/17] Move all scope parsing to 'experimental' Scope parsing is now exposed via `Scope.parse`, which is only available in `globus_sdk.experimental.scope_parser` as a method of parsing scopes. The scope parsing passes through two IRs -- first a tree, then a graph -- which reject cycles, normalize any optional/non-optional redundancies, and ensure that the tree is built in the same way that it will be during authorization flows in Globus Auth. This replaces our "shadow" implementation of MutableScope, which had been left in `scopes.scope_definition` under the name `Scope` but was never exported or documented. All of the features of `Scope` under the old location are now integrated into `globus_sdk.experimental.scope_parser.Scope`. Scope parsing is tested independently of other scope tests, moving and expanding these tests into `tests/unit/experimental/`. --- docs/experimental/index.rst | 1 + docs/experimental/scope_parser.rst | 35 ++ src/globus_sdk/_types.py | 6 - .../experimental/scope_parser/__init__.py | 8 + .../experimental/scope_parser/parser.py | 268 +++++++++++++++ .../scope_parser/scope_definition.py | 222 ++++++++++++ src/globus_sdk/scopes/scope_definition.py | 318 +----------------- tests/unit/experimental/test_scope_parser.py | 264 +++++++++++++++ tests/unit/test_scopes.py | 291 +--------------- 9 files changed, 814 insertions(+), 599 deletions(-) create mode 100644 docs/experimental/scope_parser.rst create mode 100644 src/globus_sdk/experimental/scope_parser/__init__.py create mode 100644 src/globus_sdk/experimental/scope_parser/parser.py create mode 100644 src/globus_sdk/experimental/scope_parser/scope_definition.py create mode 100644 tests/unit/experimental/test_scope_parser.py diff --git a/docs/experimental/index.rst b/docs/experimental/index.rst index 96f3b1176..a6b584afc 100644 --- a/docs/experimental/index.rst +++ b/docs/experimental/index.rst @@ -18,3 +18,4 @@ Globus SDK Experimental Components :caption: Contents :maxdepth: 1 + scope_parser diff --git a/docs/experimental/scope_parser.rst b/docs/experimental/scope_parser.rst new file mode 100644 index 000000000..34b5d6642 --- /dev/null +++ b/docs/experimental/scope_parser.rst @@ -0,0 +1,35 @@ +.. _experimental_scope_parser: + +Scope Parser +============ + +.. currentmodule:: globus_sdk.experimental.scope_parser + +This component defines a Scope object and exposes use of a parser which can +parse scope strings into trees of Scope objects. +It should be regarded as very similar to the existing ``MutableScope`` object in +``globus_sdk.scopes``. Key differences from ``MutableScope``: + +- ``Scope.parse`` is available, for parsing scopes from strings +- The Globus SDK does not support using ``Scope`` in all of the locations where + ``MutableScope`` is supported -- Scope objects must be stringified for use +- ``Scope`` objects define a ``_contains__`` method, allowing one to check if one scope + properly contains another + +.. warning:: + + This module is experimental due to the limitations of scope strings as a + representation of consent trees. The scope trees produced by parsing do + not necessarily describe real consent structures in Globus Auth. + +Reference +--------- + +.. autoclass:: Scope + :members: + :member-order: bysource + :special-members: + +.. autoclass:: ScopeParseError + +.. autoclass:: ScopeCycleError diff --git a/src/globus_sdk/_types.py b/src/globus_sdk/_types.py index 8939d08ac..968e7c1be 100644 --- a/src/globus_sdk/_types.py +++ b/src/globus_sdk/_types.py @@ -12,7 +12,6 @@ if t.TYPE_CHECKING: from globus_sdk.scopes import MutableScope - from globus_sdk.scopes.scope_definition import Scope # these types are aliases meant for internal use IntLike = t.Union[int, str] @@ -21,15 +20,10 @@ ScopeCollectionType = t.Union[ str, - "Scope", "MutableScope", t.Iterable[str], - t.Iterable["Scope"], t.Iterable["MutableScope"], - t.Iterable[t.Union[str, "Scope", "MutableScope"]], - t.Iterable[t.Union[str, "Scope"]], t.Iterable[t.Union[str, "MutableScope"]], - t.Iterable[t.Union["Scope", "MutableScope"]], ] diff --git a/src/globus_sdk/experimental/scope_parser/__init__.py b/src/globus_sdk/experimental/scope_parser/__init__.py new file mode 100644 index 000000000..be1fd854c --- /dev/null +++ b/src/globus_sdk/experimental/scope_parser/__init__.py @@ -0,0 +1,8 @@ +from .parser import ScopeCycleError, ScopeParseError +from .scope_definition import Scope + +__all__ = ( + "Scope", + "ScopeParseError", + "ScopeCycleError", +) diff --git a/src/globus_sdk/experimental/scope_parser/parser.py b/src/globus_sdk/experimental/scope_parser/parser.py new file mode 100644 index 000000000..597d13bbb --- /dev/null +++ b/src/globus_sdk/experimental/scope_parser/parser.py @@ -0,0 +1,268 @@ +from __future__ import annotations + +import enum +import typing as t +from collections import defaultdict, deque + + +class ScopeParseError(ValueError): + """The error raised if scope parsing fails.""" + + +class ScopeCycleError(ScopeParseError): + """The error raised if scope parsing discovers a cycle.""" + + +class ParseTokenType(enum.Enum): + # a string like 'urn:globus:auth:scopes:transfer.api.globus.org:all' + scope_string = enum.auto() + # the optional marker, '*' + opt_marker = enum.auto() + # '[' and ']' + lbracket = enum.auto() + rbracket = enum.auto() + + +class ParseToken: + def __init__(self, value: str, token_type: ParseTokenType) -> None: + self.value = value + self.token_type = token_type + + +def _tokenize(scope_string: str) -> list[ParseToken]: + tokens: list[ParseToken] = [] + current_token: list[str] = [] + for idx, c in enumerate(scope_string): + try: + peek: str | None = scope_string[idx + 1] + except IndexError: + peek = None + + if c in "[]* ": + if current_token: + tokens.append( + ParseToken("".join(current_token), ParseTokenType.scope_string) + ) + current_token = [] + + if c == "*": + if peek == " ": + raise ScopeParseError("'*' must not be followed by a space") + tokens.append(ParseToken(c, ParseTokenType.opt_marker)) + elif c == "[": + tokens.append(ParseToken(c, ParseTokenType.lbracket)) + elif c == "]": + if peek is not None and peek not in (" ", "]"): + raise ScopeParseError("']' may only be followed by a space or ']'") + tokens.append(ParseToken(c, ParseTokenType.rbracket)) + elif c == " ": + if peek == "[": + raise ScopeParseError("'[' cannot have a preceding space") + else: + raise NotImplementedError + else: + current_token.append(c) + if current_token: + tokens.append(ParseToken("".join(current_token), ParseTokenType.scope_string)) + return tokens + + +def _parse_tokens(tokens: list[ParseToken]) -> list[ScopeTreeNode]: + # value to return + ret: list[ScopeTreeNode] = [] + # track whether or not the current scope is optional (has a preceding *) + current_optional = False + # keep a stack of "parents", each time we enter a `[` context, push the last scope + # and each time we exit via a `]`, pop from the stack + parents: list[ScopeTreeNode] = [] + # track the current (or, by similar terminology, "last") complete scope seen + current_scope: ScopeTreeNode | None = None + + for idx in range(len(tokens)): + token = tokens[idx] + try: + peek: ParseToken | None = tokens[idx + 1] + except IndexError: + peek = None + + if token.token_type == ParseTokenType.opt_marker: + current_optional = True + if peek is None: + raise ScopeParseError("ended in optional marker") + if peek.token_type != ParseTokenType.scope_string: + raise ScopeParseError( + "a scope string must always follow an optional marker" + ) + + elif token.token_type == ParseTokenType.lbracket: + if peek is None: + raise ScopeParseError("ended in left bracket") + if peek.token_type == ParseTokenType.rbracket: + raise ScopeParseError("found empty brackets") + if peek.token_type == ParseTokenType.lbracket: + raise ScopeParseError("found double left-bracket") + if not current_scope: + raise ScopeParseError("found '[' without a preceding scope string") + + parents.append(current_scope) + elif token.token_type == ParseTokenType.rbracket: + if not parents: + raise ScopeParseError("found ']' with no matching '[' preceding it") + parents.pop() + else: + current_scope = ScopeTreeNode(token.value, optional=current_optional) + current_optional = False + if parents: + parents[-1].dependencies.append(current_scope) + else: + ret.append(current_scope) + if parents: + raise ScopeParseError("unclosed brackets, missing ']'") + + return ret + + +class ScopeTreeNode: + """ + This is an intermediate representation for scope parsing. + """ + + def __init__( + self, + scope_string: str, + *, + optional: bool, + ) -> None: + self.scope_string = scope_string + self.optional = optional + self.dependencies: list[ScopeTreeNode] = [] + + def add_dependency(self, subtree: ScopeTreeNode) -> None: + self.dependencies.append(subtree) + + def __repr__(self) -> str: + parts: list[str] = [f"'{self.scope_string}'"] + if self.optional: + parts.append("optional=True") + if self.dependencies: + parts.append(f"dependencies={self.dependencies!r}") + return "ScopeTreeNode(" + ", ".join(parts) + ")" + + @staticmethod + def parse(scope_string: str) -> list[ScopeTreeNode]: + tokens = _tokenize(scope_string) + return _parse_tokens(tokens) + + +class ScopeGraph: + def __init__(self) -> None: + self.top_level_scopes: set[tuple[str, bool]] = set() + self.nodes: set[str] = set() + self.edges: set[tuple[str, str, bool]] = set() + self._nodes_to_outbound_edges: dict[ + str, set[tuple[str, str, bool]] + ] = defaultdict(set) + + @property + def dependencies(self) -> set[tuple[str, str, bool]]: + return self.edges + + def get_child_edges(self, node: str) -> set[tuple[str, str, bool]]: + return set(self._nodes_to_outbound_edges[node]) + + def add_edge(self, src: str, dest: str, optional: bool) -> None: + self.edges.add((src, dest, optional)) + self._nodes_to_outbound_edges[src].add((src, dest, optional)) + + def _normalize_optionals(self) -> None: + to_remove: set[tuple[str, str, bool]] = set() + for edge in self.edges: + src, dest, optional = edge + if not optional: + continue + alter_ego = (src, dest, not optional) + if alter_ego in self.edges: + to_remove.add(edge) + self.edges = self.edges - to_remove + for edge in to_remove: + src, _, _ = edge + self._nodes_to_outbound_edges[src].remove(edge) + + def _check_cycles(self) -> None: + nodes_to_check = set(self.nodes) + seen_nodes = set() + new_edges_to_visit: set[tuple[str, str, bool]] = set() + while nodes_to_check: + start = nodes_to_check.pop() + seen_nodes.add(start) + edges_to_visit = self._nodes_to_outbound_edges[start] + while edges_to_visit: + new_edges_to_visit.clear() + for _source, dest, _optional in edges_to_visit: + if dest in seen_nodes: + raise ScopeCycleError( + "scopes contain a cyclic dependency on " + f"{dest} ({list(seen_nodes)})" + ) + seen_nodes.add(dest) + new_edges_to_visit |= self._nodes_to_outbound_edges[dest] + edges_to_visit = new_edges_to_visit + nodes_to_check -= seen_nodes + seen_nodes.clear() + + def __str__(self) -> str: + lines = ["digraph scopes {", ' rankdir="LR";', ""] + for node, optional in self.top_level_scopes: + lines.append(f" {'*' if optional else ''}{node}") + lines.append("") + + # do two passes to put all non-optional edges first + for source, dest, optional in self.edges: + if optional: + continue + lines.append(f" {source} -> {dest};") + for source, dest, optional in self.edges: + if not optional: + continue + lines.append(f' {source} -> {dest} [ label = "optional" ];') + lines.append("") + lines.append("}") + return "\n".join(lines) + + +def _convert_trees(trees: list[ScopeTreeNode]) -> ScopeGraph: + graph = ScopeGraph() + node_queue: t.Deque[ScopeTreeNode] = deque() + + for tree_node in trees: + node_queue.append(tree_node) + graph.top_level_scopes.add((tree_node.scope_string, tree_node.optional)) + + while node_queue: + tree_node = node_queue.pop() + scope_string = tree_node.scope_string + graph.nodes.add(scope_string) + for dep in tree_node.dependencies: + node_queue.append(dep) + graph.add_edge(scope_string, dep.scope_string, dep.optional) + + return graph + + +def parse_scope_graph(scopes: str) -> ScopeGraph: + trees = ScopeTreeNode.parse(scopes) + graph = _convert_trees(trees) + graph._normalize_optionals() + graph._check_cycles() + return graph + + +if __name__ == "__main__": + import sys + + graph = parse_scope_graph(sys.argv[1]) + print( + "top level scopes:", + ", ".join([name for name, _optional in graph.top_level_scopes]), + ) + print(graph) diff --git a/src/globus_sdk/experimental/scope_parser/scope_definition.py b/src/globus_sdk/experimental/scope_parser/scope_definition.py new file mode 100644 index 000000000..5d1033371 --- /dev/null +++ b/src/globus_sdk/experimental/scope_parser/scope_definition.py @@ -0,0 +1,222 @@ +from __future__ import annotations + +import typing as t +import warnings + +from .parser import parse_scope_graph + + +class Scope: + """ + A scope object is a representation of a scope which allows modifications to be + made. In particular, it supports handling scope dependencies via + ``add_dependency``. + + `str(Scope(...))` produces a valid scope string for use in various methods. + + :param scope_string: The string which will be used as the basis for this Scope + :type scope_string: str + :param optional: The scope may be marked as optional. This means that the scope can + be declined by the user without declining consent for other scopes + :type optional: bool + """ + + def __init__( + self, + scope_string: str, + *, + optional: bool = False, + dependencies: list[Scope] | None = None, + ) -> None: + if any(c in scope_string for c in "[]* "): + raise ValueError( + "Scope instances may not contain the special characters '[]* '. " + "Use either Scope.deserialize or Scope.parse instead" + ) + self.scope_string = scope_string + self.optional = optional + self.dependencies: list[Scope] = [] if dependencies is None else dependencies + + @staticmethod + def parse(scope_string: str) -> list[Scope]: + """ + Parse an arbitrary scope string to a list of scopes. + + Zero or more than one scope may be returned, as in the case of an empty string + or space-delimited scopes. + + .. warning:: + + Parsing passes through an intermediary representation which treats scopes + as a graph. This ensures that the behavior of parses matches the treatment + of scope strings in Globus Auth authorization flows. + However, this also means that the parsing does not allow for strings which + represent consent trees with structures in which the same scope appears in + multiple parts of the tree. + """ + scope_graph = parse_scope_graph(scope_string) + + # initialize BFS traversals (one per root node) and copy that data + # to setup the result data + bfs_additions: list[Scope] = [ + Scope(s, optional=optional) for s, optional in scope_graph.top_level_scopes + ] + results: list[Scope] = list(bfs_additions) + + while bfs_additions: + current_scope = bfs_additions.pop(0) + edges = scope_graph.get_child_edges(current_scope.scope_string) + for _, dest, optional in edges: + dest_scope = Scope(dest, optional=optional) + current_scope.add_dependency(dest_scope) + bfs_additions.append(dest_scope) + + return results + + @classmethod + def deserialize(cls, scope_string: str) -> Scope: + """ + Deserialize a scope string to a scope object. + + This is the special case of parsing in which exactly one scope must be returned + by the parse. + """ + data = Scope.parse(scope_string) + if len(data) != 1: + raise ValueError( + "Deserializing a scope from string did not get exactly one scope. " + f"Instead got data={data}" + ) + return data[0] + + def serialize(self) -> str: + base_scope = ("*" if self.optional else "") + self.scope_string + if not self.dependencies: + return base_scope + return ( + base_scope + "[" + " ".join(c.serialize() for c in self.dependencies) + "]" + ) + + def add_dependency( + self, scope: str | Scope, *, optional: bool | None = None + ) -> Scope: + """ + Add a scope dependency. The dependent scope relationship will be stored in the + Scope and will be evident in its string representation. + + :param scope: The scope upon which the current scope depends + :type scope: str + :param optional: Mark the dependency an optional one. By default it is not. An + optional scope dependency can be declined by the user without declining + consent for the primary scope + :type optional: bool, optional + """ + if optional is not None: + if isinstance(scope, Scope): + raise ValueError( + "cannot use optional=... with a Scope object as the argument to " + "add_dependency" + ) + warnings.warn( + "Passing 'optional' to add_dependency is deprecated. " + "Construct an optional Scope object instead.", + DeprecationWarning, + stacklevel=2, + ) + scopeobj = Scope(scope, optional=optional) + else: + if isinstance(scope, str): + scopeobj = Scope.deserialize(scope) + else: + scopeobj = scope + self.dependencies.append(scopeobj) + return self + + def __repr__(self) -> str: + parts: list[str] = [f"'{self.scope_string}'"] + if self.optional: + parts.append("optional=True") + if self.dependencies: + parts.append(f"dependencies={self.dependencies!r}") + return "Scope(" + ", ".join(parts) + ")" + + def __str__(self) -> str: + return self.serialize() + + def __contains__(self, other: t.Any) -> bool: + """ + .. warning:: + + The ``__contains__`` method is a non-authoritative convenience for comparing + parsed scopes. Although the essence and intent of the check is summarized + below, there is no guarantee that it correctly reflects the permissions of a + token or tokens. The structure of the data for a given consent in Globus + Auth is not perfectly reflected in the parse tree. + + ``in`` and ``not in`` are defined as permission coverage checks + + ``scope1 in scope2`` means that a token scoped for + ``scope2`` has all of the permissions of a token scoped for ``scope1``. + + A scope is covered by another scope if + + - the top level strings match + - the optional-ness matches OR only the covered scope is optional + - the dependencies of the covered scope are all covered by dependencies of + the covering scope + + Therefore, the following are true: + + .. code-block:: pycon + + >>> s = lambda x: Scope.deserialize(x) # define this for brevity below + # self inclusion works, including when optional + >>> s("foo") in s("foo") + >>> s("*foo") in s("*foo") + # an optional scope is covered by a non-optional one, but not the reverse + >>> s("*foo") in s("foo") + >>> s("foo") not in s("*foo") + # dependencies have the expected meanings + >>> s("foo") in s("foo[bar]") + >>> s("foo[bar]") not in s("foo") + >>> s("foo[bar]") in s("foo[bar[baz]]") + # dependencies are not transitive and obey "optionalness" matching + >>> s("foo[bar]") not in s("foo[fizz[bar]]") + >>> s("foo[bar]") not in s("foo[*bar]") + """ + # scopes can only contain other scopes + if not isinstance(other, Scope): + return False + + # top-level scope must match + if self.scope_string != other.scope_string: + return False + + # between self.optional and other.optional, there are four possibilities, + # of which three are acceptable and one is not + # both optional and neither optional are okay, + # 'self' being non-optional and 'other' being optional is okay + # the failing case is 'other in self' when 'self' is optional and 'other' is not + # + # self.optional | other.optional | (other in self) is possible + # --------------|----------------|---------------------------- + # true | true | true + # false | false | true + # false | true | true + # true | false | false + # + # so only check for that one case + if self.optional and not other.optional: + return False + + # dependencies must all be contained -- search for a contrary example + for other_dep in other.dependencies: + for dep in self.dependencies: + if other_dep in dep: + break + # reminder: the else branch of a for-else means that the break was never hit + else: + return False + + # all criteria were met -- True! + return True diff --git a/src/globus_sdk/scopes/scope_definition.py b/src/globus_sdk/scopes/scope_definition.py index d2fe374c8..694b93d53 100644 --- a/src/globus_sdk/scopes/scope_definition.py +++ b/src/globus_sdk/scopes/scope_definition.py @@ -5,22 +5,15 @@ """ from __future__ import annotations -import enum import typing as t import warnings from globus_sdk._types import ScopeCollectionType -class ScopeParseError(ValueError): - pass - - def _iter_scope_collection(obj: ScopeCollectionType) -> t.Iterator[str]: if isinstance(obj, str): yield obj - elif isinstance(obj, Scope): - yield str(obj) elif isinstance(obj, MutableScope): yield str(obj) else: @@ -28,316 +21,7 @@ def _iter_scope_collection(obj: ScopeCollectionType) -> t.Iterator[str]: yield str(item) -class ParseTokenType(enum.Enum): - # a string like 'urn:globus:auth:scopes:transfer.api.globus.org:all' - scope_string = enum.auto() - # the optional marker, '*' - opt_marker = enum.auto() - # '[' and ']' - lbracket = enum.auto() - rbracket = enum.auto() - - -class ParseToken: - def __init__(self, value: str, token_type: ParseTokenType) -> None: - self.value = value - self.token_type = token_type - - -def _tokenize(scope_string: str) -> list[ParseToken]: - tokens: list[ParseToken] = [] - current_token: list[str] = [] - for idx, c in enumerate(scope_string): - try: - peek: str | None = scope_string[idx + 1] - except IndexError: - peek = None - - if c in "[]* ": - if current_token: - tokens.append( - ParseToken("".join(current_token), ParseTokenType.scope_string) - ) - current_token = [] - - if c == "*": - if peek == " ": - raise ScopeParseError("'*' must not be followed by a space") - tokens.append(ParseToken(c, ParseTokenType.opt_marker)) - elif c == "[": - tokens.append(ParseToken(c, ParseTokenType.lbracket)) - elif c == "]": - if peek is not None and peek not in (" ", "]"): - raise ScopeParseError("']' may only be followed by a space or ']'") - tokens.append(ParseToken(c, ParseTokenType.rbracket)) - elif c == " ": - if peek == "[": - raise ScopeParseError("'[' cannot have a preceding space") - else: - raise NotImplementedError - else: - current_token.append(c) - if current_token: - tokens.append(ParseToken("".join(current_token), ParseTokenType.scope_string)) - return tokens - - -def _parse_tokens(tokens: list[ParseToken]) -> list[Scope]: - # value to return - ret: list[Scope] = [] - # track whether or not the current scope is optional (has a preceding *) - current_optional = False - # keep a stack of "parents", each time we enter a `[` context, push the last scope - # and each time we exit via a `]`, pop from the stack - parents: list[Scope] = [] - # track the current (or, by similar terminology, "last") complete scope seen - current_scope: Scope | None = None - - for idx in range(len(tokens)): - token = tokens[idx] - try: - peek: ParseToken | None = tokens[idx + 1] - except IndexError: - peek = None - - if token.token_type == ParseTokenType.opt_marker: - current_optional = True - if peek is None: - raise ScopeParseError("ended in optional marker") - if peek.token_type != ParseTokenType.scope_string: - raise ScopeParseError( - "a scope string must always follow an optional marker" - ) - - elif token.token_type == ParseTokenType.lbracket: - if peek is None: - raise ScopeParseError("ended in left bracket") - if peek.token_type == ParseTokenType.rbracket: - raise ScopeParseError("found empty brackets") - if peek.token_type == ParseTokenType.lbracket: - raise ScopeParseError("found double left-bracket") - if not current_scope: - raise ScopeParseError("found '[' without a preceding scope string") - - parents.append(current_scope) - elif token.token_type == ParseTokenType.rbracket: - if not parents: - raise ScopeParseError("found ']' with no matching '[' preceding it") - parents.pop() - else: - current_scope = Scope(token.value, optional=current_optional) - current_optional = False - if parents: - parents[-1].dependencies.append(current_scope) - else: - ret.append(current_scope) - if parents: - raise ScopeParseError("unclosed brackets, missing ']'") - - return ret - - -class Scope: - """ - A scope object is a representation of a scope which allows modifications to be - made. In particular, it supports handling scope dependencies via - ``add_dependency``. - - `str(Scope(...))` produces a valid scope string for use in various methods. - - :param scope_string: The string which will be used as the basis for this Scope - :type scope_string: str - :param optional: The scope may be marked as optional. This means that the scope can - be declined by the user without declining consent for other scopes - :type optional: bool - """ - - def __init__( - self, - scope_string: str, - *, - optional: bool = False, - dependencies: list[Scope] | None = None, - ) -> None: - if any(c in scope_string for c in "[]* "): - raise ValueError( - "Scope instances may not contain the special characters '[]* '. " - "Use either Scope.deserialize or Scope.parse instead" - ) - self.scope_string = scope_string - self.optional = optional - self.dependencies: list[Scope] = [] if dependencies is None else dependencies - - @staticmethod - def parse(scope_string: str) -> list[Scope]: - """ - Parse an arbitrary scope string to a list of scopes. - - Zero or more than one scope may be returned, as in the case of an empty string - or space-delimited scopes. - """ - tokens = _tokenize(scope_string) - return _parse_tokens(tokens) - - @classmethod - def deserialize(cls, scope_string: str) -> Scope: - """ - Deserialize a scope string to a scope object. - - This is the special case of parsing in which exactly one scope must be returned - by the parse. - """ - data = Scope.parse(scope_string) - if len(data) != 1: - raise ValueError( - "Deserializing a scope from string did not get exactly one scope. " - f"Instead got data={data}" - ) - return data[0] - - def serialize(self) -> str: - base_scope = ("*" if self.optional else "") + self.scope_string - if not self.dependencies: - return base_scope - return ( - base_scope + "[" + " ".join(c.serialize() for c in self.dependencies) + "]" - ) - - def add_dependency( - self, scope: str | Scope, *, optional: bool | None = None - ) -> Scope: - """ - Add a scope dependency. The dependent scope relationship will be stored in the - Scope and will be evident in its string representation. - - :param scope: The scope upon which the current scope depends - :type scope: str - :param optional: Mark the dependency an optional one. By default it is not. An - optional scope dependency can be declined by the user without declining - consent for the primary scope - :type optional: bool, optional - """ - if optional is not None: - if isinstance(scope, Scope): - raise ValueError( - "cannot use optional=... with a Scope object as the argument to " - "add_dependency" - ) - warnings.warn( - "Passing 'optional' to add_dependency is deprecated. " - "Construct an optional Scope object instead.", - DeprecationWarning, - stacklevel=2, - ) - scopeobj = Scope(scope, optional=optional) - else: - if isinstance(scope, str): - scopeobj = Scope.deserialize(scope) - else: - scopeobj = scope - self.dependencies.append(scopeobj) - return self - - def __repr__(self) -> str: - parts: list[str] = [f"'{self.scope_string}'"] - if self.optional: - parts.append("optional=True") - if self.dependencies: - parts.append(f"dependencies={self.dependencies!r}") - return "Scope(" + ", ".join(parts) + ")" - - def __str__(self) -> str: - return self.serialize() - - def _contains(self, other: t.Any) -> bool: - """ - .. warning:: - - The ``_contains`` method is a non-authoritative convenience for comparing - parsed scopes. Although the essence and intent of the check is summarized - below, there is no guarantee that it correctly reflects the permissions of a - token or tokens. The structure of the data for a given consent in Globus - Auth is not perfectly reflected in the parse tree. - - ``in`` and ``not in`` are defined as permission coverage checks - - ``scope1 in scope2`` means that a token scoped for - ``scope2`` has all of the permissions of a token scoped for ``scope1``. - - A scope is covered by another scope if - - - the top level strings match - - the optional-ness matches OR only the covered scope is optional - - the dependencies of the covered scope are all covered by dependencies of - the covering scope - - Therefore, the following are true: - - .. code-block:: pycon - - >>> s = lambda x: Scope.deserialize(x) # define this for brevity below - # self inclusion works, including when optional - >>> s("foo")._contains(s("foo")) - >>> s("*foo")._contains(s("*foo")) - # an optional scope is covered by a non-optional one, but not the reverse - >>> not s("foo")._contains(s("*foo")) - >>> s("*foo")._contains(s("foo")) - # dependencies have the expected meanings - >>> s("foo")._contains(s("foo[bar]")) - >>> not s("foo[bar]")._contains(s("foo")) - >>> s("foo[bar]")._contains(s("foo[bar[baz]]")) - # dependencies are not transitive and obey "optionalness" matching - >>> not s("foo[bar]")._contains(s("foo[fizz[bar]]")) - >>> not s("foo[bar]")._contains(s("foo[*bar]")) - """ - # scopes can only contain other scopes - if not isinstance(other, Scope): - return False - - # top-level scope must match - if self.scope_string != other.scope_string: - return False - - # between self.optional and other.optional, there are four possibilities, - # of which three are acceptable and one is not - # both optional and neither optional are okay, - # 'self' being non-optional and 'other' being optional is okay - # the failing case is 'other in self' when 'self' is optional and 'other' is not - # - # self.optional | other.optional | (other in self) is possible - # --------------|----------------|---------------------------- - # true | true | true - # false | false | true - # false | true | true - # true | false | false - # - # so only check for that one case - if self.optional and not other.optional: - return False - - # dependencies must all be contained -- search for a contrary example - for other_dep in other.dependencies: - for dep in self.dependencies: - if dep._contains(other_dep): - break - # reminder: the else branch of a for-else means that the break was never hit - else: - return False - - # all criteria were met -- True! - return True - - @staticmethod - def scopes2str(obj: ScopeCollectionType) -> str: - """ - Given a scope string, a collection of scope strings, a Scope object, a - collection of Scope objects, or a mixed collection of strings and - Scopes, convert to a string which can be used in a request. - """ - return " ".join(_iter_scope_collection(obj)) - - -# TODO: convert MutableScope into an alias of Scope +# TODO: rename MutableScope to Scope class MutableScope: """ A scope object is a representation of a scope which allows modifications to be diff --git a/tests/unit/experimental/test_scope_parser.py b/tests/unit/experimental/test_scope_parser.py new file mode 100644 index 000000000..d5da7be89 --- /dev/null +++ b/tests/unit/experimental/test_scope_parser.py @@ -0,0 +1,264 @@ +import pytest + +from globus_sdk.experimental.scope_parser import Scope, ScopeParseError + + +def test_scope_str_and_repr_simple(): + s = Scope("simple") + assert str(s) == "simple" + assert repr(s) == "Scope('simple')" + + +def test_scope_str_and_repr_optional(): + s = Scope("simple", optional=True) + assert str(s) == "*simple" + assert repr(s) == "Scope('simple', optional=True)" + + +def test_scope_str_and_repr_with_dependencies(): + s = Scope("top") + s.add_dependency("foo") + assert str(s) == "top[foo]" + s.add_dependency("bar") + assert str(s) == "top[foo bar]" + assert repr(s) == "Scope('top', dependencies=[Scope('foo'), Scope('bar')])" + + +def test_add_dependency_warns_on_optional_but_still_has_good_str_and_repr(): + s = Scope("top") + # this should warn, the use of `optional=...` rather than adding a Scope object + # when optional dependencies are wanted is deprecated + with pytest.warns(DeprecationWarning): + s.add_dependency("foo", optional=True) + + # confirm the str representation and repr for good measure + assert str(s) == "top[*foo]" + assert repr(s) == "Scope('top', dependencies=[Scope('foo', optional=True)])" + + +@pytest.mark.parametrize("optional_arg", (True, False)) +def test_add_dependency_fails_if_optional_is_combined_with_scope(optional_arg): + s = Scope("top") + s2 = Scope("bottom") + with pytest.raises(ValueError): + s.add_dependency(s2, optional=optional_arg) + + +def test_scope_str_nested(): + top = Scope("top") + mid = Scope("mid") + bottom = Scope("bottom") + mid.add_dependency(bottom) + top.add_dependency(mid) + assert str(bottom) == "bottom" + assert str(mid) == "mid[bottom]" + assert str(top) == "top[mid[bottom]]" + + +def test_add_dependency_parses_scope_with_optional_marker(): + s = Scope("top") + s.add_dependency("*subscope") + assert str(s) == "top[*subscope]" + assert repr(s) == "Scope('top', dependencies=[Scope('subscope', optional=True)])" + + +def test_scope_parsing_allows_empty_string(): + scopes = Scope.parse("") + assert scopes == [] + + +@pytest.mark.parametrize( + "scope_string1,scope_string2", + [ + ("foo ", "foo"), + (" foo", "foo"), + ("foo[ bar]", "foo[bar]"), + ], +) +def test_scope_parsing_ignores_non_semantic_whitespace(scope_string1, scope_string2): + list1 = Scope.parse(scope_string1) + list2 = Scope.parse(scope_string2) + assert len(list1) == len(list2) == 1 + s1, s2 = list1[0], list2[0] + # Scope.__eq__ is not defined, so equivalence checking is manual (and somewhat error + # prone) for now + assert s1.scope_string == s2.scope_string + assert s1.optional == s2.optional + for i in range(len(s1.dependencies)): + assert s1.dependencies[i].scope_string == s2.dependencies[i].scope_string + assert s1.dependencies[i].optional == s2.dependencies[i].optional + + +@pytest.mark.parametrize( + "scopestring", + [ + # ending in '*' + "foo*", + "foo *", + # '*' followed by '[] ' + "foo*[bar]", + "foo *[bar]", + "foo [bar*]", + "foo * ", + "* foo", + # empty brackets + "foo[]", + # starting with open bracket + "[foo]", + # double brackets + "foo[[bar]]", + # unbalanced open brackets + "foo[", + "foo[bar", + # unbalanced close brackets + "foo]", + "foo bar]", + "foo[bar]]", + "foo[bar] baz]", + # space before brackets + "foo [bar]", + # missing space before next scope string after ']' + "foo[bar]baz", + ], +) +def test_scope_parsing_rejects_bad_inputs(scopestring): + with pytest.raises(ScopeParseError): + Scope.parse(scopestring) + + +@pytest.mark.parametrize( + "scopestring", + ("foo", "*foo", "foo[bar]", "foo[*bar]", "foo bar", "foo[bar[baz]]"), +) +def test_scope_parsing_accepts_valid_inputs(scopestring): + # test *only* that parsing does not error and returns a non-empty list of scopes + scopes = Scope.parse(scopestring) + assert isinstance(scopes, list) + assert len(scopes) > 0 + assert isinstance(scopes[0], Scope) + + +def test_scope_deserialize_simple(): + scope = Scope.deserialize("foo") + assert str(scope) == "foo" + + +def test_scope_deserialize_with_dependencies(): + # oh, while we're here, let's also check that our whitespace insensitivity works + scope = Scope.deserialize("foo[ bar *baz ]") + assert str(scope) in ("foo[bar *baz]", "foo[*baz bar]") + + +def test_scope_deserialize_fails_on_empty(): + with pytest.raises(ValueError): + Scope.deserialize(" ") + + +def test_scope_deserialize_fails_on_multiple_top_level_scopes(): + with pytest.raises(ValueError): + Scope.deserialize("foo bar") + + +@pytest.mark.parametrize("scope_str", ("*foo", "foo[bar]", "foo[", "foo]", "foo bar")) +def test_scope_init_forbids_special_chars(scope_str): + with pytest.raises(ValueError): + Scope(scope_str) + + +def test_scope_contains_requires_scope_objects(): + s = Scope("foo") + assert "foo" not in s + + +@pytest.mark.parametrize( + "contained, containing, expect", + [ + # string matching, including optional on both sides + ("foo", "foo", True), # identity + ("*foo", "*foo", True), # identity + ("foo", "bar", False), + ("foo", "*bar", False), + ("*foo", "bar", False), + # optional-ness is one-way when mismatched + ("foo", "*foo", False), + ("*foo", "foo", True), + # dependency matching is also one-way when mismatched + ("foo[bar]", "foo[bar]", True), # identity + ("foo[bar]", "foo", False), + ("foo", "foo[bar]", True), + ("foo", "foo[bar[baz]]", True), + ("foo[bar]", "foo[bar[baz]]", True), + ("foo[bar[baz]]", "foo[bar[baz]]", True), # identity + # and the combination of dependencies with optional also works + ("foo[*bar]", "foo[bar[baz]]", True), + ("foo[*bar]", "foo[*bar[baz]]", True), + ("foo[bar]", "foo[bar[*baz]]", True), + ("foo[*bar]", "foo[bar[*baz]]", True), + ], +) +def test_scope_contains_simple_cases(contained, containing, expect): + outer_s = Scope.deserialize(containing) + inner_s = Scope.deserialize(contained) + assert (inner_s in outer_s) == expect + + +@pytest.mark.parametrize( + "contained, containing, expect", + [ + # "simple" cases for multiple dependencies + ("foo[bar baz]", "foo[bar[baz] baz]", True), + ("foo[bar baz]", "foo[bar[baz]]", False), + ("foo[baz bar]", "foo[bar[baz] baz]", True), + ("foo[bar baz]", "foo[bar[baz] baz buzz]", True), + # these scenarios will mirror some "realistic" usage + ( + "timer[transfer_ap[transfer]]", + "timer[transfer_ap[transfer[*foo/data_access]]]", + True, + ), + ( + "timer[transfer_ap[transfer[*foo/data_access]]]", + "timer[transfer_ap[transfer[*foo/data_access *bar/data_access]]]", + True, + ), + ( + "timer[transfer_ap[transfer[*bar *foo]]]", + "timer[transfer_ap[transfer[*foo *bar *baz]]]", + True, + ), + ( + "timer[transfer_ap[transfer[*foo/data_access]]]", + "timer[transfer_ap[transfer]]", + False, + ), + ( + "timer[transfer_ap[transfer[*foo/data_access *bar/data_access]]]", + "timer[transfer_ap[transfer[*foo/data_access]]]", + False, + ), + ( + "timer[transfer_ap[transfer[foo/data_access]]]", + "timer[transfer_ap[transfer[*foo/data_access]]]", + False, + ), + ( + "timer[transfer_ap[transfer[*foo/data_access]]]", + "timer[transfer_ap[transfer[foo/data_access]]]", + True, + ), + ( + "timer[transfer_ap[*transfer[*foo/data_access]]]", + "timer[transfer_ap[transfer[foo/data_access]]]", + True, + ), + ( + "timer[transfer_ap[transfer[*foo/data_access]]]", + "timer[transfer_ap[*transfer[foo/data_access]]]", + False, + ), + ], +) +def test_scope_contains_complex_usages(contained, containing, expect): + outer_s = Scope.deserialize(containing) + inner_s = Scope.deserialize(contained) + assert (inner_s in outer_s) == expect diff --git a/tests/unit/test_scopes.py b/tests/unit/test_scopes.py index d059e494c..ce2d0ce11 100644 --- a/tests/unit/test_scopes.py +++ b/tests/unit/test_scopes.py @@ -3,7 +3,6 @@ import pytest from globus_sdk.scopes import FlowsScopes, MutableScope, ScopeBuilder -from globus_sdk.scopes.scope_definition import Scope, ScopeParseError def test_url_scope_string(): @@ -80,42 +79,32 @@ def test_sb_allowed_inputs_types(): assert list_sb.do_a_thing == scope_1_urn -@pytest.mark.parametrize("scope_class", (Scope, MutableScope)) -def test_scope_str_and_repr_simple(scope_class): - classname = "Scope" if scope_class == Scope else "MutableScope" - s = scope_class("simple") +def test_scope_str_and_repr_simple(): + s = MutableScope("simple") assert str(s) == "simple" - assert repr(s) == f"{classname}('simple')" + assert repr(s) == "MutableScope('simple')" -@pytest.mark.parametrize("scope_class", (Scope, MutableScope)) -def test_scope_str_and_repr_optional(scope_class): - classname = "Scope" if scope_class == Scope else "MutableScope" - s = scope_class("simple", optional=True) +def test_scope_str_and_repr_optional(): + s = MutableScope("simple", optional=True) assert str(s) == "*simple" - assert repr(s) == f"{classname}('simple', optional=True)" + assert repr(s) == "MutableScope('simple', optional=True)" -@pytest.mark.parametrize("scope_class", (Scope, MutableScope)) -def test_scope_str_and_repr_with_dependencies(scope_class): - classname = "Scope" if scope_class == Scope else "MutableScope" - - s = scope_class("top") +def test_scope_str_and_repr_with_dependencies(): + s = MutableScope("top") s.add_dependency("foo") assert str(s) == "top[foo]" s.add_dependency("bar") assert str(s) == "top[foo bar]" assert ( - repr(s) - == f"{classname}('top', dependencies=[{classname}('foo'), {classname}('bar')])" + repr(s) == "MutableScope('top', " + "dependencies=[MutableScope('foo'), MutableScope('bar')])" ) -@pytest.mark.parametrize("scope_class", (Scope, MutableScope)) -def test_add_dependency_warns_on_optional_but_still_has_good_str_and_repr(scope_class): - classname = "Scope" if scope_class == Scope else "MutableScope" - - s = scope_class("top") +def test_add_dependency_warns_on_optional_but_still_has_good_str_and_repr(): + s = MutableScope("top") # this should warn, the use of `optional=...` rather than adding a Scope object # when optional dependencies are wanted is deprecated with pytest.warns(DeprecationWarning): @@ -125,46 +114,10 @@ def test_add_dependency_warns_on_optional_but_still_has_good_str_and_repr(scope_ assert str(s) == "top[*foo]" assert ( repr(s) - == f"{classname}('top', dependencies=[{classname}('foo', optional=True)])" + == "MutableScope('top', dependencies=[MutableScope('foo', optional=True)])" ) -@pytest.mark.parametrize("optional_arg", (True, False)) -def test_add_dependency_fails_if_optional_is_combined_with_scope(optional_arg): - s = Scope("top") - s2 = Scope("bottom") - with pytest.raises(ValueError): - s.add_dependency(s2, optional=optional_arg) - - -def test_scope_str_nested(): - top = Scope("top") - mid = Scope("mid") - bottom = Scope("bottom") - mid.add_dependency(bottom) - top.add_dependency(mid) - assert str(bottom) == "bottom" - assert str(mid) == "mid[bottom]" - assert str(top) == "top[mid[bottom]]" - - -def test_scope_collection_to_str(): - foo = Scope("foo") - bar = Scope("bar") - baz = "baz" - assert Scope.scopes2str(foo) == "foo" - assert Scope.scopes2str([foo, bar]) == "foo bar" - assert Scope.scopes2str(baz) == "baz" - assert Scope.scopes2str([foo, baz]) == "foo baz" - - -def test_add_dependency_parses_scope_with_optional_marker(): - s = Scope("top") - s.add_dependency("*subscope") - assert str(s) == "top[*subscope]" - assert repr(s) == "Scope('top', dependencies=[Scope('subscope', optional=True)])" - - def test_scopebuilder_make_mutable_produces_same_strings(): sb = ScopeBuilder(str(uuid.UUID(int=0)), known_scopes="foo", known_url_scopes="bar") assert str(sb.make_mutable("foo")) == sb.foo @@ -184,221 +137,7 @@ def test_flows_scopes_creation(): ) -def test_scope_parsing_allows_empty_string(): - scopes = Scope.parse("") - assert scopes == [] - - -@pytest.mark.parametrize( - "scope_string1,scope_string2", - [ - ("foo ", "foo"), - (" foo", "foo"), - ("foo[ bar]", "foo[bar]"), - ], -) -def test_scope_parsing_ignores_non_semantic_whitespace(scope_string1, scope_string2): - list1 = Scope.parse(scope_string1) - list2 = Scope.parse(scope_string2) - assert len(list1) == len(list2) == 1 - s1, s2 = list1[0], list2[0] - # Scope.__eq__ is not defined, so equivalence checking is manual (and somewhat error - # prone) for now - assert s1.scope_string == s2.scope_string - assert s1.optional == s2.optional - for i in range(len(s1.dependencies)): - assert s1.dependencies[i].scope_string == s2.dependencies[i].scope_string - assert s1.dependencies[i].optional == s2.dependencies[i].optional - - -@pytest.mark.parametrize( - "scopestring", - [ - # ending in '*' - "foo*", - "foo *", - # '*' followed by '[] ' - "foo*[bar]", - "foo *[bar]", - "foo [bar*]", - "foo * ", - "* foo", - # empty brackets - "foo[]", - # starting with open bracket - "[foo]", - # double brackets - "foo[[bar]]", - # unbalanced open brackets - "foo[", - "foo[bar", - # unbalanced close brackets - "foo]", - "foo bar]", - "foo[bar]]", - "foo[bar] baz]", - # space before brackets - "foo [bar]", - # missing space before next scope string after ']' - "foo[bar]baz", - ], -) -def test_scope_parsing_rejects_bad_inputs(scopestring): - with pytest.raises(ScopeParseError): - Scope.parse(scopestring) - - -@pytest.mark.parametrize( - "scopestring", - ("foo", "*foo", "foo[bar]", "foo[*bar]", "foo bar", "foo[bar[baz]]"), -) -def test_scope_parsing_accepts_valid_inputs(scopestring): - # test *only* that parsing does not error and returns a non-empty list of scopes - scopes = Scope.parse(scopestring) - assert isinstance(scopes, list) - assert len(scopes) > 0 - assert isinstance(scopes[0], Scope) - - -@pytest.mark.parametrize("rs_name", (str(uuid.UUID(int=0)), "example.globus.org")) -@pytest.mark.parametrize("scope_format", ("urn", "url")) -def test_scope_parsing_can_consume_scopebuilder_results(rs_name, scope_format): - sb = ScopeBuilder(rs_name) - if scope_format == "urn": - scope_string = sb.urn_scope_string("foo") - expect_string = f"urn:globus:auth:scope:{rs_name}:foo" - elif scope_format == "url": - scope_string = sb.url_scope_string("foo") - expect_string = f"https://auth.globus.org/scopes/{rs_name}/foo" - else: - raise NotImplementedError - - scope = Scope.deserialize(scope_string) - assert str(scope) == expect_string - - -def test_scope_deserialize_simple(): - scope = Scope.deserialize("foo") - assert str(scope) == "foo" - - -def test_scope_deserialize_with_dependencies(): - # oh, while we're here, let's also check that our whitespace insensitivity works - scope = Scope.deserialize("foo[ bar *baz ]") - assert str(scope) == "foo[bar *baz]" - - -def test_scope_deserialize_fails_on_empty(): - with pytest.raises(ValueError): - Scope.deserialize(" ") - - -def test_scope_deserialize_fails_on_multiple_top_level_scopes(): - with pytest.raises(ValueError): - Scope.deserialize("foo bar") - - @pytest.mark.parametrize("scope_str", ("*foo", "foo[bar]", "foo[", "foo]", "foo bar")) -@pytest.mark.parametrize("scope_class", (Scope, MutableScope)) -def test_scope_init_forbids_special_chars(scope_str, scope_class): +def test_scope_init_forbids_special_chars(scope_str): with pytest.raises(ValueError): - scope_class(scope_str) - - -def test_scope_contains_requires_scope_objects(): - s = Scope("foo") - assert not s._contains("foo") - - -@pytest.mark.parametrize( - "contained, containing, expect", - [ - # string matching, including optional on both sides - ("foo", "foo", True), # identity - ("*foo", "*foo", True), # identity - ("foo", "bar", False), - ("foo", "*bar", False), - ("*foo", "bar", False), - # optional-ness is one-way when mismatched - ("foo", "*foo", False), - ("*foo", "foo", True), - # dependency matching is also one-way when mismatched - ("foo[bar]", "foo[bar]", True), # identity - ("foo[bar]", "foo", False), - ("foo", "foo[bar]", True), - ("foo", "foo[bar[baz]]", True), - ("foo[bar]", "foo[bar[baz]]", True), - ("foo[bar[baz]]", "foo[bar[baz]]", True), # identity - # and the combination of dependencies with optional also works - ("foo[*bar]", "foo[bar[baz]]", True), - ("foo[*bar]", "foo[*bar[baz]]", True), - ("foo[bar]", "foo[bar[*baz]]", True), - ("foo[*bar]", "foo[bar[*baz]]", True), - ], -) -def test_scope_contains_simple_cases(contained, containing, expect): - outer_s = Scope.deserialize(containing) - inner_s = Scope.deserialize(contained) - assert outer_s._contains(inner_s) == expect - - -@pytest.mark.parametrize( - "contained, containing, expect", - [ - # "simple" cases for multiple dependencies - ("foo[bar baz]", "foo[bar[baz] baz]", True), - ("foo[bar baz]", "foo[bar[baz]]", False), - ("foo[baz bar]", "foo[bar[baz] baz]", True), - ("foo[bar baz]", "foo[bar[baz] baz buzz]", True), - # these scenarios will mirror some "realistic" usage - ( - "timer[transfer_ap[transfer]]", - "timer[transfer_ap[transfer[*foo/data_access]]]", - True, - ), - ( - "timer[transfer_ap[transfer[*foo/data_access]]]", - "timer[transfer_ap[transfer[*foo/data_access *bar/data_access]]]", - True, - ), - ( - "timer[transfer_ap[transfer[*bar *foo]]]", - "timer[transfer_ap[transfer[*foo *bar *baz]]]", - True, - ), - ( - "timer[transfer_ap[transfer[*foo/data_access]]]", - "timer[transfer_ap[transfer]]", - False, - ), - ( - "timer[transfer_ap[transfer[*foo/data_access *bar/data_access]]]", - "timer[transfer_ap[transfer[*foo/data_access]]]", - False, - ), - ( - "timer[transfer_ap[transfer[foo/data_access]]]", - "timer[transfer_ap[transfer[*foo/data_access]]]", - False, - ), - ( - "timer[transfer_ap[transfer[*foo/data_access]]]", - "timer[transfer_ap[transfer[foo/data_access]]]", - True, - ), - ( - "timer[transfer_ap[*transfer[*foo/data_access]]]", - "timer[transfer_ap[transfer[foo/data_access]]]", - True, - ), - ( - "timer[transfer_ap[transfer[*foo/data_access]]]", - "timer[transfer_ap[*transfer[foo/data_access]]]", - False, - ), - ], -) -def test_scope_contains_complex_usages(contained, containing, expect): - outer_s = Scope.deserialize(containing) - inner_s = Scope.deserialize(contained) - assert outer_s._contains(inner_s) == expect + MutableScope(scope_str) From 9a9f41a0c7a1b1ee685b6cb8fcd0464f34279558 Mon Sep 17 00:00:00 2001 From: Stephen Rosen Date: Wed, 19 Jul 2023 16:45:01 -0500 Subject: [PATCH 02/17] Rename a variable to avoid spurious pylint failure pylint flagged `graph` (used earlier in a function) as a redefinition of a name from outer scope. This happens because the block under the `if __name__ == "__main__"` check is technically in module scope. Therefore, at analysis-time, there is a module-scoped variable named `graph` and we "need" to avoid redefinition. This could be tucked into a `def _main()` to "hide" the name from module scope, but a rename is equally simple to dodge the overly-aggressive pylint check. --- src/globus_sdk/experimental/scope_parser/parser.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/globus_sdk/experimental/scope_parser/parser.py b/src/globus_sdk/experimental/scope_parser/parser.py index 597d13bbb..1fdb6fdf9 100644 --- a/src/globus_sdk/experimental/scope_parser/parser.py +++ b/src/globus_sdk/experimental/scope_parser/parser.py @@ -260,9 +260,9 @@ def parse_scope_graph(scopes: str) -> ScopeGraph: if __name__ == "__main__": import sys - graph = parse_scope_graph(sys.argv[1]) + parsed_graph = parse_scope_graph(sys.argv[1]) print( "top level scopes:", - ", ".join([name for name, _optional in graph.top_level_scopes]), + ", ".join([name for name, _optional in parsed_graph.top_level_scopes]), ) - print(graph) + print(parsed_graph) From f296e4f14800dc2ea361219b35aa8c680de3c1ee Mon Sep 17 00:00:00 2001 From: Stephen Rosen Date: Wed, 19 Jul 2023 16:48:16 -0500 Subject: [PATCH 03/17] Add changelog for the scope parser addition --- .../20230719_164657_sirosen_add_experimental_scope_parsing.rst | 3 +++ 1 file changed, 3 insertions(+) create mode 100644 changelog.d/20230719_164657_sirosen_add_experimental_scope_parsing.rst diff --git a/changelog.d/20230719_164657_sirosen_add_experimental_scope_parsing.rst b/changelog.d/20230719_164657_sirosen_add_experimental_scope_parsing.rst new file mode 100644 index 000000000..37eeea504 --- /dev/null +++ b/changelog.d/20230719_164657_sirosen_add_experimental_scope_parsing.rst @@ -0,0 +1,3 @@ +* Introduce an experimental Globus AUth scope parser in + ``globus_sdk.experimental.scope_parser``. + Full usage is documented in the SDK Experimental docs. (:pr:`NUMBER`) From 023667018e666239508e201b5aa3a3dc732aef86 Mon Sep 17 00:00:00 2001 From: Stephen Rosen Date: Mon, 24 Jul 2023 09:28:03 -0500 Subject: [PATCH 04/17] Apply suggestions from code review Co-authored-by: Kurt McKee --- .../20230719_164657_sirosen_add_experimental_scope_parsing.rst | 2 +- docs/experimental/scope_parser.rst | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/changelog.d/20230719_164657_sirosen_add_experimental_scope_parsing.rst b/changelog.d/20230719_164657_sirosen_add_experimental_scope_parsing.rst index 37eeea504..2e3b2c042 100644 --- a/changelog.d/20230719_164657_sirosen_add_experimental_scope_parsing.rst +++ b/changelog.d/20230719_164657_sirosen_add_experimental_scope_parsing.rst @@ -1,3 +1,3 @@ -* Introduce an experimental Globus AUth scope parser in +* Introduce an experimental Globus Auth scope parser in ``globus_sdk.experimental.scope_parser``. Full usage is documented in the SDK Experimental docs. (:pr:`NUMBER`) diff --git a/docs/experimental/scope_parser.rst b/docs/experimental/scope_parser.rst index 34b5d6642..d37a1f864 100644 --- a/docs/experimental/scope_parser.rst +++ b/docs/experimental/scope_parser.rst @@ -13,7 +13,7 @@ It should be regarded as very similar to the existing ``MutableScope`` object in - ``Scope.parse`` is available, for parsing scopes from strings - The Globus SDK does not support using ``Scope`` in all of the locations where ``MutableScope`` is supported -- Scope objects must be stringified for use -- ``Scope`` objects define a ``_contains__`` method, allowing one to check if one scope +- ``Scope`` objects define a ``__contains__`` method, allowing one to check if one scope properly contains another .. warning:: From e7f2c2dff859ea55bcbeba5721ba31c378fa63bb Mon Sep 17 00:00:00 2001 From: Stephen Rosen Date: Fri, 28 Jul 2023 17:05:12 -0500 Subject: [PATCH 05/17] Make more of scope parsing private In order to better indicate that the parser is an implementation detail, move several components into a `_parser.py` module. The `errors` are separately declared and exposed to better represent that they are part of the public interface of the package. --- src/globus_sdk/experimental/scope_parser/__init__.py | 2 +- .../scope_parser/{parser.py => _parser.py} | 10 ++-------- src/globus_sdk/experimental/scope_parser/errors.py | 6 ++++++ .../experimental/scope_parser/scope_definition.py | 2 +- 4 files changed, 10 insertions(+), 10 deletions(-) rename src/globus_sdk/experimental/scope_parser/{parser.py => _parser.py} (97%) create mode 100644 src/globus_sdk/experimental/scope_parser/errors.py diff --git a/src/globus_sdk/experimental/scope_parser/__init__.py b/src/globus_sdk/experimental/scope_parser/__init__.py index be1fd854c..b9fd8ca6f 100644 --- a/src/globus_sdk/experimental/scope_parser/__init__.py +++ b/src/globus_sdk/experimental/scope_parser/__init__.py @@ -1,4 +1,4 @@ -from .parser import ScopeCycleError, ScopeParseError +from .errors import ScopeCycleError, ScopeParseError from .scope_definition import Scope __all__ = ( diff --git a/src/globus_sdk/experimental/scope_parser/parser.py b/src/globus_sdk/experimental/scope_parser/_parser.py similarity index 97% rename from src/globus_sdk/experimental/scope_parser/parser.py rename to src/globus_sdk/experimental/scope_parser/_parser.py index 1fdb6fdf9..21cb68c7c 100644 --- a/src/globus_sdk/experimental/scope_parser/parser.py +++ b/src/globus_sdk/experimental/scope_parser/_parser.py @@ -4,13 +4,7 @@ import typing as t from collections import defaultdict, deque - -class ScopeParseError(ValueError): - """The error raised if scope parsing fails.""" - - -class ScopeCycleError(ScopeParseError): - """The error raised if scope parsing discovers a cycle.""" +from .errors import ScopeCycleError, ScopeParseError class ParseTokenType(enum.Enum): @@ -113,7 +107,7 @@ def _parse_tokens(tokens: list[ParseToken]) -> list[ScopeTreeNode]: current_scope = ScopeTreeNode(token.value, optional=current_optional) current_optional = False if parents: - parents[-1].dependencies.append(current_scope) + parents[-1].add_dependency(current_scope) else: ret.append(current_scope) if parents: diff --git a/src/globus_sdk/experimental/scope_parser/errors.py b/src/globus_sdk/experimental/scope_parser/errors.py new file mode 100644 index 000000000..9d0249ec0 --- /dev/null +++ b/src/globus_sdk/experimental/scope_parser/errors.py @@ -0,0 +1,6 @@ +class ScopeParseError(ValueError): + """The error raised if scope parsing fails.""" + + +class ScopeCycleError(ScopeParseError): + """The error raised if scope parsing discovers a cycle.""" diff --git a/src/globus_sdk/experimental/scope_parser/scope_definition.py b/src/globus_sdk/experimental/scope_parser/scope_definition.py index 5d1033371..4056fbcab 100644 --- a/src/globus_sdk/experimental/scope_parser/scope_definition.py +++ b/src/globus_sdk/experimental/scope_parser/scope_definition.py @@ -3,7 +3,7 @@ import typing as t import warnings -from .parser import parse_scope_graph +from ._parser import parse_scope_graph class Scope: From fe3394449327730916aca7777de4bc4001fc5706 Mon Sep 17 00:00:00 2001 From: Stephen Rosen Date: Fri, 28 Jul 2023 17:12:09 -0500 Subject: [PATCH 06/17] Remove unused attr from experimental ScopeGraph The 'dependencies' property was never used. Remove it. --- src/globus_sdk/experimental/scope_parser/_parser.py | 4 ---- 1 file changed, 4 deletions(-) diff --git a/src/globus_sdk/experimental/scope_parser/_parser.py b/src/globus_sdk/experimental/scope_parser/_parser.py index 21cb68c7c..9a3a807d5 100644 --- a/src/globus_sdk/experimental/scope_parser/_parser.py +++ b/src/globus_sdk/experimental/scope_parser/_parser.py @@ -157,10 +157,6 @@ def __init__(self) -> None: str, set[tuple[str, str, bool]] ] = defaultdict(set) - @property - def dependencies(self) -> set[tuple[str, str, bool]]: - return self.edges - def get_child_edges(self, node: str) -> set[tuple[str, str, bool]]: return set(self._nodes_to_outbound_edges[node]) From a7c72812ce1cc59296946d960aac1f9e69bf5efa Mon Sep 17 00:00:00 2001 From: Stephen Rosen Date: Fri, 28 Jul 2023 19:14:00 -0500 Subject: [PATCH 07/17] Add more tests and rewrite cycle detection In the experimental scope graph component, the cycle detection code was not correct. The simplicity of the approach used did not properly account for a number of scenarios like `foo[bar[baz] baz]`, which would be incorrectly detected as a cycle. These errors were masked by a secondary flaw, an incorrect call to `set.clear()` which wiped out important information. Rather than continuing to pursue this approach, for a first draft of this component simply use a well-understood recursive cycle detection algorithm which operates linearly in the size of the graph. The implementation can and should be unrolled to be iterative in the future. --- .pre-commit-config.yaml | 1 + .../experimental/scope_parser/_parser.py | 100 ++++++++++++++---- tests/unit/experimental/test_scope_parser.py | 31 +++++- ...ope_parser_intermediate_representations.py | 79 ++++++++++++++ 4 files changed, 190 insertions(+), 21 deletions(-) create mode 100644 tests/unit/experimental/test_scope_parser_intermediate_representations.py diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index e9310de26..43392d2fc 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -42,6 +42,7 @@ repos: rev: v2.2.4 hooks: - id: codespell + args: ["--ignore-regex", "https://[^\\s]*"] # custom local hooks - repo: local diff --git a/src/globus_sdk/experimental/scope_parser/_parser.py b/src/globus_sdk/experimental/scope_parser/_parser.py index 9a3a807d5..fba6237c2 100644 --- a/src/globus_sdk/experimental/scope_parser/_parser.py +++ b/src/globus_sdk/experimental/scope_parser/_parser.py @@ -179,26 +179,86 @@ def _normalize_optionals(self) -> None: self._nodes_to_outbound_edges[src].remove(edge) def _check_cycles(self) -> None: - nodes_to_check = set(self.nodes) - seen_nodes = set() - new_edges_to_visit: set[tuple[str, str, bool]] = set() - while nodes_to_check: - start = nodes_to_check.pop() - seen_nodes.add(start) - edges_to_visit = self._nodes_to_outbound_edges[start] - while edges_to_visit: - new_edges_to_visit.clear() - for _source, dest, _optional in edges_to_visit: - if dest in seen_nodes: - raise ScopeCycleError( - "scopes contain a cyclic dependency on " - f"{dest} ({list(seen_nodes)})" - ) - seen_nodes.add(dest) - new_edges_to_visit |= self._nodes_to_outbound_edges[dest] - edges_to_visit = new_edges_to_visit - nodes_to_check -= seen_nodes - seen_nodes.clear() + # check for two invariants: + # 1. no strongly connected components of size > 1 + # 2. no self-loops + # + # for (1) we use Tarjan's algorithm + # thorough comments for the algorithm follow because it is subtle and nontrivial + # however, we do not seek to explain *why* the algorithm works here + # + # see also: + # Wikipedia + # https://en.wikipedia.org/wiki/Tarjan%27s_strongly_connected_components_algorithm + # + # Original Paper (1972) + # https://citeseerx.ist.psu.edu/doc/10.1.1.327.8418 + # + # the following four variables are for Tarjan's + # - index: the index of the current node + # - stack: a stack of nodes + # - node_indices: a map from each node to its index, an incrementing integer + # which is assigned to each node as it is discovered + # - node_lowlink: a map from each node to its lowlink, the smallest index of any + # node reachable from this node on the stack + index = 0 + stack: list[str] = [] + node_indices: dict[str, int] = {} + node_lowlink: dict[str, int] = {} + + # the strongconnect method yields strongly connected components (as an iterator) + def strongconnect(node: str) -> t.Iterator[list[str]]: + nonlocal index + nonlocal stack + node_indices[node] = index + node_lowlink[node] = index + index += 1 + stack.append(node) + + # for all children of the current node... + for _, dest, _ in self.get_child_edges(node): + # if the child has not been visited, visit it (recursively) + # effectively, this proceeds down a DFS incrementing the index + # assigned to newly discovered nodes + if dest not in node_indices: + yield from strongconnect(dest) + # after uncovering the strongly connected components which are + # reachable from the dest, update the lowlink of the current node + # if dest found a lower lowlink + # this would indicate that dest or its descendants point back "up" + # the stack to 'node' or one of its ancestors + node_lowlink[node] = min(node_lowlink[node], node_lowlink[dest]) + # otherwise, but only if dest is on the stack, update the lowlink of + # 'node' if necessary + if dest in stack: + node_lowlink[node] = min(node_lowlink[node], node_indices[dest]) + + # if the current node was discovered to be a root node (because it was not + # connected to any node with a lower index) then pop the stack until we + # reach the current node, and that will be a strongly connected component + if node_lowlink[node] == node_indices[node]: + component: list[str] = [] + while stack[-1] != node: + top = stack.pop() + component.append(top) + component.append(stack.pop()) + yield component + + # first, check for self-loops to eliminate this case + for src, dest, _ in self.edges: + if src == dest: + raise ScopeCycleError(f"A self-loop was detected: {src}->{dest}") + + # now, detect strongly connected components + components: list[list[str]] = [] + for node in self.nodes: + if node in node_indices: + continue + components.extend(strongconnect(node)) + + for component in components: + if len(component) > 1: + raise ScopeCycleError(f"A cycle was detected: {component}") def __str__(self) -> str: lines = ["digraph scopes {", ' rankdir="LR";', ""] diff --git a/tests/unit/experimental/test_scope_parser.py b/tests/unit/experimental/test_scope_parser.py index d5da7be89..8709c60cf 100644 --- a/tests/unit/experimental/test_scope_parser.py +++ b/tests/unit/experimental/test_scope_parser.py @@ -1,6 +1,6 @@ import pytest -from globus_sdk.experimental.scope_parser import Scope, ScopeParseError +from globus_sdk.experimental.scope_parser import Scope, ScopeCycleError, ScopeParseError def test_scope_str_and_repr_simple(): @@ -126,6 +126,22 @@ def test_scope_parsing_rejects_bad_inputs(scopestring): Scope.parse(scopestring) +@pytest.mark.parametrize( + "scopestring", + [ + "foo[foo]", + "foo[*foo]", + "foo[bar[foo]]", + "foo[bar[baz[bar]]]", + "foo[bar[*baz[bar]]]", + "foo[bar[*baz[*bar]]]", + ], +) +def test_scope_parsing_catches_and_rejects_cycles(scopestring): + with pytest.raises(ScopeCycleError): + Scope.parse(scopestring) + + @pytest.mark.parametrize( "scopestring", ("foo", "*foo", "foo[bar]", "foo[*bar]", "foo bar", "foo[bar[baz]]"), @@ -262,3 +278,16 @@ def test_scope_contains_complex_usages(contained, containing, expect): outer_s = Scope.deserialize(containing) inner_s = Scope.deserialize(contained) assert (inner_s in outer_s) == expect + + +@pytest.mark.parametrize( + "original, reserialized", + [ + ("foo[bar *bar]", {"foo[bar]"}), + ("foo[*bar bar]", {"foo[bar]"}), + ("foo[bar[baz]] bar[*baz]", {"foo[bar[baz]]", "bar[baz]"}), + ("foo[bar[*baz]] bar[baz]", {"foo[bar[baz]]", "bar[baz]"}), + ], +) +def test_scope_parsing_normalizes_optionals(original, reserialized): + assert {s.serialize() for s in Scope.parse(original)} == reserialized diff --git a/tests/unit/experimental/test_scope_parser_intermediate_representations.py b/tests/unit/experimental/test_scope_parser_intermediate_representations.py new file mode 100644 index 000000000..0aaf722f5 --- /dev/null +++ b/tests/unit/experimental/test_scope_parser_intermediate_representations.py @@ -0,0 +1,79 @@ +from globus_sdk.experimental.scope_parser._parser import ( + ScopeTreeNode, + parse_scope_graph, +) + + +def test_graph_str_single_node(): + g = parse_scope_graph("foo") + assert ( + str(g) + == """\ +digraph scopes { + rankdir="LR"; + + foo + + +}""" + ) + + +def test_graph_str_single_optional_node(): + g = parse_scope_graph("*foo") + assert ( + str(g) + == """\ +digraph scopes { + rankdir="LR"; + + *foo + + +}""" + ) + + +def test_graph_str_single_dependency(): + g = parse_scope_graph("foo[bar]") + assert ( + str(g) + == """\ +digraph scopes { + rankdir="LR"; + + foo + + foo -> bar; + +}""" + ) + + +def test_graph_str_optional_dependency(): + g = parse_scope_graph("foo[bar[*baz]]") + assert ( + str(g) + == """\ +digraph scopes { + rankdir="LR"; + + foo + + foo -> bar; + bar -> baz [ label = "optional" ]; + +}""" + ) + + +def test_treenode_repr(): + t = ScopeTreeNode("foo", optional=False) + assert repr(t) == "ScopeTreeNode('foo')" + + t = ScopeTreeNode("foo", optional=True) + assert repr(t) == "ScopeTreeNode('foo', optional=True)" + + t = ScopeTreeNode("foo", optional=False) + t.dependencies = [ScopeTreeNode("bar", optional=False)] + assert (repr(t)) == "ScopeTreeNode('foo', dependencies=[ScopeTreeNode('bar')])" From a751e0e54de581d965051c4fc34d37b0349fffee Mon Sep 17 00:00:00 2001 From: Stephen Rosen Date: Sat, 29 Jul 2023 11:26:19 -0500 Subject: [PATCH 08/17] Revert cycle detection to a simpler implementation Our goal is still correctness, but the use of Tarjan's strongly connected components algorithm makes the ScopeGraph class significantly more complex in pursuit of efficiency. Worse, the prior implementation was recursive, which carries significant costs in python. Revert to a simpler iterative cycle detector which is adjusted to be certain to be correct but somewhat inefficient. --- .../experimental/scope_parser/_parser.py | 105 +++++------------- 1 file changed, 26 insertions(+), 79 deletions(-) diff --git a/src/globus_sdk/experimental/scope_parser/_parser.py b/src/globus_sdk/experimental/scope_parser/_parser.py index fba6237c2..ab543c66b 100644 --- a/src/globus_sdk/experimental/scope_parser/_parser.py +++ b/src/globus_sdk/experimental/scope_parser/_parser.py @@ -179,86 +179,33 @@ def _normalize_optionals(self) -> None: self._nodes_to_outbound_edges[src].remove(edge) def _check_cycles(self) -> None: - # check for two invariants: - # 1. no strongly connected components of size > 1 - # 2. no self-loops + # this is an *extremely* simple cycle detection check which takes + # quadratic time in the size of the graph (O(V^2+V*E)) + # however, for many small graph sizes, which is typical for scope construction, + # the speed of the quadratic but non-recursive approach used here is superior to + # the speed of an O(V+E) recursive approach (e.g. Tarjan's algorithm) # - # for (1) we use Tarjan's algorithm - # thorough comments for the algorithm follow because it is subtle and nontrivial - # however, we do not seek to explain *why* the algorithm works here - # - # see also: - # Wikipedia - # https://en.wikipedia.org/wiki/Tarjan%27s_strongly_connected_components_algorithm - # - # Original Paper (1972) - # https://citeseerx.ist.psu.edu/doc/10.1.1.327.8418 - # - # the following four variables are for Tarjan's - # - index: the index of the current node - # - stack: a stack of nodes - # - node_indices: a map from each node to its index, an incrementing integer - # which is assigned to each node as it is discovered - # - node_lowlink: a map from each node to its lowlink, the smallest index of any - # node reachable from this node on the stack - index = 0 - stack: list[str] = [] - node_indices: dict[str, int] = {} - node_lowlink: dict[str, int] = {} - - # the strongconnect method yields strongly connected components (as an iterator) - def strongconnect(node: str) -> t.Iterator[list[str]]: - nonlocal index - nonlocal stack - node_indices[node] = index - node_lowlink[node] = index - index += 1 - stack.append(node) - - # for all children of the current node... - for _, dest, _ in self.get_child_edges(node): - # if the child has not been visited, visit it (recursively) - # effectively, this proceeds down a DFS incrementing the index - # assigned to newly discovered nodes - if dest not in node_indices: - yield from strongconnect(dest) - # after uncovering the strongly connected components which are - # reachable from the dest, update the lowlink of the current node - # if dest found a lower lowlink - # this would indicate that dest or its descendants point back "up" - # the stack to 'node' or one of its ancestors - node_lowlink[node] = min(node_lowlink[node], node_lowlink[dest]) - # otherwise, but only if dest is on the stack, update the lowlink of - # 'node' if necessary - if dest in stack: - node_lowlink[node] = min(node_lowlink[node], node_indices[dest]) - - # if the current node was discovered to be a root node (because it was not - # connected to any node with a lower index) then pop the stack until we - # reach the current node, and that will be a strongly connected component - if node_lowlink[node] == node_indices[node]: - component: list[str] = [] - while stack[-1] != node: - top = stack.pop() - component.append(top) - component.append(stack.pop()) - yield component - - # first, check for self-loops to eliminate this case - for src, dest, _ in self.edges: - if src == dest: - raise ScopeCycleError(f"A self-loop was detected: {src}->{dest}") - - # now, detect strongly connected components - components: list[list[str]] = [] - for node in self.nodes: - if node in node_indices: - continue - components.extend(strongconnect(node)) - - for component in components: - if len(component) > 1: - raise ScopeCycleError(f"A cycle was detected: {component}") + # it could be improved in the future by writing a more elaborate O(V+E) + # algorithm unrolled into an iterative implementation + nodes_to_check = set(self.nodes) + + while nodes_to_check: + start = nodes_to_check.pop() + seen_nodes: set[str] = {start} + + edges_to_visit = self.get_child_edges(start) + while edges_to_visit: + new_edges_to_visit: set[tuple[str, str, bool]] = set() + + for _source, dest, _optional in edges_to_visit: + if dest == start: + raise ScopeCycleError( + "scopes contain a cyclic dependency on " + f"{start} ({list(seen_nodes)})" + ) + seen_nodes.add(dest) + new_edges_to_visit |= self.get_child_edges(dest) + edges_to_visit = new_edges_to_visit def __str__(self) -> str: lines = ["digraph scopes {", ' rankdir="LR";', ""] From b3f0e8d218d2908c654665d1bbb85a68f3b2f267 Mon Sep 17 00:00:00 2001 From: Stephen Rosen Date: Sat, 29 Jul 2023 12:21:59 -0500 Subject: [PATCH 09/17] Refine the scope graph cycle detector Make cycle detection much more efficient in time (but inefficient in space) by employing a BFS graph traversal which tracks the ancestry paths through the graph which have been discovered. If a back-edge is ever found, that means that there is a cycle. --- .../experimental/scope_parser/_parser.py | 66 ++++++++++++------- tests/unit/experimental/test_scope_parser.py | 28 ++++++++ 2 files changed, 69 insertions(+), 25 deletions(-) diff --git a/src/globus_sdk/experimental/scope_parser/_parser.py b/src/globus_sdk/experimental/scope_parser/_parser.py index ab543c66b..f935c2e4c 100644 --- a/src/globus_sdk/experimental/scope_parser/_parser.py +++ b/src/globus_sdk/experimental/scope_parser/_parser.py @@ -179,33 +179,49 @@ def _normalize_optionals(self) -> None: self._nodes_to_outbound_edges[src].remove(edge) def _check_cycles(self) -> None: - # this is an *extremely* simple cycle detection check which takes - # quadratic time in the size of the graph (O(V^2+V*E)) - # however, for many small graph sizes, which is typical for scope construction, - # the speed of the quadratic but non-recursive approach used here is superior to - # the speed of an O(V+E) recursive approach (e.g. Tarjan's algorithm) - # - # it could be improved in the future by writing a more elaborate O(V+E) - # algorithm unrolled into an iterative implementation - nodes_to_check = set(self.nodes) - - while nodes_to_check: - start = nodes_to_check.pop() - seen_nodes: set[str] = {start} - - edges_to_visit = self.get_child_edges(start) - while edges_to_visit: - new_edges_to_visit: set[tuple[str, str, bool]] = set() - - for _source, dest, _optional in edges_to_visit: - if dest == start: + # explore the graph using an iterative Breadth-First Search + # as we explore the graph, keep track of paths of ancestry being explored + # if we ever find a back-edge along one of those paths of ancestry, that + # means that there must be a cycle + + # start from the top-level nodes (which we know to be the roots of this + # forest-shaped graph) + # we will track this as the set of paths to continue to branch and explore + paths_to_explore: list[list[str]] = [ + [node] for node, _ in self.top_level_scopes + ] + + while paths_to_explore: + # the "new_paths_to_explore" tracks the set of paths which we will evaluate + # in the next iteration, starting with nothing + # if no out-edges are found from the terminal nodes of any path, we will + # stop exploring the graph + # but otherwise, we will continue to rebuild this until the entire graph + # has been traversed + new_paths_to_explore: list[list[str]] = [] + + for path in paths_to_explore: + # get out-edges from the last node in the path + terminus = path[-1] + children = self.get_child_edges(terminus) + + # if the node was a leaf, no children, we are done exploring this path + if not children: + continue + + # for each child edge, do two basic things: + # - check if we found a back-edge (cycle!) + # - create a new path to explore, with the child node as its current + # terminus + for edge in children: + _, dest, _ = edge + if dest in path: raise ScopeCycleError( - "scopes contain a cyclic dependency on " - f"{start} ({list(seen_nodes)})" + f"A cycle was found involving '{dest}' " + f"(path: {path[path.index(dest):]})" ) - seen_nodes.add(dest) - new_edges_to_visit |= self.get_child_edges(dest) - edges_to_visit = new_edges_to_visit + new_paths_to_explore.append(list(path) + [dest]) + paths_to_explore = new_paths_to_explore def __str__(self) -> str: lines = ["digraph scopes {", ' rankdir="LR";', ""] diff --git a/tests/unit/experimental/test_scope_parser.py b/tests/unit/experimental/test_scope_parser.py index 8709c60cf..8dcd9147e 100644 --- a/tests/unit/experimental/test_scope_parser.py +++ b/tests/unit/experimental/test_scope_parser.py @@ -1,3 +1,5 @@ +import time + import pytest from globus_sdk.experimental.scope_parser import Scope, ScopeCycleError, ScopeParseError @@ -142,6 +144,32 @@ def test_scope_parsing_catches_and_rejects_cycles(scopestring): Scope.parse(scopestring) +def test_scope_parsing_catches_and_rejects_very_large_cycles_quickly(): + """ + WARNING: this test is hardware speed dependent and could fail on slow systems. + + This test creates a very long cycle and validates that it can be caught in a + small timeframe of < 100ms. + Observed times on a test system were <20ms, and in CI were <60ms. + + Although checking the speed in this way is not ideal, we want to avoid high + time-complexity in the cycle detection. This test offers good protection against any + major performance regression. + """ + scope_string = "" + for i in range(1000): + scope_string += f"foo{i}[" + scope_string += " foo10" + for _ in range(1000): + scope_string += "]" + + t0 = time.time() + with pytest.raises(ScopeCycleError): + Scope.parse(scope_string) + t1 = time.time() + assert t1 - t0 < 0.1 + + @pytest.mark.parametrize( "scopestring", ("foo", "*foo", "foo[bar]", "foo[*bar]", "foo bar", "foo[bar[baz]]"), From d8759e2005d8033ed720b0cfa2ff0620707692ca Mon Sep 17 00:00:00 2001 From: Stephen Rosen Date: Mon, 31 Jul 2023 14:58:33 -0500 Subject: [PATCH 10/17] Make ScopeGraph __str__ tests less fragile These tests were sensitive to the exact whitespace generated by __str__, which is a cosmetic (non-semantic) detail. It is relatively simple to remove blank lines, which makes the tests more robust to any refinements we make in the future. --- ...ope_parser_intermediate_representations.py | 28 ++++++++----------- 1 file changed, 12 insertions(+), 16 deletions(-) diff --git a/tests/unit/experimental/test_scope_parser_intermediate_representations.py b/tests/unit/experimental/test_scope_parser_intermediate_representations.py index 0aaf722f5..45bef8c5a 100644 --- a/tests/unit/experimental/test_scope_parser_intermediate_representations.py +++ b/tests/unit/experimental/test_scope_parser_intermediate_representations.py @@ -6,63 +6,55 @@ def test_graph_str_single_node(): g = parse_scope_graph("foo") + clean_str = _blank_lines_removed(str(g)) assert ( - str(g) + clean_str == """\ digraph scopes { rankdir="LR"; - foo - - }""" ) def test_graph_str_single_optional_node(): g = parse_scope_graph("*foo") + clean_str = _blank_lines_removed(str(g)) assert ( - str(g) + clean_str == """\ digraph scopes { rankdir="LR"; - *foo - - }""" ) def test_graph_str_single_dependency(): g = parse_scope_graph("foo[bar]") + clean_str = _blank_lines_removed(str(g)) assert ( - str(g) + clean_str == """\ digraph scopes { rankdir="LR"; - foo - foo -> bar; - }""" ) def test_graph_str_optional_dependency(): g = parse_scope_graph("foo[bar[*baz]]") + clean_str = _blank_lines_removed(str(g)) assert ( - str(g) + clean_str == """\ digraph scopes { rankdir="LR"; - foo - foo -> bar; bar -> baz [ label = "optional" ]; - }""" ) @@ -77,3 +69,7 @@ def test_treenode_repr(): t = ScopeTreeNode("foo", optional=False) t.dependencies = [ScopeTreeNode("bar", optional=False)] assert (repr(t)) == "ScopeTreeNode('foo', dependencies=[ScopeTreeNode('bar')])" + + +def _blank_lines_removed(s: str) -> str: + return "\n".join(line for line in s.split("\n") if line.strip() != "") From 72bdb45d619d4bc54f0f6e219211fffe9c8cde01 Mon Sep 17 00:00:00 2001 From: Stephen Rosen Date: Tue, 1 Aug 2023 15:34:38 -0500 Subject: [PATCH 11/17] Tamp down overly aggressive experimental warning Experimental scope parsing had a vague warning which did not properly announce the limitations of the scope parsing interface. The replacement note instead clarifies only what a parse represents and notes that there is no external callout for validation. --- docs/experimental/scope_parser.rst | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/docs/experimental/scope_parser.rst b/docs/experimental/scope_parser.rst index d37a1f864..a4af34fbb 100644 --- a/docs/experimental/scope_parser.rst +++ b/docs/experimental/scope_parser.rst @@ -5,8 +5,8 @@ Scope Parser .. currentmodule:: globus_sdk.experimental.scope_parser -This component defines a Scope object and exposes use of a parser which can -parse scope strings into trees of Scope objects. +This component defines a ``Scope`` object and exposes use of a parser which can +parse scope strings into trees of ``Scope`` objects. It should be regarded as very similar to the existing ``MutableScope`` object in ``globus_sdk.scopes``. Key differences from ``MutableScope``: @@ -16,11 +16,11 @@ It should be regarded as very similar to the existing ``MutableScope`` object in - ``Scope`` objects define a ``__contains__`` method, allowing one to check if one scope properly contains another -.. warning:: +.. note:: - This module is experimental due to the limitations of scope strings as a - representation of consent trees. The scope trees produced by parsing do - not necessarily describe real consent structures in Globus Auth. + The scope trees produced by parsing represent prospective consent structures as + would be produced by a single authentication flow in Auth. No external APIs (e.g., + the Get Consents API) are used to validate or mutate the data. Reference --------- From 0d61d2fc8ee80829647cc92ca756312c40b4ea8c Mon Sep 17 00:00:00 2001 From: Stephen Rosen Date: Tue, 1 Aug 2023 17:00:58 -0500 Subject: [PATCH 12/17] Optimize ScopeGraph cycle detection There are three changes here which make cycle detection faster. 1. Use DFS rather than BFS, which will find cycles faster in some cases and is less memory intensive for large graphs 2. Represent paths using `(frozenset, str)` tuples, since we don't care about ordering at all. This reduces the information in any given "path" from the prior implementation to a bag of nodes and the current terminus. Order is lost, but frozenset construction and membership checking is faster than the list equivalents 3. Rename such that `get_child_edges` becomes `adjacency_matrix`, converting a function call to an attribute lookup and skipping an unnecessary set construction step --- .../experimental/scope_parser/_parser.py | 73 ++++++++----------- .../scope_parser/scope_definition.py | 2 +- 2 files changed, 32 insertions(+), 43 deletions(-) diff --git a/src/globus_sdk/experimental/scope_parser/_parser.py b/src/globus_sdk/experimental/scope_parser/_parser.py index f935c2e4c..6426bc998 100644 --- a/src/globus_sdk/experimental/scope_parser/_parser.py +++ b/src/globus_sdk/experimental/scope_parser/_parser.py @@ -153,16 +153,11 @@ def __init__(self) -> None: self.top_level_scopes: set[tuple[str, bool]] = set() self.nodes: set[str] = set() self.edges: set[tuple[str, str, bool]] = set() - self._nodes_to_outbound_edges: dict[ - str, set[tuple[str, str, bool]] - ] = defaultdict(set) - - def get_child_edges(self, node: str) -> set[tuple[str, str, bool]]: - return set(self._nodes_to_outbound_edges[node]) + self.adjacency_matrix: dict[str, set[tuple[str, str, bool]]] = defaultdict(set) def add_edge(self, src: str, dest: str, optional: bool) -> None: self.edges.add((src, dest, optional)) - self._nodes_to_outbound_edges[src].add((src, dest, optional)) + self.adjacency_matrix[src].add((src, dest, optional)) def _normalize_optionals(self) -> None: to_remove: set[tuple[str, str, bool]] = set() @@ -176,52 +171,46 @@ def _normalize_optionals(self) -> None: self.edges = self.edges - to_remove for edge in to_remove: src, _, _ = edge - self._nodes_to_outbound_edges[src].remove(edge) + self.adjacency_matrix[src].remove(edge) def _check_cycles(self) -> None: - # explore the graph using an iterative Breadth-First Search + # explore the graph using an iterative Depth-First Search # as we explore the graph, keep track of paths of ancestry being explored # if we ever find a back-edge along one of those paths of ancestry, that # means that there must be a cycle # start from the top-level nodes (which we know to be the roots of this # forest-shaped graph) - # we will track this as the set of paths to continue to branch and explore - paths_to_explore: list[list[str]] = [ - [node] for node, _ in self.top_level_scopes + # we will track this as the set of paths to continue to branch and explore in a + # stack and pop from it until it is empty, thus implementing DFS + # + # conceptually, the paths could be implemented as `list[str]`, which would + # preserve the order in which we encountered each node. Using frozenset is a + # micro-optimization which makes checks faster, since we only care to detect + # *that* there was a cycle, not what the shape of that cycle was + paths_to_explore: list[tuple[frozenset[str], str]] = [ + (frozenset((node,)), node) for node, _ in self.top_level_scopes ] while paths_to_explore: - # the "new_paths_to_explore" tracks the set of paths which we will evaluate - # in the next iteration, starting with nothing - # if no out-edges are found from the terminal nodes of any path, we will - # stop exploring the graph - # but otherwise, we will continue to rebuild this until the entire graph - # has been traversed - new_paths_to_explore: list[list[str]] = [] - - for path in paths_to_explore: - # get out-edges from the last node in the path - terminus = path[-1] - children = self.get_child_edges(terminus) - - # if the node was a leaf, no children, we are done exploring this path - if not children: - continue - - # for each child edge, do two basic things: - # - check if we found a back-edge (cycle!) - # - create a new path to explore, with the child node as its current - # terminus - for edge in children: - _, dest, _ = edge - if dest in path: - raise ScopeCycleError( - f"A cycle was found involving '{dest}' " - f"(path: {path[path.index(dest):]})" - ) - new_paths_to_explore.append(list(path) + [dest]) - paths_to_explore = new_paths_to_explore + path, terminus = paths_to_explore.pop() + + # get out-edges from the last node in the path + children = self.adjacency_matrix[terminus] + + # if the node was a leaf, no children, we are done exploring this path + if not children: + continue + + # for each child edge, do two basic things: + # - check if we found a back-edge (cycle!) + # - create a new path to explore, with the child node as its current + # terminus + for edge in children: + _, dest, _ = edge + if dest in path: + raise ScopeCycleError(f"A cycle was found involving '{dest}'") + paths_to_explore.append((path.union((dest,)), dest)) def __str__(self) -> str: lines = ["digraph scopes {", ' rankdir="LR";', ""] diff --git a/src/globus_sdk/experimental/scope_parser/scope_definition.py b/src/globus_sdk/experimental/scope_parser/scope_definition.py index 4056fbcab..505cff280 100644 --- a/src/globus_sdk/experimental/scope_parser/scope_definition.py +++ b/src/globus_sdk/experimental/scope_parser/scope_definition.py @@ -65,7 +65,7 @@ def parse(scope_string: str) -> list[Scope]: while bfs_additions: current_scope = bfs_additions.pop(0) - edges = scope_graph.get_child_edges(current_scope.scope_string) + edges = scope_graph.adjacency_matrix[current_scope.scope_string] for _, dest, optional in edges: dest_scope = Scope(dest, optional=optional) current_scope.add_dependency(dest_scope) From 32a4b48ddf2b07d924518289d4c205e6f1cd0a8b Mon Sep 17 00:00:00 2001 From: Stephen Rosen Date: Wed, 2 Aug 2023 12:05:53 -0500 Subject: [PATCH 13/17] Minor cleanup to experimental scope parsing 1. Use a dataclass for parsed tokens for a cleaner, more declarative style 2. Move the test script into a `__main__.py` module to allow `-m` execution for testing purposes In (2), this now also includes some timing tests which reflect some of the work which was done in order to tune and optimize the cycle detector. --- .../experimental/scope_parser/__main__.py | 62 +++++++++++++++++++ .../experimental/scope_parser/_parser.py | 25 +++----- 2 files changed, 69 insertions(+), 18 deletions(-) create mode 100644 src/globus_sdk/experimental/scope_parser/__main__.py diff --git a/src/globus_sdk/experimental/scope_parser/__main__.py b/src/globus_sdk/experimental/scope_parser/__main__.py new file mode 100644 index 000000000..6437dfc4e --- /dev/null +++ b/src/globus_sdk/experimental/scope_parser/__main__.py @@ -0,0 +1,62 @@ +import sys +import timeit +import warnings + +from ._parser import parse_scope_graph + +warnings.warn( + "This is a test module for scope parsing. It is not intended for public use.", + stacklevel=1, +) + + +def timeit_test() -> None: + for scope_depth, num_iterations in ( + (10, 1000), + (100, 1000), + (1000, 1000), + (2000, 100), + (3000, 100), + (4000, 100), + (5000, 100), + ): + setup = f"""\ +from globus_sdk.experimental.scope_parser import Scope +big_scope = "" +for i in range({scope_depth}): + big_scope += f"foo{{i}}[" +big_scope += "bar" +for _ in range({scope_depth}): + big_scope += "]" +""" + + time_taken = timeit.timeit( + "Scope.deserialize(big_scope)", setup=setup, number=num_iterations + ) + average = time_taken / num_iterations + print(f"{num_iterations} runs, {scope_depth} depth, avg time taken: {average}") + + +def parse_test(scope_string: str) -> None: + parsed_graph = parse_scope_graph(sys.argv[1]) + print( + "top level scopes:", + ", ".join([name for name, _optional in parsed_graph.top_level_scopes]), + ) + print(parsed_graph) + + +def main(): + if len(sys.argv) < 2 or sys.argv[1] in ("-h", "--help"): + print("This script supports two usage patterns:") + print(" python -m globus_sdk.experimental.scope_parser SCOPE_STRING") + print(" python -m globus_sdk.experimental.scope_parser --timeit") + sys.exit(0) + + if sys.argv[1] == "--timeit": + timeit_test() + else: + parse_test(sys.argv[1]) + + +main() diff --git a/src/globus_sdk/experimental/scope_parser/_parser.py b/src/globus_sdk/experimental/scope_parser/_parser.py index 6426bc998..6860b4b86 100644 --- a/src/globus_sdk/experimental/scope_parser/_parser.py +++ b/src/globus_sdk/experimental/scope_parser/_parser.py @@ -1,5 +1,6 @@ from __future__ import annotations +import dataclasses import enum import typing as t from collections import defaultdict, deque @@ -17,10 +18,10 @@ class ParseTokenType(enum.Enum): rbracket = enum.auto() +@dataclasses.dataclass class ParseToken: - def __init__(self, value: str, token_type: ParseTokenType) -> None: - self.value = value - self.token_type = token_type + value: str + token_type: ParseTokenType def _tokenize(scope_string: str) -> list[ParseToken]: @@ -117,10 +118,9 @@ def _parse_tokens(tokens: list[ParseToken]) -> list[ScopeTreeNode]: class ScopeTreeNode: - """ - This is an intermediate representation for scope parsing. - """ - + # + # This is an intermediate representation for scope parsing. + # def __init__( self, scope_string: str, @@ -257,14 +257,3 @@ def parse_scope_graph(scopes: str) -> ScopeGraph: graph._normalize_optionals() graph._check_cycles() return graph - - -if __name__ == "__main__": - import sys - - parsed_graph = parse_scope_graph(sys.argv[1]) - print( - "top level scopes:", - ", ".join([name for name, _optional in parsed_graph.top_level_scopes]), - ) - print(parsed_graph) From ca0dc6e52068105f5ff1e2ba89088eb6e6b28af4 Mon Sep 17 00:00:00 2001 From: Kurt McKee Date: Thu, 3 Aug 2023 10:35:54 -0500 Subject: [PATCH 14/17] Avoid list appends and joins to construct token strings --- .../experimental/scope_parser/_parser.py | 15 +++++++-------- 1 file changed, 7 insertions(+), 8 deletions(-) diff --git a/src/globus_sdk/experimental/scope_parser/_parser.py b/src/globus_sdk/experimental/scope_parser/_parser.py index 6860b4b86..f4044f6e6 100644 --- a/src/globus_sdk/experimental/scope_parser/_parser.py +++ b/src/globus_sdk/experimental/scope_parser/_parser.py @@ -26,7 +26,7 @@ class ParseToken: def _tokenize(scope_string: str) -> list[ParseToken]: tokens: list[ParseToken] = [] - current_token: list[str] = [] + start = 0 for idx, c in enumerate(scope_string): try: peek: str | None = scope_string[idx + 1] @@ -34,12 +34,12 @@ def _tokenize(scope_string: str) -> list[ParseToken]: peek = None if c in "[]* ": - if current_token: + if start != idx: tokens.append( - ParseToken("".join(current_token), ParseTokenType.scope_string) + ParseToken(scope_string[start:idx], ParseTokenType.scope_string) ) - current_token = [] + start = idx + 1 if c == "*": if peek == " ": raise ScopeParseError("'*' must not be followed by a space") @@ -55,10 +55,9 @@ def _tokenize(scope_string: str) -> list[ParseToken]: raise ScopeParseError("'[' cannot have a preceding space") else: raise NotImplementedError - else: - current_token.append(c) - if current_token: - tokens.append(ParseToken("".join(current_token), ParseTokenType.scope_string)) + remainder = scope_string[start:].strip() + if remainder: + tokens.append(ParseToken(remainder, ParseTokenType.scope_string)) return tokens From aa05cc19fdc01c052b5e666692de8f235b50ecb2 Mon Sep 17 00:00:00 2001 From: Kurt McKee Date: Thu, 3 Aug 2023 11:18:19 -0500 Subject: [PATCH 15/17] Avoid dataclasses and enums in favor of speed --- .../experimental/scope_parser/_parser.py | 55 +++++++------------ 1 file changed, 19 insertions(+), 36 deletions(-) diff --git a/src/globus_sdk/experimental/scope_parser/_parser.py b/src/globus_sdk/experimental/scope_parser/_parser.py index f4044f6e6..55b0a5e70 100644 --- a/src/globus_sdk/experimental/scope_parser/_parser.py +++ b/src/globus_sdk/experimental/scope_parser/_parser.py @@ -1,31 +1,16 @@ from __future__ import annotations -import dataclasses -import enum import typing as t from collections import defaultdict, deque from .errors import ScopeCycleError, ScopeParseError +SPECIAL_CHARACTERS = set("[]* ") +SPECIAL_TOKENS = set("[]*") -class ParseTokenType(enum.Enum): - # a string like 'urn:globus:auth:scopes:transfer.api.globus.org:all' - scope_string = enum.auto() - # the optional marker, '*' - opt_marker = enum.auto() - # '[' and ']' - lbracket = enum.auto() - rbracket = enum.auto() - -@dataclasses.dataclass -class ParseToken: - value: str - token_type: ParseTokenType - - -def _tokenize(scope_string: str) -> list[ParseToken]: - tokens: list[ParseToken] = [] +def _tokenize(scope_string: str) -> list[str]: + tokens: list[str] = [] start = 0 for idx, c in enumerate(scope_string): try: @@ -33,23 +18,21 @@ def _tokenize(scope_string: str) -> list[ParseToken]: except IndexError: peek = None - if c in "[]* ": + if c in SPECIAL_CHARACTERS: if start != idx: - tokens.append( - ParseToken(scope_string[start:idx], ParseTokenType.scope_string) - ) + tokens.append(scope_string[start:idx]) start = idx + 1 if c == "*": if peek == " ": raise ScopeParseError("'*' must not be followed by a space") - tokens.append(ParseToken(c, ParseTokenType.opt_marker)) + tokens.append(c) elif c == "[": - tokens.append(ParseToken(c, ParseTokenType.lbracket)) + tokens.append(c) elif c == "]": if peek is not None and peek not in (" ", "]"): raise ScopeParseError("']' may only be followed by a space or ']'") - tokens.append(ParseToken(c, ParseTokenType.rbracket)) + tokens.append(c) elif c == " ": if peek == "[": raise ScopeParseError("'[' cannot have a preceding space") @@ -57,11 +40,11 @@ def _tokenize(scope_string: str) -> list[ParseToken]: raise NotImplementedError remainder = scope_string[start:].strip() if remainder: - tokens.append(ParseToken(remainder, ParseTokenType.scope_string)) + tokens.append(remainder) return tokens -def _parse_tokens(tokens: list[ParseToken]) -> list[ScopeTreeNode]: +def _parse_tokens(tokens: list[str]) -> list[ScopeTreeNode]: # value to return ret: list[ScopeTreeNode] = [] # track whether or not the current scope is optional (has a preceding *) @@ -75,36 +58,36 @@ def _parse_tokens(tokens: list[ParseToken]) -> list[ScopeTreeNode]: for idx in range(len(tokens)): token = tokens[idx] try: - peek: ParseToken | None = tokens[idx + 1] + peek: str | None = tokens[idx + 1] except IndexError: peek = None - if token.token_type == ParseTokenType.opt_marker: + if token == "*": current_optional = True if peek is None: raise ScopeParseError("ended in optional marker") - if peek.token_type != ParseTokenType.scope_string: + if peek in SPECIAL_TOKENS: raise ScopeParseError( "a scope string must always follow an optional marker" ) - elif token.token_type == ParseTokenType.lbracket: + elif token == "[": if peek is None: raise ScopeParseError("ended in left bracket") - if peek.token_type == ParseTokenType.rbracket: + if peek == "]": raise ScopeParseError("found empty brackets") - if peek.token_type == ParseTokenType.lbracket: + if peek == "[": raise ScopeParseError("found double left-bracket") if not current_scope: raise ScopeParseError("found '[' without a preceding scope string") parents.append(current_scope) - elif token.token_type == ParseTokenType.rbracket: + elif token == "]": if not parents: raise ScopeParseError("found ']' with no matching '[' preceding it") parents.pop() else: - current_scope = ScopeTreeNode(token.value, optional=current_optional) + current_scope = ScopeTreeNode(token, optional=current_optional) current_optional = False if parents: parents[-1].add_dependency(current_scope) From 6d6ad891efda377e58c01a060e9e5832bc386ce6 Mon Sep 17 00:00:00 2001 From: Kurt McKee Date: Thu, 3 Aug 2023 11:53:05 -0500 Subject: [PATCH 16/17] Use sets, which appear to be faster --- src/globus_sdk/experimental/scope_parser/_parser.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/globus_sdk/experimental/scope_parser/_parser.py b/src/globus_sdk/experimental/scope_parser/_parser.py index 55b0a5e70..f9e8cc1ff 100644 --- a/src/globus_sdk/experimental/scope_parser/_parser.py +++ b/src/globus_sdk/experimental/scope_parser/_parser.py @@ -167,11 +167,11 @@ def _check_cycles(self) -> None: # stack and pop from it until it is empty, thus implementing DFS # # conceptually, the paths could be implemented as `list[str]`, which would - # preserve the order in which we encountered each node. Using frozenset is a + # preserve the order in which we encountered each node. Using a set is a # micro-optimization which makes checks faster, since we only care to detect # *that* there was a cycle, not what the shape of that cycle was - paths_to_explore: list[tuple[frozenset[str], str]] = [ - (frozenset((node,)), node) for node, _ in self.top_level_scopes + paths_to_explore: list[tuple[set[str], str]] = [ + ({node}, node) for node, _ in self.top_level_scopes ] while paths_to_explore: From 928b774bc52092c14b566fa561fa2f5c5a14afe1 Mon Sep 17 00:00:00 2001 From: Stephen Rosen Date: Thu, 3 Aug 2023 12:59:20 -0500 Subject: [PATCH 17/17] Cleanup scope parsing test module - add missing annotation - fix parameter usage in the non-timing test - test for wide scopes as well as deep ones for timing - report more precise timing stats, including `min` Some of the timing data manipulation was informed by reading the `timeit` module source and recommendations. --- .../experimental/scope_parser/__main__.py | 73 ++++++++++++++----- 1 file changed, 55 insertions(+), 18 deletions(-) diff --git a/src/globus_sdk/experimental/scope_parser/__main__.py b/src/globus_sdk/experimental/scope_parser/__main__.py index 6437dfc4e..a21740a81 100644 --- a/src/globus_sdk/experimental/scope_parser/__main__.py +++ b/src/globus_sdk/experimental/scope_parser/__main__.py @@ -1,3 +1,5 @@ +from __future__ import annotations + import sys import timeit import warnings @@ -11,34 +13,68 @@ def timeit_test() -> None: - for scope_depth, num_iterations in ( - (10, 1000), - (100, 1000), - (1000, 1000), - (2000, 100), - (3000, 100), - (4000, 100), - (5000, 100), + for size, num_iterations, style in ( + (10, 1000, "deep"), + (100, 1000, "deep"), + (1000, 1000, "deep"), + (2000, 100, "deep"), + (3000, 100, "deep"), + (4000, 100, "deep"), + (5000, 100, "deep"), + (5000, 1000, "wide"), + (10000, 1000, "wide"), ): - setup = f"""\ + if style == "deep": + setup = f"""\ from globus_sdk.experimental.scope_parser import Scope big_scope = "" -for i in range({scope_depth}): +for i in range({size}): big_scope += f"foo{{i}}[" big_scope += "bar" -for _ in range({scope_depth}): +for _ in range({size}): big_scope += "]" """ + elif style == "wide": + setup = f"""\ +from globus_sdk.experimental.scope_parser import Scope +big_scope = "" +for i in range({size}): + big_scope += f"foo{{i}} " +""" + else: + raise NotImplementedError + + timer = timeit.Timer("Scope.parse(big_scope)", setup=setup) + + raw_timings = timer.repeat(repeat=5, number=num_iterations) + best, worst, average, variance = _stats(raw_timings) + if style == "deep": + print(f"{num_iterations} runs on a deep scope, depth={size}") + elif style == "wide": + print(f"{num_iterations} runs on a wide scope, width={size}") + else: + raise NotImplementedError + print(f" best={best} worst={worst} average={average} variance={variance}") + print(f" normalized best={best / num_iterations}") + print() + print("The most informative stat over these timings is the min timing (best).") + print("Normed best is best/iterations.") + print( + "Max timing (worst) and dispersion (variance vis-a-vis average) indicate " + "how consistent the results are, but are not a report of speed." + ) + - time_taken = timeit.timeit( - "Scope.deserialize(big_scope)", setup=setup, number=num_iterations - ) - average = time_taken / num_iterations - print(f"{num_iterations} runs, {scope_depth} depth, avg time taken: {average}") +def _stats(timing_data: list[float]) -> tuple[float, float, float, float]: + best = min(timing_data) + worst = max(timing_data) + average = sum(timing_data) / len(timing_data) + variance = sum((x - average) ** 2 for x in timing_data) / len(timing_data) + return best, worst, average, variance def parse_test(scope_string: str) -> None: - parsed_graph = parse_scope_graph(sys.argv[1]) + parsed_graph = parse_scope_graph(scope_string) print( "top level scopes:", ", ".join([name for name, _optional in parsed_graph.top_level_scopes]), @@ -46,13 +82,14 @@ def parse_test(scope_string: str) -> None: print(parsed_graph) -def main(): +def main() -> None: if len(sys.argv) < 2 or sys.argv[1] in ("-h", "--help"): print("This script supports two usage patterns:") print(" python -m globus_sdk.experimental.scope_parser SCOPE_STRING") print(" python -m globus_sdk.experimental.scope_parser --timeit") sys.exit(0) + print() if sys.argv[1] == "--timeit": timeit_test() else: