From d95f332828444792b91118492aa9f3f6b5dfed60 Mon Sep 17 00:00:00 2001
From: dangotbanned <125183946+dangotbanned@users.noreply.github.com>
Date: Sun, 18 Aug 2024 21:57:25 +0100
Subject: [PATCH 01/92] perf: Replace `_use_referencing_library()` with a
 constant

Every call was identical as it was based on an existing constant `jsonschema_version_str`
---
 tools/schemapi/schemapi.py | 8 +++++---
 1 file changed, 5 insertions(+), 3 deletions(-)

diff --git a/tools/schemapi/schemapi.py b/tools/schemapi/schemapi.py
index b6907ec8f..9d81ccd78 100644
--- a/tools/schemapi/schemapi.py
+++ b/tools/schemapi/schemapi.py
@@ -81,7 +81,9 @@
 # class-level _class_is_valid_at_instantiation attribute to False
 DEBUG_MODE: bool = True
 
-jsonschema_version_str = importlib_version("jsonschema")
+
+_USING_REFERENCING: Final[bool] = Version(importlib_version("jsonschema")) >= Version("4.18")  # fmt: off
+"""In version 4.18.0, the ``jsonschema`` package deprecated RefResolver in favor of the ``referencing`` library."""
 
 
 def enable_debug_mode() -> None:
@@ -191,7 +193,7 @@ def _get_errors_from_spec(
     if hasattr(validator_cls, "FORMAT_CHECKER"):
         validator_kwargs["format_checker"] = validator_cls.FORMAT_CHECKER
 
-    if _use_referencing_library():
+    if _USING_REFERENCING:
         schema = _prepare_references_in_schema(schema)
         validator_kwargs["registry"] = _get_referencing_registry(
             rootschema or schema, json_schema_draft_url
@@ -538,7 +540,7 @@ def _resolve_references(
     schema: dict[str, Any], rootschema: dict[str, Any] | None = None
 ) -> dict[str, Any]:
     """Resolve schema references until there is no $ref anymore in the top-level of the dictionary."""
-    if _use_referencing_library():
+    if _USING_REFERENCING:
         registry = _get_referencing_registry(rootschema or schema)
         # Using a different variable name to show that this is not the
         # jsonschema.RefResolver but instead a Resolver from the referencing

From f4a4e0e4b0be55ade5e1602cfb65010767739ddf Mon Sep 17 00:00:00 2001
From: dangotbanned <125183946+dangotbanned@users.noreply.github.com>
Date: Sun, 18 Aug 2024 22:00:54 +0100
Subject: [PATCH 02/92] docs: Move `_get_errors_from_spec` comment into
 docstring

I'm going to do this a lot.
Docstrings can be collapsed in all editors and can benefit from markdown.

Everything here is already private, so using long comments has no benefit
---
 tools/schemapi/schemapi.py | 29 +++++++++++++++++------------
 1 file changed, 17 insertions(+), 12 deletions(-)

diff --git a/tools/schemapi/schemapi.py b/tools/schemapi/schemapi.py
index 9d81ccd78..bbef37d99 100644
--- a/tools/schemapi/schemapi.py
+++ b/tools/schemapi/schemapi.py
@@ -171,20 +171,25 @@ def _get_errors_from_spec(
     rootschema: dict[str, Any] | None = None,
 ) -> ValidationErrorList:
     """
-    Uses the relevant jsonschema validator to validate the passed in spec against the schema using the rootschema to resolve references.
+    Uses the relevant ``jsonschema`` validator to validate ``spec`` against ``schema`` using `` rootschema`` to resolve references.
 
-    The schema and rootschema themselves are not validated but instead considered as valid.
-    """
-    # We don't use jsonschema.validate as this would validate the schema itself.
-    # Instead, we pass the schema directly to the validator class. This is done for
-    # two reasons: The schema comes from Vega-Lite and is not based on the user
-    # input, therefore there is no need to validate it in the first place. Furthermore,
-    # the "uri-reference" format checker fails for some of the references as URIs in
-    # "$ref" are not encoded,
-    # e.g. '#/definitions/ValueDefWithCondition<MarkPropFieldOrDatumDef,
-    # (Gradient|string|null)>' would be a valid $ref in a Vega-Lite schema but
-    # it is not a valid URI reference due to the characters such as '<'.
+    ``schema`` and ``rootschema`` are not validated but instead considered as valid.
+
+    We don't use ``jsonschema.validate`` as this would validate the ``schema`` itself.
+    Instead, we pass the ``schema`` directly to the validator class.
+
+    This is done for two reasons:
 
+    1. The schema comes from Vega-Lite and is not based on the user
+    input, therefore there is no need to validate it in the first place.
+    2. The "uri-reference" format checker fails for some of the
+    references as URIs in "$ref" are not encoded, e.g.:
+
+        '#/definitions/ValueDefWithCondition<MarkPropFieldOrDatumDef, (Gradient|string|null)>'
+
+    would be a valid $ref in a Vega-Lite schema but it is not a valid
+    URI reference due to the characters such as '<'.
+    """
     json_schema_draft_url = _get_json_schema_draft_url(rootschema or schema)
     validator_cls = jsonschema.validators.validator_for(
         {"$schema": json_schema_draft_url}

From 6dfe61bf8bc6404963813230a01b05f950cbfa13 Mon Sep 17 00:00:00 2001
From: dangotbanned <125183946+dangotbanned@users.noreply.github.com>
Date: Sun, 18 Aug 2024 22:02:51 +0100
Subject: [PATCH 03/92] fix(typing): Resolve `jsonschema` incomplete stubs
 issue

`typeshed` disagrees with `jsonschema`, this is just enforcing what `jsonschema` says is true
---
 tools/schemapi/schemapi.py | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/tools/schemapi/schemapi.py b/tools/schemapi/schemapi.py
index bbef37d99..1d734ddc1 100644
--- a/tools/schemapi/schemapi.py
+++ b/tools/schemapi/schemapi.py
@@ -42,6 +42,7 @@
 if TYPE_CHECKING:
     from typing import ClassVar
 
+    from jsonschema.protocols import Validator
     from referencing import Registry
 
     from altair.typing import ChartType
@@ -191,8 +192,9 @@ def _get_errors_from_spec(
     URI reference due to the characters such as '<'.
     """
     json_schema_draft_url = _get_json_schema_draft_url(rootschema or schema)
-    validator_cls = jsonschema.validators.validator_for(
-        {"$schema": json_schema_draft_url}
+    validator_cls: type[Validator] = cast(
+        "type[Validator]",
+        jsonschema.validators.validator_for({"$schema": json_schema_draft_url}),
     )
     validator_kwargs: dict[str, Any] = {}
     if hasattr(validator_cls, "FORMAT_CHECKER"):

From e9a4beb02034c0730e6737db37aa1758b2233f9e Mon Sep 17 00:00:00 2001
From: dangotbanned <125183946+dangotbanned@users.noreply.github.com>
Date: Sun, 18 Aug 2024 22:03:58 +0100
Subject: [PATCH 04/92] refactor: Reuse `None` in ternary expression

---
 tools/schemapi/schemapi.py | 5 +----
 1 file changed, 1 insertion(+), 4 deletions(-)

diff --git a/tools/schemapi/schemapi.py b/tools/schemapi/schemapi.py
index 1d734ddc1..b1f3192ab 100644
--- a/tools/schemapi/schemapi.py
+++ b/tools/schemapi/schemapi.py
@@ -205,13 +205,10 @@ def _get_errors_from_spec(
         validator_kwargs["registry"] = _get_referencing_registry(
             rootschema or schema, json_schema_draft_url
         )
-
     else:
         # No resolver is necessary if the schema is already the full schema
         validator_kwargs["resolver"] = (
-            jsonschema.RefResolver.from_schema(rootschema)
-            if rootschema is not None
-            else None
+            jsonschema.RefResolver.from_schema(rootschema) if rootschema else rootschema
         )
 
     validator = validator_cls(schema, **validator_kwargs)

From c53487645685e9e377fb53d99e94c58a9af37471 Mon Sep 17 00:00:00 2001
From: dangotbanned <125183946+dangotbanned@users.noreply.github.com>
Date: Sun, 18 Aug 2024 22:07:46 +0100
Subject: [PATCH 05/92] perf: Replace `_prepare_references_in_schema`

Produces the same result, but skips the upfront `deepcopy`.
No longer modifying the copy inplace, new objects are created inside the iterator.
---
 tools/schemapi/schemapi.py | 60 ++++++++++++++++----------------------
 1 file changed, 25 insertions(+), 35 deletions(-)

diff --git a/tools/schemapi/schemapi.py b/tools/schemapi/schemapi.py
index b1f3192ab..e528dcacd 100644
--- a/tools/schemapi/schemapi.py
+++ b/tools/schemapi/schemapi.py
@@ -1,7 +1,6 @@
 from __future__ import annotations
 
 import contextlib
-import copy
 import inspect
 import json
 import sys
@@ -201,7 +200,7 @@ def _get_errors_from_spec(
         validator_kwargs["format_checker"] = validator_cls.FORMAT_CHECKER
 
     if _USING_REFERENCING:
-        schema = _prepare_references_in_schema(schema)
+        schema = _prepare_references(schema)
         validator_kwargs["registry"] = _get_referencing_registry(
             rootschema or schema, json_schema_draft_url
         )
@@ -220,44 +219,35 @@ def _get_json_schema_draft_url(schema: dict[str, Any]) -> str:
     return schema.get("$schema", _DEFAULT_JSON_SCHEMA_DRAFT_URL)
 
 
-def _use_referencing_library() -> bool:
-    """In version 4.18.0, the jsonschema package deprecated RefResolver in favor of the referencing library."""
-    return Version(jsonschema_version_str) >= Version("4.18")
+def _prepare_references(schema: dict[str, Any], /) -> dict[str, Any]:
+    """
+    Return a deep copy of ``schema`` w/ replaced uri(s).
 
+    All encountered ``dict | list``(s) will be reconstructed
+    w/ ``_VEGA_LITE_ROOT_URI`` in front of all nested``$ref`` values.
 
-def _prepare_references_in_schema(schema: dict[str, Any]) -> dict[str, Any]:
-    # Create a copy so that $ref is not modified in the original schema in case
-    # that it would still reference a dictionary which might be attached to
-    # an Altair class _schema attribute
-    schema = copy.deepcopy(schema)
+    Notes
+    -----
+    ``copy.deepcopy`` is not needed as the iterator yields new objects.
+    """
+    return dict(_rec_refs(schema))
 
-    def _prepare_refs(d: dict[str, Any]) -> dict[str, Any]:
-        """
-        Add _VEGA_LITE_ROOT_URI in front of all $ref values.
 
-        This function recursively iterates through the whole dictionary.
+def _rec_refs(m: dict[str, Any], /) -> Iterator[tuple[str, Any]]:
+    """
+    Recurse through a schema, yielding fresh copies of mutable containers.
 
-        $ref values can only be nested in dictionaries or lists
-        as the passed in `d` dictionary comes from the Vega-Lite json schema
-        and in json we only have arrays (-> lists in Python) and objects
-        (-> dictionaries in Python) which we need to iterate through.
-        """
-        for key, value in d.items():
-            if key == "$ref":
-                d[key] = _VEGA_LITE_ROOT_URI + d[key]
-            elif isinstance(value, dict):
-                d[key] = _prepare_refs(value)
-            elif isinstance(value, list):
-                prepared_values = []
-                for v in value:
-                    if isinstance(v, dict):
-                        v = _prepare_refs(v)
-                    prepared_values.append(v)
-                d[key] = prepared_values
-        return d
-
-    schema = _prepare_refs(schema)
-    return schema
+    Adds ``_VEGA_LITE_ROOT_URI`` in front of all nested``$ref`` values.
+    """
+    for k, v in m.items():
+        if k == "$ref":
+            yield k, f"{_VEGA_LITE_ROOT_URI}{v}"
+        elif isinstance(v, dict):
+            yield k, dict(_rec_refs(v))
+        elif isinstance(v, list):
+            yield k, [dict(_rec_refs(el)) if _is_dict(el) else el for el in v]
+        else:
+            yield k, v
 
 
 # We do not annotate the return value here as the referencing library is not always

From e30cac6db326115a3c5d21d14882454e883c9eef Mon Sep 17 00:00:00 2001
From: dangotbanned <125183946+dangotbanned@users.noreply.github.com>
Date: Sun, 18 Aug 2024 22:10:18 +0100
Subject: [PATCH 06/92] chore: Add note on `_get_errors_from_spec`

---
 tools/schemapi/schemapi.py | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/tools/schemapi/schemapi.py b/tools/schemapi/schemapi.py
index e528dcacd..4ccea8d73 100644
--- a/tools/schemapi/schemapi.py
+++ b/tools/schemapi/schemapi.py
@@ -165,6 +165,9 @@ def validate_jsonschema(
         return None
 
 
+# NOTE: Entry for creating a `list` of errors
+# Everything else is skipped if this returns an empty `list`
+# TODO: Refactor to peek at possible error w/ `next(validator.iter_errors(spec))`
 def _get_errors_from_spec(
     spec: dict[str, Any],
     schema: dict[str, Any],

From e702b2648476b75056c3eb18da10d6ffed990203 Mon Sep 17 00:00:00 2001
From: dangotbanned <125183946+dangotbanned@users.noreply.github.com>
Date: Sun, 18 Aug 2024 22:11:40 +0100
Subject: [PATCH 07/92] build: run `generate-schema-wrapper`

---
 altair/utils/schemapi.py | 111 +++++++++++++++++++--------------------
 1 file changed, 55 insertions(+), 56 deletions(-)

diff --git a/altair/utils/schemapi.py b/altair/utils/schemapi.py
index b91b90fbe..a0a2535f5 100644
--- a/altair/utils/schemapi.py
+++ b/altair/utils/schemapi.py
@@ -3,7 +3,6 @@
 from __future__ import annotations
 
 import contextlib
-import copy
 import inspect
 import json
 import sys
@@ -44,6 +43,7 @@
 if TYPE_CHECKING:
     from typing import ClassVar
 
+    from jsonschema.protocols import Validator
     from referencing import Registry
 
     from altair.typing import ChartType
@@ -83,7 +83,9 @@
 # class-level _class_is_valid_at_instantiation attribute to False
 DEBUG_MODE: bool = True
 
-jsonschema_version_str = importlib_version("jsonschema")
+
+_USING_REFERENCING: Final[bool] = Version(importlib_version("jsonschema")) >= Version("4.18")  # fmt: off
+"""In version 4.18.0, the ``jsonschema`` package deprecated RefResolver in favor of the ``referencing`` library."""
 
 
 def enable_debug_mode() -> None:
@@ -165,46 +167,52 @@ def validate_jsonschema(
         return None
 
 
+# NOTE: Entry for creating a `list` of errors
+# Everything else is skipped if this returns an empty `list`
+# TODO: Refactor to peek at possible error w/ `next(validator.iter_errors(spec))`
 def _get_errors_from_spec(
     spec: dict[str, Any],
     schema: dict[str, Any],
     rootschema: dict[str, Any] | None = None,
 ) -> ValidationErrorList:
     """
-    Uses the relevant jsonschema validator to validate the passed in spec against the schema using the rootschema to resolve references.
+    Uses the relevant ``jsonschema`` validator to validate ``spec`` against ``schema`` using `` rootschema`` to resolve references.
 
-    The schema and rootschema themselves are not validated but instead considered as valid.
-    """
-    # We don't use jsonschema.validate as this would validate the schema itself.
-    # Instead, we pass the schema directly to the validator class. This is done for
-    # two reasons: The schema comes from Vega-Lite and is not based on the user
-    # input, therefore there is no need to validate it in the first place. Furthermore,
-    # the "uri-reference" format checker fails for some of the references as URIs in
-    # "$ref" are not encoded,
-    # e.g. '#/definitions/ValueDefWithCondition<MarkPropFieldOrDatumDef,
-    # (Gradient|string|null)>' would be a valid $ref in a Vega-Lite schema but
-    # it is not a valid URI reference due to the characters such as '<'.
+    ``schema`` and ``rootschema`` are not validated but instead considered as valid.
+
+    We don't use ``jsonschema.validate`` as this would validate the ``schema`` itself.
+    Instead, we pass the ``schema`` directly to the validator class.
+
+    This is done for two reasons:
+
+    1. The schema comes from Vega-Lite and is not based on the user
+    input, therefore there is no need to validate it in the first place.
+    2. The "uri-reference" format checker fails for some of the
+    references as URIs in "$ref" are not encoded, e.g.:
 
+        '#/definitions/ValueDefWithCondition<MarkPropFieldOrDatumDef, (Gradient|string|null)>'
+
+    would be a valid $ref in a Vega-Lite schema but it is not a valid
+    URI reference due to the characters such as '<'.
+    """
     json_schema_draft_url = _get_json_schema_draft_url(rootschema or schema)
-    validator_cls = jsonschema.validators.validator_for(
-        {"$schema": json_schema_draft_url}
+    validator_cls: type[Validator] = cast(
+        "type[Validator]",
+        jsonschema.validators.validator_for({"$schema": json_schema_draft_url}),
     )
     validator_kwargs: dict[str, Any] = {}
     if hasattr(validator_cls, "FORMAT_CHECKER"):
         validator_kwargs["format_checker"] = validator_cls.FORMAT_CHECKER
 
-    if _use_referencing_library():
-        schema = _prepare_references_in_schema(schema)
+    if _USING_REFERENCING:
+        schema = _prepare_references(schema)
         validator_kwargs["registry"] = _get_referencing_registry(
             rootschema or schema, json_schema_draft_url
         )
-
     else:
         # No resolver is necessary if the schema is already the full schema
         validator_kwargs["resolver"] = (
-            jsonschema.RefResolver.from_schema(rootschema)
-            if rootschema is not None
-            else None
+            jsonschema.RefResolver.from_schema(rootschema) if rootschema else rootschema
         )
 
     validator = validator_cls(schema, **validator_kwargs)
@@ -216,44 +224,35 @@ def _get_json_schema_draft_url(schema: dict[str, Any]) -> str:
     return schema.get("$schema", _DEFAULT_JSON_SCHEMA_DRAFT_URL)
 
 
-def _use_referencing_library() -> bool:
-    """In version 4.18.0, the jsonschema package deprecated RefResolver in favor of the referencing library."""
-    return Version(jsonschema_version_str) >= Version("4.18")
+def _prepare_references(schema: dict[str, Any], /) -> dict[str, Any]:
+    """
+    Return a deep copy of ``schema`` w/ replaced uri(s).
 
+    All encountered ``dict | list``(s) will be reconstructed
+    w/ ``_VEGA_LITE_ROOT_URI`` in front of all nested``$ref`` values.
 
-def _prepare_references_in_schema(schema: dict[str, Any]) -> dict[str, Any]:
-    # Create a copy so that $ref is not modified in the original schema in case
-    # that it would still reference a dictionary which might be attached to
-    # an Altair class _schema attribute
-    schema = copy.deepcopy(schema)
+    Notes
+    -----
+    ``copy.deepcopy`` is not needed as the iterator yields new objects.
+    """
+    return dict(_rec_refs(schema))
 
-    def _prepare_refs(d: dict[str, Any]) -> dict[str, Any]:
-        """
-        Add _VEGA_LITE_ROOT_URI in front of all $ref values.
 
-        This function recursively iterates through the whole dictionary.
+def _rec_refs(m: dict[str, Any], /) -> Iterator[tuple[str, Any]]:
+    """
+    Recurse through a schema, yielding fresh copies of mutable containers.
 
-        $ref values can only be nested in dictionaries or lists
-        as the passed in `d` dictionary comes from the Vega-Lite json schema
-        and in json we only have arrays (-> lists in Python) and objects
-        (-> dictionaries in Python) which we need to iterate through.
-        """
-        for key, value in d.items():
-            if key == "$ref":
-                d[key] = _VEGA_LITE_ROOT_URI + d[key]
-            elif isinstance(value, dict):
-                d[key] = _prepare_refs(value)
-            elif isinstance(value, list):
-                prepared_values = []
-                for v in value:
-                    if isinstance(v, dict):
-                        v = _prepare_refs(v)
-                    prepared_values.append(v)
-                d[key] = prepared_values
-        return d
-
-    schema = _prepare_refs(schema)
-    return schema
+    Adds ``_VEGA_LITE_ROOT_URI`` in front of all nested``$ref`` values.
+    """
+    for k, v in m.items():
+        if k == "$ref":
+            yield k, f"{_VEGA_LITE_ROOT_URI}{v}"
+        elif isinstance(v, dict):
+            yield k, dict(_rec_refs(v))
+        elif isinstance(v, list):
+            yield k, [dict(_rec_refs(el)) if _is_dict(el) else el for el in v]
+        else:
+            yield k, v
 
 
 # We do not annotate the return value here as the referencing library is not always
@@ -540,7 +539,7 @@ def _resolve_references(
     schema: dict[str, Any], rootschema: dict[str, Any] | None = None
 ) -> dict[str, Any]:
     """Resolve schema references until there is no $ref anymore in the top-level of the dictionary."""
-    if _use_referencing_library():
+    if _USING_REFERENCING:
         registry = _get_referencing_registry(rootschema or schema)
         # Using a different variable name to show that this is not the
         # jsonschema.RefResolver but instead a Resolver from the referencing

From af878d08e8458cbf4e299b7186d327eb291c9f1b Mon Sep 17 00:00:00 2001
From: dangotbanned <125183946+dangotbanned@users.noreply.github.com>
Date: Mon, 19 Aug 2024 09:29:36 +0100
Subject: [PATCH 08/92] refactor(typing): Use stubs type `_JsonParameter`

https://github.com/python/typeshed/blob/937270df0c25dc56a02f7199f1943fdb7d47aa9d/stubs/jsonschema/jsonschema/protocols.pyi#L11
---
 tools/schemapi/schemapi.py | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/tools/schemapi/schemapi.py b/tools/schemapi/schemapi.py
index 4ccea8d73..2df9070d1 100644
--- a/tools/schemapi/schemapi.py
+++ b/tools/schemapi/schemapi.py
@@ -41,7 +41,7 @@
 if TYPE_CHECKING:
     from typing import ClassVar
 
-    from jsonschema.protocols import Validator
+    from jsonschema.protocols import Validator, _JsonParameter
     from referencing import Registry
 
     from altair.typing import ChartType
@@ -109,7 +109,7 @@ def debug_mode(arg: bool) -> Iterator[None]:
 
 @overload
 def validate_jsonschema(
-    spec: Any,
+    spec: _JsonParameter,
     schema: dict[str, Any],
     rootschema: dict[str, Any] | None = ...,
     *,
@@ -119,7 +119,7 @@ def validate_jsonschema(
 
 @overload
 def validate_jsonschema(
-    spec: Any,
+    spec: _JsonParameter,
     schema: dict[str, Any],
     rootschema: dict[str, Any] | None = ...,
     *,
@@ -128,7 +128,7 @@ def validate_jsonschema(
 
 
 def validate_jsonschema(
-    spec,
+    spec: _JsonParameter,
     schema: dict[str, Any],
     rootschema: dict[str, Any] | None = None,
     *,
@@ -169,7 +169,7 @@ def validate_jsonschema(
 # Everything else is skipped if this returns an empty `list`
 # TODO: Refactor to peek at possible error w/ `next(validator.iter_errors(spec))`
 def _get_errors_from_spec(
-    spec: dict[str, Any],
+    spec: _JsonParameter,
     schema: dict[str, Any],
     rootschema: dict[str, Any] | None = None,
 ) -> ValidationErrorList:

From 5ca44df69a970bc21f41303f758cc84e68ab9e72 Mon Sep 17 00:00:00 2001
From: dangotbanned <125183946+dangotbanned@users.noreply.github.com>
Date: Mon, 19 Aug 2024 09:32:34 +0100
Subject: [PATCH 09/92] refactor: Shorten some references to `ValidationError`

---
 tools/schemapi/schemapi.py | 17 ++++++++---------
 1 file changed, 8 insertions(+), 9 deletions(-)

diff --git a/tools/schemapi/schemapi.py b/tools/schemapi/schemapi.py
index 2df9070d1..271e7070d 100644
--- a/tools/schemapi/schemapi.py
+++ b/tools/schemapi/schemapi.py
@@ -41,6 +41,7 @@
 if TYPE_CHECKING:
     from typing import ClassVar
 
+    from jsonschema import ValidationError
     from jsonschema.protocols import Validator, _JsonParameter
     from referencing import Registry
 
@@ -56,7 +57,7 @@
     else:
         from typing_extensions import Never, Self
 
-ValidationErrorList: TypeAlias = List[jsonschema.exceptions.ValidationError]
+ValidationErrorList: TypeAlias = List[jsonschema.ValidationError]
 GroupedValidationErrors: TypeAlias = Dict[str, ValidationErrorList]
 
 # This URI is arbitrary and could be anything else. It just cannot be an empty
@@ -124,7 +125,7 @@ def validate_jsonschema(
     rootschema: dict[str, Any] | None = ...,
     *,
     raise_error: Literal[False],
-) -> jsonschema.exceptions.ValidationError | None: ...
+) -> ValidationError | None: ...
 
 
 def validate_jsonschema(
@@ -133,7 +134,7 @@ def validate_jsonschema(
     rootschema: dict[str, Any] | None = None,
     *,
     raise_error: bool = True,
-) -> jsonschema.exceptions.ValidationError | None:
+) -> ValidationError | None:
     """
     Validates the passed in spec against the schema in the context of the rootschema.
 
@@ -398,7 +399,7 @@ def _deduplicate_errors(
     return grouped_errors_deduplicated
 
 
-def _is_required_value_error(err: jsonschema.exceptions.ValidationError) -> bool:
+def _is_required_value_error(err: ValidationError) -> bool:
     return err.validator == "required" and err.validator_value == ["value"]
 
 
@@ -558,7 +559,7 @@ def _resolve_references(
 class SchemaValidationError(jsonschema.ValidationError):
     """A wrapper for jsonschema.ValidationError with friendlier traceback."""
 
-    def __init__(self, obj: SchemaBase, err: jsonschema.ValidationError) -> None:
+    def __init__(self, obj: SchemaBase, err: ValidationError) -> None:
         super().__init__(**err._contents())
         self.obj = obj
         self._errors: GroupedValidationErrors = getattr(
@@ -614,7 +615,7 @@ def _get_message_for_errors_group(
 
     def _get_additional_properties_error_message(
         self,
-        error: jsonschema.exceptions.ValidationError,
+        error: ValidationError,
     ) -> str:
         """Output all existing parameters when an unknown parameter is specified."""
         altair_cls = self._get_altair_class_for_error(error)
@@ -633,9 +634,7 @@ def _get_additional_properties_error_message(
 See the help for `{altair_cls.__name__}` to read the full description of these parameters"""
         return message
 
-    def _get_altair_class_for_error(
-        self, error: jsonschema.exceptions.ValidationError
-    ) -> type[SchemaBase]:
+    def _get_altair_class_for_error(self, error: ValidationError) -> type[SchemaBase]:
         """
         Try to get the lowest class possible in the chart hierarchy so it can be displayed in the error message.
 

From de0701226515c42d13e4b0f46e24391110e99e37 Mon Sep 17 00:00:00 2001
From: dangotbanned <125183946+dangotbanned@users.noreply.github.com>
Date: Mon, 19 Aug 2024 09:51:45 +0100
Subject: [PATCH 10/92] perf: Redefine `_json_path` to be bound on `jsonschema`
 version

Previously, using a version below `4.0.1` would still always check first if there was a property. This would not change between checks.

Defining in this style removes the need for as much documentation, since the version guards are very clear when each branch is used.
---
 tools/schemapi/schemapi.py | 52 +++++++++++++++++++++-----------------
 1 file changed, 29 insertions(+), 23 deletions(-)

diff --git a/tools/schemapi/schemapi.py b/tools/schemapi/schemapi.py
index 271e7070d..e7b37b25e 100644
--- a/tools/schemapi/schemapi.py
+++ b/tools/schemapi/schemapi.py
@@ -3,6 +3,7 @@
 import contextlib
 import inspect
 import json
+import operator
 import sys
 import textwrap
 from collections import defaultdict
@@ -13,6 +14,7 @@
 from typing import (
     TYPE_CHECKING,
     Any,
+    Callable,
     Dict,
     Final,
     Iterable,
@@ -28,7 +30,6 @@
 from typing_extensions import TypeAlias
 
 import jsonschema
-import jsonschema.exceptions
 import jsonschema.validators
 import narwhals.stable.v1 as nw
 from packaging.version import Version
@@ -82,9 +83,31 @@
 # class-level _class_is_valid_at_instantiation attribute to False
 DEBUG_MODE: bool = True
 
+_JSONSCHEMA_VERSION = Version(importlib_version("jsonschema"))
+_USING_REFERENCING: Final[bool] = _JSONSCHEMA_VERSION >= Version("4.18")  # noqa: SIM300
+"""
+``jsonschema`` deprecated ``RefResolver`` in favor of ``referencing``.
+
+See https://github.com/python-jsonschema/jsonschema/releases/tag/v4.18.0a1
+"""
+
+if _JSONSCHEMA_VERSION >= Version("4.0.1"):  # noqa: SIM300
+    _json_path: Callable[[ValidationError], str] = operator.attrgetter("json_path")
+else:
 
-_USING_REFERENCING: Final[bool] = Version(importlib_version("jsonschema")) >= Version("4.18")  # fmt: off
-"""In version 4.18.0, the ``jsonschema`` package deprecated RefResolver in favor of the ``referencing`` library."""
+    def _json_path(err: ValidationError, /) -> str:
+        """
+        Vendored backport for ``jsonschema.ValidationError.json_path`` property.
+
+        See https://github.com/vega/altair/issues/3038.
+        """
+        path = "$"
+        for elem in err.absolute_path:
+            if isinstance(elem, int):
+                path += "[" + str(elem) + "]"
+            else:
+                path += "." + elem
+        return path
 
 
 def enable_debug_mode() -> None:
@@ -279,23 +302,7 @@ def _get_referencing_registry(
     )
 
 
-def _json_path(err: jsonschema.exceptions.ValidationError) -> str:
-    """
-    Drop in replacement for the .json_path property of the jsonschema ValidationError class.
-
-    This is not available as property for ValidationError with jsonschema<4.0.1.
-
-    More info, see https://github.com/vega/altair/issues/3038.
-    """
-    path = "$"
-    for elem in err.absolute_path:
-        if isinstance(elem, int):
-            path += "[" + str(elem) + "]"
-        else:
-            path += "." + elem
-    return path
-
-
+# NOTE: Review function (2)
 def _group_errors_by_json_path(
     errors: ValidationErrorList,
 ) -> GroupedValidationErrors:
@@ -308,8 +315,7 @@ def _group_errors_by_json_path(
     """
     errors_by_json_path = defaultdict(list)
     for err in errors:
-        err_key = getattr(err, "json_path", _json_path(err))
-        errors_by_json_path[err_key].append(err)
+        errors_by_json_path[_json_path(err)].append(err)
     return dict(errors_by_json_path)
 
 
@@ -563,7 +569,7 @@ def __init__(self, obj: SchemaBase, err: ValidationError) -> None:
         super().__init__(**err._contents())
         self.obj = obj
         self._errors: GroupedValidationErrors = getattr(
-            err, "_all_errors", {getattr(err, "json_path", _json_path(err)): [err]}
+            err, "_all_errors", {_json_path(err): [err]}
         )
         # This is the message from err
         self._original_message = self.message

From 05976a6a03cb5ef4a9850083b8fb11bc46b7ff27 Mon Sep 17 00:00:00 2001
From: dangotbanned <125183946+dangotbanned@users.noreply.github.com>
Date: Mon, 19 Aug 2024 09:56:43 +0100
Subject: [PATCH 11/92] build: run `generate-schema-wrapper`

---
 altair/utils/schemapi.py | 79 +++++++++++++++++++++-------------------
 1 file changed, 42 insertions(+), 37 deletions(-)

diff --git a/altair/utils/schemapi.py b/altair/utils/schemapi.py
index a0a2535f5..e8bf16f80 100644
--- a/altair/utils/schemapi.py
+++ b/altair/utils/schemapi.py
@@ -5,6 +5,7 @@
 import contextlib
 import inspect
 import json
+import operator
 import sys
 import textwrap
 from collections import defaultdict
@@ -15,6 +16,7 @@
 from typing import (
     TYPE_CHECKING,
     Any,
+    Callable,
     Dict,
     Final,
     Iterable,
@@ -30,7 +32,6 @@
 from typing_extensions import TypeAlias
 
 import jsonschema
-import jsonschema.exceptions
 import jsonschema.validators
 import narwhals.stable.v1 as nw
 from packaging.version import Version
@@ -43,7 +44,8 @@
 if TYPE_CHECKING:
     from typing import ClassVar
 
-    from jsonschema.protocols import Validator
+    from jsonschema import ValidationError
+    from jsonschema.protocols import Validator, _JsonParameter
     from referencing import Registry
 
     from altair.typing import ChartType
@@ -58,7 +60,7 @@
     else:
         from typing_extensions import Never, Self
 
-ValidationErrorList: TypeAlias = List[jsonschema.exceptions.ValidationError]
+ValidationErrorList: TypeAlias = List[jsonschema.ValidationError]
 GroupedValidationErrors: TypeAlias = Dict[str, ValidationErrorList]
 
 # This URI is arbitrary and could be anything else. It just cannot be an empty
@@ -83,9 +85,31 @@
 # class-level _class_is_valid_at_instantiation attribute to False
 DEBUG_MODE: bool = True
 
+_JSONSCHEMA_VERSION = Version(importlib_version("jsonschema"))
+_USING_REFERENCING: Final[bool] = _JSONSCHEMA_VERSION >= Version("4.18")  # noqa: SIM300
+"""
+``jsonschema`` deprecated ``RefResolver`` in favor of ``referencing``.
+
+See https://github.com/python-jsonschema/jsonschema/releases/tag/v4.18.0a1
+"""
+
+if _JSONSCHEMA_VERSION >= Version("4.0.1"):  # noqa: SIM300
+    _json_path: Callable[[ValidationError], str] = operator.attrgetter("json_path")
+else:
 
-_USING_REFERENCING: Final[bool] = Version(importlib_version("jsonschema")) >= Version("4.18")  # fmt: off
-"""In version 4.18.0, the ``jsonschema`` package deprecated RefResolver in favor of the ``referencing`` library."""
+    def _json_path(err: ValidationError, /) -> str:
+        """
+        Vendored backport for ``jsonschema.ValidationError.json_path`` property.
+
+        See https://github.com/vega/altair/issues/3038.
+        """
+        path = "$"
+        for elem in err.absolute_path:
+            if isinstance(elem, int):
+                path += "[" + str(elem) + "]"
+            else:
+                path += "." + elem
+        return path
 
 
 def enable_debug_mode() -> None:
@@ -111,7 +135,7 @@ def debug_mode(arg: bool) -> Iterator[None]:
 
 @overload
 def validate_jsonschema(
-    spec: Any,
+    spec: _JsonParameter,
     schema: dict[str, Any],
     rootschema: dict[str, Any] | None = ...,
     *,
@@ -121,21 +145,21 @@ def validate_jsonschema(
 
 @overload
 def validate_jsonschema(
-    spec: Any,
+    spec: _JsonParameter,
     schema: dict[str, Any],
     rootschema: dict[str, Any] | None = ...,
     *,
     raise_error: Literal[False],
-) -> jsonschema.exceptions.ValidationError | None: ...
+) -> ValidationError | None: ...
 
 
 def validate_jsonschema(
-    spec,
+    spec: _JsonParameter,
     schema: dict[str, Any],
     rootschema: dict[str, Any] | None = None,
     *,
     raise_error: bool = True,
-) -> jsonschema.exceptions.ValidationError | None:
+) -> ValidationError | None:
     """
     Validates the passed in spec against the schema in the context of the rootschema.
 
@@ -171,7 +195,7 @@ def validate_jsonschema(
 # Everything else is skipped if this returns an empty `list`
 # TODO: Refactor to peek at possible error w/ `next(validator.iter_errors(spec))`
 def _get_errors_from_spec(
-    spec: dict[str, Any],
+    spec: _JsonParameter,
     schema: dict[str, Any],
     rootschema: dict[str, Any] | None = None,
 ) -> ValidationErrorList:
@@ -280,23 +304,7 @@ def _get_referencing_registry(
     )
 
 
-def _json_path(err: jsonschema.exceptions.ValidationError) -> str:
-    """
-    Drop in replacement for the .json_path property of the jsonschema ValidationError class.
-
-    This is not available as property for ValidationError with jsonschema<4.0.1.
-
-    More info, see https://github.com/vega/altair/issues/3038.
-    """
-    path = "$"
-    for elem in err.absolute_path:
-        if isinstance(elem, int):
-            path += "[" + str(elem) + "]"
-        else:
-            path += "." + elem
-    return path
-
-
+# NOTE: Review function (2)
 def _group_errors_by_json_path(
     errors: ValidationErrorList,
 ) -> GroupedValidationErrors:
@@ -309,8 +317,7 @@ def _group_errors_by_json_path(
     """
     errors_by_json_path = defaultdict(list)
     for err in errors:
-        err_key = getattr(err, "json_path", _json_path(err))
-        errors_by_json_path[err_key].append(err)
+        errors_by_json_path[_json_path(err)].append(err)
     return dict(errors_by_json_path)
 
 
@@ -400,7 +407,7 @@ def _deduplicate_errors(
     return grouped_errors_deduplicated
 
 
-def _is_required_value_error(err: jsonschema.exceptions.ValidationError) -> bool:
+def _is_required_value_error(err: ValidationError) -> bool:
     return err.validator == "required" and err.validator_value == ["value"]
 
 
@@ -560,11 +567,11 @@ def _resolve_references(
 class SchemaValidationError(jsonschema.ValidationError):
     """A wrapper for jsonschema.ValidationError with friendlier traceback."""
 
-    def __init__(self, obj: SchemaBase, err: jsonschema.ValidationError) -> None:
+    def __init__(self, obj: SchemaBase, err: ValidationError) -> None:
         super().__init__(**err._contents())
         self.obj = obj
         self._errors: GroupedValidationErrors = getattr(
-            err, "_all_errors", {getattr(err, "json_path", _json_path(err)): [err]}
+            err, "_all_errors", {_json_path(err): [err]}
         )
         # This is the message from err
         self._original_message = self.message
@@ -616,7 +623,7 @@ def _get_message_for_errors_group(
 
     def _get_additional_properties_error_message(
         self,
-        error: jsonschema.exceptions.ValidationError,
+        error: ValidationError,
     ) -> str:
         """Output all existing parameters when an unknown parameter is specified."""
         altair_cls = self._get_altair_class_for_error(error)
@@ -635,9 +642,7 @@ def _get_additional_properties_error_message(
 See the help for `{altair_cls.__name__}` to read the full description of these parameters"""
         return message
 
-    def _get_altair_class_for_error(
-        self, error: jsonschema.exceptions.ValidationError
-    ) -> type[SchemaBase]:
+    def _get_altair_class_for_error(self, error: ValidationError) -> type[SchemaBase]:
         """
         Try to get the lowest class possible in the chart hierarchy so it can be displayed in the error message.
 

From af6e41ce0c8476b28f329528d1442f1ba44751e0 Mon Sep 17 00:00:00 2001
From: dangotbanned <125183946+dangotbanned@users.noreply.github.com>
Date: Mon, 19 Aug 2024 23:19:50 +0100
Subject: [PATCH 12/92] perf(DRAFT): Initial lazy validation

First non-failing version.
Have left most of the original code in. Planning to migrate & adapt the comments before removing.

#
---
 altair/utils/schemapi.py   | 229 ++++++++++++++++++++++++++++++-------
 tools/schemapi/schemapi.py | 229 ++++++++++++++++++++++++++++++-------
 2 files changed, 380 insertions(+), 78 deletions(-)

diff --git a/altair/utils/schemapi.py b/altair/utils/schemapi.py
index e8bf16f80..93cbb06c1 100644
--- a/altair/utils/schemapi.py
+++ b/altair/utils/schemapi.py
@@ -11,7 +11,7 @@
 from collections import defaultdict
 from functools import partial
 from importlib.metadata import version as importlib_version
-from itertools import chain, zip_longest
+from itertools import chain, groupby, islice, zip_longest
 from math import ceil
 from typing import (
     TYPE_CHECKING,
@@ -34,6 +34,7 @@
 import jsonschema
 import jsonschema.validators
 import narwhals.stable.v1 as nw
+from jsonschema import ValidationError
 from packaging.version import Version
 
 # This leads to circular imports with the vegalite module. Currently, this works
@@ -44,7 +45,6 @@
 if TYPE_CHECKING:
     from typing import ClassVar
 
-    from jsonschema import ValidationError
     from jsonschema.protocols import Validator, _JsonParameter
     from referencing import Registry
 
@@ -63,27 +63,40 @@
 ValidationErrorList: TypeAlias = List[jsonschema.ValidationError]
 GroupedValidationErrors: TypeAlias = Dict[str, ValidationErrorList]
 
-# This URI is arbitrary and could be anything else. It just cannot be an empty
-# string as we need to reference the schema registered in
-# the referencing.Registry.
 _VEGA_LITE_ROOT_URI: Final = "urn:vega-lite-schema"
+"""
+Prefix added to each ``"$ref"``.
+
+This URI is arbitrary and could be anything else.
+
+It just cannot be an empty string as we need to reference the schema registered in
+the ``referencing.Registry``."""
 
-# Ideally, jsonschema specification would be parsed from the current Vega-Lite
-# schema instead of being hardcoded here as a default value.
-# However, due to circular imports between this module and the altair.vegalite
-# modules, this information is not yet available at this point as altair.vegalite
-# is only partially loaded. The draft version which is used is unlikely to
-# change often so it's ok to keep this. There is also a test which validates
-# that this value is always the same as in the Vega-Lite schema.
 _DEFAULT_JSON_SCHEMA_DRAFT_URL: Final = "http://json-schema.org/draft-07/schema#"
+"""
+Ideally, jsonschema specification would be parsed from the current Vega-Lite
+schema instead of being hardcoded here as a default value.
 
+However, due to circular imports between this module and the ``alt.vegalite``
+modules, this information is not yet available at this point as ``alt.vegalite``
+is only partially loaded.
+
+The draft version which is used is unlikely to change often so it's ok to keep this.
+There is also a test which validates that this value is always the same as in the Vega-Lite schema.
+"""
 
-# If DEBUG_MODE is True, then schema objects are converted to dict and
-# validated at creation time. This slows things down, particularly for
-# larger specs, but leads to much more useful tracebacks for the user.
-# Individual schema classes can override this by setting the
-# class-level _class_is_valid_at_instantiation attribute to False
 DEBUG_MODE: bool = True
+"""
+If ``DEBUG_MODE``, then ``SchemaBase`` are converted to ``dict`` and validated at creation time.
+
+This slows things down, particularly for larger specs, but leads to much more
+useful tracebacks for the user.
+
+Individual schema classes can override with:
+
+    class Derived(SchemaBase):
+        _class_is_valid_at_instantiation: ClassVar[bool] = False
+"""
 
 _JSONSCHEMA_VERSION = Version(importlib_version("jsonschema"))
 _USING_REFERENCING: Final[bool] = _JSONSCHEMA_VERSION >= Version("4.18")  # noqa: SIM300
@@ -141,8 +154,6 @@ def validate_jsonschema(
     *,
     raise_error: Literal[True] = ...,
 ) -> Never: ...
-
-
 @overload
 def validate_jsonschema(
     spec: _JsonParameter,
@@ -151,8 +162,6 @@ def validate_jsonschema(
     *,
     raise_error: Literal[False],
 ) -> ValidationError | None: ...
-
-
 def validate_jsonschema(
     spec: _JsonParameter,
     schema: dict[str, Any],
@@ -167,8 +176,9 @@ def validate_jsonschema(
     and only the most relevant errors are kept. Errors are then either raised
     or returned, depending on the value of `raise_error`.
     """
-    errors = _get_errors_from_spec(spec, schema, rootschema=rootschema)
-    if errors:
+    it_errors = _get_errors_from_spec(spec, schema, rootschema=rootschema)
+    if first_error := next(it_errors, None):
+        errors = [first_error, *it_errors]
         leaf_errors = _get_leaves_of_error_tree(errors)
         grouped_errors = _group_errors_by_json_path(leaf_errors)
         grouped_errors = _subset_to_most_specific_json_paths(grouped_errors)
@@ -182,7 +192,7 @@ def validate_jsonschema(
         # error message. Setting a new attribute like this is not ideal as
         # it then no longer matches the type ValidationError. It would be better
         # to refactor this function to never raise but only return errors.
-        main_error._all_errors = grouped_errors
+        main_error._errors = list(grouped_errors.values())
         if raise_error:
             raise main_error
         else:
@@ -191,14 +201,50 @@ def validate_jsonschema(
         return None
 
 
-# NOTE: Entry for creating a `list` of errors
-# Everything else is skipped if this returns an empty `list`
-# TODO: Refactor to peek at possible error w/ `next(validator.iter_errors(spec))`
+def _rechain(element: T, others: Iterable[T], /) -> Iterator[T]:
+    """
+    Continue an iterator at the last popped ``element``.
+
+    Equivalent to::
+
+        elements = 1, 2, 3, 4, 5
+        it = iter(elements)
+        element = next(it)
+        it_continue = chain([element], it)
+
+    """
+    yield element
+    yield from others
+
+
+def lazy_validate_json_schema(
+    spec: _JsonParameter,
+    schema: dict[str, Any],
+    rootschema: dict[str, Any] | None = None,
+) -> None:
+    """Lazy equivalent of `validate_jsonschema`."""
+    it_errors = _get_errors_from_spec(spec, schema, rootschema=rootschema)
+    if first_error := next(it_errors, None):
+        groups = _lazy_group_tree_leaves(_rechain(first_error, it_errors))
+        most_specific = _lazy_subset_to_most_specific_json_paths(groups)
+        deduplicated = _lazy_deduplicate_errors(most_specific)
+        dummy_error: Any
+        if dummy_error := next(deduplicated, None):
+            dummy_error._errors = _rechain(dummy_error, deduplicated)  # type: ignore[attr-defined]
+            raise dummy_error
+        else:
+            msg = (
+                f"Expected to find at least one error, but first error was `None`.\n\n"
+                f"spec: {spec!r}"
+            )
+            raise NotImplementedError(msg)
+
+
 def _get_errors_from_spec(
     spec: _JsonParameter,
     schema: dict[str, Any],
     rootschema: dict[str, Any] | None = None,
-) -> ValidationErrorList:
+) -> Iterator[ValidationError]:
     """
     Uses the relevant ``jsonschema`` validator to validate ``spec`` against ``schema`` using `` rootschema`` to resolve references.
 
@@ -240,8 +286,7 @@ def _get_errors_from_spec(
         )
 
     validator = validator_cls(schema, **validator_kwargs)
-    errors = list(validator.iter_errors(spec))
-    return errors
+    return validator.iter_errors(spec)
 
 
 def _get_json_schema_draft_url(schema: dict[str, Any]) -> str:
@@ -304,9 +349,8 @@ def _get_referencing_registry(
     )
 
 
-# NOTE: Review function (2)
 def _group_errors_by_json_path(
-    errors: ValidationErrorList,
+    errors: Iterable[ValidationError],
 ) -> GroupedValidationErrors:
     """
     Groups errors by the `json_path` attribute of the jsonschema ValidationError class.
@@ -342,6 +386,109 @@ def _get_leaves_of_error_tree(
     return leaves
 
 
+def _lazy_group_tree_leaves(
+    errors: Iterable[ValidationError], /
+) -> Iterator[tuple[str, ValidationError]]:
+    """
+    Combines 3 previously distinct steps:
+
+    - ``_get_leaves_of_error_tree``
+    - (part of) ``_group_errors_by_json_path``
+        - Doesnt actually group yet, can by calling `dict(result)`.
+    - ``_is_required_value_error``
+    """  # noqa: D400
+    for err in errors:
+        if err_context := err.context:
+            yield from _lazy_group_tree_leaves(err_context)
+        elif err.validator == "required" and err.validator_value == ["value"]:
+            continue
+        else:
+            yield _json_path(err), err
+
+
+_fn_path = cast("Callable[[tuple[str, ValidationError]], str]", operator.itemgetter(0))
+"""Key function for ``(json_path, ValidationError)``."""
+_fn_validator = cast("Callable[[ValidationError], str]", operator.attrgetter("validator"))  # fmt: off
+"""Key function for ``ValidationError.validator``."""
+
+
+def _lazy_subset_to_most_specific_json_paths(
+    json_path_errors: Iterator[tuple[str, ValidationError]], /
+) -> Iterator[Iterable[ValidationError]]:
+    """
+    Currently using a `list`, but typing it more restrictive to see if it can be avoided.
+
+    - Needs to be sorted to work with groupby
+    - Reversing allows prioritising more specific groups, since they are seen first
+    - Then re-reversed, to keep seen order
+
+    """
+    rev_sort = sorted(json_path_errors, key=_fn_path, reverse=True)
+    keeping: dict[str, Iterable[ValidationError]] = {}
+    for unique_path, grouped_errors in groupby(rev_sort, key=_fn_path):
+        if any(seen.startswith(unique_path) for seen in keeping):
+            continue
+        else:
+            keeping[unique_path] = [err for _, err in grouped_errors]
+    yield from reversed(keeping.values())
+
+
+def _lazy_deduplicate_errors(
+    grouped_errors: Iterator[Iterable[ValidationError]], /
+) -> Iterator[ValidationError]:
+    for element_errors in grouped_errors:
+        for validator, errors in groupby(
+            sorted(element_errors, key=_fn_validator), key=_fn_validator
+        ):
+            if validator == "additionalProperties":
+                errors = _lazy_additional_properties(errors)
+            elif validator == "enum":
+                errors = _lazy_enum(errors)
+            yield from _lazy_unique_message(errors)
+
+
+def _lazy_unique_message(
+    iterable: Iterable[ValidationError], /
+) -> Iterator[ValidationError]:
+    seen = set()
+    for el in iterable:
+        if el.message not in seen:
+            seen.add(el.message)
+            yield el
+
+
+def _lazy_additional_properties(
+    iterable: Iterable[ValidationError], /
+) -> Iterator[ValidationError]:
+    it = iter(iterable)
+    first = next(it)
+    if (
+        parent := cast("ValidationError", first.parent)
+    ) and parent.validator == "anyOf":
+        yield min(_rechain(first, it), key=lambda x: len(x.message))
+    else:
+        yield first
+
+
+def _lazy_enum(iterable: Iterable[ValidationError], /) -> Iterator[ValidationError]:
+    """
+    Temporary reusing the eager version to isolate issues.
+
+    The 3 errors rule applies per group.
+    """
+    # FIXME: Too simple
+    # Need to do an eager pass, as this skips intersections of non-overlapping enums
+    # yield reduce(_enum_inner, iterable)
+    yield from _deduplicate_enum_errors(list(iterable))
+
+
+def _enum_inner(prev: ValidationError, current: ValidationError, /) -> ValidationError:
+    """**Disabled**."""
+    longest = set(cast("list[str]", prev.validator_value))
+    contender = set(cast("list[str]", current.validator_value))
+    return current if contender.issuperset(longest) else prev
+
+
 def _subset_to_most_specific_json_paths(
     errors_by_json_path: GroupedValidationErrors,
 ) -> GroupedValidationErrors:
@@ -570,9 +717,8 @@ class SchemaValidationError(jsonschema.ValidationError):
     def __init__(self, obj: SchemaBase, err: ValidationError) -> None:
         super().__init__(**err._contents())
         self.obj = obj
-        self._errors: GroupedValidationErrors = getattr(
-            err, "_all_errors", {_json_path(err): [err]}
-        )
+        err = cast("SchemaValidationError", err)
+        self._errors: Iterable[ValidationError] = err._errors
         # This is the message from err
         self._original_message = self.message
         self.message = self._get_message()
@@ -592,7 +738,10 @@ def indent_second_line_onwards(message: str, indent: int = 4) -> str:
         error_messages: list[str] = []
         # Only show a maximum of 3 errors as else the final message returned by this
         # method could get very long.
-        for errors in list(self._errors.values())[:3]:
+        # ^^^^^^^^^^
+        # CORRECTION: Only show 3 **json_paths**
+
+        for errors in islice(_group_errors_by_json_path(self._errors).values(), 3):
             error_messages.append(self._get_message_for_errors_group(errors))
 
         message = ""
@@ -1197,7 +1346,9 @@ def validate(
             schema = cls._schema
         # For the benefit of mypy
         assert schema is not None
-        validate_jsonschema(instance, schema, rootschema=cls._rootschema or cls._schema)
+        lazy_validate_json_schema(
+            instance, schema, rootschema=cls._rootschema or cls._schema
+        )
 
     @classmethod
     def resolve_references(cls, schema: dict[str, Any] | None = None) -> dict[str, Any]:
@@ -1223,7 +1374,7 @@ def validate_property(
         np_opt = sys.modules.get("numpy")
         value = _todict(value, context={}, np_opt=np_opt, pd_opt=pd_opt)
         props = cls.resolve_references(schema or cls._schema).get("properties", {})
-        validate_jsonschema(
+        lazy_validate_json_schema(
             value, props.get(name, {}), rootschema=cls._rootschema or cls._schema
         )
 
@@ -1394,7 +1545,7 @@ def from_dict(
             schemas = resolved.get("anyOf", []) + resolved.get("oneOf", [])
             for possible in schemas:
                 try:
-                    validate_jsonschema(dct, possible, rootschema=root_schema)
+                    lazy_validate_json_schema(dct, possible, rootschema=root_schema)
                 except jsonschema.ValidationError:
                     continue
                 else:
diff --git a/tools/schemapi/schemapi.py b/tools/schemapi/schemapi.py
index e7b37b25e..13d75946c 100644
--- a/tools/schemapi/schemapi.py
+++ b/tools/schemapi/schemapi.py
@@ -9,7 +9,7 @@
 from collections import defaultdict
 from functools import partial
 from importlib.metadata import version as importlib_version
-from itertools import chain, zip_longest
+from itertools import chain, groupby, islice, zip_longest
 from math import ceil
 from typing import (
     TYPE_CHECKING,
@@ -32,6 +32,7 @@
 import jsonschema
 import jsonschema.validators
 import narwhals.stable.v1 as nw
+from jsonschema import ValidationError
 from packaging.version import Version
 
 # This leads to circular imports with the vegalite module. Currently, this works
@@ -42,7 +43,6 @@
 if TYPE_CHECKING:
     from typing import ClassVar
 
-    from jsonschema import ValidationError
     from jsonschema.protocols import Validator, _JsonParameter
     from referencing import Registry
 
@@ -61,27 +61,40 @@
 ValidationErrorList: TypeAlias = List[jsonschema.ValidationError]
 GroupedValidationErrors: TypeAlias = Dict[str, ValidationErrorList]
 
-# This URI is arbitrary and could be anything else. It just cannot be an empty
-# string as we need to reference the schema registered in
-# the referencing.Registry.
 _VEGA_LITE_ROOT_URI: Final = "urn:vega-lite-schema"
+"""
+Prefix added to each ``"$ref"``.
+
+This URI is arbitrary and could be anything else.
+
+It just cannot be an empty string as we need to reference the schema registered in
+the ``referencing.Registry``."""
 
-# Ideally, jsonschema specification would be parsed from the current Vega-Lite
-# schema instead of being hardcoded here as a default value.
-# However, due to circular imports between this module and the altair.vegalite
-# modules, this information is not yet available at this point as altair.vegalite
-# is only partially loaded. The draft version which is used is unlikely to
-# change often so it's ok to keep this. There is also a test which validates
-# that this value is always the same as in the Vega-Lite schema.
 _DEFAULT_JSON_SCHEMA_DRAFT_URL: Final = "http://json-schema.org/draft-07/schema#"
+"""
+Ideally, jsonschema specification would be parsed from the current Vega-Lite
+schema instead of being hardcoded here as a default value.
 
+However, due to circular imports between this module and the ``alt.vegalite``
+modules, this information is not yet available at this point as ``alt.vegalite``
+is only partially loaded.
+
+The draft version which is used is unlikely to change often so it's ok to keep this.
+There is also a test which validates that this value is always the same as in the Vega-Lite schema.
+"""
 
-# If DEBUG_MODE is True, then schema objects are converted to dict and
-# validated at creation time. This slows things down, particularly for
-# larger specs, but leads to much more useful tracebacks for the user.
-# Individual schema classes can override this by setting the
-# class-level _class_is_valid_at_instantiation attribute to False
 DEBUG_MODE: bool = True
+"""
+If ``DEBUG_MODE``, then ``SchemaBase`` are converted to ``dict`` and validated at creation time.
+
+This slows things down, particularly for larger specs, but leads to much more
+useful tracebacks for the user.
+
+Individual schema classes can override with:
+
+    class Derived(SchemaBase):
+        _class_is_valid_at_instantiation: ClassVar[bool] = False
+"""
 
 _JSONSCHEMA_VERSION = Version(importlib_version("jsonschema"))
 _USING_REFERENCING: Final[bool] = _JSONSCHEMA_VERSION >= Version("4.18")  # noqa: SIM300
@@ -139,8 +152,6 @@ def validate_jsonschema(
     *,
     raise_error: Literal[True] = ...,
 ) -> Never: ...
-
-
 @overload
 def validate_jsonschema(
     spec: _JsonParameter,
@@ -149,8 +160,6 @@ def validate_jsonschema(
     *,
     raise_error: Literal[False],
 ) -> ValidationError | None: ...
-
-
 def validate_jsonschema(
     spec: _JsonParameter,
     schema: dict[str, Any],
@@ -165,8 +174,9 @@ def validate_jsonschema(
     and only the most relevant errors are kept. Errors are then either raised
     or returned, depending on the value of `raise_error`.
     """
-    errors = _get_errors_from_spec(spec, schema, rootschema=rootschema)
-    if errors:
+    it_errors = _get_errors_from_spec(spec, schema, rootschema=rootschema)
+    if first_error := next(it_errors, None):
+        errors = [first_error, *it_errors]
         leaf_errors = _get_leaves_of_error_tree(errors)
         grouped_errors = _group_errors_by_json_path(leaf_errors)
         grouped_errors = _subset_to_most_specific_json_paths(grouped_errors)
@@ -180,7 +190,7 @@ def validate_jsonschema(
         # error message. Setting a new attribute like this is not ideal as
         # it then no longer matches the type ValidationError. It would be better
         # to refactor this function to never raise but only return errors.
-        main_error._all_errors = grouped_errors
+        main_error._errors = list(grouped_errors.values())
         if raise_error:
             raise main_error
         else:
@@ -189,14 +199,50 @@ def validate_jsonschema(
         return None
 
 
-# NOTE: Entry for creating a `list` of errors
-# Everything else is skipped if this returns an empty `list`
-# TODO: Refactor to peek at possible error w/ `next(validator.iter_errors(spec))`
+def _rechain(element: T, others: Iterable[T], /) -> Iterator[T]:
+    """
+    Continue an iterator at the last popped ``element``.
+
+    Equivalent to::
+
+        elements = 1, 2, 3, 4, 5
+        it = iter(elements)
+        element = next(it)
+        it_continue = chain([element], it)
+
+    """
+    yield element
+    yield from others
+
+
+def lazy_validate_json_schema(
+    spec: _JsonParameter,
+    schema: dict[str, Any],
+    rootschema: dict[str, Any] | None = None,
+) -> None:
+    """Lazy equivalent of `validate_jsonschema`."""
+    it_errors = _get_errors_from_spec(spec, schema, rootschema=rootschema)
+    if first_error := next(it_errors, None):
+        groups = _lazy_group_tree_leaves(_rechain(first_error, it_errors))
+        most_specific = _lazy_subset_to_most_specific_json_paths(groups)
+        deduplicated = _lazy_deduplicate_errors(most_specific)
+        dummy_error: Any
+        if dummy_error := next(deduplicated, None):
+            dummy_error._errors = _rechain(dummy_error, deduplicated)  # type: ignore[attr-defined]
+            raise dummy_error
+        else:
+            msg = (
+                f"Expected to find at least one error, but first error was `None`.\n\n"
+                f"spec: {spec!r}"
+            )
+            raise NotImplementedError(msg)
+
+
 def _get_errors_from_spec(
     spec: _JsonParameter,
     schema: dict[str, Any],
     rootschema: dict[str, Any] | None = None,
-) -> ValidationErrorList:
+) -> Iterator[ValidationError]:
     """
     Uses the relevant ``jsonschema`` validator to validate ``spec`` against ``schema`` using `` rootschema`` to resolve references.
 
@@ -238,8 +284,7 @@ def _get_errors_from_spec(
         )
 
     validator = validator_cls(schema, **validator_kwargs)
-    errors = list(validator.iter_errors(spec))
-    return errors
+    return validator.iter_errors(spec)
 
 
 def _get_json_schema_draft_url(schema: dict[str, Any]) -> str:
@@ -302,9 +347,8 @@ def _get_referencing_registry(
     )
 
 
-# NOTE: Review function (2)
 def _group_errors_by_json_path(
-    errors: ValidationErrorList,
+    errors: Iterable[ValidationError],
 ) -> GroupedValidationErrors:
     """
     Groups errors by the `json_path` attribute of the jsonschema ValidationError class.
@@ -340,6 +384,109 @@ def _get_leaves_of_error_tree(
     return leaves
 
 
+def _lazy_group_tree_leaves(
+    errors: Iterable[ValidationError], /
+) -> Iterator[tuple[str, ValidationError]]:
+    """
+    Combines 3 previously distinct steps:
+
+    - ``_get_leaves_of_error_tree``
+    - (part of) ``_group_errors_by_json_path``
+        - Doesnt actually group yet, can by calling `dict(result)`.
+    - ``_is_required_value_error``
+    """  # noqa: D400
+    for err in errors:
+        if err_context := err.context:
+            yield from _lazy_group_tree_leaves(err_context)
+        elif err.validator == "required" and err.validator_value == ["value"]:
+            continue
+        else:
+            yield _json_path(err), err
+
+
+_fn_path = cast("Callable[[tuple[str, ValidationError]], str]", operator.itemgetter(0))
+"""Key function for ``(json_path, ValidationError)``."""
+_fn_validator = cast("Callable[[ValidationError], str]", operator.attrgetter("validator"))  # fmt: off
+"""Key function for ``ValidationError.validator``."""
+
+
+def _lazy_subset_to_most_specific_json_paths(
+    json_path_errors: Iterator[tuple[str, ValidationError]], /
+) -> Iterator[Iterable[ValidationError]]:
+    """
+    Currently using a `list`, but typing it more restrictive to see if it can be avoided.
+
+    - Needs to be sorted to work with groupby
+    - Reversing allows prioritising more specific groups, since they are seen first
+    - Then re-reversed, to keep seen order
+
+    """
+    rev_sort = sorted(json_path_errors, key=_fn_path, reverse=True)
+    keeping: dict[str, Iterable[ValidationError]] = {}
+    for unique_path, grouped_errors in groupby(rev_sort, key=_fn_path):
+        if any(seen.startswith(unique_path) for seen in keeping):
+            continue
+        else:
+            keeping[unique_path] = [err for _, err in grouped_errors]
+    yield from reversed(keeping.values())
+
+
+def _lazy_deduplicate_errors(
+    grouped_errors: Iterator[Iterable[ValidationError]], /
+) -> Iterator[ValidationError]:
+    for element_errors in grouped_errors:
+        for validator, errors in groupby(
+            sorted(element_errors, key=_fn_validator), key=_fn_validator
+        ):
+            if validator == "additionalProperties":
+                errors = _lazy_additional_properties(errors)
+            elif validator == "enum":
+                errors = _lazy_enum(errors)
+            yield from _lazy_unique_message(errors)
+
+
+def _lazy_unique_message(
+    iterable: Iterable[ValidationError], /
+) -> Iterator[ValidationError]:
+    seen = set()
+    for el in iterable:
+        if el.message not in seen:
+            seen.add(el.message)
+            yield el
+
+
+def _lazy_additional_properties(
+    iterable: Iterable[ValidationError], /
+) -> Iterator[ValidationError]:
+    it = iter(iterable)
+    first = next(it)
+    if (
+        parent := cast("ValidationError", first.parent)
+    ) and parent.validator == "anyOf":
+        yield min(_rechain(first, it), key=lambda x: len(x.message))
+    else:
+        yield first
+
+
+def _lazy_enum(iterable: Iterable[ValidationError], /) -> Iterator[ValidationError]:
+    """
+    Temporary reusing the eager version to isolate issues.
+
+    The 3 errors rule applies per group.
+    """
+    # FIXME: Too simple
+    # Need to do an eager pass, as this skips intersections of non-overlapping enums
+    # yield reduce(_enum_inner, iterable)
+    yield from _deduplicate_enum_errors(list(iterable))
+
+
+def _enum_inner(prev: ValidationError, current: ValidationError, /) -> ValidationError:
+    """**Disabled**."""
+    longest = set(cast("list[str]", prev.validator_value))
+    contender = set(cast("list[str]", current.validator_value))
+    return current if contender.issuperset(longest) else prev
+
+
 def _subset_to_most_specific_json_paths(
     errors_by_json_path: GroupedValidationErrors,
 ) -> GroupedValidationErrors:
@@ -568,9 +715,8 @@ class SchemaValidationError(jsonschema.ValidationError):
     def __init__(self, obj: SchemaBase, err: ValidationError) -> None:
         super().__init__(**err._contents())
         self.obj = obj
-        self._errors: GroupedValidationErrors = getattr(
-            err, "_all_errors", {_json_path(err): [err]}
-        )
+        err = cast("SchemaValidationError", err)
+        self._errors: Iterable[ValidationError] = err._errors
         # This is the message from err
         self._original_message = self.message
         self.message = self._get_message()
@@ -590,7 +736,10 @@ def indent_second_line_onwards(message: str, indent: int = 4) -> str:
         error_messages: list[str] = []
         # Only show a maximum of 3 errors as else the final message returned by this
         # method could get very long.
-        for errors in list(self._errors.values())[:3]:
+        # ^^^^^^^^^^
+        # CORRECTION: Only show 3 **json_paths**
+
+        for errors in islice(_group_errors_by_json_path(self._errors).values(), 3):
             error_messages.append(self._get_message_for_errors_group(errors))
 
         message = ""
@@ -1195,7 +1344,9 @@ def validate(
             schema = cls._schema
         # For the benefit of mypy
         assert schema is not None
-        validate_jsonschema(instance, schema, rootschema=cls._rootschema or cls._schema)
+        lazy_validate_json_schema(
+            instance, schema, rootschema=cls._rootschema or cls._schema
+        )
 
     @classmethod
     def resolve_references(cls, schema: dict[str, Any] | None = None) -> dict[str, Any]:
@@ -1221,7 +1372,7 @@ def validate_property(
         np_opt = sys.modules.get("numpy")
         value = _todict(value, context={}, np_opt=np_opt, pd_opt=pd_opt)
         props = cls.resolve_references(schema or cls._schema).get("properties", {})
-        validate_jsonschema(
+        lazy_validate_json_schema(
             value, props.get(name, {}), rootschema=cls._rootschema or cls._schema
         )
 
@@ -1392,7 +1543,7 @@ def from_dict(
             schemas = resolved.get("anyOf", []) + resolved.get("oneOf", [])
             for possible in schemas:
                 try:
-                    validate_jsonschema(dct, possible, rootschema=root_schema)
+                    lazy_validate_json_schema(dct, possible, rootschema=root_schema)
                 except jsonschema.ValidationError:
                     continue
                 else:

From c261cb46510ce680d99930bfc9a9f1f4762d1d9b Mon Sep 17 00:00:00 2001
From: dangotbanned <125183946+dangotbanned@users.noreply.github.com>
Date: Tue, 20 Aug 2024 11:34:50 +0100
Subject: [PATCH 13/92] perf: Replace message length lambda

https://github.com/vega/altair/pull/3547#discussion_r1722938370
---
 altair/utils/schemapi.py   | 7 ++++++-
 tools/schemapi/schemapi.py | 7 ++++++-
 2 files changed, 12 insertions(+), 2 deletions(-)

diff --git a/altair/utils/schemapi.py b/altair/utils/schemapi.py
index 93cbb06c1..b8dcc05a8 100644
--- a/altair/utils/schemapi.py
+++ b/altair/utils/schemapi.py
@@ -465,11 +465,16 @@ def _lazy_additional_properties(
     if (
         parent := cast("ValidationError", first.parent)
     ) and parent.validator == "anyOf":
-        yield min(_rechain(first, it), key=lambda x: len(x.message))
+        yield min(_rechain(first, it), key=_message_len)
     else:
         yield first
 
 
+def _message_len(err: ValidationError, /) -> int:
+    """Return length of a ``ValidationError`` message."""
+    return len(err.message)
+
+
 def _lazy_enum(iterable: Iterable[ValidationError], /) -> Iterator[ValidationError]:
     """
     Temporary reusing the eager version to isolate issues.
diff --git a/tools/schemapi/schemapi.py b/tools/schemapi/schemapi.py
index 13d75946c..2d63ecdf9 100644
--- a/tools/schemapi/schemapi.py
+++ b/tools/schemapi/schemapi.py
@@ -463,11 +463,16 @@ def _lazy_additional_properties(
     if (
         parent := cast("ValidationError", first.parent)
     ) and parent.validator == "anyOf":
-        yield min(_rechain(first, it), key=lambda x: len(x.message))
+        yield min(_rechain(first, it), key=_message_len)
     else:
         yield first
 
 
+def _message_len(err: ValidationError, /) -> int:
+    """Return length of a ``ValidationError`` message."""
+    return len(err.message)
+
+
 def _lazy_enum(iterable: Iterable[ValidationError], /) -> Iterator[ValidationError]:
     """
     Temporary reusing the eager version to isolate issues.

From bbf6f142ac6a4df0dc6d7e3ca326b9a8d32b413e Mon Sep 17 00:00:00 2001
From: dangotbanned <125183946+dangotbanned@users.noreply.github.com>
Date: Tue, 20 Aug 2024 12:01:51 +0100
Subject: [PATCH 14/92] perf: Use `islice` earlier to prune errors

https://github.com/vega/altair/pull/3547#discussion_r1723026972
---
 altair/utils/schemapi.py   | 4 ++--
 tools/schemapi/schemapi.py | 4 ++--
 2 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/altair/utils/schemapi.py b/altair/utils/schemapi.py
index b8dcc05a8..535a63cfe 100644
--- a/altair/utils/schemapi.py
+++ b/altair/utils/schemapi.py
@@ -430,7 +430,7 @@ def _lazy_subset_to_most_specific_json_paths(
             continue
         else:
             keeping[unique_path] = [err for _, err in grouped_errors]
-    yield from reversed(keeping.values())
+    yield from islice(reversed(keeping.values()), 3)
 
 
 def _lazy_deduplicate_errors(
@@ -746,7 +746,7 @@ def indent_second_line_onwards(message: str, indent: int = 4) -> str:
         # ^^^^^^^^^^
         # CORRECTION: Only show 3 **json_paths**
 
-        for errors in islice(_group_errors_by_json_path(self._errors).values(), 3):
+        for errors in _group_errors_by_json_path(self._errors).values():
             error_messages.append(self._get_message_for_errors_group(errors))
 
         message = ""
diff --git a/tools/schemapi/schemapi.py b/tools/schemapi/schemapi.py
index 2d63ecdf9..6cb154065 100644
--- a/tools/schemapi/schemapi.py
+++ b/tools/schemapi/schemapi.py
@@ -428,7 +428,7 @@ def _lazy_subset_to_most_specific_json_paths(
             continue
         else:
             keeping[unique_path] = [err for _, err in grouped_errors]
-    yield from reversed(keeping.values())
+    yield from islice(reversed(keeping.values()), 3)
 
 
 def _lazy_deduplicate_errors(
@@ -744,7 +744,7 @@ def indent_second_line_onwards(message: str, indent: int = 4) -> str:
         # ^^^^^^^^^^
         # CORRECTION: Only show 3 **json_paths**
 
-        for errors in islice(_group_errors_by_json_path(self._errors).values(), 3):
+        for errors in _group_errors_by_json_path(self._errors).values():
             error_messages.append(self._get_message_for_errors_group(errors))
 
         message = ""

From 2d20db7dab7715a541aa3e19c0e149dfaa686ace Mon Sep 17 00:00:00 2001
From: dangotbanned <125183946+dangotbanned@users.noreply.github.com>
Date: Tue, 20 Aug 2024 12:17:07 +0100
Subject: [PATCH 15/92] ci: Add temporary script for `hatch run validation`

**Remove before review**.

Using for quicker feedback loop, where running mypy and all tests are not beneficial
---
 pyproject.toml | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/pyproject.toml b/pyproject.toml
index 4a0c3874c..d061df401 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -130,6 +130,13 @@ update-init-file = [
     "ruff format .",
 ]
 
+# Much more isolated, focused purely on a faster `schemapi.py` rebuild/test loop.
+validation = [
+    "mypy tools/schemapi/schemapi.py",
+    "python tools/generate_schema_wrapper.py",
+    "pytest -k test_schemapi tests {args}",
+]
+
 [tool.hatch.envs.hatch-test]
 # https://hatch.pypa.io/latest/tutorials/testing/overview/
 features = ["all", "dev", "doc"]

From ad06d080d561009d6ce0fddd228364f6e82c5171 Mon Sep 17 00:00:00 2001
From: dangotbanned <125183946+dangotbanned@users.noreply.github.com>
Date: Tue, 20 Aug 2024 14:09:13 +0100
Subject: [PATCH 16/92] feat(perf): Adds `_lazy_deduplicate_enum`

https://github.com/vega/altair/pull/3547#discussion_r1723029403
---
 altair/utils/schemapi.py   | 29 +++++++++++------------------
 tools/schemapi/schemapi.py | 29 +++++++++++------------------
 2 files changed, 22 insertions(+), 36 deletions(-)

diff --git a/altair/utils/schemapi.py b/altair/utils/schemapi.py
index 535a63cfe..3e1d75e53 100644
--- a/altair/utils/schemapi.py
+++ b/altair/utils/schemapi.py
@@ -443,7 +443,7 @@ def _lazy_deduplicate_errors(
             if validator == "additionalProperties":
                 errors = _lazy_additional_properties(errors)
             elif validator == "enum":
-                errors = _lazy_enum(errors)
+                errors = _lazy_deduplicate_enum(errors)
             yield from _lazy_unique_message(errors)
 
 
@@ -475,23 +475,16 @@ def _message_len(err: ValidationError, /) -> int:
     return len(err.message)
 
 
-def _lazy_enum(iterable: Iterable[ValidationError], /) -> Iterator[ValidationError]:
-    """
-    Temporary reusing the eager version to isolate issues.
-
-    The 3 errors rule applies per group.
-    """
-    # FIXME: Too simple
-    # Need to do an eager pass, as this skips intersections of non-overlapping enums
-    # yield reduce(_enum_inner, iterable)
-    yield from _deduplicate_enum_errors(list(iterable))
-
-
-def _enum_inner(prev: ValidationError, current: ValidationError, /) -> ValidationError:
-    """**Disabled**."""
-    longest = set(cast("list[str]", prev.validator_value))
-    contender = set(cast("list[str]", current.validator_value))
-    return current if contender.issuperset(longest) else prev
+def _lazy_deduplicate_enum(
+    iterable: Iterable[ValidationError], /
+) -> Iterator[ValidationError]:
+    """Skip any``"enum"`` errors that are a subset of another error."""
+    enums: tuple[set[str], ...]
+    errors: tuple[ValidationError, ...]
+    enums, errors = zip(*((set(err.validator_value), err) for err in iterable))  # type: ignore[arg-type]
+    for cur_enum, err in zip(enums, errors):
+        if not any(cur_enum < e for e in enums if e != cur_enum):
+            yield err
 
 
 def _subset_to_most_specific_json_paths(
diff --git a/tools/schemapi/schemapi.py b/tools/schemapi/schemapi.py
index 6cb154065..b4995c070 100644
--- a/tools/schemapi/schemapi.py
+++ b/tools/schemapi/schemapi.py
@@ -441,7 +441,7 @@ def _lazy_deduplicate_errors(
             if validator == "additionalProperties":
                 errors = _lazy_additional_properties(errors)
             elif validator == "enum":
-                errors = _lazy_enum(errors)
+                errors = _lazy_deduplicate_enum(errors)
             yield from _lazy_unique_message(errors)
 
 
@@ -473,23 +473,16 @@ def _message_len(err: ValidationError, /) -> int:
     return len(err.message)
 
 
-def _lazy_enum(iterable: Iterable[ValidationError], /) -> Iterator[ValidationError]:
-    """
-    Temporary reusing the eager version to isolate issues.
-
-    The 3 errors rule applies per group.
-    """
-    # FIXME: Too simple
-    # Need to do an eager pass, as this skips intersections of non-overlapping enums
-    # yield reduce(_enum_inner, iterable)
-    yield from _deduplicate_enum_errors(list(iterable))
-
-
-def _enum_inner(prev: ValidationError, current: ValidationError, /) -> ValidationError:
-    """**Disabled**."""
-    longest = set(cast("list[str]", prev.validator_value))
-    contender = set(cast("list[str]", current.validator_value))
-    return current if contender.issuperset(longest) else prev
+def _lazy_deduplicate_enum(
+    iterable: Iterable[ValidationError], /
+) -> Iterator[ValidationError]:
+    """Skip any``"enum"`` errors that are a subset of another error."""
+    enums: tuple[set[str], ...]
+    errors: tuple[ValidationError, ...]
+    enums, errors = zip(*((set(err.validator_value), err) for err in iterable))  # type: ignore[arg-type]
+    for cur_enum, err in zip(enums, errors):
+        if not any(cur_enum < e for e in enums if e != cur_enum):
+            yield err
 
 
 def _subset_to_most_specific_json_paths(

From af17d441db83cabe7fcd2088eaa2df75bc00acaa Mon Sep 17 00:00:00 2001
From: dangotbanned <125183946+dangotbanned@users.noreply.github.com>
Date: Wed, 21 Aug 2024 00:18:58 +0100
Subject: [PATCH 17/92] refactor(perf): Tidying up, more lazy ops, renaming

Note to self: Fill out comments re individual changes.
---
 altair/utils/schemapi.py   | 637 ++++++++++++++-----------------------
 tools/schemapi/schemapi.py | 637 ++++++++++++++-----------------------
 2 files changed, 464 insertions(+), 810 deletions(-)

diff --git a/altair/utils/schemapi.py b/altair/utils/schemapi.py
index 3e1d75e53..248193a85 100644
--- a/altair/utils/schemapi.py
+++ b/altair/utils/schemapi.py
@@ -21,15 +21,14 @@
     Final,
     Iterable,
     Iterator,
+    KeysView,
     List,
-    Literal,
     Sequence,
     TypeVar,
     Union,
     cast,
     overload,
 )
-from typing_extensions import TypeAlias
 
 import jsonschema
 import jsonschema.validators
@@ -37,11 +36,6 @@
 from jsonschema import ValidationError
 from packaging.version import Version
 
-# This leads to circular imports with the vegalite module. Currently, this works
-# but be aware that when you access it in this script, the vegalite module might
-# not yet be fully instantiated in case your code is being executed during import time
-from altair import vegalite
-
 if TYPE_CHECKING:
     from typing import ClassVar
 
@@ -59,9 +53,15 @@
         from typing import Never, Self
     else:
         from typing_extensions import Never, Self
+    if sys.version_info >= (3, 10):
+        from typing import TypeAlias
+    else:
+        from typing_extensions import TypeAlias
+    _Errs: TypeAlias = Iterable[ValidationError]
+    _ErrsLazy: TypeAlias = Iterator[ValidationError]
+    _ErrsLazyGroup: TypeAlias = Iterator[_ErrsLazy]
+    _IntoLazyGroup: TypeAlias = Iterator["tuple[str, ValidationError]"]
 
-ValidationErrorList: TypeAlias = List[jsonschema.ValidationError]
-GroupedValidationErrors: TypeAlias = Dict[str, ValidationErrorList]
 
 _VEGA_LITE_ROOT_URI: Final = "urn:vega-lite-schema"
 """
@@ -146,61 +146,6 @@ def debug_mode(arg: bool) -> Iterator[None]:
         DEBUG_MODE = original
 
 
-@overload
-def validate_jsonschema(
-    spec: _JsonParameter,
-    schema: dict[str, Any],
-    rootschema: dict[str, Any] | None = ...,
-    *,
-    raise_error: Literal[True] = ...,
-) -> Never: ...
-@overload
-def validate_jsonschema(
-    spec: _JsonParameter,
-    schema: dict[str, Any],
-    rootschema: dict[str, Any] | None = ...,
-    *,
-    raise_error: Literal[False],
-) -> ValidationError | None: ...
-def validate_jsonschema(
-    spec: _JsonParameter,
-    schema: dict[str, Any],
-    rootschema: dict[str, Any] | None = None,
-    *,
-    raise_error: bool = True,
-) -> ValidationError | None:
-    """
-    Validates the passed in spec against the schema in the context of the rootschema.
-
-    If any errors are found, they are deduplicated and prioritized
-    and only the most relevant errors are kept. Errors are then either raised
-    or returned, depending on the value of `raise_error`.
-    """
-    it_errors = _get_errors_from_spec(spec, schema, rootschema=rootschema)
-    if first_error := next(it_errors, None):
-        errors = [first_error, *it_errors]
-        leaf_errors = _get_leaves_of_error_tree(errors)
-        grouped_errors = _group_errors_by_json_path(leaf_errors)
-        grouped_errors = _subset_to_most_specific_json_paths(grouped_errors)
-        grouped_errors = _deduplicate_errors(grouped_errors)
-
-        # Nothing special about this first error but we need to choose one
-        # which can be raised
-        main_error: Any = next(iter(grouped_errors.values()))[0]
-        # All errors are then attached as a new attribute to ValidationError so that
-        # they can be used in SchemaValidationError to craft a more helpful
-        # error message. Setting a new attribute like this is not ideal as
-        # it then no longer matches the type ValidationError. It would be better
-        # to refactor this function to never raise but only return errors.
-        main_error._errors = list(grouped_errors.values())
-        if raise_error:
-            raise main_error
-        else:
-            return main_error
-    else:
-        return None
-
-
 def _rechain(element: T, others: Iterable[T], /) -> Iterator[T]:
     """
     Continue an iterator at the last popped ``element``.
@@ -217,20 +162,44 @@ def _rechain(element: T, others: Iterable[T], /) -> Iterator[T]:
     yield from others
 
 
-def lazy_validate_json_schema(
+def _regroup(errors: _Errs, /) -> _ErrsLazyGroup:
+    """
+    Regroup error stream with the assumption they are already sorted.
+
+    This holds **only after** all other stages.
+    """
+    for _, grouped_it in groupby(errors, _json_path):
+        yield grouped_it
+
+
+def validate_jsonschema(
     spec: _JsonParameter,
     schema: dict[str, Any],
     rootschema: dict[str, Any] | None = None,
 ) -> None:
-    """Lazy equivalent of `validate_jsonschema`."""
-    it_errors = _get_errors_from_spec(spec, schema, rootschema=rootschema)
+    """
+    Lazy equivalent of `validate_jsonschema`.
+
+    Validates the passed in spec against the schema in the context of the rootschema.
+
+    If any errors are found, they are deduplicated and prioritized
+    and only the most relevant errors are kept.
+
+    Nothing special about this first error but we need to choose one
+    which can be raised
+    All errors are then attached as a new attribute to ValidationError so that
+    they can be used in SchemaValidationError to craft a more helpful
+    error message. Setting a new attribute like this is not ideal as
+    it then no longer matches the type ValidationError.
+    """
+    it_errors = _iter_errors_from_spec(spec, schema, rootschema=rootschema)
     if first_error := next(it_errors, None):
-        groups = _lazy_group_tree_leaves(_rechain(first_error, it_errors))
-        most_specific = _lazy_subset_to_most_specific_json_paths(groups)
-        deduplicated = _lazy_deduplicate_errors(most_specific)
+        groups = _group_tree_leaves(_rechain(first_error, it_errors))
+        most_specific = _prune_subset_paths(groups)
+        deduplicated = _deduplicate_errors(most_specific)
         dummy_error: Any
         if dummy_error := next(deduplicated, None):
-            dummy_error._errors = _rechain(dummy_error, deduplicated)  # type: ignore[attr-defined]
+            dummy_error._errors = _regroup(_rechain(dummy_error, deduplicated))  # type: ignore[attr-defined]
             raise dummy_error
         else:
             msg = (
@@ -240,11 +209,27 @@ def lazy_validate_json_schema(
             raise NotImplementedError(msg)
 
 
-def _get_errors_from_spec(
+def validate_jsonschema_fail_fast(
+    spec: _JsonParameter,
+    schema: dict[str, Any],
+    rootschema: dict[str, Any] | None = None,
+) -> None:
+    """
+    Raise as quickly as possible.
+
+    Use when any information about the error is not needed.
+    """
+    if (
+        err := next(_iter_errors_from_spec(spec, schema, rootschema=rootschema), None)
+    ) is not None:
+        raise err
+
+
+def _iter_errors_from_spec(
     spec: _JsonParameter,
     schema: dict[str, Any],
     rootschema: dict[str, Any] | None = None,
-) -> Iterator[ValidationError]:
+) -> _ErrsLazy:
     """
     Uses the relevant ``jsonschema`` validator to validate ``spec`` against ``schema`` using `` rootschema`` to resolve references.
 
@@ -324,82 +309,57 @@ def _rec_refs(m: dict[str, Any], /) -> Iterator[tuple[str, Any]]:
             yield k, v
 
 
-# We do not annotate the return value here as the referencing library is not always
-# available and this function is only executed in those cases.
 def _get_referencing_registry(
     rootschema: dict[str, Any], json_schema_draft_url: str | None = None
 ) -> Registry:
-    # Referencing is a dependency of newer jsonschema versions, starting with the
-    # version that is specified in _use_referencing_library and we therefore
-    # can expect that it is installed if the function returns True.
-    # We ignore 'import' mypy errors which happen when the referencing library
-    # is not installed. That's ok as in these cases this function is not called.
-    # We also have to ignore 'unused-ignore' errors as mypy raises those in case
-    # referencing is installed.
-    import referencing  # type: ignore[import,unused-ignore]
-    import referencing.jsonschema  # type: ignore[import,unused-ignore]
-
-    if json_schema_draft_url is None:
-        json_schema_draft_url = _get_json_schema_draft_url(rootschema)
-
-    specification = referencing.jsonschema.specification_with(json_schema_draft_url)
-    resource = specification.create_resource(rootschema)
-    return referencing.Registry().with_resource(
-        uri=_VEGA_LITE_ROOT_URI, resource=resource
-    )
+    """
+    Referencing is a dependency of newer jsonschema versions.
 
+    See https://github.com/python-jsonschema/jsonschema/releases/tag/v4.18.0a1
 
-def _group_errors_by_json_path(
-    errors: Iterable[ValidationError],
-) -> GroupedValidationErrors:
-    """
-    Groups errors by the `json_path` attribute of the jsonschema ValidationError class.
+    We ignore 'import' ``mypy`` errors which happen when the ``referencing`` library
+    is not installed.
+    That's ok as in these cases this function is not called.
 
-    This attribute contains the path to the offending element within
-    a chart specification and can therefore be considered as an identifier of an
-    'issue' in the chart that needs to be fixed.
+    We also have to ignore 'unused-ignore' errors as ``mypy`` raises those in case
+    ``referencing`` is installed.
     """
-    errors_by_json_path = defaultdict(list)
-    for err in errors:
-        errors_by_json_path[_json_path(err)].append(err)
-    return dict(errors_by_json_path)
+    from referencing import Registry  # type: ignore[import,unused-ignore]  # noqa: I001
+    from referencing.jsonschema import specification_with  # type: ignore[import,unused-ignore]
+
+    dialect_id = json_schema_draft_url or _get_json_schema_draft_url(rootschema)
+    specification = specification_with(dialect_id)
+    resource = specification.create_resource(rootschema)
+    return Registry().with_resource(uri=_VEGA_LITE_ROOT_URI, resource=resource)
 
 
-def _get_leaves_of_error_tree(
-    errors: ValidationErrorList,
-) -> ValidationErrorList:
+def _group_tree_leaves(errors: _Errs, /) -> _IntoLazyGroup:
     """
-    For each error in `errors`, it traverses down the "error tree" that is generated by the jsonschema library to find and return all "leaf" errors.
+    Combines 3 previously distinct steps:
+
+    1. ``_get_leaves_of_error_tree``
 
     These are errors which have no further errors that caused it and so they are the most specific errors
     with the most specific error messages.
-    """
-    leaves: ValidationErrorList = []
-    for err in errors:
-        if err.context:
-            # This means that the error `err` was caused by errors in subschemas.
-            # The list of errors from the subschemas are available in the property
-            # `context`.
-            leaves.extend(_get_leaves_of_error_tree(err.context))
-        else:
-            leaves.append(err)
-    return leaves
 
+    2. ``_group_errors_by_json_path`` (part of)
 
-def _lazy_group_tree_leaves(
-    errors: Iterable[ValidationError], /
-) -> Iterator[tuple[str, ValidationError]]:
-    """
-    Combines 3 previously distinct steps:
+    Extracts the path for grouping.
 
-    - ``_get_leaves_of_error_tree``
-    - (part of) ``_group_errors_by_json_path``
-        - Doesnt actually group yet, can by calling `dict(result)`.
-    - ``_is_required_value_error``
+    3. Removes::
+
+        ValidationError: "'value' is a required property"
+
+    as these errors are unlikely to be the relevant ones for the user.
+    They come from validation against a schema definition where the output of `alt.value`
+    would be valid.
+    However, if a user uses `alt.value`, the `value` keyword is included automatically
+    from that function and so it's unlikely that this was what the user intended
+    if the keyword is not present in the first place.
     """  # noqa: D400
     for err in errors:
         if err_context := err.context:
-            yield from _lazy_group_tree_leaves(err_context)
+            yield from _group_tree_leaves(err_context)
         elif err.validator == "required" and err.validator_value == ["value"]:
             continue
         else:
@@ -412,19 +372,28 @@ def _lazy_group_tree_leaves(
 """Key function for ``ValidationError.validator``."""
 
 
-def _lazy_subset_to_most_specific_json_paths(
-    json_path_errors: Iterator[tuple[str, ValidationError]], /
-) -> Iterator[Iterable[ValidationError]]:
+def _message_len(err: ValidationError, /) -> int:
+    """Return length of a ``ValidationError`` message."""
+    return len(err.message)
+
+
+def _prune_subset_paths(json_path_errors: _IntoLazyGroup, /) -> Iterator[_Errs]:
     """
+    Removes key (json path), value (errors) pairs where the json path is fully contained in another json path.
+
+    For example if `errors_by_json_path` has two keys, `$.encoding.X` and `$.encoding.X.tooltip`,
+    then the first one will be removed and only the second one is returned.
+
+    This is done under the assumption that more specific json paths give more helpful error messages to the user.
+
     Currently using a `list`, but typing it more restrictive to see if it can be avoided.
 
     - Needs to be sorted to work with groupby
     - Reversing allows prioritising more specific groups, since they are seen first
     - Then re-reversed, to keep seen order
-
     """
     rev_sort = sorted(json_path_errors, key=_fn_path, reverse=True)
-    keeping: dict[str, Iterable[ValidationError]] = {}
+    keeping: dict[str, _Errs] = {}
     for unique_path, grouped_errors in groupby(rev_sort, key=_fn_path):
         if any(seen.startswith(unique_path) for seen in keeping):
             continue
@@ -433,23 +402,35 @@ def _lazy_subset_to_most_specific_json_paths(
     yield from islice(reversed(keeping.values()), 3)
 
 
-def _lazy_deduplicate_errors(
-    grouped_errors: Iterator[Iterable[ValidationError]], /
-) -> Iterator[ValidationError]:
-    for element_errors in grouped_errors:
-        for validator, errors in groupby(
-            sorted(element_errors, key=_fn_validator), key=_fn_validator
-        ):
+def _groupby_validator(errors: _Errs, /) -> Iterator[tuple[str, _ErrsLazy]]:
+    """
+    Groups the errors by the json schema "validator" that casued the error.
+
+    For example if the error is that a value is not one of an enumeration in the json schema
+    then the "validator" is `"enum"`, if the error is due to an unknown property that
+    was set although no additional properties are allowed then "validator" is
+    `"additionalProperties`, etc.
+    """
+    yield from groupby(sorted(errors, key=_fn_validator), key=_fn_validator)
+
+
+def _deduplicate_errors(grouped_errors: Iterator[_Errs], /) -> _ErrsLazy:
+    """
+    Some errors have very similar error messages or are just in general not helpful for a user.
+
+    This function removes as many of these cases as possible and
+    can be extended over time to handle new cases that come up.
+    """
+    for by_path in grouped_errors:
+        for validator, errors in _groupby_validator(by_path):
             if validator == "additionalProperties":
-                errors = _lazy_additional_properties(errors)
+                errors = _shortest_any_of(errors)
             elif validator == "enum":
-                errors = _lazy_deduplicate_enum(errors)
-            yield from _lazy_unique_message(errors)
+                errors = _prune_subset_enum(errors)
+            yield from _distinct_messages(errors)
 
 
-def _lazy_unique_message(
-    iterable: Iterable[ValidationError], /
-) -> Iterator[ValidationError]:
+def _distinct_messages(iterable: _Errs, /) -> _ErrsLazy:
     seen = set()
     for el in iterable:
         if el.message not in seen:
@@ -457,9 +438,19 @@ def _lazy_unique_message(
             yield el
 
 
-def _lazy_additional_properties(
-    iterable: Iterable[ValidationError], /
-) -> Iterator[ValidationError]:
+def _shortest_any_of(iterable: _Errs, /) -> _ErrsLazy:
+    """
+    If there are multiple additional property errors it usually means that the offending element was validated against multiple schemas and its parent is a common anyOf validator.
+
+    The error messages produced from these cases are usually
+    very similar and we just take the shortest one.
+    For example the following 3 errors are raised for::
+
+        alt.X("variety", unknown=2)
+        - "Additional properties are not allowed ('unknown' was unexpected)"
+        - "Additional properties are not allowed ('field', 'unknown' were unexpected)"
+        - "Additional properties are not allowed ('field', 'type', 'unknown' were unexpected)".
+    """
     it = iter(iterable)
     first = next(it)
     if (
@@ -470,14 +461,7 @@ def _lazy_additional_properties(
         yield first
 
 
-def _message_len(err: ValidationError, /) -> int:
-    """Return length of a ``ValidationError`` message."""
-    return len(err.message)
-
-
-def _lazy_deduplicate_enum(
-    iterable: Iterable[ValidationError], /
-) -> Iterator[ValidationError]:
+def _prune_subset_enum(iterable: _Errs, /) -> _ErrsLazy:
     """Skip any``"enum"`` errors that are a subset of another error."""
     enums: tuple[set[str], ...]
     errors: tuple[ValidationError, ...]
@@ -487,157 +471,14 @@ def _lazy_deduplicate_enum(
             yield err
 
 
-def _subset_to_most_specific_json_paths(
-    errors_by_json_path: GroupedValidationErrors,
-) -> GroupedValidationErrors:
-    """
-    Removes key (json path), value (errors) pairs where the json path is fully contained in another json path.
-
-    For example if `errors_by_json_path` has two keys, `$.encoding.X` and `$.encoding.X.tooltip`,
-    then the first one will be removed and only the second one is returned.
-
-    This is done under the assumption that more specific json paths give more helpful error messages to the user.
-    """
-    errors_by_json_path_specific: GroupedValidationErrors = {}
-    for json_path, errors in errors_by_json_path.items():
-        if not _contained_at_start_of_one_of_other_values(
-            json_path, list(errors_by_json_path.keys())
-        ):
-            errors_by_json_path_specific[json_path] = errors
-    return errors_by_json_path_specific
-
-
-def _contained_at_start_of_one_of_other_values(x: str, values: Sequence[str]) -> bool:
-    # Does not count as "contained at start of other value" if the values are
-    # the same. These cases should be handled separately
-    return any(value.startswith(x) for value in values if x != value)
-
-
-def _deduplicate_errors(
-    grouped_errors: GroupedValidationErrors,
-) -> GroupedValidationErrors:
-    """
-    Some errors have very similar error messages or are just in general not helpful for a user.
-
-    This function removes as many of these cases as possible and
-    can be extended over time to handle new cases that come up.
-    """
-    grouped_errors_deduplicated: GroupedValidationErrors = {}
-    for json_path, element_errors in grouped_errors.items():
-        errors_by_validator = _group_errors_by_validator(element_errors)
-
-        deduplication_functions = {
-            "enum": _deduplicate_enum_errors,
-            "additionalProperties": _deduplicate_additional_properties_errors,
-        }
-        deduplicated_errors: ValidationErrorList = []
-        for validator, errors in errors_by_validator.items():
-            deduplication_func = deduplication_functions.get(validator)
-            if deduplication_func is not None:
-                errors = deduplication_func(errors)
-            deduplicated_errors.extend(_deduplicate_by_message(errors))
-
-        # Removes any ValidationError "'value' is a required property" as these
-        # errors are unlikely to be the relevant ones for the user. They come from
-        # validation against a schema definition where the output of `alt.value`
-        # would be valid. However, if a user uses `alt.value`, the `value` keyword
-        # is included automatically from that function and so it's unlikely
-        # that this was what the user intended if the keyword is not present
-        # in the first place.
-        deduplicated_errors = [
-            err for err in deduplicated_errors if not _is_required_value_error(err)
-        ]
-
-        grouped_errors_deduplicated[json_path] = deduplicated_errors
-    return grouped_errors_deduplicated
-
-
-def _is_required_value_error(err: ValidationError) -> bool:
-    return err.validator == "required" and err.validator_value == ["value"]
-
-
-def _group_errors_by_validator(errors: ValidationErrorList) -> GroupedValidationErrors:
-    """
-    Groups the errors by the json schema "validator" that casued the error.
-
-    For example if the error is that a value is not one of an enumeration in the json schema
-    then the "validator" is `"enum"`, if the error is due to an unknown property that
-    was set although no additional properties are allowed then "validator" is
-    `"additionalProperties`, etc.
-    """
-    errors_by_validator: defaultdict[str, ValidationErrorList] = defaultdict(list)
-    for err in errors:
-        # Ignore mypy error as err.validator as it wrongly sees err.validator
-        # as of type Optional[Validator] instead of str which it is according
-        # to the documentation and all tested cases
-        errors_by_validator[err.validator].append(err)  # type: ignore[index]
-    return dict(errors_by_validator)
-
-
-def _deduplicate_enum_errors(errors: ValidationErrorList) -> ValidationErrorList:
-    """
-    Deduplicate enum errors by removing the errors where the allowed values are a subset of another error.
-
-    For example, if `enum` contains two errors and one has `validator_value` (i.e. accepted values) ["A", "B"] and the
-    other one ["A", "B", "C"] then the first one is removed and the final
-    `enum` list only contains the error with ["A", "B", "C"].
-    """
-    if len(errors) > 1:
-        # Values (and therefore `validator_value`) of an enum are always arrays,
-        # see https://json-schema.org/understanding-json-schema/reference/generic.html#enumerated-values
-        # which is why we can use join below
-        value_strings = [",".join(err.validator_value) for err in errors]  # type: ignore
-        longest_enums: ValidationErrorList = []
-        for value_str, err in zip(value_strings, errors):
-            if not _contained_at_start_of_one_of_other_values(value_str, value_strings):
-                longest_enums.append(err)
-        errors = longest_enums
-    return errors
-
-
-def _deduplicate_additional_properties_errors(
-    errors: ValidationErrorList,
-) -> ValidationErrorList:
-    """
-    If there are multiple additional property errors it usually means that the offending element was validated against multiple schemas and its parent is a common anyOf validator.
-
-    The error messages produced from these cases are usually
-    very similar and we just take the shortest one. For example,
-    the following 3 errors are raised for the `unknown` channel option in
-    `alt.X("variety", unknown=2)`:
-    - "Additional properties are not allowed ('unknown' was unexpected)"
-    - "Additional properties are not allowed ('field', 'unknown' were unexpected)"
-    - "Additional properties are not allowed ('field', 'type', 'unknown' were unexpected)".
-    """
-    if len(errors) > 1:
-        # Test if all parent errors are the same anyOf error and only do
-        # the prioritization in these cases. Can't think of a chart spec where this
-        # would not be the case but still allow for it below to not break anything.
-        parent = errors[0].parent
-        if (
-            parent is not None
-            and parent.validator == "anyOf"
-            # Use [1:] as don't have to check for first error as it was used
-            # above to define `parent`
-            and all(err.parent is parent for err in errors[1:])
-        ):
-            errors = [min(errors, key=lambda x: len(x.message))]
-    return errors
-
-
-def _deduplicate_by_message(errors: ValidationErrorList) -> ValidationErrorList:
-    """Deduplicate errors by message. This keeps the original order in case it was chosen intentionally."""
-    return list({e.message: e for e in errors}.values())
-
-
 def _subclasses(cls: type[Any]) -> Iterator[type[Any]]:
     """Breadth-first sequence of all classes which inherit from cls."""
     seen = set()
-    current_set = {cls}
-    while current_set:
-        seen |= current_set
-        current_set = set.union(*(set(cls.__subclasses__()) for cls in current_set))
-        for cls in current_set - seen:
+    current: set[type[Any]] = {cls}
+    while current:
+        seen |= current
+        current = set(chain.from_iterable(cls.__subclasses__() for cls in current))
+        for cls in current - seen:
             yield cls
 
 
@@ -716,7 +557,7 @@ def __init__(self, obj: SchemaBase, err: ValidationError) -> None:
         super().__init__(**err._contents())
         self.obj = obj
         err = cast("SchemaValidationError", err)
-        self._errors: Iterable[ValidationError] = err._errors
+        self._errors: _ErrsLazyGroup = err._errors
         # This is the message from err
         self._original_message = self.message
         self.message = self._get_message()
@@ -724,49 +565,47 @@ def __init__(self, obj: SchemaBase, err: ValidationError) -> None:
     def __str__(self) -> str:
         return self.message
 
+    @staticmethod
+    def indent_from_second_line(msg: str, /, indent: int = 4) -> str:
+        return "\n".join(
+            " " * indent + s if idx > 0 and s else s
+            for idx, s in enumerate(msg.split("\n"))
+        )
+
     def _get_message(self) -> str:
-        def indent_second_line_onwards(message: str, indent: int = 4) -> str:
-            modified_lines: list[str] = []
-            for idx, line in enumerate(message.split("\n")):
-                if idx > 0 and len(line) > 0:
-                    line = " " * indent + line
-                modified_lines.append(line)
-            return "\n".join(modified_lines)
-
-        error_messages: list[str] = []
-        # Only show a maximum of 3 errors as else the final message returned by this
-        # method could get very long.
-        # ^^^^^^^^^^
-        # CORRECTION: Only show 3 **json_paths**
-
-        for errors in _group_errors_by_json_path(self._errors).values():
-            error_messages.append(self._get_message_for_errors_group(errors))
-
-        message = ""
-        if len(error_messages) > 1:
-            error_messages = [
-                indent_second_line_onwards(f"Error {error_id}: {m}")
+        it = self._errors
+        group_1 = list(next(it))
+        if (group_2 := next(it, None)) is not None:
+            error_messages = []
+            for group in group_1, list(group_2), next(it, None):
+                if group is not None:
+                    error_messages.append(self._get_message_for_errors_group(group))
+            message = "\n\n".join(
+                self.indent_from_second_line(f"Error {error_id}: {m}")
                 for error_id, m in enumerate(error_messages, start=1)
-            ]
-            message += "Multiple errors were found.\n\n"
-        message += "\n\n".join(error_messages)
-        return message
+            )
+            return f"Multiple errors were found.\n\n{message}"
+        else:
+            return self._get_message_for_errors_group(group_1)
 
-    def _get_message_for_errors_group(
-        self,
-        errors: ValidationErrorList,
-    ) -> str:
+    def _get_message_for_errors_group(self, errors: _Errs) -> str:
+        """
+        Note.
+
+        During development, we only found cases where an additionalProperties
+        error was raised if that was the only error for the offending instance
+        as identifiable by the json path.
+
+        Therefore, we just check here the first error.
+        However, other constellations might exist in which case this should be adapted
+        so that other error messages are shown as well.
+        """
+        if not isinstance(errors, Sequence):
+            errors = list(errors)
         if errors[0].validator == "additionalProperties":
-            # During development, we only found cases where an additionalProperties
-            # error was raised if that was the only error for the offending instance
-            # as identifiable by the json path. Therefore, we just check here the first
-            # error. However, other constellations might exist in which case
-            # this should be adapted so that other error messages are shown as well.
-            message = self._get_additional_properties_error_message(errors[0])
+            return self._get_additional_properties_error_message(errors[0])
         else:
-            message = self._get_default_error_message(errors=errors)
-
-        return message.strip()
+            return self._get_default_error_message(errors=errors)
 
     def _get_additional_properties_error_message(
         self,
@@ -781,13 +620,12 @@ def _get_additional_properties_error_message(
         # "Additional properties are not allowed ('unknown' was unexpected)"
         # Line below extracts "unknown" from this string
         parameter_name = error.message.split("('")[-1].split("'")[0]
-        message = f"""\
-`{altair_cls.__name__}` has no parameter named '{parameter_name}'
-
-Existing parameter names are:
-{param_names_table}
-See the help for `{altair_cls.__name__}` to read the full description of these parameters"""
-        return message
+        cls_name = altair_cls.__name__
+        return (
+            f"`{cls_name}` has no parameter named '{parameter_name}'\n\n"
+            f"Existing parameter names are:\n{param_names_table}\n"
+            f"See the help for `{cls_name}` to read the full description of these parameters"
+        )
 
     def _get_altair_class_for_error(self, error: ValidationError) -> type[SchemaBase]:
         """
@@ -795,6 +633,8 @@ def _get_altair_class_for_error(self, error: ValidationError) -> type[SchemaBase
 
         This should lead to more informative error messages pointing the user closer to the source of the issue.
         """
+        from altair import vegalite
+
         for prop_name in reversed(error.absolute_path):
             # Check if str as e.g. first item can be a 0
             if isinstance(prop_name, str):
@@ -806,24 +646,17 @@ def _get_altair_class_for_error(self, error: ValidationError) -> type[SchemaBase
             # Did not find a suitable class based on traversing the path so we fall
             # back on the class of the top-level object which created
             # the SchemaValidationError
-            cls = self.obj.__class__
+            cls = type(self.obj)
         return cls
 
     @staticmethod
-    def _format_params_as_table(param_dict_keys: Iterable[str]) -> str:
+    def _format_params_as_table(param_view: KeysView[str]) -> str:
         """Format param names into a table so that they are easier to read."""
-        param_names: tuple[str, ...]
-        name_lengths: tuple[int, ...]
-        param_names, name_lengths = zip(
-            *[
-                (name, len(name))
-                for name in param_dict_keys
-                if name not in {"kwds", "self"}
-            ]
-        )
+        param_names: list[str] = [nm for nm in param_view if nm not in {"kwds", "self"}]
+
         # Worst case scenario with the same longest param name in the same
         # row for all columns
-        max_name_length = max(name_lengths)
+        max_name_length = len(max(param_view, key=len))
         max_column_width = 80
         # Output a square table if not too big (since it is easier to read)
         num_param_names = len(param_names)
@@ -837,7 +670,7 @@ def split_into_equal_parts(n: int, p: int) -> list[int]:
         column_heights = split_into_equal_parts(num_param_names, columns)
 
         # Section the param names into columns and compute their widths
-        param_names_columns: list[tuple[str, ...]] = []
+        param_names_columns: list[Sequence[str]] = []
         column_max_widths: list[int] = []
         last_end_idx: int = 0
         for ch in column_heights:
@@ -848,30 +681,29 @@ def split_into_equal_parts(n: int, p: int) -> list[int]:
             last_end_idx = ch + last_end_idx
 
         # Transpose the param name columns into rows to facilitate looping
-        param_names_rows: list[tuple[str, ...]] = []
-        for li in zip_longest(*param_names_columns, fillvalue=""):
-            param_names_rows.append(li)
         # Build the table as a string by iterating over and formatting the rows
         param_names_table: str = ""
-        for param_names_row in param_names_rows:
+        column_pad = 3
+        for param_names_row in zip_longest(*param_names_columns, fillvalue=""):
+            last_element = len(param_names_row) - 1
             for num, param_name in enumerate(param_names_row):
                 # Set column width based on the longest param in the column
-                max_name_length_column = column_max_widths[num]
-                column_pad = 3
-                param_names_table += "{:<{}}".format(
-                    param_name, max_name_length_column + column_pad
-                )
+                width = column_pad + column_max_widths[num]
+                param_names_table += "{:<{}}".format(param_name, width)
                 # Insert newlines and spacing after the last element in each row
-                if num == (len(param_names_row) - 1):
+                if num == last_element:
                     param_names_table += "\n"
         return param_names_table
 
     def _get_default_error_message(
         self,
-        errors: ValidationErrorList,
+        errors: Sequence[ValidationError],
     ) -> str:
         bullet_points: list[str] = []
-        errors_by_validator = _group_errors_by_validator(errors)
+        errors_by_validator: defaultdict[str, list[ValidationError]] = defaultdict(list)
+        for err in errors:
+            errors_by_validator[err.validator].append(err)  # type: ignore[index]
+
         if "enum" in errors_by_validator:
             for error in errors_by_validator["enum"]:
                 bullet_points.append(f"one of {error.validator_value}")
@@ -919,7 +751,7 @@ def _get_default_error_message(
             if validator not in {"enum", "type"}
         )
         message += "".join(it)
-        return message
+        return message.strip()
 
 
 class UndefinedType:
@@ -1215,7 +1047,7 @@ def to_dict(
         if validate:
             try:
                 self.validate(result)
-            except jsonschema.ValidationError as err:
+            except ValidationError as err:
                 # We do not raise `from err` as else the resulting
                 # traceback is very long as it contains part
                 # of the Vega-Lite schema. It would also first
@@ -1340,12 +1172,8 @@ def validate(
         cls, instance: dict[str, Any], schema: dict[str, Any] | None = None
     ) -> None:
         """Validate the instance against the class schema in the context of the rootschema."""
-        if schema is None:
-            schema = cls._schema
-        # For the benefit of mypy
-        assert schema is not None
-        lazy_validate_json_schema(
-            instance, schema, rootschema=cls._rootschema or cls._schema
+        validate_jsonschema(
+            instance, schema or cls._schema, cls._rootschema or cls._schema
         )
 
     @classmethod
@@ -1372,7 +1200,7 @@ def validate_property(
         np_opt = sys.modules.get("numpy")
         value = _todict(value, context={}, np_opt=np_opt, pd_opt=pd_opt)
         props = cls.resolve_references(schema or cls._schema).get("properties", {})
-        lazy_validate_json_schema(
+        validate_jsonschema(
             value, props.get(name, {}), rootschema=cls._rootschema or cls._schema
         )
 
@@ -1408,6 +1236,17 @@ def _passthrough(*args: Any, **kwds: Any) -> Any | dict[str, Any]:
     return args[0] if args else kwds
 
 
+def _freeze(val):
+    if isinstance(val, dict):
+        return frozenset((k, _freeze(v)) for k, v in val.items())
+    elif isinstance(val, set):
+        return frozenset(_freeze(v) for v in val)
+    elif isinstance(val, (list, tuple)):
+        return tuple(_freeze(v) for v in val)
+    else:
+        return val
+
+
 class _FromDict:
     """
     Class used to construct SchemaBase class hierarchies from a dict.
@@ -1445,22 +1284,8 @@ def hash_schema(cls, schema: dict[str, Any], use_json: bool = True) -> int:
                 for key, val in schema.items()
                 if key not in cls._hash_exclude_keys
             }
-        if use_json:
-            s = json.dumps(schema, sort_keys=True)
-            return hash(s)
-        else:
-
-            def _freeze(val):
-                if isinstance(val, dict):
-                    return frozenset((k, _freeze(v)) for k, v in val.items())
-                elif isinstance(val, set):
-                    return frozenset(map(_freeze, val))
-                elif isinstance(val, (list, tuple)):
-                    return tuple(map(_freeze, val))
-                else:
-                    return val
-
-            return hash(_freeze(schema))
+        s: Any = json.dumps(schema, sort_keys=True) if use_json else _freeze(schema)
+        return hash(s)
 
     @overload
     def from_dict(
@@ -1543,8 +1368,8 @@ def from_dict(
             schemas = resolved.get("anyOf", []) + resolved.get("oneOf", [])
             for possible in schemas:
                 try:
-                    lazy_validate_json_schema(dct, possible, rootschema=root_schema)
-                except jsonschema.ValidationError:
+                    validate_jsonschema_fail_fast(dct, possible, rootschema=root_schema)
+                except ValidationError:
                     continue
                 else:
                     return from_dict(dct, schema=possible, default_class=target_tp)
@@ -1571,6 +1396,8 @@ def __init__(self, prop: str, schema: dict[str, Any]) -> None:
         self.schema = schema
 
     def __get__(self, obj, cls):
+        from altair import vegalite
+
         self.obj = obj
         self.cls = cls
         # The docs from the encoding class parameter (e.g. `bin` in X, Color,
diff --git a/tools/schemapi/schemapi.py b/tools/schemapi/schemapi.py
index b4995c070..57bf63b23 100644
--- a/tools/schemapi/schemapi.py
+++ b/tools/schemapi/schemapi.py
@@ -19,15 +19,14 @@
     Final,
     Iterable,
     Iterator,
+    KeysView,
     List,
-    Literal,
     Sequence,
     TypeVar,
     Union,
     cast,
     overload,
 )
-from typing_extensions import TypeAlias
 
 import jsonschema
 import jsonschema.validators
@@ -35,11 +34,6 @@
 from jsonschema import ValidationError
 from packaging.version import Version
 
-# This leads to circular imports with the vegalite module. Currently, this works
-# but be aware that when you access it in this script, the vegalite module might
-# not yet be fully instantiated in case your code is being executed during import time
-from altair import vegalite
-
 if TYPE_CHECKING:
     from typing import ClassVar
 
@@ -57,9 +51,15 @@
         from typing import Never, Self
     else:
         from typing_extensions import Never, Self
+    if sys.version_info >= (3, 10):
+        from typing import TypeAlias
+    else:
+        from typing_extensions import TypeAlias
+    _Errs: TypeAlias = Iterable[ValidationError]
+    _ErrsLazy: TypeAlias = Iterator[ValidationError]
+    _ErrsLazyGroup: TypeAlias = Iterator[_ErrsLazy]
+    _IntoLazyGroup: TypeAlias = Iterator["tuple[str, ValidationError]"]
 
-ValidationErrorList: TypeAlias = List[jsonschema.ValidationError]
-GroupedValidationErrors: TypeAlias = Dict[str, ValidationErrorList]
 
 _VEGA_LITE_ROOT_URI: Final = "urn:vega-lite-schema"
 """
@@ -144,61 +144,6 @@ def debug_mode(arg: bool) -> Iterator[None]:
         DEBUG_MODE = original
 
 
-@overload
-def validate_jsonschema(
-    spec: _JsonParameter,
-    schema: dict[str, Any],
-    rootschema: dict[str, Any] | None = ...,
-    *,
-    raise_error: Literal[True] = ...,
-) -> Never: ...
-@overload
-def validate_jsonschema(
-    spec: _JsonParameter,
-    schema: dict[str, Any],
-    rootschema: dict[str, Any] | None = ...,
-    *,
-    raise_error: Literal[False],
-) -> ValidationError | None: ...
-def validate_jsonschema(
-    spec: _JsonParameter,
-    schema: dict[str, Any],
-    rootschema: dict[str, Any] | None = None,
-    *,
-    raise_error: bool = True,
-) -> ValidationError | None:
-    """
-    Validates the passed in spec against the schema in the context of the rootschema.
-
-    If any errors are found, they are deduplicated and prioritized
-    and only the most relevant errors are kept. Errors are then either raised
-    or returned, depending on the value of `raise_error`.
-    """
-    it_errors = _get_errors_from_spec(spec, schema, rootschema=rootschema)
-    if first_error := next(it_errors, None):
-        errors = [first_error, *it_errors]
-        leaf_errors = _get_leaves_of_error_tree(errors)
-        grouped_errors = _group_errors_by_json_path(leaf_errors)
-        grouped_errors = _subset_to_most_specific_json_paths(grouped_errors)
-        grouped_errors = _deduplicate_errors(grouped_errors)
-
-        # Nothing special about this first error but we need to choose one
-        # which can be raised
-        main_error: Any = next(iter(grouped_errors.values()))[0]
-        # All errors are then attached as a new attribute to ValidationError so that
-        # they can be used in SchemaValidationError to craft a more helpful
-        # error message. Setting a new attribute like this is not ideal as
-        # it then no longer matches the type ValidationError. It would be better
-        # to refactor this function to never raise but only return errors.
-        main_error._errors = list(grouped_errors.values())
-        if raise_error:
-            raise main_error
-        else:
-            return main_error
-    else:
-        return None
-
-
 def _rechain(element: T, others: Iterable[T], /) -> Iterator[T]:
     """
     Continue an iterator at the last popped ``element``.
@@ -215,20 +160,44 @@ def _rechain(element: T, others: Iterable[T], /) -> Iterator[T]:
     yield from others
 
 
-def lazy_validate_json_schema(
+def _regroup(errors: _Errs, /) -> _ErrsLazyGroup:
+    """
+    Regroup error stream with the assumption they are already sorted.
+
+    This holds **only after** all other stages.
+    """
+    for _, grouped_it in groupby(errors, _json_path):
+        yield grouped_it
+
+
+def validate_jsonschema(
     spec: _JsonParameter,
     schema: dict[str, Any],
     rootschema: dict[str, Any] | None = None,
 ) -> None:
-    """Lazy equivalent of `validate_jsonschema`."""
-    it_errors = _get_errors_from_spec(spec, schema, rootschema=rootschema)
+    """
+    Lazy equivalent of `validate_jsonschema`.
+
+    Validates the passed in spec against the schema in the context of the rootschema.
+
+    If any errors are found, they are deduplicated and prioritized
+    and only the most relevant errors are kept.
+
+    Nothing special about this first error but we need to choose one
+    which can be raised
+    All errors are then attached as a new attribute to ValidationError so that
+    they can be used in SchemaValidationError to craft a more helpful
+    error message. Setting a new attribute like this is not ideal as
+    it then no longer matches the type ValidationError.
+    """
+    it_errors = _iter_errors_from_spec(spec, schema, rootschema=rootschema)
     if first_error := next(it_errors, None):
-        groups = _lazy_group_tree_leaves(_rechain(first_error, it_errors))
-        most_specific = _lazy_subset_to_most_specific_json_paths(groups)
-        deduplicated = _lazy_deduplicate_errors(most_specific)
+        groups = _group_tree_leaves(_rechain(first_error, it_errors))
+        most_specific = _prune_subset_paths(groups)
+        deduplicated = _deduplicate_errors(most_specific)
         dummy_error: Any
         if dummy_error := next(deduplicated, None):
-            dummy_error._errors = _rechain(dummy_error, deduplicated)  # type: ignore[attr-defined]
+            dummy_error._errors = _regroup(_rechain(dummy_error, deduplicated))  # type: ignore[attr-defined]
             raise dummy_error
         else:
             msg = (
@@ -238,11 +207,27 @@ def lazy_validate_json_schema(
             raise NotImplementedError(msg)
 
 
-def _get_errors_from_spec(
+def validate_jsonschema_fail_fast(
+    spec: _JsonParameter,
+    schema: dict[str, Any],
+    rootschema: dict[str, Any] | None = None,
+) -> None:
+    """
+    Raise as quickly as possible.
+
+    Use when any information about the error is not needed.
+    """
+    if (
+        err := next(_iter_errors_from_spec(spec, schema, rootschema=rootschema), None)
+    ) is not None:
+        raise err
+
+
+def _iter_errors_from_spec(
     spec: _JsonParameter,
     schema: dict[str, Any],
     rootschema: dict[str, Any] | None = None,
-) -> Iterator[ValidationError]:
+) -> _ErrsLazy:
     """
     Uses the relevant ``jsonschema`` validator to validate ``spec`` against ``schema`` using `` rootschema`` to resolve references.
 
@@ -322,82 +307,57 @@ def _rec_refs(m: dict[str, Any], /) -> Iterator[tuple[str, Any]]:
             yield k, v
 
 
-# We do not annotate the return value here as the referencing library is not always
-# available and this function is only executed in those cases.
 def _get_referencing_registry(
     rootschema: dict[str, Any], json_schema_draft_url: str | None = None
 ) -> Registry:
-    # Referencing is a dependency of newer jsonschema versions, starting with the
-    # version that is specified in _use_referencing_library and we therefore
-    # can expect that it is installed if the function returns True.
-    # We ignore 'import' mypy errors which happen when the referencing library
-    # is not installed. That's ok as in these cases this function is not called.
-    # We also have to ignore 'unused-ignore' errors as mypy raises those in case
-    # referencing is installed.
-    import referencing  # type: ignore[import,unused-ignore]
-    import referencing.jsonschema  # type: ignore[import,unused-ignore]
-
-    if json_schema_draft_url is None:
-        json_schema_draft_url = _get_json_schema_draft_url(rootschema)
-
-    specification = referencing.jsonschema.specification_with(json_schema_draft_url)
-    resource = specification.create_resource(rootschema)
-    return referencing.Registry().with_resource(
-        uri=_VEGA_LITE_ROOT_URI, resource=resource
-    )
+    """
+    Referencing is a dependency of newer jsonschema versions.
 
+    See https://github.com/python-jsonschema/jsonschema/releases/tag/v4.18.0a1
 
-def _group_errors_by_json_path(
-    errors: Iterable[ValidationError],
-) -> GroupedValidationErrors:
-    """
-    Groups errors by the `json_path` attribute of the jsonschema ValidationError class.
+    We ignore 'import' ``mypy`` errors which happen when the ``referencing`` library
+    is not installed.
+    That's ok as in these cases this function is not called.
 
-    This attribute contains the path to the offending element within
-    a chart specification and can therefore be considered as an identifier of an
-    'issue' in the chart that needs to be fixed.
+    We also have to ignore 'unused-ignore' errors as ``mypy`` raises those in case
+    ``referencing`` is installed.
     """
-    errors_by_json_path = defaultdict(list)
-    for err in errors:
-        errors_by_json_path[_json_path(err)].append(err)
-    return dict(errors_by_json_path)
+    from referencing import Registry  # type: ignore[import,unused-ignore]  # noqa: I001
+    from referencing.jsonschema import specification_with  # type: ignore[import,unused-ignore]
+
+    dialect_id = json_schema_draft_url or _get_json_schema_draft_url(rootschema)
+    specification = specification_with(dialect_id)
+    resource = specification.create_resource(rootschema)
+    return Registry().with_resource(uri=_VEGA_LITE_ROOT_URI, resource=resource)
 
 
-def _get_leaves_of_error_tree(
-    errors: ValidationErrorList,
-) -> ValidationErrorList:
+def _group_tree_leaves(errors: _Errs, /) -> _IntoLazyGroup:
     """
-    For each error in `errors`, it traverses down the "error tree" that is generated by the jsonschema library to find and return all "leaf" errors.
+    Combines 3 previously distinct steps:
+
+    1. ``_get_leaves_of_error_tree``
 
     These are errors which have no further errors that caused it and so they are the most specific errors
     with the most specific error messages.
-    """
-    leaves: ValidationErrorList = []
-    for err in errors:
-        if err.context:
-            # This means that the error `err` was caused by errors in subschemas.
-            # The list of errors from the subschemas are available in the property
-            # `context`.
-            leaves.extend(_get_leaves_of_error_tree(err.context))
-        else:
-            leaves.append(err)
-    return leaves
 
+    2. ``_group_errors_by_json_path`` (part of)
 
-def _lazy_group_tree_leaves(
-    errors: Iterable[ValidationError], /
-) -> Iterator[tuple[str, ValidationError]]:
-    """
-    Combines 3 previously distinct steps:
+    Extracts the path for grouping.
 
-    - ``_get_leaves_of_error_tree``
-    - (part of) ``_group_errors_by_json_path``
-        - Doesnt actually group yet, can by calling `dict(result)`.
-    - ``_is_required_value_error``
+    3. Removes::
+
+        ValidationError: "'value' is a required property"
+
+    as these errors are unlikely to be the relevant ones for the user.
+    They come from validation against a schema definition where the output of `alt.value`
+    would be valid.
+    However, if a user uses `alt.value`, the `value` keyword is included automatically
+    from that function and so it's unlikely that this was what the user intended
+    if the keyword is not present in the first place.
     """  # noqa: D400
     for err in errors:
         if err_context := err.context:
-            yield from _lazy_group_tree_leaves(err_context)
+            yield from _group_tree_leaves(err_context)
         elif err.validator == "required" and err.validator_value == ["value"]:
             continue
         else:
@@ -410,19 +370,28 @@ def _lazy_group_tree_leaves(
 """Key function for ``ValidationError.validator``."""
 
 
-def _lazy_subset_to_most_specific_json_paths(
-    json_path_errors: Iterator[tuple[str, ValidationError]], /
-) -> Iterator[Iterable[ValidationError]]:
+def _message_len(err: ValidationError, /) -> int:
+    """Return length of a ``ValidationError`` message."""
+    return len(err.message)
+
+
+def _prune_subset_paths(json_path_errors: _IntoLazyGroup, /) -> Iterator[_Errs]:
     """
+    Removes key (json path), value (errors) pairs where the json path is fully contained in another json path.
+
+    For example if `errors_by_json_path` has two keys, `$.encoding.X` and `$.encoding.X.tooltip`,
+    then the first one will be removed and only the second one is returned.
+
+    This is done under the assumption that more specific json paths give more helpful error messages to the user.
+
     Currently using a `list`, but typing it more restrictive to see if it can be avoided.
 
     - Needs to be sorted to work with groupby
     - Reversing allows prioritising more specific groups, since they are seen first
     - Then re-reversed, to keep seen order
-
     """
     rev_sort = sorted(json_path_errors, key=_fn_path, reverse=True)
-    keeping: dict[str, Iterable[ValidationError]] = {}
+    keeping: dict[str, _Errs] = {}
     for unique_path, grouped_errors in groupby(rev_sort, key=_fn_path):
         if any(seen.startswith(unique_path) for seen in keeping):
             continue
@@ -431,23 +400,35 @@ def _lazy_subset_to_most_specific_json_paths(
     yield from islice(reversed(keeping.values()), 3)
 
 
-def _lazy_deduplicate_errors(
-    grouped_errors: Iterator[Iterable[ValidationError]], /
-) -> Iterator[ValidationError]:
-    for element_errors in grouped_errors:
-        for validator, errors in groupby(
-            sorted(element_errors, key=_fn_validator), key=_fn_validator
-        ):
+def _groupby_validator(errors: _Errs, /) -> Iterator[tuple[str, _ErrsLazy]]:
+    """
+    Groups the errors by the json schema "validator" that casued the error.
+
+    For example if the error is that a value is not one of an enumeration in the json schema
+    then the "validator" is `"enum"`, if the error is due to an unknown property that
+    was set although no additional properties are allowed then "validator" is
+    `"additionalProperties`, etc.
+    """
+    yield from groupby(sorted(errors, key=_fn_validator), key=_fn_validator)
+
+
+def _deduplicate_errors(grouped_errors: Iterator[_Errs], /) -> _ErrsLazy:
+    """
+    Some errors have very similar error messages or are just in general not helpful for a user.
+
+    This function removes as many of these cases as possible and
+    can be extended over time to handle new cases that come up.
+    """
+    for by_path in grouped_errors:
+        for validator, errors in _groupby_validator(by_path):
             if validator == "additionalProperties":
-                errors = _lazy_additional_properties(errors)
+                errors = _shortest_any_of(errors)
             elif validator == "enum":
-                errors = _lazy_deduplicate_enum(errors)
-            yield from _lazy_unique_message(errors)
+                errors = _prune_subset_enum(errors)
+            yield from _distinct_messages(errors)
 
 
-def _lazy_unique_message(
-    iterable: Iterable[ValidationError], /
-) -> Iterator[ValidationError]:
+def _distinct_messages(iterable: _Errs, /) -> _ErrsLazy:
     seen = set()
     for el in iterable:
         if el.message not in seen:
@@ -455,9 +436,19 @@ def _lazy_unique_message(
             yield el
 
 
-def _lazy_additional_properties(
-    iterable: Iterable[ValidationError], /
-) -> Iterator[ValidationError]:
+def _shortest_any_of(iterable: _Errs, /) -> _ErrsLazy:
+    """
+    If there are multiple additional property errors it usually means that the offending element was validated against multiple schemas and its parent is a common anyOf validator.
+
+    The error messages produced from these cases are usually
+    very similar and we just take the shortest one.
+    For example the following 3 errors are raised for::
+
+        alt.X("variety", unknown=2)
+        - "Additional properties are not allowed ('unknown' was unexpected)"
+        - "Additional properties are not allowed ('field', 'unknown' were unexpected)"
+        - "Additional properties are not allowed ('field', 'type', 'unknown' were unexpected)".
+    """
     it = iter(iterable)
     first = next(it)
     if (
@@ -468,14 +459,7 @@ def _lazy_additional_properties(
         yield first
 
 
-def _message_len(err: ValidationError, /) -> int:
-    """Return length of a ``ValidationError`` message."""
-    return len(err.message)
-
-
-def _lazy_deduplicate_enum(
-    iterable: Iterable[ValidationError], /
-) -> Iterator[ValidationError]:
+def _prune_subset_enum(iterable: _Errs, /) -> _ErrsLazy:
     """Skip any``"enum"`` errors that are a subset of another error."""
     enums: tuple[set[str], ...]
     errors: tuple[ValidationError, ...]
@@ -485,157 +469,14 @@ def _lazy_deduplicate_enum(
             yield err
 
 
-def _subset_to_most_specific_json_paths(
-    errors_by_json_path: GroupedValidationErrors,
-) -> GroupedValidationErrors:
-    """
-    Removes key (json path), value (errors) pairs where the json path is fully contained in another json path.
-
-    For example if `errors_by_json_path` has two keys, `$.encoding.X` and `$.encoding.X.tooltip`,
-    then the first one will be removed and only the second one is returned.
-
-    This is done under the assumption that more specific json paths give more helpful error messages to the user.
-    """
-    errors_by_json_path_specific: GroupedValidationErrors = {}
-    for json_path, errors in errors_by_json_path.items():
-        if not _contained_at_start_of_one_of_other_values(
-            json_path, list(errors_by_json_path.keys())
-        ):
-            errors_by_json_path_specific[json_path] = errors
-    return errors_by_json_path_specific
-
-
-def _contained_at_start_of_one_of_other_values(x: str, values: Sequence[str]) -> bool:
-    # Does not count as "contained at start of other value" if the values are
-    # the same. These cases should be handled separately
-    return any(value.startswith(x) for value in values if x != value)
-
-
-def _deduplicate_errors(
-    grouped_errors: GroupedValidationErrors,
-) -> GroupedValidationErrors:
-    """
-    Some errors have very similar error messages or are just in general not helpful for a user.
-
-    This function removes as many of these cases as possible and
-    can be extended over time to handle new cases that come up.
-    """
-    grouped_errors_deduplicated: GroupedValidationErrors = {}
-    for json_path, element_errors in grouped_errors.items():
-        errors_by_validator = _group_errors_by_validator(element_errors)
-
-        deduplication_functions = {
-            "enum": _deduplicate_enum_errors,
-            "additionalProperties": _deduplicate_additional_properties_errors,
-        }
-        deduplicated_errors: ValidationErrorList = []
-        for validator, errors in errors_by_validator.items():
-            deduplication_func = deduplication_functions.get(validator)
-            if deduplication_func is not None:
-                errors = deduplication_func(errors)
-            deduplicated_errors.extend(_deduplicate_by_message(errors))
-
-        # Removes any ValidationError "'value' is a required property" as these
-        # errors are unlikely to be the relevant ones for the user. They come from
-        # validation against a schema definition where the output of `alt.value`
-        # would be valid. However, if a user uses `alt.value`, the `value` keyword
-        # is included automatically from that function and so it's unlikely
-        # that this was what the user intended if the keyword is not present
-        # in the first place.
-        deduplicated_errors = [
-            err for err in deduplicated_errors if not _is_required_value_error(err)
-        ]
-
-        grouped_errors_deduplicated[json_path] = deduplicated_errors
-    return grouped_errors_deduplicated
-
-
-def _is_required_value_error(err: ValidationError) -> bool:
-    return err.validator == "required" and err.validator_value == ["value"]
-
-
-def _group_errors_by_validator(errors: ValidationErrorList) -> GroupedValidationErrors:
-    """
-    Groups the errors by the json schema "validator" that casued the error.
-
-    For example if the error is that a value is not one of an enumeration in the json schema
-    then the "validator" is `"enum"`, if the error is due to an unknown property that
-    was set although no additional properties are allowed then "validator" is
-    `"additionalProperties`, etc.
-    """
-    errors_by_validator: defaultdict[str, ValidationErrorList] = defaultdict(list)
-    for err in errors:
-        # Ignore mypy error as err.validator as it wrongly sees err.validator
-        # as of type Optional[Validator] instead of str which it is according
-        # to the documentation and all tested cases
-        errors_by_validator[err.validator].append(err)  # type: ignore[index]
-    return dict(errors_by_validator)
-
-
-def _deduplicate_enum_errors(errors: ValidationErrorList) -> ValidationErrorList:
-    """
-    Deduplicate enum errors by removing the errors where the allowed values are a subset of another error.
-
-    For example, if `enum` contains two errors and one has `validator_value` (i.e. accepted values) ["A", "B"] and the
-    other one ["A", "B", "C"] then the first one is removed and the final
-    `enum` list only contains the error with ["A", "B", "C"].
-    """
-    if len(errors) > 1:
-        # Values (and therefore `validator_value`) of an enum are always arrays,
-        # see https://json-schema.org/understanding-json-schema/reference/generic.html#enumerated-values
-        # which is why we can use join below
-        value_strings = [",".join(err.validator_value) for err in errors]  # type: ignore
-        longest_enums: ValidationErrorList = []
-        for value_str, err in zip(value_strings, errors):
-            if not _contained_at_start_of_one_of_other_values(value_str, value_strings):
-                longest_enums.append(err)
-        errors = longest_enums
-    return errors
-
-
-def _deduplicate_additional_properties_errors(
-    errors: ValidationErrorList,
-) -> ValidationErrorList:
-    """
-    If there are multiple additional property errors it usually means that the offending element was validated against multiple schemas and its parent is a common anyOf validator.
-
-    The error messages produced from these cases are usually
-    very similar and we just take the shortest one. For example,
-    the following 3 errors are raised for the `unknown` channel option in
-    `alt.X("variety", unknown=2)`:
-    - "Additional properties are not allowed ('unknown' was unexpected)"
-    - "Additional properties are not allowed ('field', 'unknown' were unexpected)"
-    - "Additional properties are not allowed ('field', 'type', 'unknown' were unexpected)".
-    """
-    if len(errors) > 1:
-        # Test if all parent errors are the same anyOf error and only do
-        # the prioritization in these cases. Can't think of a chart spec where this
-        # would not be the case but still allow for it below to not break anything.
-        parent = errors[0].parent
-        if (
-            parent is not None
-            and parent.validator == "anyOf"
-            # Use [1:] as don't have to check for first error as it was used
-            # above to define `parent`
-            and all(err.parent is parent for err in errors[1:])
-        ):
-            errors = [min(errors, key=lambda x: len(x.message))]
-    return errors
-
-
-def _deduplicate_by_message(errors: ValidationErrorList) -> ValidationErrorList:
-    """Deduplicate errors by message. This keeps the original order in case it was chosen intentionally."""
-    return list({e.message: e for e in errors}.values())
-
-
 def _subclasses(cls: type[Any]) -> Iterator[type[Any]]:
     """Breadth-first sequence of all classes which inherit from cls."""
     seen = set()
-    current_set = {cls}
-    while current_set:
-        seen |= current_set
-        current_set = set.union(*(set(cls.__subclasses__()) for cls in current_set))
-        for cls in current_set - seen:
+    current: set[type[Any]] = {cls}
+    while current:
+        seen |= current
+        current = set(chain.from_iterable(cls.__subclasses__() for cls in current))
+        for cls in current - seen:
             yield cls
 
 
@@ -714,7 +555,7 @@ def __init__(self, obj: SchemaBase, err: ValidationError) -> None:
         super().__init__(**err._contents())
         self.obj = obj
         err = cast("SchemaValidationError", err)
-        self._errors: Iterable[ValidationError] = err._errors
+        self._errors: _ErrsLazyGroup = err._errors
         # This is the message from err
         self._original_message = self.message
         self.message = self._get_message()
@@ -722,49 +563,47 @@ def __init__(self, obj: SchemaBase, err: ValidationError) -> None:
     def __str__(self) -> str:
         return self.message
 
+    @staticmethod
+    def indent_from_second_line(msg: str, /, indent: int = 4) -> str:
+        return "\n".join(
+            " " * indent + s if idx > 0 and s else s
+            for idx, s in enumerate(msg.split("\n"))
+        )
+
     def _get_message(self) -> str:
-        def indent_second_line_onwards(message: str, indent: int = 4) -> str:
-            modified_lines: list[str] = []
-            for idx, line in enumerate(message.split("\n")):
-                if idx > 0 and len(line) > 0:
-                    line = " " * indent + line
-                modified_lines.append(line)
-            return "\n".join(modified_lines)
-
-        error_messages: list[str] = []
-        # Only show a maximum of 3 errors as else the final message returned by this
-        # method could get very long.
-        # ^^^^^^^^^^
-        # CORRECTION: Only show 3 **json_paths**
-
-        for errors in _group_errors_by_json_path(self._errors).values():
-            error_messages.append(self._get_message_for_errors_group(errors))
-
-        message = ""
-        if len(error_messages) > 1:
-            error_messages = [
-                indent_second_line_onwards(f"Error {error_id}: {m}")
+        it = self._errors
+        group_1 = list(next(it))
+        if (group_2 := next(it, None)) is not None:
+            error_messages = []
+            for group in group_1, list(group_2), next(it, None):
+                if group is not None:
+                    error_messages.append(self._get_message_for_errors_group(group))
+            message = "\n\n".join(
+                self.indent_from_second_line(f"Error {error_id}: {m}")
                 for error_id, m in enumerate(error_messages, start=1)
-            ]
-            message += "Multiple errors were found.\n\n"
-        message += "\n\n".join(error_messages)
-        return message
+            )
+            return f"Multiple errors were found.\n\n{message}"
+        else:
+            return self._get_message_for_errors_group(group_1)
 
-    def _get_message_for_errors_group(
-        self,
-        errors: ValidationErrorList,
-    ) -> str:
+    def _get_message_for_errors_group(self, errors: _Errs) -> str:
+        """
+        Note.
+
+        During development, we only found cases where an additionalProperties
+        error was raised if that was the only error for the offending instance
+        as identifiable by the json path.
+
+        Therefore, we just check here the first error.
+        However, other constellations might exist in which case this should be adapted
+        so that other error messages are shown as well.
+        """
+        if not isinstance(errors, Sequence):
+            errors = list(errors)
         if errors[0].validator == "additionalProperties":
-            # During development, we only found cases where an additionalProperties
-            # error was raised if that was the only error for the offending instance
-            # as identifiable by the json path. Therefore, we just check here the first
-            # error. However, other constellations might exist in which case
-            # this should be adapted so that other error messages are shown as well.
-            message = self._get_additional_properties_error_message(errors[0])
+            return self._get_additional_properties_error_message(errors[0])
         else:
-            message = self._get_default_error_message(errors=errors)
-
-        return message.strip()
+            return self._get_default_error_message(errors=errors)
 
     def _get_additional_properties_error_message(
         self,
@@ -779,13 +618,12 @@ def _get_additional_properties_error_message(
         # "Additional properties are not allowed ('unknown' was unexpected)"
         # Line below extracts "unknown" from this string
         parameter_name = error.message.split("('")[-1].split("'")[0]
-        message = f"""\
-`{altair_cls.__name__}` has no parameter named '{parameter_name}'
-
-Existing parameter names are:
-{param_names_table}
-See the help for `{altair_cls.__name__}` to read the full description of these parameters"""
-        return message
+        cls_name = altair_cls.__name__
+        return (
+            f"`{cls_name}` has no parameter named '{parameter_name}'\n\n"
+            f"Existing parameter names are:\n{param_names_table}\n"
+            f"See the help for `{cls_name}` to read the full description of these parameters"
+        )
 
     def _get_altair_class_for_error(self, error: ValidationError) -> type[SchemaBase]:
         """
@@ -793,6 +631,8 @@ def _get_altair_class_for_error(self, error: ValidationError) -> type[SchemaBase
 
         This should lead to more informative error messages pointing the user closer to the source of the issue.
         """
+        from altair import vegalite
+
         for prop_name in reversed(error.absolute_path):
             # Check if str as e.g. first item can be a 0
             if isinstance(prop_name, str):
@@ -804,24 +644,17 @@ def _get_altair_class_for_error(self, error: ValidationError) -> type[SchemaBase
             # Did not find a suitable class based on traversing the path so we fall
             # back on the class of the top-level object which created
             # the SchemaValidationError
-            cls = self.obj.__class__
+            cls = type(self.obj)
         return cls
 
     @staticmethod
-    def _format_params_as_table(param_dict_keys: Iterable[str]) -> str:
+    def _format_params_as_table(param_view: KeysView[str]) -> str:
         """Format param names into a table so that they are easier to read."""
-        param_names: tuple[str, ...]
-        name_lengths: tuple[int, ...]
-        param_names, name_lengths = zip(
-            *[
-                (name, len(name))
-                for name in param_dict_keys
-                if name not in {"kwds", "self"}
-            ]
-        )
+        param_names: list[str] = [nm for nm in param_view if nm not in {"kwds", "self"}]
+
         # Worst case scenario with the same longest param name in the same
         # row for all columns
-        max_name_length = max(name_lengths)
+        max_name_length = len(max(param_view, key=len))
         max_column_width = 80
         # Output a square table if not too big (since it is easier to read)
         num_param_names = len(param_names)
@@ -835,7 +668,7 @@ def split_into_equal_parts(n: int, p: int) -> list[int]:
         column_heights = split_into_equal_parts(num_param_names, columns)
 
         # Section the param names into columns and compute their widths
-        param_names_columns: list[tuple[str, ...]] = []
+        param_names_columns: list[Sequence[str]] = []
         column_max_widths: list[int] = []
         last_end_idx: int = 0
         for ch in column_heights:
@@ -846,30 +679,29 @@ def split_into_equal_parts(n: int, p: int) -> list[int]:
             last_end_idx = ch + last_end_idx
 
         # Transpose the param name columns into rows to facilitate looping
-        param_names_rows: list[tuple[str, ...]] = []
-        for li in zip_longest(*param_names_columns, fillvalue=""):
-            param_names_rows.append(li)
         # Build the table as a string by iterating over and formatting the rows
         param_names_table: str = ""
-        for param_names_row in param_names_rows:
+        column_pad = 3
+        for param_names_row in zip_longest(*param_names_columns, fillvalue=""):
+            last_element = len(param_names_row) - 1
             for num, param_name in enumerate(param_names_row):
                 # Set column width based on the longest param in the column
-                max_name_length_column = column_max_widths[num]
-                column_pad = 3
-                param_names_table += "{:<{}}".format(
-                    param_name, max_name_length_column + column_pad
-                )
+                width = column_pad + column_max_widths[num]
+                param_names_table += "{:<{}}".format(param_name, width)
                 # Insert newlines and spacing after the last element in each row
-                if num == (len(param_names_row) - 1):
+                if num == last_element:
                     param_names_table += "\n"
         return param_names_table
 
     def _get_default_error_message(
         self,
-        errors: ValidationErrorList,
+        errors: Sequence[ValidationError],
     ) -> str:
         bullet_points: list[str] = []
-        errors_by_validator = _group_errors_by_validator(errors)
+        errors_by_validator: defaultdict[str, list[ValidationError]] = defaultdict(list)
+        for err in errors:
+            errors_by_validator[err.validator].append(err)  # type: ignore[index]
+
         if "enum" in errors_by_validator:
             for error in errors_by_validator["enum"]:
                 bullet_points.append(f"one of {error.validator_value}")
@@ -917,7 +749,7 @@ def _get_default_error_message(
             if validator not in {"enum", "type"}
         )
         message += "".join(it)
-        return message
+        return message.strip()
 
 
 class UndefinedType:
@@ -1213,7 +1045,7 @@ def to_dict(
         if validate:
             try:
                 self.validate(result)
-            except jsonschema.ValidationError as err:
+            except ValidationError as err:
                 # We do not raise `from err` as else the resulting
                 # traceback is very long as it contains part
                 # of the Vega-Lite schema. It would also first
@@ -1338,12 +1170,8 @@ def validate(
         cls, instance: dict[str, Any], schema: dict[str, Any] | None = None
     ) -> None:
         """Validate the instance against the class schema in the context of the rootschema."""
-        if schema is None:
-            schema = cls._schema
-        # For the benefit of mypy
-        assert schema is not None
-        lazy_validate_json_schema(
-            instance, schema, rootschema=cls._rootschema or cls._schema
+        validate_jsonschema(
+            instance, schema or cls._schema, cls._rootschema or cls._schema
         )
 
     @classmethod
@@ -1370,7 +1198,7 @@ def validate_property(
         np_opt = sys.modules.get("numpy")
         value = _todict(value, context={}, np_opt=np_opt, pd_opt=pd_opt)
         props = cls.resolve_references(schema or cls._schema).get("properties", {})
-        lazy_validate_json_schema(
+        validate_jsonschema(
             value, props.get(name, {}), rootschema=cls._rootschema or cls._schema
         )
 
@@ -1406,6 +1234,17 @@ def _passthrough(*args: Any, **kwds: Any) -> Any | dict[str, Any]:
     return args[0] if args else kwds
 
 
+def _freeze(val):
+    if isinstance(val, dict):
+        return frozenset((k, _freeze(v)) for k, v in val.items())
+    elif isinstance(val, set):
+        return frozenset(_freeze(v) for v in val)
+    elif isinstance(val, (list, tuple)):
+        return tuple(_freeze(v) for v in val)
+    else:
+        return val
+
+
 class _FromDict:
     """
     Class used to construct SchemaBase class hierarchies from a dict.
@@ -1443,22 +1282,8 @@ def hash_schema(cls, schema: dict[str, Any], use_json: bool = True) -> int:
                 for key, val in schema.items()
                 if key not in cls._hash_exclude_keys
             }
-        if use_json:
-            s = json.dumps(schema, sort_keys=True)
-            return hash(s)
-        else:
-
-            def _freeze(val):
-                if isinstance(val, dict):
-                    return frozenset((k, _freeze(v)) for k, v in val.items())
-                elif isinstance(val, set):
-                    return frozenset(map(_freeze, val))
-                elif isinstance(val, (list, tuple)):
-                    return tuple(map(_freeze, val))
-                else:
-                    return val
-
-            return hash(_freeze(schema))
+        s: Any = json.dumps(schema, sort_keys=True) if use_json else _freeze(schema)
+        return hash(s)
 
     @overload
     def from_dict(
@@ -1541,8 +1366,8 @@ def from_dict(
             schemas = resolved.get("anyOf", []) + resolved.get("oneOf", [])
             for possible in schemas:
                 try:
-                    lazy_validate_json_schema(dct, possible, rootschema=root_schema)
-                except jsonschema.ValidationError:
+                    validate_jsonschema_fail_fast(dct, possible, rootschema=root_schema)
+                except ValidationError:
                     continue
                 else:
                     return from_dict(dct, schema=possible, default_class=target_tp)
@@ -1569,6 +1394,8 @@ def __init__(self, prop: str, schema: dict[str, Any]) -> None:
         self.schema = schema
 
     def __get__(self, obj, cls):
+        from altair import vegalite
+
         self.obj = obj
         self.cls = cls
         # The docs from the encoding class parameter (e.g. `bin` in X, Color,

From 5483db8c8bc009f405f288988c40bb54bb7ea312 Mon Sep 17 00:00:00 2001
From: dangotbanned <125183946+dangotbanned@users.noreply.github.com>
Date: Wed, 21 Aug 2024 14:30:29 +0100
Subject: [PATCH 18/92] test(perf): Adds `test_chart_validation_benchmark`

Temporary, will remove before review.
Tried to isolate to a single function so that I can reproduce on main
---
 tests/utils/test_schemapi.py | 33 +++++++++++++++++++++++++++++++++
 1 file changed, 33 insertions(+)

diff --git a/tests/utils/test_schemapi.py b/tests/utils/test_schemapi.py
index 7ae96a864..a73994163 100644
--- a/tests/utils/test_schemapi.py
+++ b/tests/utils/test_schemapi.py
@@ -871,6 +871,39 @@ def test_chart_validation_errors(chart_func, expected_error_message):
         chart.to_dict()
 
 
+_SKIP_SLOW_BENCHMARKS: bool = False
+
+
+@pytest.mark.skipif(
+    _SKIP_SLOW_BENCHMARKS,
+    reason="Should only be run in isolation to test single threaded performance.",
+)
+def test_chart_validation_benchmark() -> None:
+    """
+    Intended to isolate the `to_dict` call.
+
+    Repeated ``1000`` times, non-parametric:
+    - in an attempt to limit the potential overhead of ``pytest``
+    - but enforce ``1`` thread, like a user-code would be.
+    """
+    if TYPE_CHECKING:
+        from typing import Iterator
+
+        from altair.typing import ChartType
+
+    def _iter_charts(*, times: int) -> Iterator[ChartType]:
+        from itertools import chain, repeat
+
+        with warnings.catch_warnings():
+            warnings.filterwarnings("ignore", category=UserWarning)
+            charts: list[ChartType] = [fn() for fn, _ in chart_funcs_error_message]
+        yield from chain.from_iterable(repeat(charts, times=times))
+
+    for chart in _iter_charts(times=1000):
+        with pytest.raises(SchemaValidationError):
+            chart.to_dict(validate=True)
+
+
 def test_multiple_field_strings_in_condition():
     selection = alt.selection_point()
     expected_error_message = "A field cannot be used for both the `if_true` and `if_false` values of a condition. One of them has to specify a `value` or `datum` definition."

From f208066c7037ec89c2e0bcd9e12e0cd661eaf0ff Mon Sep 17 00:00:00 2001
From: dangotbanned <125183946+dangotbanned@users.noreply.github.com>
Date: Wed, 21 Aug 2024 17:40:11 +0100
Subject: [PATCH 19/92] fix: Invert default for `_SKIP_SLOW_BENCHMARKS`

I renamed this from `_SLOW_BENCHMARKS` but forgot to invert the bool lol
---
 tests/utils/test_schemapi.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/utils/test_schemapi.py b/tests/utils/test_schemapi.py
index 8526faf3c..231b60d74 100644
--- a/tests/utils/test_schemapi.py
+++ b/tests/utils/test_schemapi.py
@@ -875,7 +875,7 @@ def test_chart_validation_errors(chart_func, expected_error_message):
         chart.to_dict()
 
 
-_SKIP_SLOW_BENCHMARKS: bool = False
+_SKIP_SLOW_BENCHMARKS: bool = True
 
 
 @pytest.mark.skipif(

From 996ea97134566d4ead1629460c0bd7acdb77d24a Mon Sep 17 00:00:00 2001
From: dangotbanned <125183946+dangotbanned@users.noreply.github.com>
Date: Wed, 21 Aug 2024 18:08:49 +0100
Subject: [PATCH 20/92] refactor: Parameterize `_regroup` and improve doc

---
 tools/schemapi/schemapi.py | 10 ++++++----
 1 file changed, 6 insertions(+), 4 deletions(-)

diff --git a/tools/schemapi/schemapi.py b/tools/schemapi/schemapi.py
index 57bf63b23..7acf76d82 100644
--- a/tools/schemapi/schemapi.py
+++ b/tools/schemapi/schemapi.py
@@ -160,13 +160,15 @@ def _rechain(element: T, others: Iterable[T], /) -> Iterator[T]:
     yield from others
 
 
-def _regroup(errors: _Errs, /) -> _ErrsLazyGroup:
+def _regroup(
+    errors: _Errs, /, *, key: Callable[[ValidationError], str] = _json_path
+) -> _ErrsLazyGroup:
     """
-    Regroup error stream with the assumption they are already sorted.
+    Regroup error stream by a ``key`` function.
 
-    This holds **only after** all other stages.
+    Assumes ``errors`` are already sorted, which holds **only** at the end of ``validate_jsonschema``.
     """
-    for _, grouped_it in groupby(errors, _json_path):
+    for _, grouped_it in groupby(errors, key):
         yield grouped_it
 
 

From 5751132a63af6b6b1f67be5a4d636bb345ae112e Mon Sep 17 00:00:00 2001
From: dangotbanned <125183946+dangotbanned@users.noreply.github.com>
Date: Wed, 21 Aug 2024 18:19:38 +0100
Subject: [PATCH 21/92] docs: Update `validate_jsonschema`/`_fail_fast`

---
 tools/schemapi/schemapi.py | 21 +++++++++------------
 1 file changed, 9 insertions(+), 12 deletions(-)

diff --git a/tools/schemapi/schemapi.py b/tools/schemapi/schemapi.py
index 7acf76d82..953717b11 100644
--- a/tools/schemapi/schemapi.py
+++ b/tools/schemapi/schemapi.py
@@ -178,19 +178,16 @@ def validate_jsonschema(
     rootschema: dict[str, Any] | None = None,
 ) -> None:
     """
-    Lazy equivalent of `validate_jsonschema`.
+    Validates ``spec`` against ``schema`` in the context of ``rootschema``.
 
-    Validates the passed in spec against the schema in the context of the rootschema.
+    Any ``ValidationError``(s) are deduplicated and prioritized, with
+    the remaining errors deemed relevant to the user.
 
-    If any errors are found, they are deduplicated and prioritized
-    and only the most relevant errors are kept.
-
-    Nothing special about this first error but we need to choose one
-    which can be raised
-    All errors are then attached as a new attribute to ValidationError so that
-    they can be used in SchemaValidationError to craft a more helpful
-    error message. Setting a new attribute like this is not ideal as
-    it then no longer matches the type ValidationError.
+    Notes
+    -----
+    - The first error is monkeypatched with a grouped iterator of all remaining errors
+    - ``SchemaValidationError`` utilizes the patched attribute, to craft a more helpful error message.
+        - However this breaks typing
     """
     it_errors = _iter_errors_from_spec(spec, schema, rootschema=rootschema)
     if first_error := next(it_errors, None):
@@ -217,7 +214,7 @@ def validate_jsonschema_fail_fast(
     """
     Raise as quickly as possible.
 
-    Use when any information about the error is not needed.
+    Use instead of ``validate_jsonschema`` when any information about the error(s) are not needed.
     """
     if (
         err := next(_iter_errors_from_spec(spec, schema, rootschema=rootschema), None)

From cb1fa24e468309be8a68d580ec100585fc2acb10 Mon Sep 17 00:00:00 2001
From: dangotbanned <125183946+dangotbanned@users.noreply.github.com>
Date: Wed, 21 Aug 2024 21:16:01 +0100
Subject: [PATCH 22/92] refactor: Use more constants, rename, reorder

---
 tools/schemapi/schemapi.py | 63 +++++++++++++++++++++++++-------------
 1 file changed, 42 insertions(+), 21 deletions(-)

diff --git a/tools/schemapi/schemapi.py b/tools/schemapi/schemapi.py
index 953717b11..4c14dab7f 100644
--- a/tools/schemapi/schemapi.py
+++ b/tools/schemapi/schemapi.py
@@ -35,7 +35,7 @@
 from packaging.version import Version
 
 if TYPE_CHECKING:
-    from typing import ClassVar
+    from typing import ClassVar, Literal, Mapping
 
     from jsonschema.protocols import Validator, _JsonParameter
     from referencing import Registry
@@ -59,6 +59,19 @@
     _ErrsLazy: TypeAlias = Iterator[ValidationError]
     _ErrsLazyGroup: TypeAlias = Iterator[_ErrsLazy]
     _IntoLazyGroup: TypeAlias = Iterator["tuple[str, ValidationError]"]
+    _ValidatorKeyword: TypeAlias = Literal[
+        "additionalProperties",
+        "enum",
+        "type",
+        "required",
+        "properties",
+        "anyOf",
+        "allOf",
+        "oneOf",
+        "ref",
+        "const",
+    ]
+    """Non-exhaustive listing of possible literals in ``ValidationError.validator``"""
 
 
 _VEGA_LITE_ROOT_URI: Final = "urn:vega-lite-schema"
@@ -330,6 +343,17 @@ def _get_referencing_registry(
     return Registry().with_resource(uri=_VEGA_LITE_ROOT_URI, resource=resource)
 
 
+_FN_PATH = cast("Callable[[tuple[str, ValidationError]], str]", operator.itemgetter(0))
+"""Key function for ``(json_path, ValidationError)``."""
+_FN_VALIDATOR = cast("Callable[[ValidationError], _ValidatorKeyword]", operator.attrgetter("validator"))  # fmt: off
+"""Key function for ``ValidationError.validator``."""
+
+
+def _message_len(err: ValidationError, /) -> int:
+    """Return length of a ``ValidationError`` message."""
+    return len(err.message)
+
+
 def _group_tree_leaves(errors: _Errs, /) -> _IntoLazyGroup:
     """
     Combines 3 previously distinct steps:
@@ -354,26 +378,17 @@ def _group_tree_leaves(errors: _Errs, /) -> _IntoLazyGroup:
     from that function and so it's unlikely that this was what the user intended
     if the keyword is not present in the first place.
     """  # noqa: D400
+    REQUIRED = "required"
+    VALUE = ["value"]
     for err in errors:
         if err_context := err.context:
             yield from _group_tree_leaves(err_context)
-        elif err.validator == "required" and err.validator_value == ["value"]:
+        elif err.validator == REQUIRED and err.validator_value == VALUE:
             continue
         else:
             yield _json_path(err), err
 
 
-_fn_path = cast("Callable[[tuple[str, ValidationError]], str]", operator.itemgetter(0))
-"""Key function for ``(json_path, ValidationError)``."""
-_fn_validator = cast("Callable[[ValidationError], str]", operator.attrgetter("validator"))  # fmt: off
-"""Key function for ``ValidationError.validator``."""
-
-
-def _message_len(err: ValidationError, /) -> int:
-    """Return length of a ``ValidationError`` message."""
-    return len(err.message)
-
-
 def _prune_subset_paths(json_path_errors: _IntoLazyGroup, /) -> Iterator[_Errs]:
     """
     Removes key (json path), value (errors) pairs where the json path is fully contained in another json path.
@@ -389,9 +404,9 @@ def _prune_subset_paths(json_path_errors: _IntoLazyGroup, /) -> Iterator[_Errs]:
     - Reversing allows prioritising more specific groups, since they are seen first
     - Then re-reversed, to keep seen order
     """
-    rev_sort = sorted(json_path_errors, key=_fn_path, reverse=True)
+    rev_sort = sorted(json_path_errors, key=_FN_PATH, reverse=True)
     keeping: dict[str, _Errs] = {}
-    for unique_path, grouped_errors in groupby(rev_sort, key=_fn_path):
+    for unique_path, grouped_errors in groupby(rev_sort, key=_FN_PATH):
         if any(seen.startswith(unique_path) for seen in keeping):
             continue
         else:
@@ -399,7 +414,9 @@ def _prune_subset_paths(json_path_errors: _IntoLazyGroup, /) -> Iterator[_Errs]:
     yield from islice(reversed(keeping.values()), 3)
 
 
-def _groupby_validator(errors: _Errs, /) -> Iterator[tuple[str, _ErrsLazy]]:
+def _groupby_validator(
+    errors: _Errs, /
+) -> Iterator[tuple[_ValidatorKeyword, _ErrsLazy]]:
     """
     Groups the errors by the json schema "validator" that casued the error.
 
@@ -408,7 +425,7 @@ def _groupby_validator(errors: _Errs, /) -> Iterator[tuple[str, _ErrsLazy]]:
     was set although no additional properties are allowed then "validator" is
     `"additionalProperties`, etc.
     """
-    yield from groupby(sorted(errors, key=_fn_validator), key=_fn_validator)
+    yield from groupby(sorted(errors, key=_FN_VALIDATOR), key=_FN_VALIDATOR)
 
 
 def _deduplicate_errors(grouped_errors: Iterator[_Errs], /) -> _ErrsLazy:
@@ -420,10 +437,8 @@ def _deduplicate_errors(grouped_errors: Iterator[_Errs], /) -> _ErrsLazy:
     """
     for by_path in grouped_errors:
         for validator, errors in _groupby_validator(by_path):
-            if validator == "additionalProperties":
-                errors = _shortest_any_of(errors)
-            elif validator == "enum":
-                errors = _prune_subset_enum(errors)
+            if fn := _FN_MAP_DEDUPLICATION.get(validator):
+                errors = fn(errors)
             yield from _distinct_messages(errors)
 
 
@@ -468,6 +483,12 @@ def _prune_subset_enum(iterable: _Errs, /) -> _ErrsLazy:
             yield err
 
 
+_FN_MAP_DEDUPLICATION: Mapping[_ValidatorKeyword, Callable[[_Errs], _ErrsLazy]] = {
+    "additionalProperties": _shortest_any_of,
+    "enum": _prune_subset_enum,
+}
+
+
 def _subclasses(cls: type[Any]) -> Iterator[type[Any]]:
     """Breadth-first sequence of all classes which inherit from cls."""
     seen = set()

From 09c83ae216819003b996363289ae3322ef2d9960 Mon Sep 17 00:00:00 2001
From: dangotbanned <125183946+dangotbanned@users.noreply.github.com>
Date: Wed, 21 Aug 2024 21:17:23 +0100
Subject: [PATCH 23/92] docs: Tweak `_group_tree_leaves`

---
 tools/schemapi/schemapi.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tools/schemapi/schemapi.py b/tools/schemapi/schemapi.py
index 4c14dab7f..6d2e161b4 100644
--- a/tools/schemapi/schemapi.py
+++ b/tools/schemapi/schemapi.py
@@ -365,7 +365,7 @@ def _group_tree_leaves(errors: _Errs, /) -> _IntoLazyGroup:
 
     2. ``_group_errors_by_json_path`` (part of)
 
-    Extracts the path for grouping.
+    Extracts the ``.json_path`` property for grouping.
 
     3. Removes::
 

From a0a897833091c57f539a6be9ad443350926b8907 Mon Sep 17 00:00:00 2001
From: dangotbanned <125183946+dangotbanned@users.noreply.github.com>
Date: Wed, 21 Aug 2024 21:22:39 +0100
Subject: [PATCH 24/92] refactor(perf): Conditionally define compatibility code

Rather than checking a function/constant, a single set of behaviour is defined **once** - depending on `jsonschema` version.
This makes the remaining functionality much easier to reason with.
Also easier to avoid typing issues
---
 tools/schemapi/schemapi.py | 296 +++++++++++++++++++------------------
 1 file changed, 151 insertions(+), 145 deletions(-)

diff --git a/tools/schemapi/schemapi.py b/tools/schemapi/schemapi.py
index 6d2e161b4..465c5681b 100644
--- a/tools/schemapi/schemapi.py
+++ b/tools/schemapi/schemapi.py
@@ -109,32 +109,6 @@ class Derived(SchemaBase):
         _class_is_valid_at_instantiation: ClassVar[bool] = False
 """
 
-_JSONSCHEMA_VERSION = Version(importlib_version("jsonschema"))
-_USING_REFERENCING: Final[bool] = _JSONSCHEMA_VERSION >= Version("4.18")  # noqa: SIM300
-"""
-``jsonschema`` deprecated ``RefResolver`` in favor of ``referencing``.
-
-See https://github.com/python-jsonschema/jsonschema/releases/tag/v4.18.0a1
-"""
-
-if _JSONSCHEMA_VERSION >= Version("4.0.1"):  # noqa: SIM300
-    _json_path: Callable[[ValidationError], str] = operator.attrgetter("json_path")
-else:
-
-    def _json_path(err: ValidationError, /) -> str:
-        """
-        Vendored backport for ``jsonschema.ValidationError.json_path`` property.
-
-        See https://github.com/vega/altair/issues/3038.
-        """
-        path = "$"
-        for elem in err.absolute_path:
-            if isinstance(elem, int):
-                path += "[" + str(elem) + "]"
-            else:
-                path += "." + elem
-        return path
-
 
 def enable_debug_mode() -> None:
     global DEBUG_MODE
@@ -157,34 +131,6 @@ def debug_mode(arg: bool) -> Iterator[None]:
         DEBUG_MODE = original
 
 
-def _rechain(element: T, others: Iterable[T], /) -> Iterator[T]:
-    """
-    Continue an iterator at the last popped ``element``.
-
-    Equivalent to::
-
-        elements = 1, 2, 3, 4, 5
-        it = iter(elements)
-        element = next(it)
-        it_continue = chain([element], it)
-
-    """
-    yield element
-    yield from others
-
-
-def _regroup(
-    errors: _Errs, /, *, key: Callable[[ValidationError], str] = _json_path
-) -> _ErrsLazyGroup:
-    """
-    Regroup error stream by a ``key`` function.
-
-    Assumes ``errors`` are already sorted, which holds **only** at the end of ``validate_jsonschema``.
-    """
-    for _, grouped_it in groupby(errors, key):
-        yield grouped_it
-
-
 def validate_jsonschema(
     spec: _JsonParameter,
     schema: dict[str, Any],
@@ -202,7 +148,7 @@ def validate_jsonschema(
     - ``SchemaValidationError`` utilizes the patched attribute, to craft a more helpful error message.
         - However this breaks typing
     """
-    it_errors = _iter_errors_from_spec(spec, schema, rootschema=rootschema)
+    it_errors = _iter_validator_errors(spec, schema, rootschema=rootschema)
     if first_error := next(it_errors, None):
         groups = _group_tree_leaves(_rechain(first_error, it_errors))
         most_specific = _prune_subset_paths(groups)
@@ -230,60 +176,11 @@ def validate_jsonschema_fail_fast(
     Use instead of ``validate_jsonschema`` when any information about the error(s) are not needed.
     """
     if (
-        err := next(_iter_errors_from_spec(spec, schema, rootschema=rootschema), None)
+        err := next(_iter_validator_errors(spec, schema, rootschema=rootschema), None)
     ) is not None:
         raise err
 
 
-def _iter_errors_from_spec(
-    spec: _JsonParameter,
-    schema: dict[str, Any],
-    rootschema: dict[str, Any] | None = None,
-) -> _ErrsLazy:
-    """
-    Uses the relevant ``jsonschema`` validator to validate ``spec`` against ``schema`` using `` rootschema`` to resolve references.
-
-    ``schema`` and ``rootschema`` are not validated but instead considered as valid.
-
-    We don't use ``jsonschema.validate`` as this would validate the ``schema`` itself.
-    Instead, we pass the ``schema`` directly to the validator class.
-
-    This is done for two reasons:
-
-    1. The schema comes from Vega-Lite and is not based on the user
-    input, therefore there is no need to validate it in the first place.
-    2. The "uri-reference" format checker fails for some of the
-    references as URIs in "$ref" are not encoded, e.g.:
-
-        '#/definitions/ValueDefWithCondition<MarkPropFieldOrDatumDef, (Gradient|string|null)>'
-
-    would be a valid $ref in a Vega-Lite schema but it is not a valid
-    URI reference due to the characters such as '<'.
-    """
-    json_schema_draft_url = _get_json_schema_draft_url(rootschema or schema)
-    validator_cls: type[Validator] = cast(
-        "type[Validator]",
-        jsonschema.validators.validator_for({"$schema": json_schema_draft_url}),
-    )
-    validator_kwargs: dict[str, Any] = {}
-    if hasattr(validator_cls, "FORMAT_CHECKER"):
-        validator_kwargs["format_checker"] = validator_cls.FORMAT_CHECKER
-
-    if _USING_REFERENCING:
-        schema = _prepare_references(schema)
-        validator_kwargs["registry"] = _get_referencing_registry(
-            rootschema or schema, json_schema_draft_url
-        )
-    else:
-        # No resolver is necessary if the schema is already the full schema
-        validator_kwargs["resolver"] = (
-            jsonschema.RefResolver.from_schema(rootschema) if rootschema else rootschema
-        )
-
-    validator = validator_cls(schema, **validator_kwargs)
-    return validator.iter_errors(spec)
-
-
 def _get_json_schema_draft_url(schema: dict[str, Any]) -> str:
     return schema.get("$schema", _DEFAULT_JSON_SCHEMA_DRAFT_URL)
 
@@ -319,28 +216,103 @@ def _rec_refs(m: dict[str, Any], /) -> Iterator[tuple[str, Any]]:
             yield k, v
 
 
-def _get_referencing_registry(
-    rootschema: dict[str, Any], json_schema_draft_url: str | None = None
-) -> Registry:
-    """
-    Referencing is a dependency of newer jsonschema versions.
+def _prepare_validator(url: str, /) -> Callable[..., Validator]:
+    tp = cast(
+        "Callable[..., Validator]",
+        jsonschema.validators.validator_for({"$schema": url}),
+    )
+    if hasattr(tp, "FORMAT_CHECKER"):
+        return partial(tp, format_checker=tp.FORMAT_CHECKER)
+    else:
+        return tp
+
+
+if Version(importlib_version("jsonschema")) >= Version("4.18"):
+    from referencing import Registry
+    from referencing.jsonschema import specification_with
+
+    def _construct_validator(
+        schema: dict[str, Any], rootschema: dict[str, Any] | None = None
+    ) -> Validator:
+        url = _get_json_schema_draft_url(rootschema or schema)
+        tp = _prepare_validator(url)
+        registry = _get_referencing_registry(rootschema or schema, url)
+        return tp(_prepare_references(schema), registry=registry)
+
+    def _get_referencing_registry(
+        rootschema: dict[str, Any], json_schema_draft_url: str | None = None
+    ) -> Registry[Any]:
+        """
+        Referencing is a dependency of newer jsonschema versions.
 
-    See https://github.com/python-jsonschema/jsonschema/releases/tag/v4.18.0a1
+        See https://github.com/python-jsonschema/jsonschema/releases/tag/v4.18.0a1
 
-    We ignore 'import' ``mypy`` errors which happen when the ``referencing`` library
-    is not installed.
-    That's ok as in these cases this function is not called.
+        We ignore 'import' ``mypy`` errors which happen when the ``referencing`` library
+        is not installed.
+        That's ok as in these cases this function is not called.
+
+        We also have to ignore 'unused-ignore' errors as ``mypy`` raises those in case
+        ``referencing`` is installed.
+        """
+        dialect_id = json_schema_draft_url or _get_json_schema_draft_url(rootschema)
+        specification = specification_with(dialect_id)
+        resource = specification.create_resource(rootschema)
+        return Registry().with_resource(uri=_VEGA_LITE_ROOT_URI, resource=resource)
+
+    def _resolve_references(
+        schema: dict[str, Any], rootschema: dict[str, Any] | None = None
+    ) -> dict[str, Any]:
+        """Resolve schema references until there is no $ref anymore in the top-level of the dictionary."""
+        registry = _get_referencing_registry(rootschema or schema)
+        resolver = registry.resolver()
+        while "$ref" in schema:
+            schema = resolver.lookup(_VEGA_LITE_ROOT_URI + schema["$ref"]).contents
+        return schema
+else:
+
+    def _construct_validator(
+        schema: dict[str, Any], rootschema: dict[str, Any] | None = None
+    ) -> Validator:
+        tp = _prepare_validator(_get_json_schema_draft_url(rootschema or schema))
+        resolver: Any = (
+            jsonschema.RefResolver.from_schema(rootschema) if rootschema else rootschema
+        )
+        return tp(schema, resolver=resolver)
+
+    def _resolve_references(
+        schema: dict[str, Any], rootschema: dict[str, Any] | None = None
+    ) -> dict[str, Any]:
+        """
+        Resolve schema references until there is no $ref anymore in the top-level of the dictionary.
+
+        ``jsonschema`` deprecated ``RefResolver`` in favor of ``referencing``.
+
+        See https://github.com/python-jsonschema/jsonschema/releases/tag/v4.18.0a1
+        """
+        resolver = jsonschema.RefResolver.from_schema(rootschema or schema)
+        while "$ref" in schema:
+            with resolver.resolving(schema["$ref"]) as resolved:
+                schema = resolved
+        return schema
 
-    We also have to ignore 'unused-ignore' errors as ``mypy`` raises those in case
-    ``referencing`` is installed.
-    """
-    from referencing import Registry  # type: ignore[import,unused-ignore]  # noqa: I001
-    from referencing.jsonschema import specification_with  # type: ignore[import,unused-ignore]
 
-    dialect_id = json_schema_draft_url or _get_json_schema_draft_url(rootschema)
-    specification = specification_with(dialect_id)
-    resource = specification.create_resource(rootschema)
-    return Registry().with_resource(uri=_VEGA_LITE_ROOT_URI, resource=resource)
+if Version(importlib_version("jsonschema")) >= Version("4.0.1"):
+    _json_path: Callable[[ValidationError], str] = operator.attrgetter("json_path")
+else:
+
+    def _json_path(err: ValidationError, /) -> str:
+        """
+        Vendored backport for ``jsonschema.ValidationError.json_path`` property.
+
+        See https://github.com/vega/altair/issues/3038.
+        """
+        path = "$"
+        for elem in err.absolute_path:
+            if isinstance(elem, int):
+                path += "[" + str(elem) + "]"
+            else:
+                path += "." + elem
+        return path
 
 
 _FN_PATH = cast("Callable[[tuple[str, ValidationError]], str]", operator.itemgetter(0))
@@ -354,6 +326,62 @@ def _message_len(err: ValidationError, /) -> int:
     return len(err.message)
 
 
+def _rechain(element: T, others: Iterable[T], /) -> Iterator[T]:
+    """
+    Continue an iterator at the last popped ``element``.
+
+    Equivalent to::
+
+        elements = 1, 2, 3, 4, 5
+        it = iter(elements)
+        element = next(it)
+        it_continue = chain([element], it)
+
+    """
+    yield element
+    yield from others
+
+
+def _regroup(
+    errors: _Errs, /, *, key: Callable[[ValidationError], str] = _json_path
+) -> _ErrsLazyGroup:
+    """
+    Regroup error stream by a ``key`` function.
+
+    Assumes ``errors`` are already sorted, which holds **only** at the end of ``validate_jsonschema``.
+    """
+    for _, grouped_it in groupby(errors, key):
+        yield grouped_it
+
+
+def _iter_validator_errors(
+    spec: _JsonParameter,
+    schema: dict[str, Any],
+    rootschema: dict[str, Any] | None = None,
+) -> _ErrsLazy:
+    """
+    Uses the relevant ``jsonschema`` validator to validate ``spec`` against ``schema`` using `` rootschema`` to resolve references.
+
+    ``schema`` and ``rootschema`` are not validated but instead considered as valid.
+
+    We don't use ``jsonschema.validate`` as this would validate the ``schema`` itself.
+    Instead, we pass the ``schema`` directly to the validator class.
+
+    This is done for two reasons:
+
+    1. The schema comes from Vega-Lite and is not based on the user
+    input, therefore there is no need to validate it in the first place.
+    2. The "uri-reference" format checker fails for some of the
+    references as URIs in "$ref" are not encoded, e.g.:
+
+        '#/definitions/ValueDefWithCondition<MarkPropFieldOrDatumDef, (Gradient|string|null)>'
+
+    would be a valid $ref in a Vega-Lite schema but it is not a valid
+    URI reference due to the characters such as '<'.
+    """
+    return _construct_validator(schema, rootschema).iter_errors(spec)
+
+
 def _group_tree_leaves(errors: _Errs, /) -> _IntoLazyGroup:
     """
     Combines 3 previously distinct steps:
@@ -546,28 +574,6 @@ def _todict(obj: Any, context: dict[str, Any] | None, np_opt: Any, pd_opt: Any)
         return obj
 
 
-def _resolve_references(
-    schema: dict[str, Any], rootschema: dict[str, Any] | None = None
-) -> dict[str, Any]:
-    """Resolve schema references until there is no $ref anymore in the top-level of the dictionary."""
-    if _USING_REFERENCING:
-        registry = _get_referencing_registry(rootschema or schema)
-        # Using a different variable name to show that this is not the
-        # jsonschema.RefResolver but instead a Resolver from the referencing
-        # library
-        referencing_resolver = registry.resolver()
-        while "$ref" in schema:
-            schema = referencing_resolver.lookup(
-                _VEGA_LITE_ROOT_URI + schema["$ref"]
-            ).contents
-    else:
-        resolver = jsonschema.RefResolver.from_schema(rootschema or schema)
-        while "$ref" in schema:
-            with resolver.resolving(schema["$ref"]) as resolved:
-                schema = resolved
-    return schema
-
-
 class SchemaValidationError(jsonschema.ValidationError):
     """A wrapper for jsonschema.ValidationError with friendlier traceback."""
 

From 2e8159261cfbe528c87d1f58c5641412c44bcd6a Mon Sep 17 00:00:00 2001
From: dangotbanned <125183946+dangotbanned@users.noreply.github.com>
Date: Wed, 21 Aug 2024 21:24:36 +0100
Subject: [PATCH 25/92] build: run `generate-schema-wrapper`

---
 altair/utils/schemapi.py | 376 +++++++++++++++++++++------------------
 1 file changed, 201 insertions(+), 175 deletions(-)

diff --git a/altair/utils/schemapi.py b/altair/utils/schemapi.py
index 248193a85..ef477e769 100644
--- a/altair/utils/schemapi.py
+++ b/altair/utils/schemapi.py
@@ -37,7 +37,7 @@
 from packaging.version import Version
 
 if TYPE_CHECKING:
-    from typing import ClassVar
+    from typing import ClassVar, Literal, Mapping
 
     from jsonschema.protocols import Validator, _JsonParameter
     from referencing import Registry
@@ -61,6 +61,19 @@
     _ErrsLazy: TypeAlias = Iterator[ValidationError]
     _ErrsLazyGroup: TypeAlias = Iterator[_ErrsLazy]
     _IntoLazyGroup: TypeAlias = Iterator["tuple[str, ValidationError]"]
+    _ValidatorKeyword: TypeAlias = Literal[
+        "additionalProperties",
+        "enum",
+        "type",
+        "required",
+        "properties",
+        "anyOf",
+        "allOf",
+        "oneOf",
+        "ref",
+        "const",
+    ]
+    """Non-exhaustive listing of possible literals in ``ValidationError.validator``"""
 
 
 _VEGA_LITE_ROOT_URI: Final = "urn:vega-lite-schema"
@@ -98,32 +111,6 @@ class Derived(SchemaBase):
         _class_is_valid_at_instantiation: ClassVar[bool] = False
 """
 
-_JSONSCHEMA_VERSION = Version(importlib_version("jsonschema"))
-_USING_REFERENCING: Final[bool] = _JSONSCHEMA_VERSION >= Version("4.18")  # noqa: SIM300
-"""
-``jsonschema`` deprecated ``RefResolver`` in favor of ``referencing``.
-
-See https://github.com/python-jsonschema/jsonschema/releases/tag/v4.18.0a1
-"""
-
-if _JSONSCHEMA_VERSION >= Version("4.0.1"):  # noqa: SIM300
-    _json_path: Callable[[ValidationError], str] = operator.attrgetter("json_path")
-else:
-
-    def _json_path(err: ValidationError, /) -> str:
-        """
-        Vendored backport for ``jsonschema.ValidationError.json_path`` property.
-
-        See https://github.com/vega/altair/issues/3038.
-        """
-        path = "$"
-        for elem in err.absolute_path:
-            if isinstance(elem, int):
-                path += "[" + str(elem) + "]"
-            else:
-                path += "." + elem
-        return path
-
 
 def enable_debug_mode() -> None:
     global DEBUG_MODE
@@ -146,53 +133,24 @@ def debug_mode(arg: bool) -> Iterator[None]:
         DEBUG_MODE = original
 
 
-def _rechain(element: T, others: Iterable[T], /) -> Iterator[T]:
-    """
-    Continue an iterator at the last popped ``element``.
-
-    Equivalent to::
-
-        elements = 1, 2, 3, 4, 5
-        it = iter(elements)
-        element = next(it)
-        it_continue = chain([element], it)
-
-    """
-    yield element
-    yield from others
-
-
-def _regroup(errors: _Errs, /) -> _ErrsLazyGroup:
-    """
-    Regroup error stream with the assumption they are already sorted.
-
-    This holds **only after** all other stages.
-    """
-    for _, grouped_it in groupby(errors, _json_path):
-        yield grouped_it
-
-
 def validate_jsonschema(
     spec: _JsonParameter,
     schema: dict[str, Any],
     rootschema: dict[str, Any] | None = None,
 ) -> None:
     """
-    Lazy equivalent of `validate_jsonschema`.
+    Validates ``spec`` against ``schema`` in the context of ``rootschema``.
 
-    Validates the passed in spec against the schema in the context of the rootschema.
+    Any ``ValidationError``(s) are deduplicated and prioritized, with
+    the remaining errors deemed relevant to the user.
 
-    If any errors are found, they are deduplicated and prioritized
-    and only the most relevant errors are kept.
-
-    Nothing special about this first error but we need to choose one
-    which can be raised
-    All errors are then attached as a new attribute to ValidationError so that
-    they can be used in SchemaValidationError to craft a more helpful
-    error message. Setting a new attribute like this is not ideal as
-    it then no longer matches the type ValidationError.
+    Notes
+    -----
+    - The first error is monkeypatched with a grouped iterator of all remaining errors
+    - ``SchemaValidationError`` utilizes the patched attribute, to craft a more helpful error message.
+        - However this breaks typing
     """
-    it_errors = _iter_errors_from_spec(spec, schema, rootschema=rootschema)
+    it_errors = _iter_validator_errors(spec, schema, rootschema=rootschema)
     if first_error := next(it_errors, None):
         groups = _group_tree_leaves(_rechain(first_error, it_errors))
         most_specific = _prune_subset_paths(groups)
@@ -217,63 +175,14 @@ def validate_jsonschema_fail_fast(
     """
     Raise as quickly as possible.
 
-    Use when any information about the error is not needed.
+    Use instead of ``validate_jsonschema`` when any information about the error(s) are not needed.
     """
     if (
-        err := next(_iter_errors_from_spec(spec, schema, rootschema=rootschema), None)
+        err := next(_iter_validator_errors(spec, schema, rootschema=rootschema), None)
     ) is not None:
         raise err
 
 
-def _iter_errors_from_spec(
-    spec: _JsonParameter,
-    schema: dict[str, Any],
-    rootschema: dict[str, Any] | None = None,
-) -> _ErrsLazy:
-    """
-    Uses the relevant ``jsonschema`` validator to validate ``spec`` against ``schema`` using `` rootschema`` to resolve references.
-
-    ``schema`` and ``rootschema`` are not validated but instead considered as valid.
-
-    We don't use ``jsonschema.validate`` as this would validate the ``schema`` itself.
-    Instead, we pass the ``schema`` directly to the validator class.
-
-    This is done for two reasons:
-
-    1. The schema comes from Vega-Lite and is not based on the user
-    input, therefore there is no need to validate it in the first place.
-    2. The "uri-reference" format checker fails for some of the
-    references as URIs in "$ref" are not encoded, e.g.:
-
-        '#/definitions/ValueDefWithCondition<MarkPropFieldOrDatumDef, (Gradient|string|null)>'
-
-    would be a valid $ref in a Vega-Lite schema but it is not a valid
-    URI reference due to the characters such as '<'.
-    """
-    json_schema_draft_url = _get_json_schema_draft_url(rootschema or schema)
-    validator_cls: type[Validator] = cast(
-        "type[Validator]",
-        jsonschema.validators.validator_for({"$schema": json_schema_draft_url}),
-    )
-    validator_kwargs: dict[str, Any] = {}
-    if hasattr(validator_cls, "FORMAT_CHECKER"):
-        validator_kwargs["format_checker"] = validator_cls.FORMAT_CHECKER
-
-    if _USING_REFERENCING:
-        schema = _prepare_references(schema)
-        validator_kwargs["registry"] = _get_referencing_registry(
-            rootschema or schema, json_schema_draft_url
-        )
-    else:
-        # No resolver is necessary if the schema is already the full schema
-        validator_kwargs["resolver"] = (
-            jsonschema.RefResolver.from_schema(rootschema) if rootschema else rootschema
-        )
-
-    validator = validator_cls(schema, **validator_kwargs)
-    return validator.iter_errors(spec)
-
-
 def _get_json_schema_draft_url(schema: dict[str, Any]) -> str:
     return schema.get("$schema", _DEFAULT_JSON_SCHEMA_DRAFT_URL)
 
@@ -309,28 +218,170 @@ def _rec_refs(m: dict[str, Any], /) -> Iterator[tuple[str, Any]]:
             yield k, v
 
 
-def _get_referencing_registry(
-    rootschema: dict[str, Any], json_schema_draft_url: str | None = None
-) -> Registry:
+def _prepare_validator(url: str, /) -> Callable[..., Validator]:
+    tp = cast(
+        "Callable[..., Validator]",
+        jsonschema.validators.validator_for({"$schema": url}),
+    )
+    if hasattr(tp, "FORMAT_CHECKER"):
+        return partial(tp, format_checker=tp.FORMAT_CHECKER)
+    else:
+        return tp
+
+
+if Version(importlib_version("jsonschema")) >= Version("4.18"):
+    from referencing import Registry
+    from referencing.jsonschema import specification_with
+
+    def _construct_validator(
+        schema: dict[str, Any], rootschema: dict[str, Any] | None = None
+    ) -> Validator:
+        url = _get_json_schema_draft_url(rootschema or schema)
+        tp = _prepare_validator(url)
+        registry = _get_referencing_registry(rootschema or schema, url)
+        return tp(_prepare_references(schema), registry=registry)
+
+    def _get_referencing_registry(
+        rootschema: dict[str, Any], json_schema_draft_url: str | None = None
+    ) -> Registry[Any]:
+        """
+        Referencing is a dependency of newer jsonschema versions.
+
+        See https://github.com/python-jsonschema/jsonschema/releases/tag/v4.18.0a1
+
+        We ignore 'import' ``mypy`` errors which happen when the ``referencing`` library
+        is not installed.
+        That's ok as in these cases this function is not called.
+
+        We also have to ignore 'unused-ignore' errors as ``mypy`` raises those in case
+        ``referencing`` is installed.
+        """
+        dialect_id = json_schema_draft_url or _get_json_schema_draft_url(rootschema)
+        specification = specification_with(dialect_id)
+        resource = specification.create_resource(rootschema)
+        return Registry().with_resource(uri=_VEGA_LITE_ROOT_URI, resource=resource)
+
+    def _resolve_references(
+        schema: dict[str, Any], rootschema: dict[str, Any] | None = None
+    ) -> dict[str, Any]:
+        """Resolve schema references until there is no $ref anymore in the top-level of the dictionary."""
+        registry = _get_referencing_registry(rootschema or schema)
+        resolver = registry.resolver()
+        while "$ref" in schema:
+            schema = resolver.lookup(_VEGA_LITE_ROOT_URI + schema["$ref"]).contents
+        return schema
+else:
+
+    def _construct_validator(
+        schema: dict[str, Any], rootschema: dict[str, Any] | None = None
+    ) -> Validator:
+        tp = _prepare_validator(_get_json_schema_draft_url(rootschema or schema))
+        resolver: Any = (
+            jsonschema.RefResolver.from_schema(rootschema) if rootschema else rootschema
+        )
+        return tp(schema, resolver=resolver)
+
+    def _resolve_references(
+        schema: dict[str, Any], rootschema: dict[str, Any] | None = None
+    ) -> dict[str, Any]:
+        """
+        Resolve schema references until there is no $ref anymore in the top-level of the dictionary.
+
+        ``jsonschema`` deprecated ``RefResolver`` in favor of ``referencing``.
+
+        See https://github.com/python-jsonschema/jsonschema/releases/tag/v4.18.0a1
+        """
+        resolver = jsonschema.RefResolver.from_schema(rootschema or schema)
+        while "$ref" in schema:
+            with resolver.resolving(schema["$ref"]) as resolved:
+                schema = resolved
+        return schema
+
+
+if Version(importlib_version("jsonschema")) >= Version("4.0.1"):
+    _json_path: Callable[[ValidationError], str] = operator.attrgetter("json_path")
+else:
+
+    def _json_path(err: ValidationError, /) -> str:
+        """
+        Vendored backport for ``jsonschema.ValidationError.json_path`` property.
+
+        See https://github.com/vega/altair/issues/3038.
+        """
+        path = "$"
+        for elem in err.absolute_path:
+            if isinstance(elem, int):
+                path += "[" + str(elem) + "]"
+            else:
+                path += "." + elem
+        return path
+
+
+_FN_PATH = cast("Callable[[tuple[str, ValidationError]], str]", operator.itemgetter(0))
+"""Key function for ``(json_path, ValidationError)``."""
+_FN_VALIDATOR = cast("Callable[[ValidationError], _ValidatorKeyword]", operator.attrgetter("validator"))  # fmt: off
+"""Key function for ``ValidationError.validator``."""
+
+
+def _message_len(err: ValidationError, /) -> int:
+    """Return length of a ``ValidationError`` message."""
+    return len(err.message)
+
+
+def _rechain(element: T, others: Iterable[T], /) -> Iterator[T]:
     """
-    Referencing is a dependency of newer jsonschema versions.
+    Continue an iterator at the last popped ``element``.
 
-    See https://github.com/python-jsonschema/jsonschema/releases/tag/v4.18.0a1
+    Equivalent to::
 
-    We ignore 'import' ``mypy`` errors which happen when the ``referencing`` library
-    is not installed.
-    That's ok as in these cases this function is not called.
+        elements = 1, 2, 3, 4, 5
+        it = iter(elements)
+        element = next(it)
+        it_continue = chain([element], it)
 
-    We also have to ignore 'unused-ignore' errors as ``mypy`` raises those in case
-    ``referencing`` is installed.
     """
-    from referencing import Registry  # type: ignore[import,unused-ignore]  # noqa: I001
-    from referencing.jsonschema import specification_with  # type: ignore[import,unused-ignore]
+    yield element
+    yield from others
 
-    dialect_id = json_schema_draft_url or _get_json_schema_draft_url(rootschema)
-    specification = specification_with(dialect_id)
-    resource = specification.create_resource(rootschema)
-    return Registry().with_resource(uri=_VEGA_LITE_ROOT_URI, resource=resource)
+
+def _regroup(
+    errors: _Errs, /, *, key: Callable[[ValidationError], str] = _json_path
+) -> _ErrsLazyGroup:
+    """
+    Regroup error stream by a ``key`` function.
+
+    Assumes ``errors`` are already sorted, which holds **only** at the end of ``validate_jsonschema``.
+    """
+    for _, grouped_it in groupby(errors, key):
+        yield grouped_it
+
+
+def _iter_validator_errors(
+    spec: _JsonParameter,
+    schema: dict[str, Any],
+    rootschema: dict[str, Any] | None = None,
+) -> _ErrsLazy:
+    """
+    Uses the relevant ``jsonschema`` validator to validate ``spec`` against ``schema`` using `` rootschema`` to resolve references.
+
+    ``schema`` and ``rootschema`` are not validated but instead considered as valid.
+
+    We don't use ``jsonschema.validate`` as this would validate the ``schema`` itself.
+    Instead, we pass the ``schema`` directly to the validator class.
+
+    This is done for two reasons:
+
+    1. The schema comes from Vega-Lite and is not based on the user
+    input, therefore there is no need to validate it in the first place.
+    2. The "uri-reference" format checker fails for some of the
+    references as URIs in "$ref" are not encoded, e.g.:
+
+        '#/definitions/ValueDefWithCondition<MarkPropFieldOrDatumDef, (Gradient|string|null)>'
+
+    would be a valid $ref in a Vega-Lite schema but it is not a valid
+    URI reference due to the characters such as '<'.
+    """
+    return _construct_validator(schema, rootschema).iter_errors(spec)
 
 
 def _group_tree_leaves(errors: _Errs, /) -> _IntoLazyGroup:
@@ -344,7 +395,7 @@ def _group_tree_leaves(errors: _Errs, /) -> _IntoLazyGroup:
 
     2. ``_group_errors_by_json_path`` (part of)
 
-    Extracts the path for grouping.
+    Extracts the ``.json_path`` property for grouping.
 
     3. Removes::
 
@@ -357,26 +408,17 @@ def _group_tree_leaves(errors: _Errs, /) -> _IntoLazyGroup:
     from that function and so it's unlikely that this was what the user intended
     if the keyword is not present in the first place.
     """  # noqa: D400
+    REQUIRED = "required"
+    VALUE = ["value"]
     for err in errors:
         if err_context := err.context:
             yield from _group_tree_leaves(err_context)
-        elif err.validator == "required" and err.validator_value == ["value"]:
+        elif err.validator == REQUIRED and err.validator_value == VALUE:
             continue
         else:
             yield _json_path(err), err
 
 
-_fn_path = cast("Callable[[tuple[str, ValidationError]], str]", operator.itemgetter(0))
-"""Key function for ``(json_path, ValidationError)``."""
-_fn_validator = cast("Callable[[ValidationError], str]", operator.attrgetter("validator"))  # fmt: off
-"""Key function for ``ValidationError.validator``."""
-
-
-def _message_len(err: ValidationError, /) -> int:
-    """Return length of a ``ValidationError`` message."""
-    return len(err.message)
-
-
 def _prune_subset_paths(json_path_errors: _IntoLazyGroup, /) -> Iterator[_Errs]:
     """
     Removes key (json path), value (errors) pairs where the json path is fully contained in another json path.
@@ -392,9 +434,9 @@ def _prune_subset_paths(json_path_errors: _IntoLazyGroup, /) -> Iterator[_Errs]:
     - Reversing allows prioritising more specific groups, since they are seen first
     - Then re-reversed, to keep seen order
     """
-    rev_sort = sorted(json_path_errors, key=_fn_path, reverse=True)
+    rev_sort = sorted(json_path_errors, key=_FN_PATH, reverse=True)
     keeping: dict[str, _Errs] = {}
-    for unique_path, grouped_errors in groupby(rev_sort, key=_fn_path):
+    for unique_path, grouped_errors in groupby(rev_sort, key=_FN_PATH):
         if any(seen.startswith(unique_path) for seen in keeping):
             continue
         else:
@@ -402,7 +444,9 @@ def _prune_subset_paths(json_path_errors: _IntoLazyGroup, /) -> Iterator[_Errs]:
     yield from islice(reversed(keeping.values()), 3)
 
 
-def _groupby_validator(errors: _Errs, /) -> Iterator[tuple[str, _ErrsLazy]]:
+def _groupby_validator(
+    errors: _Errs, /
+) -> Iterator[tuple[_ValidatorKeyword, _ErrsLazy]]:
     """
     Groups the errors by the json schema "validator" that casued the error.
 
@@ -411,7 +455,7 @@ def _groupby_validator(errors: _Errs, /) -> Iterator[tuple[str, _ErrsLazy]]:
     was set although no additional properties are allowed then "validator" is
     `"additionalProperties`, etc.
     """
-    yield from groupby(sorted(errors, key=_fn_validator), key=_fn_validator)
+    yield from groupby(sorted(errors, key=_FN_VALIDATOR), key=_FN_VALIDATOR)
 
 
 def _deduplicate_errors(grouped_errors: Iterator[_Errs], /) -> _ErrsLazy:
@@ -423,10 +467,8 @@ def _deduplicate_errors(grouped_errors: Iterator[_Errs], /) -> _ErrsLazy:
     """
     for by_path in grouped_errors:
         for validator, errors in _groupby_validator(by_path):
-            if validator == "additionalProperties":
-                errors = _shortest_any_of(errors)
-            elif validator == "enum":
-                errors = _prune_subset_enum(errors)
+            if fn := _FN_MAP_DEDUPLICATION.get(validator):
+                errors = fn(errors)
             yield from _distinct_messages(errors)
 
 
@@ -471,6 +513,12 @@ def _prune_subset_enum(iterable: _Errs, /) -> _ErrsLazy:
             yield err
 
 
+_FN_MAP_DEDUPLICATION: Mapping[_ValidatorKeyword, Callable[[_Errs], _ErrsLazy]] = {
+    "additionalProperties": _shortest_any_of,
+    "enum": _prune_subset_enum,
+}
+
+
 def _subclasses(cls: type[Any]) -> Iterator[type[Any]]:
     """Breadth-first sequence of all classes which inherit from cls."""
     seen = set()
@@ -528,28 +576,6 @@ def _todict(obj: Any, context: dict[str, Any] | None, np_opt: Any, pd_opt: Any)
         return obj
 
 
-def _resolve_references(
-    schema: dict[str, Any], rootschema: dict[str, Any] | None = None
-) -> dict[str, Any]:
-    """Resolve schema references until there is no $ref anymore in the top-level of the dictionary."""
-    if _USING_REFERENCING:
-        registry = _get_referencing_registry(rootschema or schema)
-        # Using a different variable name to show that this is not the
-        # jsonschema.RefResolver but instead a Resolver from the referencing
-        # library
-        referencing_resolver = registry.resolver()
-        while "$ref" in schema:
-            schema = referencing_resolver.lookup(
-                _VEGA_LITE_ROOT_URI + schema["$ref"]
-            ).contents
-    else:
-        resolver = jsonschema.RefResolver.from_schema(rootschema or schema)
-        while "$ref" in schema:
-            with resolver.resolving(schema["$ref"]) as resolved:
-                schema = resolved
-    return schema
-
-
 class SchemaValidationError(jsonschema.ValidationError):
     """A wrapper for jsonschema.ValidationError with friendlier traceback."""
 

From 49aeec1f12f722d19121d16f977167048f647ec8 Mon Sep 17 00:00:00 2001
From: dangotbanned <125183946+dangotbanned@users.noreply.github.com>
Date: Thu, 22 Aug 2024 12:37:53 +0100
Subject: [PATCH 26/92] refactor: Use language that more closely aligns with
 json schema

Provided links to `json-schema` reference for more info.
 Also some minor edits to docs
---
 altair/utils/schemapi.py     | 52 +++++++++++++++++++++---------------
 tests/utils/test_schemapi.py | 16 +++++++----
 tools/schemapi/schemapi.py   | 51 ++++++++++++++++++++---------------
 3 files changed, 70 insertions(+), 49 deletions(-)

diff --git a/altair/utils/schemapi.py b/altair/utils/schemapi.py
index ef477e769..b7fa7e4bf 100644
--- a/altair/utils/schemapi.py
+++ b/altair/utils/schemapi.py
@@ -50,9 +50,9 @@
         from typing_extensions import TypeIs
 
     if sys.version_info >= (3, 11):
-        from typing import Never, Self
+        from typing import LiteralString, Never, Self
     else:
-        from typing_extensions import Never, Self
+        from typing_extensions import LiteralString, Never, Self
     if sys.version_info >= (3, 10):
         from typing import TypeAlias
     else:
@@ -85,18 +85,19 @@
 It just cannot be an empty string as we need to reference the schema registered in
 the ``referencing.Registry``."""
 
-_DEFAULT_JSON_SCHEMA_DRAFT_URL: Final = "http://json-schema.org/draft-07/schema#"
+_DEFAULT_DIALECT_URI: LiteralString = "http://json-schema.org/draft-07/schema#"
 """
-Ideally, jsonschema specification would be parsed from the current Vega-Lite
-schema instead of being hardcoded here as a default value.
+Ideally, this would be parsed from the current Vega-Lite schema, and not hardcoded here.
 
-However, due to circular imports between this module and the ``alt.vegalite``
-modules, this information is not yet available at this point as ``alt.vegalite``
-is only partially loaded.
+However, due to circular imports between this module and ``alt.vegalite``,
+this information is not yet available as the latter is only *partially* loaded.
 
-The draft version which is used is unlikely to change often so it's ok to keep this.
-There is also a test which validates that this value is always the same as in the Vega-Lite schema.
+The `draft version`_ which is used is unlikely to change often so it's ok to keep this.
+
+.. _draft version:
+   https://json-schema.org/understanding-json-schema/reference/schema#declaring-a-dialect
 """
+# RELATED: tests/utils/test/schemapi.py/test_actual_json_schema_draft_is_same_as_hardcoded_default
 
 DEBUG_MODE: bool = True
 """
@@ -183,8 +184,17 @@ def validate_jsonschema_fail_fast(
         raise err
 
 
-def _get_json_schema_draft_url(schema: dict[str, Any]) -> str:
-    return schema.get("$schema", _DEFAULT_JSON_SCHEMA_DRAFT_URL)
+def _get_schema_dialect_uri(schema: dict[str, Any]) -> str:
+    """
+    Return value of `$schema`_.
+
+    Defines which JSON Schema draft ``schema`` was written for.
+
+    .. _$schema:
+       https://json-schema.org/understanding-json-schema/reference/schema#schema
+
+    """
+    return schema.get("$schema", _DEFAULT_DIALECT_URI)
 
 
 def _prepare_references(schema: dict[str, Any], /) -> dict[str, Any]:
@@ -218,11 +228,9 @@ def _rec_refs(m: dict[str, Any], /) -> Iterator[tuple[str, Any]]:
             yield k, v
 
 
-def _prepare_validator(url: str, /) -> Callable[..., Validator]:
-    tp = cast(
-        "Callable[..., Validator]",
-        jsonschema.validators.validator_for({"$schema": url}),
-    )
+def _prepare_validator(uri: str, /) -> Callable[..., Validator]:
+    # tp = cast("Callable[..., Validator]", jsonschema.validators.validator_for({"$schema": uri}))
+    tp: Callable[..., Validator] = jsonschema.validators.validator_for({"$schema": uri})
     if hasattr(tp, "FORMAT_CHECKER"):
         return partial(tp, format_checker=tp.FORMAT_CHECKER)
     else:
@@ -236,9 +244,9 @@ def _prepare_validator(url: str, /) -> Callable[..., Validator]:
     def _construct_validator(
         schema: dict[str, Any], rootschema: dict[str, Any] | None = None
     ) -> Validator:
-        url = _get_json_schema_draft_url(rootschema or schema)
-        tp = _prepare_validator(url)
-        registry = _get_referencing_registry(rootschema or schema, url)
+        uri = _get_schema_dialect_uri(rootschema or schema)
+        tp = _prepare_validator(uri)
+        registry = _get_referencing_registry(rootschema or schema, uri)
         return tp(_prepare_references(schema), registry=registry)
 
     def _get_referencing_registry(
@@ -256,7 +264,7 @@ def _get_referencing_registry(
         We also have to ignore 'unused-ignore' errors as ``mypy`` raises those in case
         ``referencing`` is installed.
         """
-        dialect_id = json_schema_draft_url or _get_json_schema_draft_url(rootschema)
+        dialect_id = json_schema_draft_url or _get_schema_dialect_uri(rootschema)
         specification = specification_with(dialect_id)
         resource = specification.create_resource(rootschema)
         return Registry().with_resource(uri=_VEGA_LITE_ROOT_URI, resource=resource)
@@ -275,7 +283,7 @@ def _resolve_references(
     def _construct_validator(
         schema: dict[str, Any], rootschema: dict[str, Any] | None = None
     ) -> Validator:
-        tp = _prepare_validator(_get_json_schema_draft_url(rootschema or schema))
+        tp = _prepare_validator(_get_schema_dialect_uri(rootschema or schema))
         resolver: Any = (
             jsonschema.RefResolver.from_schema(rootschema) if rootschema else rootschema
         )
diff --git a/tests/utils/test_schemapi.py b/tests/utils/test_schemapi.py
index 231b60d74..10ea2ecb2 100644
--- a/tests/utils/test_schemapi.py
+++ b/tests/utils/test_schemapi.py
@@ -22,7 +22,7 @@
 import altair as alt
 from altair import load_schema
 from altair.utils.schemapi import (
-    _DEFAULT_JSON_SCHEMA_DRAFT_URL,
+    _DEFAULT_DIALECT_URI,
     SchemaBase,
     SchemaValidationError,
     Undefined,
@@ -42,9 +42,9 @@
 
 
 def test_actual_json_schema_draft_is_same_as_hardcoded_default():
-    # See comments next to definition of _DEFAULT_JSON_SCHEMA_DRAFT_URL
+    # See comments next to definition of `_DEFAULT_DIALECT_URI`
     # for details why we need this test
-    assert _DEFAULT_JSON_SCHEMA_DRAFT_URL == _JSON_SCHEMA_DRAFT_URL, (
+    assert _DEFAULT_DIALECT_URI == _JSON_SCHEMA_DRAFT_URL, (
         "The default json schema URL, which is hardcoded,"
         + " is not the same as the one used in the Vega-Lite schema."
         + " You need to update the default value."
@@ -876,6 +876,8 @@ def test_chart_validation_errors(chart_func, expected_error_message):
 
 
 _SKIP_SLOW_BENCHMARKS: bool = True
+_REPEAT_TIMES = 1000
+# to_dict optimize had no observable benefit
 
 
 @pytest.mark.skipif(
@@ -886,9 +888,13 @@ def test_chart_validation_benchmark() -> None:
     """
     Intended to isolate the `to_dict` call.
 
-    Repeated ``1000`` times, non-parametric:
+    Repeated ``_REPEAT_TIMES`` times, non-parametric:
     - in an attempt to limit the potential overhead of ``pytest``
     - but enforce ``1`` thread, like a user-code would be.
+
+    Results
+    -------
+    8/22/2024, 10:06:32 - 1000x in 108.46s (0:01:48)
     """
     if TYPE_CHECKING:
         from typing import Iterator
@@ -903,7 +909,7 @@ def _iter_charts(*, times: int) -> Iterator[ChartType]:
             charts: list[ChartType] = [fn() for fn, _ in chart_funcs_error_message]
         yield from chain.from_iterable(repeat(charts, times=times))
 
-    for chart in _iter_charts(times=1000):
+    for chart in _iter_charts(times=_REPEAT_TIMES):
         with pytest.raises(SchemaValidationError):
             chart.to_dict(validate=True)
 
diff --git a/tools/schemapi/schemapi.py b/tools/schemapi/schemapi.py
index 465c5681b..e5e96be84 100644
--- a/tools/schemapi/schemapi.py
+++ b/tools/schemapi/schemapi.py
@@ -48,9 +48,9 @@
         from typing_extensions import TypeIs
 
     if sys.version_info >= (3, 11):
-        from typing import Never, Self
+        from typing import LiteralString, Never, Self
     else:
-        from typing_extensions import Never, Self
+        from typing_extensions import LiteralString, Never, Self
     if sys.version_info >= (3, 10):
         from typing import TypeAlias
     else:
@@ -83,18 +83,19 @@
 It just cannot be an empty string as we need to reference the schema registered in
 the ``referencing.Registry``."""
 
-_DEFAULT_JSON_SCHEMA_DRAFT_URL: Final = "http://json-schema.org/draft-07/schema#"
+_DEFAULT_DIALECT_URI: LiteralString = "http://json-schema.org/draft-07/schema#"
 """
-Ideally, jsonschema specification would be parsed from the current Vega-Lite
-schema instead of being hardcoded here as a default value.
+Ideally, this would be parsed from the current Vega-Lite schema, and not hardcoded here.
 
-However, due to circular imports between this module and the ``alt.vegalite``
-modules, this information is not yet available at this point as ``alt.vegalite``
-is only partially loaded.
+However, due to circular imports between this module and ``alt.vegalite``,
+this information is not yet available as the latter is only *partially* loaded.
 
-The draft version which is used is unlikely to change often so it's ok to keep this.
-There is also a test which validates that this value is always the same as in the Vega-Lite schema.
+The `draft version`_ which is used is unlikely to change often so it's ok to keep this.
+
+.. _draft version:
+   https://json-schema.org/understanding-json-schema/reference/schema#declaring-a-dialect
 """
+# RELATED: tests/utils/test/schemapi.py/test_actual_json_schema_draft_is_same_as_hardcoded_default
 
 DEBUG_MODE: bool = True
 """
@@ -181,8 +182,17 @@ def validate_jsonschema_fail_fast(
         raise err
 
 
-def _get_json_schema_draft_url(schema: dict[str, Any]) -> str:
-    return schema.get("$schema", _DEFAULT_JSON_SCHEMA_DRAFT_URL)
+def _get_schema_dialect_uri(schema: dict[str, Any]) -> str:
+    """
+    Return value of `$schema`_.
+
+    Defines which JSON Schema draft ``schema`` was written for.
+
+    .. _$schema:
+       https://json-schema.org/understanding-json-schema/reference/schema#schema
+
+    """
+    return schema.get("$schema", _DEFAULT_DIALECT_URI)
 
 
 def _prepare_references(schema: dict[str, Any], /) -> dict[str, Any]:
@@ -216,11 +226,8 @@ def _rec_refs(m: dict[str, Any], /) -> Iterator[tuple[str, Any]]:
             yield k, v
 
 
-def _prepare_validator(url: str, /) -> Callable[..., Validator]:
-    tp = cast(
-        "Callable[..., Validator]",
-        jsonschema.validators.validator_for({"$schema": url}),
-    )
+def _prepare_validator(uri: str, /) -> Callable[..., Validator]:
+    tp: Callable[..., Validator] = jsonschema.validators.validator_for({"$schema": uri})
     if hasattr(tp, "FORMAT_CHECKER"):
         return partial(tp, format_checker=tp.FORMAT_CHECKER)
     else:
@@ -234,9 +241,9 @@ def _prepare_validator(url: str, /) -> Callable[..., Validator]:
     def _construct_validator(
         schema: dict[str, Any], rootschema: dict[str, Any] | None = None
     ) -> Validator:
-        url = _get_json_schema_draft_url(rootschema or schema)
-        tp = _prepare_validator(url)
-        registry = _get_referencing_registry(rootschema or schema, url)
+        uri = _get_schema_dialect_uri(rootschema or schema)
+        tp = _prepare_validator(uri)
+        registry = _get_referencing_registry(rootschema or schema, uri)
         return tp(_prepare_references(schema), registry=registry)
 
     def _get_referencing_registry(
@@ -254,7 +261,7 @@ def _get_referencing_registry(
         We also have to ignore 'unused-ignore' errors as ``mypy`` raises those in case
         ``referencing`` is installed.
         """
-        dialect_id = json_schema_draft_url or _get_json_schema_draft_url(rootschema)
+        dialect_id = json_schema_draft_url or _get_schema_dialect_uri(rootschema)
         specification = specification_with(dialect_id)
         resource = specification.create_resource(rootschema)
         return Registry().with_resource(uri=_VEGA_LITE_ROOT_URI, resource=resource)
@@ -273,7 +280,7 @@ def _resolve_references(
     def _construct_validator(
         schema: dict[str, Any], rootschema: dict[str, Any] | None = None
     ) -> Validator:
-        tp = _prepare_validator(_get_json_schema_draft_url(rootschema or schema))
+        tp = _prepare_validator(_get_schema_dialect_uri(rootschema or schema))
         resolver: Any = (
             jsonschema.RefResolver.from_schema(rootschema) if rootschema else rootschema
         )

From 61bf44877472f5dca2107967b1c9084cc416f288 Mon Sep 17 00:00:00 2001
From: dangotbanned <125183946+dangotbanned@users.noreply.github.com>
Date: Thu, 22 Aug 2024 13:24:57 +0100
Subject: [PATCH 27/92] perf: Cache the result of
 `referencing.jsonschema.specification_with`

Currently, this will only ever return a single result, based on https://json-schema.org/draft-07/json-schema-release-notes
---
 tools/schemapi/schemapi.py | 15 +++++++++++++--
 1 file changed, 13 insertions(+), 2 deletions(-)

diff --git a/tools/schemapi/schemapi.py b/tools/schemapi/schemapi.py
index e5e96be84..1a87fec11 100644
--- a/tools/schemapi/schemapi.py
+++ b/tools/schemapi/schemapi.py
@@ -38,7 +38,7 @@
     from typing import ClassVar, Literal, Mapping
 
     from jsonschema.protocols import Validator, _JsonParameter
-    from referencing import Registry
+    from referencing import Registry, Specification
 
     from altair.typing import ChartType
 
@@ -235,8 +235,19 @@ def _prepare_validator(uri: str, /) -> Callable[..., Validator]:
 
 
 if Version(importlib_version("jsonschema")) >= Version("4.18"):
+    from functools import lru_cache
+
     from referencing import Registry
-    from referencing.jsonschema import specification_with
+    from referencing.jsonschema import specification_with as _specification_with
+
+    @lru_cache(maxsize=None)
+    def specification_with(dialect_id: str, /) -> Specification[Any]:
+        """
+        Directly wraps ``referencing.jsonschema.specification_with``.
+
+        The original function returns one **immutable** object per JSON Schema **dialect**.
+        """
+        return _specification_with(dialect_id)
 
     def _construct_validator(
         schema: dict[str, Any], rootschema: dict[str, Any] | None = None

From c665bb17efa564d95177253899d8ed8f0727f562 Mon Sep 17 00:00:00 2001
From: dangotbanned <125183946+dangotbanned@users.noreply.github.com>
Date: Thu, 22 Aug 2024 13:25:23 +0100
Subject: [PATCH 28/92] build: run `generate-schema-wrapper`

---
 altair/utils/schemapi.py | 16 +++++++++++++---
 1 file changed, 13 insertions(+), 3 deletions(-)

diff --git a/altair/utils/schemapi.py b/altair/utils/schemapi.py
index b7fa7e4bf..075e1fa7e 100644
--- a/altair/utils/schemapi.py
+++ b/altair/utils/schemapi.py
@@ -40,7 +40,7 @@
     from typing import ClassVar, Literal, Mapping
 
     from jsonschema.protocols import Validator, _JsonParameter
-    from referencing import Registry
+    from referencing import Registry, Specification
 
     from altair.typing import ChartType
 
@@ -229,7 +229,6 @@ def _rec_refs(m: dict[str, Any], /) -> Iterator[tuple[str, Any]]:
 
 
 def _prepare_validator(uri: str, /) -> Callable[..., Validator]:
-    # tp = cast("Callable[..., Validator]", jsonschema.validators.validator_for({"$schema": uri}))
     tp: Callable[..., Validator] = jsonschema.validators.validator_for({"$schema": uri})
     if hasattr(tp, "FORMAT_CHECKER"):
         return partial(tp, format_checker=tp.FORMAT_CHECKER)
@@ -238,8 +237,19 @@ def _prepare_validator(uri: str, /) -> Callable[..., Validator]:
 
 
 if Version(importlib_version("jsonschema")) >= Version("4.18"):
+    from functools import lru_cache
+
     from referencing import Registry
-    from referencing.jsonschema import specification_with
+    from referencing.jsonschema import specification_with as _specification_with
+
+    @lru_cache(maxsize=None)
+    def specification_with(dialect_id: str, /) -> Specification[Any]:
+        """
+        Directly wraps ``referencing.jsonschema.specification_with``.
+
+        The original function returns one **immutable** object per JSON Schema **dialect**.
+        """
+        return _specification_with(dialect_id)
 
     def _construct_validator(
         schema: dict[str, Any], rootschema: dict[str, Any] | None = None

From 69588b341984108f8abc0d16342925fb1bb96897 Mon Sep 17 00:00:00 2001
From: dangotbanned <125183946+dangotbanned@users.noreply.github.com>
Date: Thu, 22 Aug 2024 14:01:31 +0100
Subject: [PATCH 29/92] fix(typing): Address `None` propagation, Remove
 outdated doc

If a `None` reached `_get_schema_dialect_uri`, it would cause a runtime error.
`None` does not have a `get` method.
Confident this is only a theoretical issue, but is now fixed at the source.
---
 tools/schemapi/schemapi.py | 37 +++++++++++++++++--------------------
 1 file changed, 17 insertions(+), 20 deletions(-)

diff --git a/tools/schemapi/schemapi.py b/tools/schemapi/schemapi.py
index 1a87fec11..2c66daed6 100644
--- a/tools/schemapi/schemapi.py
+++ b/tools/schemapi/schemapi.py
@@ -258,30 +258,23 @@ def _construct_validator(
         return tp(_prepare_references(schema), registry=registry)
 
     def _get_referencing_registry(
-        rootschema: dict[str, Any], json_schema_draft_url: str | None = None
+        rootschema: dict[str, Any], dialect_id: str
     ) -> Registry[Any]:
         """
         Referencing is a dependency of newer jsonschema versions.
 
         See https://github.com/python-jsonschema/jsonschema/releases/tag/v4.18.0a1
-
-        We ignore 'import' ``mypy`` errors which happen when the ``referencing`` library
-        is not installed.
-        That's ok as in these cases this function is not called.
-
-        We also have to ignore 'unused-ignore' errors as ``mypy`` raises those in case
-        ``referencing`` is installed.
         """
-        dialect_id = json_schema_draft_url or _get_schema_dialect_uri(rootschema)
         specification = specification_with(dialect_id)
         resource = specification.create_resource(rootschema)
         return Registry().with_resource(uri=_VEGA_LITE_ROOT_URI, resource=resource)
 
     def _resolve_references(
-        schema: dict[str, Any], rootschema: dict[str, Any] | None = None
+        schema: dict[str, Any], rootschema: dict[str, Any]
     ) -> dict[str, Any]:
         """Resolve schema references until there is no $ref anymore in the top-level of the dictionary."""
-        registry = _get_referencing_registry(rootschema or schema)
+        uri = _get_schema_dialect_uri(rootschema)
+        registry = _get_referencing_registry(rootschema or schema, uri)
         resolver = registry.resolver()
         while "$ref" in schema:
             schema = resolver.lookup(_VEGA_LITE_ROOT_URI + schema["$ref"]).contents
@@ -298,7 +291,7 @@ def _construct_validator(
         return tp(schema, resolver=resolver)
 
     def _resolve_references(
-        schema: dict[str, Any], rootschema: dict[str, Any] | None = None
+        schema: dict[str, Any], rootschema: dict[str, Any]
     ) -> dict[str, Any]:
         """
         Resolve schema references until there is no $ref anymore in the top-level of the dictionary.
@@ -900,7 +893,7 @@ class SchemaBase:
     """
 
     _schema: ClassVar[dict[str, Any] | Any] = None
-    _rootschema: ClassVar[dict[str, Any] | None] = None
+    _rootschema: ClassVar[dict[str, Any] | Any] = None
     _class_is_valid_at_instantiation: ClassVar[bool] = True
 
     def __init__(self, *args: Any, **kwds: Any) -> None:
@@ -1221,13 +1214,17 @@ def validate(
     @classmethod
     def resolve_references(cls, schema: dict[str, Any] | None = None) -> dict[str, Any]:
         """Resolve references in the context of this object's schema or root schema."""
-        schema_to_pass = schema or cls._schema
-        # For the benefit of mypy
-        assert schema_to_pass is not None
-        return _resolve_references(
-            schema=schema_to_pass,
-            rootschema=(cls._rootschema or cls._schema or schema),
-        )
+        rootschema = cls._rootschema or cls._schema or schema
+        if rootschema is None:
+            name = type(cls).__name__
+            msg = (
+                f"{name}.resolve_references() provided only `None` values for:\n"
+                f"{schema=}, {cls._schema=}, {cls._rootschema=}.\n\n"
+                f"This variant indicates the class definition {name!r} is invalid."
+            )
+            raise TypeError(msg)
+        else:
+            return _resolve_references(schema or cls._schema, rootschema=rootschema)
 
     @classmethod
     def validate_property(

From 7fceab86f094855de60eef8b0695f6a61a028938 Mon Sep 17 00:00:00 2001
From: dangotbanned <125183946+dangotbanned@users.noreply.github.com>
Date: Thu, 22 Aug 2024 14:03:57 +0100
Subject: [PATCH 30/92] build: run `generate-schema-wrapper`

---
 altair/utils/schemapi.py | 37 +++++++++++++++++--------------------
 1 file changed, 17 insertions(+), 20 deletions(-)

diff --git a/altair/utils/schemapi.py b/altair/utils/schemapi.py
index 075e1fa7e..b27f6c3c3 100644
--- a/altair/utils/schemapi.py
+++ b/altair/utils/schemapi.py
@@ -260,30 +260,23 @@ def _construct_validator(
         return tp(_prepare_references(schema), registry=registry)
 
     def _get_referencing_registry(
-        rootschema: dict[str, Any], json_schema_draft_url: str | None = None
+        rootschema: dict[str, Any], dialect_id: str
     ) -> Registry[Any]:
         """
         Referencing is a dependency of newer jsonschema versions.
 
         See https://github.com/python-jsonschema/jsonschema/releases/tag/v4.18.0a1
-
-        We ignore 'import' ``mypy`` errors which happen when the ``referencing`` library
-        is not installed.
-        That's ok as in these cases this function is not called.
-
-        We also have to ignore 'unused-ignore' errors as ``mypy`` raises those in case
-        ``referencing`` is installed.
         """
-        dialect_id = json_schema_draft_url or _get_schema_dialect_uri(rootschema)
         specification = specification_with(dialect_id)
         resource = specification.create_resource(rootschema)
         return Registry().with_resource(uri=_VEGA_LITE_ROOT_URI, resource=resource)
 
     def _resolve_references(
-        schema: dict[str, Any], rootschema: dict[str, Any] | None = None
+        schema: dict[str, Any], rootschema: dict[str, Any]
     ) -> dict[str, Any]:
         """Resolve schema references until there is no $ref anymore in the top-level of the dictionary."""
-        registry = _get_referencing_registry(rootschema or schema)
+        uri = _get_schema_dialect_uri(rootschema)
+        registry = _get_referencing_registry(rootschema or schema, uri)
         resolver = registry.resolver()
         while "$ref" in schema:
             schema = resolver.lookup(_VEGA_LITE_ROOT_URI + schema["$ref"]).contents
@@ -300,7 +293,7 @@ def _construct_validator(
         return tp(schema, resolver=resolver)
 
     def _resolve_references(
-        schema: dict[str, Any], rootschema: dict[str, Any] | None = None
+        schema: dict[str, Any], rootschema: dict[str, Any]
     ) -> dict[str, Any]:
         """
         Resolve schema references until there is no $ref anymore in the top-level of the dictionary.
@@ -902,7 +895,7 @@ class SchemaBase:
     """
 
     _schema: ClassVar[dict[str, Any] | Any] = None
-    _rootschema: ClassVar[dict[str, Any] | None] = None
+    _rootschema: ClassVar[dict[str, Any] | Any] = None
     _class_is_valid_at_instantiation: ClassVar[bool] = True
 
     def __init__(self, *args: Any, **kwds: Any) -> None:
@@ -1223,13 +1216,17 @@ def validate(
     @classmethod
     def resolve_references(cls, schema: dict[str, Any] | None = None) -> dict[str, Any]:
         """Resolve references in the context of this object's schema or root schema."""
-        schema_to_pass = schema or cls._schema
-        # For the benefit of mypy
-        assert schema_to_pass is not None
-        return _resolve_references(
-            schema=schema_to_pass,
-            rootschema=(cls._rootschema or cls._schema or schema),
-        )
+        rootschema = cls._rootschema or cls._schema or schema
+        if rootschema is None:
+            name = type(cls).__name__
+            msg = (
+                f"{name}.resolve_references() provided only `None` values for:\n"
+                f"{schema=}, {cls._schema=}, {cls._rootschema=}.\n\n"
+                f"This variant indicates the class definition {name!r} is invalid."
+            )
+            raise TypeError(msg)
+        else:
+            return _resolve_references(schema or cls._schema, rootschema=rootschema)
 
     @classmethod
     def validate_property(

From e6912c1abaacaa49c586de4e1bd7bbc5b51d43ce Mon Sep 17 00:00:00 2001
From: dangotbanned <125183946+dangotbanned@users.noreply.github.com>
Date: Thu, 22 Aug 2024 14:34:57 +0100
Subject: [PATCH 31/92] docs: Improve `specification_with`

---
 tools/schemapi/schemapi.py | 15 +++++++++++++--
 1 file changed, 13 insertions(+), 2 deletions(-)

diff --git a/tools/schemapi/schemapi.py b/tools/schemapi/schemapi.py
index 2c66daed6..f80683b7f 100644
--- a/tools/schemapi/schemapi.py
+++ b/tools/schemapi/schemapi.py
@@ -243,9 +243,20 @@ def _prepare_validator(uri: str, /) -> Callable[..., Validator]:
     @lru_cache(maxsize=None)
     def specification_with(dialect_id: str, /) -> Specification[Any]:
         """
-        Directly wraps ``referencing.jsonschema.specification_with``.
+        Retrieve the `Specification`_ with the given dialect identifier.
 
-        The original function returns one **immutable** object per JSON Schema **dialect**.
+        Wraps `specification_with`_, which returns one **immutable** object per
+        JSON Schema **dialect**.
+
+        Raises
+        ------
+        ``UnknownDialect``
+            if the given ``dialect_id`` isn't known
+
+        .. _Specification:
+           https://referencing.readthedocs.io/en/stable/api/#referencing.Specification
+        .. _specification_with:
+           https://referencing.readthedocs.io/en/stable/api/#referencing.jsonschema.specification_with
         """
         return _specification_with(dialect_id)
 

From 283b69ed8dd23d40a7ad3304c341d58f9b4edabe Mon Sep 17 00:00:00 2001
From: dangotbanned <125183946+dangotbanned@users.noreply.github.com>
Date: Thu, 22 Aug 2024 14:44:10 +0100
Subject: [PATCH 32/92] refactor: Rename`_get_referencing_registry` ->
 `_registry`, improve doc

The original name is misleading, as this is a factory function.
That is, a new `Registry` is created on each call.
I think this could be impacting performance. Discarding the registry every time doesn't utilise the immutable properties provided by https://referencing.readthedocs.io/en/stable/api/#referencing.Registry
---
 tools/schemapi/schemapi.py | 19 ++++++++++++-------
 1 file changed, 12 insertions(+), 7 deletions(-)

diff --git a/tools/schemapi/schemapi.py b/tools/schemapi/schemapi.py
index f80683b7f..e1fb6f5f8 100644
--- a/tools/schemapi/schemapi.py
+++ b/tools/schemapi/schemapi.py
@@ -265,16 +265,21 @@ def _construct_validator(
     ) -> Validator:
         uri = _get_schema_dialect_uri(rootschema or schema)
         tp = _prepare_validator(uri)
-        registry = _get_referencing_registry(rootschema or schema, uri)
+        registry = _registry(rootschema or schema, uri)
         return tp(_prepare_references(schema), registry=registry)
 
-    def _get_referencing_registry(
-        rootschema: dict[str, Any], dialect_id: str
-    ) -> Registry[Any]:
+    def _registry(rootschema: dict[str, Any], dialect_id: str) -> Registry[Any]:
         """
-        Referencing is a dependency of newer jsonschema versions.
+        Constructs a `Registry`_, adding the `Resource`_ produced by ``rootschema``.
 
-        See https://github.com/python-jsonschema/jsonschema/releases/tag/v4.18.0a1
+        Requires at least ``jsonschema`` `v4.18.0a1`_.
+
+        .. _Registry:
+           https://referencing.readthedocs.io/en/stable/api/#referencing.Registry
+        .. _Resource:
+           https://referencing.readthedocs.io/en/stable/api/#referencing.Resource
+        .. _v4.18.0a1:
+           https://github.com/python-jsonschema/jsonschema/releases/tag/v4.18.0a1
         """
         specification = specification_with(dialect_id)
         resource = specification.create_resource(rootschema)
@@ -285,7 +290,7 @@ def _resolve_references(
     ) -> dict[str, Any]:
         """Resolve schema references until there is no $ref anymore in the top-level of the dictionary."""
         uri = _get_schema_dialect_uri(rootschema)
-        registry = _get_referencing_registry(rootschema or schema, uri)
+        registry = _registry(rootschema or schema, uri)
         resolver = registry.resolver()
         while "$ref" in schema:
             schema = resolver.lookup(_VEGA_LITE_ROOT_URI + schema["$ref"]).contents

From 56be98401591a17a2a1c0857c1a47078dbdaa159 Mon Sep 17 00:00:00 2001
From: dangotbanned <125183946+dangotbanned@users.noreply.github.com>
Date: Thu, 22 Aug 2024 15:11:38 +0100
Subject: [PATCH 33/92] refactor: Renaming, docs to align with `jsonschema`

Now very clear what is being wrapped, and where to find more information
---
 tools/schemapi/schemapi.py | 38 ++++++++++++++++++++++++++++++++------
 1 file changed, 32 insertions(+), 6 deletions(-)

diff --git a/tools/schemapi/schemapi.py b/tools/schemapi/schemapi.py
index e1fb6f5f8..31eb8f7ab 100644
--- a/tools/schemapi/schemapi.py
+++ b/tools/schemapi/schemapi.py
@@ -226,7 +226,20 @@ def _rec_refs(m: dict[str, Any], /) -> Iterator[tuple[str, Any]]:
             yield k, v
 
 
-def _prepare_validator(uri: str, /) -> Callable[..., Validator]:
+def _validator_for(uri: str, /) -> Callable[..., Validator]:
+    """
+    Retrieve the constructor for a `Validator`_ class appropriate for validating the given schema.
+
+    Parameters
+    ----------
+    uri
+        Address pointing to the `$schema`_.
+
+    .. _Validator:
+       https://python-jsonschema.readthedocs.io/en/stable/validate/#the-validator-protocol
+    .. _$schema:
+       https://json-schema.org/understanding-json-schema/reference/schema
+    """
     tp: Callable[..., Validator] = jsonschema.validators.validator_for({"$schema": uri})
     if hasattr(tp, "FORMAT_CHECKER"):
         return partial(tp, format_checker=tp.FORMAT_CHECKER)
@@ -260,11 +273,24 @@ def specification_with(dialect_id: str, /) -> Specification[Any]:
         """
         return _specification_with(dialect_id)
 
-    def _construct_validator(
+    def _validator(
         schema: dict[str, Any], rootschema: dict[str, Any] | None = None
     ) -> Validator:
+        """
+        Constructs a `Validator`_ for future validation.
+
+        Parameters
+        ----------
+        schema
+            Schema that a spec will be validated against.
+        rootschema
+            Context to evaluate within.
+
+        .. _Validator:
+           https://python-jsonschema.readthedocs.io/en/stable/validate/#the-validator-protocol
+        """
         uri = _get_schema_dialect_uri(rootschema or schema)
-        tp = _prepare_validator(uri)
+        tp = _validator_for(uri)
         registry = _registry(rootschema or schema, uri)
         return tp(_prepare_references(schema), registry=registry)
 
@@ -297,10 +323,10 @@ def _resolve_references(
         return schema
 else:
 
-    def _construct_validator(
+    def _validator(
         schema: dict[str, Any], rootschema: dict[str, Any] | None = None
     ) -> Validator:
-        tp = _prepare_validator(_get_schema_dialect_uri(rootschema or schema))
+        tp = _validator_for(_get_schema_dialect_uri(rootschema or schema))
         resolver: Any = (
             jsonschema.RefResolver.from_schema(rootschema) if rootschema else rootschema
         )
@@ -406,7 +432,7 @@ def _iter_validator_errors(
     would be a valid $ref in a Vega-Lite schema but it is not a valid
     URI reference due to the characters such as '<'.
     """
-    return _construct_validator(schema, rootschema).iter_errors(spec)
+    return _validator(schema, rootschema).iter_errors(spec)
 
 
 def _group_tree_leaves(errors: _Errs, /) -> _IntoLazyGroup:

From 4c4b322a5f3661b1c11de17543fde482c01caf3c Mon Sep 17 00:00:00 2001
From: dangotbanned <125183946+dangotbanned@users.noreply.github.com>
Date: Thu, 22 Aug 2024 15:14:09 +0100
Subject: [PATCH 34/92] build: run `generate-schema-wrapper`

---
 altair/utils/schemapi.py | 72 +++++++++++++++++++++++++++++++---------
 1 file changed, 57 insertions(+), 15 deletions(-)

diff --git a/altair/utils/schemapi.py b/altair/utils/schemapi.py
index b27f6c3c3..254fad1c2 100644
--- a/altair/utils/schemapi.py
+++ b/altair/utils/schemapi.py
@@ -228,7 +228,20 @@ def _rec_refs(m: dict[str, Any], /) -> Iterator[tuple[str, Any]]:
             yield k, v
 
 
-def _prepare_validator(uri: str, /) -> Callable[..., Validator]:
+def _validator_for(uri: str, /) -> Callable[..., Validator]:
+    """
+    Retrieve the constructor for a `Validator`_ class appropriate for validating the given schema.
+
+    Parameters
+    ----------
+    uri
+        Address pointing to the `$schema`_.
+
+    .. _Validator:
+       https://python-jsonschema.readthedocs.io/en/stable/validate/#the-validator-protocol
+    .. _$schema:
+       https://json-schema.org/understanding-json-schema/reference/schema
+    """
     tp: Callable[..., Validator] = jsonschema.validators.validator_for({"$schema": uri})
     if hasattr(tp, "FORMAT_CHECKER"):
         return partial(tp, format_checker=tp.FORMAT_CHECKER)
@@ -245,27 +258,56 @@ def _prepare_validator(uri: str, /) -> Callable[..., Validator]:
     @lru_cache(maxsize=None)
     def specification_with(dialect_id: str, /) -> Specification[Any]:
         """
-        Directly wraps ``referencing.jsonschema.specification_with``.
+        Retrieve the `Specification`_ with the given dialect identifier.
+
+        Wraps `specification_with`_, which returns one **immutable** object per
+        JSON Schema **dialect**.
+
+        Raises
+        ------
+        ``UnknownDialect``
+            if the given ``dialect_id`` isn't known
 
-        The original function returns one **immutable** object per JSON Schema **dialect**.
+        .. _Specification:
+           https://referencing.readthedocs.io/en/stable/api/#referencing.Specification
+        .. _specification_with:
+           https://referencing.readthedocs.io/en/stable/api/#referencing.jsonschema.specification_with
         """
         return _specification_with(dialect_id)
 
-    def _construct_validator(
+    def _validator(
         schema: dict[str, Any], rootschema: dict[str, Any] | None = None
     ) -> Validator:
+        """
+        Constructs a `Validator`_ for future validation.
+
+        Parameters
+        ----------
+        schema
+            Schema that a spec will be validated against.
+        rootschema
+            Context to evaluate within.
+
+        .. _Validator:
+           https://python-jsonschema.readthedocs.io/en/stable/validate/#the-validator-protocol
+        """
         uri = _get_schema_dialect_uri(rootschema or schema)
-        tp = _prepare_validator(uri)
-        registry = _get_referencing_registry(rootschema or schema, uri)
+        tp = _validator_for(uri)
+        registry = _registry(rootschema or schema, uri)
         return tp(_prepare_references(schema), registry=registry)
 
-    def _get_referencing_registry(
-        rootschema: dict[str, Any], dialect_id: str
-    ) -> Registry[Any]:
+    def _registry(rootschema: dict[str, Any], dialect_id: str) -> Registry[Any]:
         """
-        Referencing is a dependency of newer jsonschema versions.
+        Constructs a `Registry`_, adding the `Resource`_ produced by ``rootschema``.
 
-        See https://github.com/python-jsonschema/jsonschema/releases/tag/v4.18.0a1
+        Requires at least ``jsonschema`` `v4.18.0a1`_.
+
+        .. _Registry:
+           https://referencing.readthedocs.io/en/stable/api/#referencing.Registry
+        .. _Resource:
+           https://referencing.readthedocs.io/en/stable/api/#referencing.Resource
+        .. _v4.18.0a1:
+           https://github.com/python-jsonschema/jsonschema/releases/tag/v4.18.0a1
         """
         specification = specification_with(dialect_id)
         resource = specification.create_resource(rootschema)
@@ -276,17 +318,17 @@ def _resolve_references(
     ) -> dict[str, Any]:
         """Resolve schema references until there is no $ref anymore in the top-level of the dictionary."""
         uri = _get_schema_dialect_uri(rootschema)
-        registry = _get_referencing_registry(rootschema or schema, uri)
+        registry = _registry(rootschema or schema, uri)
         resolver = registry.resolver()
         while "$ref" in schema:
             schema = resolver.lookup(_VEGA_LITE_ROOT_URI + schema["$ref"]).contents
         return schema
 else:
 
-    def _construct_validator(
+    def _validator(
         schema: dict[str, Any], rootschema: dict[str, Any] | None = None
     ) -> Validator:
-        tp = _prepare_validator(_get_schema_dialect_uri(rootschema or schema))
+        tp = _validator_for(_get_schema_dialect_uri(rootschema or schema))
         resolver: Any = (
             jsonschema.RefResolver.from_schema(rootschema) if rootschema else rootschema
         )
@@ -392,7 +434,7 @@ def _iter_validator_errors(
     would be a valid $ref in a Vega-Lite schema but it is not a valid
     URI reference due to the characters such as '<'.
     """
-    return _construct_validator(schema, rootschema).iter_errors(spec)
+    return _validator(schema, rootschema).iter_errors(spec)
 
 
 def _group_tree_leaves(errors: _Errs, /) -> _IntoLazyGroup:

From a3a148e88353c9be2369e30c4111fbd92dada254 Mon Sep 17 00:00:00 2001
From: dangotbanned <125183946+dangotbanned@users.noreply.github.com>
Date: Thu, 22 Aug 2024 19:18:12 +0100
Subject: [PATCH 35/92] refactor: Factor-out `_iter_validator_errors`

---
 tools/schemapi/schemapi.py | 49 +++++++++++++++-----------------------
 1 file changed, 19 insertions(+), 30 deletions(-)

diff --git a/tools/schemapi/schemapi.py b/tools/schemapi/schemapi.py
index 31eb8f7ab..b0b4a97b5 100644
--- a/tools/schemapi/schemapi.py
+++ b/tools/schemapi/schemapi.py
@@ -148,8 +148,25 @@ def validate_jsonschema(
     - The first error is monkeypatched with a grouped iterator of all remaining errors
     - ``SchemaValidationError`` utilizes the patched attribute, to craft a more helpful error message.
         - However this breaks typing
+
+    ``schema`` and ``rootschema`` are not validated but instead considered as valid.
+
+    We don't use ``jsonschema.validate`` as this would validate the ``schema`` itself.
+    Instead, we pass the ``schema`` directly to the validator class.
+
+    This is done for two reasons:
+
+    1. The schema comes from Vega-Lite and is not based on the user
+    input, therefore there is no need to validate it in the first place.
+    2. The "uri-reference" format checker fails for some of the
+    references as URIs in "$ref" are not encoded, e.g.:
+
+        '#/definitions/ValueDefWithCondition<MarkPropFieldOrDatumDef, (Gradient|string|null)>'
+
+    would be a valid $ref in a Vega-Lite schema but it is not a valid
+    URI reference due to the characters such as '<'.
     """
-    it_errors = _iter_validator_errors(spec, schema, rootschema=rootschema)
+    it_errors = _validator(schema, rootschema).iter_errors(spec)
     if first_error := next(it_errors, None):
         groups = _group_tree_leaves(_rechain(first_error, it_errors))
         most_specific = _prune_subset_paths(groups)
@@ -177,7 +194,7 @@ def validate_jsonschema_fail_fast(
     Use instead of ``validate_jsonschema`` when any information about the error(s) are not needed.
     """
     if (
-        err := next(_iter_validator_errors(spec, schema, rootschema=rootschema), None)
+        err := next(_validator(schema, rootschema).iter_errors(spec), None)
     ) is not None:
         raise err
 
@@ -407,34 +424,6 @@ def _regroup(
         yield grouped_it
 
 
-def _iter_validator_errors(
-    spec: _JsonParameter,
-    schema: dict[str, Any],
-    rootschema: dict[str, Any] | None = None,
-) -> _ErrsLazy:
-    """
-    Uses the relevant ``jsonschema`` validator to validate ``spec`` against ``schema`` using `` rootschema`` to resolve references.
-
-    ``schema`` and ``rootschema`` are not validated but instead considered as valid.
-
-    We don't use ``jsonschema.validate`` as this would validate the ``schema`` itself.
-    Instead, we pass the ``schema`` directly to the validator class.
-
-    This is done for two reasons:
-
-    1. The schema comes from Vega-Lite and is not based on the user
-    input, therefore there is no need to validate it in the first place.
-    2. The "uri-reference" format checker fails for some of the
-    references as URIs in "$ref" are not encoded, e.g.:
-
-        '#/definitions/ValueDefWithCondition<MarkPropFieldOrDatumDef, (Gradient|string|null)>'
-
-    would be a valid $ref in a Vega-Lite schema but it is not a valid
-    URI reference due to the characters such as '<'.
-    """
-    return _validator(schema, rootschema).iter_errors(spec)
-
-
 def _group_tree_leaves(errors: _Errs, /) -> _IntoLazyGroup:
     """
     Combines 3 previously distinct steps:

From 5fd6787b71bd57263fbade1428a3f9fd1c8812a9 Mon Sep 17 00:00:00 2001
From: dangotbanned <125183946+dangotbanned@users.noreply.github.com>
Date: Thu, 22 Aug 2024 21:54:13 +0100
Subject: [PATCH 36/92] chore: rename `tp` -> `validator`

---
 tools/schemapi/schemapi.py | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/tools/schemapi/schemapi.py b/tools/schemapi/schemapi.py
index b0b4a97b5..288a44674 100644
--- a/tools/schemapi/schemapi.py
+++ b/tools/schemapi/schemapi.py
@@ -307,9 +307,9 @@ def _validator(
            https://python-jsonschema.readthedocs.io/en/stable/validate/#the-validator-protocol
         """
         uri = _get_schema_dialect_uri(rootschema or schema)
-        tp = _validator_for(uri)
+        validator = _validator_for(uri)
         registry = _registry(rootschema or schema, uri)
-        return tp(_prepare_references(schema), registry=registry)
+        return validator(_prepare_references(schema), registry=registry)
 
     def _registry(rootschema: dict[str, Any], dialect_id: str) -> Registry[Any]:
         """
@@ -343,11 +343,11 @@ def _resolve_references(
     def _validator(
         schema: dict[str, Any], rootschema: dict[str, Any] | None = None
     ) -> Validator:
-        tp = _validator_for(_get_schema_dialect_uri(rootschema or schema))
+        validator = _validator_for(_get_schema_dialect_uri(rootschema or schema))
         resolver: Any = (
             jsonschema.RefResolver.from_schema(rootschema) if rootschema else rootschema
         )
-        return tp(schema, resolver=resolver)
+        return validator(schema, resolver=resolver)
 
     def _resolve_references(
         schema: dict[str, Any], rootschema: dict[str, Any]

From 4cc16194e173edc0b45381d7451d0cd9f26bd7fe Mon Sep 17 00:00:00 2001
From: dangotbanned <125183946+dangotbanned@users.noreply.github.com>
Date: Thu, 22 Aug 2024 22:00:35 +0100
Subject: [PATCH 37/92] perf: Experiment with more cache layers

All of this is intended to avoid repeating any work that has already been performed.
---
 tools/schemapi/schemapi.py | 64 ++++++++++++++++++++++++++++++++------
 1 file changed, 55 insertions(+), 9 deletions(-)

diff --git a/tools/schemapi/schemapi.py b/tools/schemapi/schemapi.py
index 288a44674..20f891cbf 100644
--- a/tools/schemapi/schemapi.py
+++ b/tools/schemapi/schemapi.py
@@ -7,7 +7,7 @@
 import sys
 import textwrap
 from collections import defaultdict
-from functools import partial
+from functools import lru_cache, partial
 from importlib.metadata import version as importlib_version
 from itertools import chain, groupby, islice, zip_longest
 from math import ceil
@@ -38,9 +38,9 @@
     from typing import ClassVar, Literal, Mapping
 
     from jsonschema.protocols import Validator, _JsonParameter
-    from referencing import Registry, Specification
 
     from altair.typing import ChartType
+    from altair.vegalite.v5.schema._typing import Map
 
     if sys.version_info >= (3, 13):
         from typing import TypeIs
@@ -243,6 +243,7 @@ def _rec_refs(m: dict[str, Any], /) -> Iterator[tuple[str, Any]]:
             yield k, v
 
 
+@lru_cache(maxsize=None)
 def _validator_for(uri: str, /) -> Callable[..., Validator]:
     """
     Retrieve the constructor for a `Validator`_ class appropriate for validating the given schema.
@@ -265,11 +266,13 @@ def _validator_for(uri: str, /) -> Callable[..., Validator]:
 
 
 if Version(importlib_version("jsonschema")) >= Version("4.18"):
-    from functools import lru_cache
-
     from referencing import Registry
     from referencing.jsonschema import specification_with as _specification_with
 
+    if TYPE_CHECKING:
+        from referencing import Specification
+        from referencing._core import Resolver
+
     @lru_cache(maxsize=None)
     def specification_with(dialect_id: str, /) -> Specification[Any]:
         """
@@ -324,20 +327,63 @@ def _registry(rootschema: dict[str, Any], dialect_id: str) -> Registry[Any]:
         .. _v4.18.0a1:
            https://github.com/python-jsonschema/jsonschema/releases/tag/v4.18.0a1
         """
-        specification = specification_with(dialect_id)
-        resource = specification.create_resource(rootschema)
-        return Registry().with_resource(uri=_VEGA_LITE_ROOT_URI, resource=resource)
+        global _REGISTRY_CACHE
+        cache_key = _registry_comp_key(rootschema, dialect_id)
+        if (registry := _REGISTRY_CACHE.get(cache_key, None)) is not None:
+            return registry
+        else:
+            specification = specification_with(dialect_id)
+            resource = specification.create_resource(rootschema)
+            registry = Registry().with_resource(_VEGA_LITE_ROOT_URI, resource)
+            _REGISTRY_CACHE[cache_key] = registry
+            return registry
+
+    def _registry_update(
+        root: dict[str, Any], dialect_id: str, resolver: Resolver[Any]
+    ):
+        global _REGISTRY_CACHE
+        cache_key = _registry_comp_key(root, dialect_id)
+        _REGISTRY_CACHE[cache_key] = resolver._registry
 
     def _resolve_references(
         schema: dict[str, Any], rootschema: dict[str, Any]
     ) -> dict[str, Any]:
         """Resolve schema references until there is no $ref anymore in the top-level of the dictionary."""
+        root = rootschema or schema
+        if ("$ref" not in root) or ("$ref" not in schema):
+            return schema
         uri = _get_schema_dialect_uri(rootschema)
-        registry = _registry(rootschema or schema, uri)
+        registry = _registry(root, uri)
         resolver = registry.resolver()
         while "$ref" in schema:
-            schema = resolver.lookup(_VEGA_LITE_ROOT_URI + schema["$ref"]).contents
+            resolved = resolver.lookup(_VEGA_LITE_ROOT_URI + schema["$ref"])
+            schema = resolved.contents
+        _registry_update(root, uri, resolved.resolver)
         return schema
+
+    def _registry_comp_key(root: Map, dialect_id: str, /) -> tuple[str, str]:
+        """
+        Generate a simple-minded hash to identify a registry.
+
+        Notes
+        -----
+        Why the strange hash?
+        - **All** generated schemas hit the ``"$ref"`` branch.
+        - ``api.Then`` hits the len(...) 1 branch w/ ``{"type": "object"}``.
+        - Final branch is only hit by mock schemas in:
+            - `tests/utils/test_core.py::test_infer_encoding_types`
+            - `tests/utils/test_schemapi.py`
+        """
+        if "$ref" in root:
+            k1 = root["$ref"]
+        elif len(root) == 1:
+            k1 = "".join(f"{s!s}" for s in chain(*root.items()))
+        else:
+            k1 = json.dumps(root, separators=(",", ":"), sort_keys=True)
+        return k1, dialect_id
+
+    _REGISTRY_CACHE: dict[tuple[str, str], Registry[Any]] = {}
+
 else:
 
     def _validator(

From 509b67846234416da84621c0a6e6130f336be8d1 Mon Sep 17 00:00:00 2001
From: dangotbanned <125183946+dangotbanned@users.noreply.github.com>
Date: Thu, 22 Aug 2024 22:02:31 +0100
Subject: [PATCH 38/92] build: run `generate-schema-wrapper`

---
 altair/utils/schemapi.py | 121 +++++++++++++++++++++++++--------------
 1 file changed, 78 insertions(+), 43 deletions(-)

diff --git a/altair/utils/schemapi.py b/altair/utils/schemapi.py
index 254fad1c2..a97dbc789 100644
--- a/altair/utils/schemapi.py
+++ b/altair/utils/schemapi.py
@@ -9,7 +9,7 @@
 import sys
 import textwrap
 from collections import defaultdict
-from functools import partial
+from functools import lru_cache, partial
 from importlib.metadata import version as importlib_version
 from itertools import chain, groupby, islice, zip_longest
 from math import ceil
@@ -40,9 +40,9 @@
     from typing import ClassVar, Literal, Mapping
 
     from jsonschema.protocols import Validator, _JsonParameter
-    from referencing import Registry, Specification
 
     from altair.typing import ChartType
+    from altair.vegalite.v5.schema._typing import Map
 
     if sys.version_info >= (3, 13):
         from typing import TypeIs
@@ -150,8 +150,25 @@ def validate_jsonschema(
     - The first error is monkeypatched with a grouped iterator of all remaining errors
     - ``SchemaValidationError`` utilizes the patched attribute, to craft a more helpful error message.
         - However this breaks typing
+
+    ``schema`` and ``rootschema`` are not validated but instead considered as valid.
+
+    We don't use ``jsonschema.validate`` as this would validate the ``schema`` itself.
+    Instead, we pass the ``schema`` directly to the validator class.
+
+    This is done for two reasons:
+
+    1. The schema comes from Vega-Lite and is not based on the user
+    input, therefore there is no need to validate it in the first place.
+    2. The "uri-reference" format checker fails for some of the
+    references as URIs in "$ref" are not encoded, e.g.:
+
+        '#/definitions/ValueDefWithCondition<MarkPropFieldOrDatumDef, (Gradient|string|null)>'
+
+    would be a valid $ref in a Vega-Lite schema but it is not a valid
+    URI reference due to the characters such as '<'.
     """
-    it_errors = _iter_validator_errors(spec, schema, rootschema=rootschema)
+    it_errors = _validator(schema, rootschema).iter_errors(spec)
     if first_error := next(it_errors, None):
         groups = _group_tree_leaves(_rechain(first_error, it_errors))
         most_specific = _prune_subset_paths(groups)
@@ -179,7 +196,7 @@ def validate_jsonschema_fail_fast(
     Use instead of ``validate_jsonschema`` when any information about the error(s) are not needed.
     """
     if (
-        err := next(_iter_validator_errors(spec, schema, rootschema=rootschema), None)
+        err := next(_validator(schema, rootschema).iter_errors(spec), None)
     ) is not None:
         raise err
 
@@ -228,6 +245,7 @@ def _rec_refs(m: dict[str, Any], /) -> Iterator[tuple[str, Any]]:
             yield k, v
 
 
+@lru_cache(maxsize=None)
 def _validator_for(uri: str, /) -> Callable[..., Validator]:
     """
     Retrieve the constructor for a `Validator`_ class appropriate for validating the given schema.
@@ -250,11 +268,13 @@ def _validator_for(uri: str, /) -> Callable[..., Validator]:
 
 
 if Version(importlib_version("jsonschema")) >= Version("4.18"):
-    from functools import lru_cache
-
     from referencing import Registry
     from referencing.jsonschema import specification_with as _specification_with
 
+    if TYPE_CHECKING:
+        from referencing import Specification
+        from referencing._core import Resolver
+
     @lru_cache(maxsize=None)
     def specification_with(dialect_id: str, /) -> Specification[Any]:
         """
@@ -292,9 +312,9 @@ def _validator(
            https://python-jsonschema.readthedocs.io/en/stable/validate/#the-validator-protocol
         """
         uri = _get_schema_dialect_uri(rootschema or schema)
-        tp = _validator_for(uri)
+        validator = _validator_for(uri)
         registry = _registry(rootschema or schema, uri)
-        return tp(_prepare_references(schema), registry=registry)
+        return validator(_prepare_references(schema), registry=registry)
 
     def _registry(rootschema: dict[str, Any], dialect_id: str) -> Registry[Any]:
         """
@@ -309,30 +329,73 @@ def _registry(rootschema: dict[str, Any], dialect_id: str) -> Registry[Any]:
         .. _v4.18.0a1:
            https://github.com/python-jsonschema/jsonschema/releases/tag/v4.18.0a1
         """
-        specification = specification_with(dialect_id)
-        resource = specification.create_resource(rootschema)
-        return Registry().with_resource(uri=_VEGA_LITE_ROOT_URI, resource=resource)
+        global _REGISTRY_CACHE
+        cache_key = _registry_comp_key(rootschema, dialect_id)
+        if (registry := _REGISTRY_CACHE.get(cache_key, None)) is not None:
+            return registry
+        else:
+            specification = specification_with(dialect_id)
+            resource = specification.create_resource(rootschema)
+            registry = Registry().with_resource(_VEGA_LITE_ROOT_URI, resource)
+            _REGISTRY_CACHE[cache_key] = registry
+            return registry
+
+    def _registry_update(
+        root: dict[str, Any], dialect_id: str, resolver: Resolver[Any]
+    ):
+        global _REGISTRY_CACHE
+        cache_key = _registry_comp_key(root, dialect_id)
+        _REGISTRY_CACHE[cache_key] = resolver._registry
 
     def _resolve_references(
         schema: dict[str, Any], rootschema: dict[str, Any]
     ) -> dict[str, Any]:
         """Resolve schema references until there is no $ref anymore in the top-level of the dictionary."""
+        root = rootschema or schema
+        if ("$ref" not in root) or ("$ref" not in schema):
+            return schema
         uri = _get_schema_dialect_uri(rootschema)
-        registry = _registry(rootschema or schema, uri)
+        registry = _registry(root, uri)
         resolver = registry.resolver()
         while "$ref" in schema:
-            schema = resolver.lookup(_VEGA_LITE_ROOT_URI + schema["$ref"]).contents
+            resolved = resolver.lookup(_VEGA_LITE_ROOT_URI + schema["$ref"])
+            schema = resolved.contents
+        _registry_update(root, uri, resolved.resolver)
         return schema
+
+    def _registry_comp_key(root: Map, dialect_id: str, /) -> tuple[str, str]:
+        """
+        Generate a simple-minded hash to identify a registry.
+
+        Notes
+        -----
+        Why the strange hash?
+        - **All** generated schemas hit the ``"$ref"`` branch.
+        - ``api.Then`` hits the len(...) 1 branch w/ ``{"type": "object"}``.
+        - Final branch is only hit by mock schemas in:
+            - `tests/utils/test_core.py::test_infer_encoding_types`
+            - `tests/utils/test_schemapi.py`
+        """
+        if "$ref" in root:
+            k1 = root["$ref"]
+        elif len(root) == 1:
+            k1 = "".join(f"{s!s}" for s in chain(*root.items()))
+        else:
+            k1 = json.dumps(root, separators=(",", ":"), sort_keys=True)
+        return k1, dialect_id
+
+    _REGISTRY_CACHE: dict[tuple[str, str], Registry[Any]] = {}
+
 else:
 
     def _validator(
         schema: dict[str, Any], rootschema: dict[str, Any] | None = None
     ) -> Validator:
-        tp = _validator_for(_get_schema_dialect_uri(rootschema or schema))
+        validator = _validator_for(_get_schema_dialect_uri(rootschema or schema))
         resolver: Any = (
             jsonschema.RefResolver.from_schema(rootschema) if rootschema else rootschema
         )
-        return tp(schema, resolver=resolver)
+        return validator(schema, resolver=resolver)
 
     def _resolve_references(
         schema: dict[str, Any], rootschema: dict[str, Any]
@@ -409,34 +472,6 @@ def _regroup(
         yield grouped_it
 
 
-def _iter_validator_errors(
-    spec: _JsonParameter,
-    schema: dict[str, Any],
-    rootschema: dict[str, Any] | None = None,
-) -> _ErrsLazy:
-    """
-    Uses the relevant ``jsonschema`` validator to validate ``spec`` against ``schema`` using `` rootschema`` to resolve references.
-
-    ``schema`` and ``rootschema`` are not validated but instead considered as valid.
-
-    We don't use ``jsonschema.validate`` as this would validate the ``schema`` itself.
-    Instead, we pass the ``schema`` directly to the validator class.
-
-    This is done for two reasons:
-
-    1. The schema comes from Vega-Lite and is not based on the user
-    input, therefore there is no need to validate it in the first place.
-    2. The "uri-reference" format checker fails for some of the
-    references as URIs in "$ref" are not encoded, e.g.:
-
-        '#/definitions/ValueDefWithCondition<MarkPropFieldOrDatumDef, (Gradient|string|null)>'
-
-    would be a valid $ref in a Vega-Lite schema but it is not a valid
-    URI reference due to the characters such as '<'.
-    """
-    return _validator(schema, rootschema).iter_errors(spec)
-
-
 def _group_tree_leaves(errors: _Errs, /) -> _IntoLazyGroup:
     """
     Combines 3 previously distinct steps:

From c594e55e76613d0363e3b419a00cb2f920547755 Mon Sep 17 00:00:00 2001
From: dangotbanned <125183946+dangotbanned@users.noreply.github.com>
Date: Fri, 23 Aug 2024 16:28:24 +0100
Subject: [PATCH 39/92] ci: Add patterns for `pyright`

Fixes:
```
2024-08-23 16:23:02.509 [info] [Info  - 4:23:02 PM] (15104) Loading pyproject.toml file at c:\Users\danie\Documents\GitHub\altair\pyproject.toml 2024-08-23 16:23:02.511 [info] [Info  - 4:23:02 PM] (15104) No include entries specified; assuming c:\Users\danie\Documents\GitHub\altair 2024-08-23 16:23:04.953 [info] [Info  - 4:23:04 PM] (15104) Found 8234 source files 2024-08-23 16:23:50.675 [info] [Warn  - 4:23:50 PM] (15104) Workspace indexing has hit its upper limit: 5000 files
```
---
 pyproject.toml | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/pyproject.toml b/pyproject.toml
index 47abd59f1..0fd722574 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -443,3 +443,10 @@ extraPaths=["./tools"]
 pythonPlatform="All"
 pythonVersion="3.8"
 reportUnusedExpression="none"
+include=[
+    "./altair/**/*.py",
+    ".doc/*.py",
+	"./sphinxext/**/*.py",
+	"./tests/**/*.py",
+	"./tools/**/*.py",
+]

From 5334a2c23a3b402e3561a3ec218f60ad5204e40c Mon Sep 17 00:00:00 2001
From: dangotbanned <125183946+dangotbanned@users.noreply.github.com>
Date: Sat, 24 Aug 2024 15:43:43 +0100
Subject: [PATCH 40/92] feat: Adds `schemapi.__all__`

Related https://github.com/vega/altair/pull/3556
---
 tests/utils/test_schemapi.py  | 23 +++++++++--------------
 tests/vegalite/v5/test_api.py |  7 ++++---
 tools/schemapi/schemapi.py    | 11 +++++++++++
 3 files changed, 24 insertions(+), 17 deletions(-)

diff --git a/tests/utils/test_schemapi.py b/tests/utils/test_schemapi.py
index 10ea2ecb2..1a20b9371 100644
--- a/tests/utils/test_schemapi.py
+++ b/tests/utils/test_schemapi.py
@@ -21,14 +21,8 @@
 
 import altair as alt
 from altair import load_schema
-from altair.utils.schemapi import (
-    _DEFAULT_DIALECT_URI,
-    SchemaBase,
-    SchemaValidationError,
-    Undefined,
-    UndefinedType,
-    _FromDict,
-)
+from altair.utils import schemapi
+from altair.utils.schemapi import SchemaBase, Undefined, UndefinedType
 from altair.vegalite.v5.schema.channels import X
 from altair.vegalite.v5.schema.core import FieldOneOfPredicate, Legend
 from vega_datasets import data
@@ -44,7 +38,7 @@
 def test_actual_json_schema_draft_is_same_as_hardcoded_default():
     # See comments next to definition of `_DEFAULT_DIALECT_URI`
     # for details why we need this test
-    assert _DEFAULT_DIALECT_URI == _JSON_SCHEMA_DRAFT_URL, (
+    assert schemapi._DEFAULT_DIALECT_URI == _JSON_SCHEMA_DRAFT_URL, (
         "The default json schema URL, which is hardcoded,"
         + " is not the same as the one used in the Vega-Lite schema."
         + " You need to update the default value."
@@ -392,10 +386,11 @@ class BadSchema(SchemaBase):
 @pytest.mark.parametrize("use_json", [True, False])
 def test_hash_schema(use_json):
     classes = _TestSchema._default_wrapper_classes()
+    FromDict = schemapi._FromDict
 
     for cls in classes:
-        hsh1 = _FromDict.hash_schema(cls._schema, use_json=use_json)
-        hsh2 = _FromDict.hash_schema(cls._schema, use_json=use_json)
+        hsh1 = FromDict.hash_schema(cls._schema, use_json=use_json)
+        hsh2 = FromDict.hash_schema(cls._schema, use_json=use_json)
         assert hsh1 == hsh2
         assert hash(hsh1) == hash(hsh2)
 
@@ -407,7 +402,7 @@ def test_schema_validation_error():
     except jsonschema.ValidationError as err:
         the_err = err
 
-    assert isinstance(the_err, SchemaValidationError)
+    assert isinstance(the_err, schemapi.SchemaValidationError)
     message = str(the_err)
 
     assert the_err.message in message
@@ -871,7 +866,7 @@ def test_chart_validation_errors(chart_func, expected_error_message):
         warnings.filterwarnings("ignore", category=UserWarning)
         chart = chart_func()
     expected_error_message = inspect.cleandoc(expected_error_message)
-    with pytest.raises(SchemaValidationError, match=expected_error_message):
+    with pytest.raises(schemapi.SchemaValidationError, match=expected_error_message):
         chart.to_dict()
 
 
@@ -910,7 +905,7 @@ def _iter_charts(*, times: int) -> Iterator[ChartType]:
         yield from chain.from_iterable(repeat(charts, times=times))
 
     for chart in _iter_charts(times=_REPEAT_TIMES):
-        with pytest.raises(SchemaValidationError):
+        with pytest.raises(schemapi.SchemaValidationError):
             chart.to_dict(validate=True)
 
 
diff --git a/tests/vegalite/v5/test_api.py b/tests/vegalite/v5/test_api.py
index 29d68d1ea..f5a150556 100644
--- a/tests/vegalite/v5/test_api.py
+++ b/tests/vegalite/v5/test_api.py
@@ -22,6 +22,7 @@
 from packaging.version import Version
 
 import altair as alt
+from altair.utils import schemapi
 from altair.utils.schemapi import Optional, Undefined
 
 try:
@@ -527,8 +528,6 @@ def test_when_labels_position_based_on_condition() -> None:
     import numpy as np
     import pandas as pd
 
-    from altair.utils.schemapi import SchemaValidationError
-
     rand = np.random.RandomState(42)
     df = pd.DataFrame({"xval": range(100), "yval": rand.randn(100).cumsum()})
 
@@ -569,7 +568,9 @@ def test_when_labels_position_based_on_condition() -> None:
     fail_condition = alt.condition(
         param_width < 200, alt.value("red"), alt.value("black")
     )
-    with pytest.raises(SchemaValidationError, match="invalid value for `expr`"):
+    with pytest.raises(
+        schemapi.SchemaValidationError, match="invalid value for `expr`"
+    ):
         alt.param(expr=fail_condition)  # type: ignore
 
 
diff --git a/tools/schemapi/schemapi.py b/tools/schemapi/schemapi.py
index 20f891cbf..dbe2d24cb 100644
--- a/tools/schemapi/schemapi.py
+++ b/tools/schemapi/schemapi.py
@@ -73,6 +73,17 @@
     ]
     """Non-exhaustive listing of possible literals in ``ValidationError.validator``"""
 
+__all__ = [
+    "Optional",  # altair.utils
+    "SchemaBase",  # altair.vegalite.v5.schema.core
+    "Undefined",  # altair.utils
+    "UndefinedType",  # altair.vegalite.v5.schema.core -> (side-effect relied on to propagate to alt.__init__)
+    "_resolve_references",  # tools.schemapi.utils -> tools.generate_schema_wrapper
+    "_subclasses",  # altair.vegalite.v5.schema.core
+    "is_undefined",  # altair.typing
+    "validate_jsonschema",  # altair.utils.display
+    "with_property_setters",  # altair.vegalite.v5.schema.channels
+]
 
 _VEGA_LITE_ROOT_URI: Final = "urn:vega-lite-schema"
 """

From 2751bb2399ec9bf2627b189dbfa53d712069819c Mon Sep 17 00:00:00 2001
From: dangotbanned <125183946+dangotbanned@users.noreply.github.com>
Date: Sun, 25 Aug 2024 11:47:00 +0100
Subject: [PATCH 41/92] fix: Use uniform docs in version-gated functions

Both versions of these functions can be targeted by an IDE.
But hovering over the name (anywhere but the actual definition) displays *only* the final docstring on hover.

This duplicates the docs, and adds a **NOTE** comment to be super clear when viewing the code itself.
---
 tools/schemapi/schemapi.py | 41 ++++++++++++++++++++++++++++++++++----
 1 file changed, 37 insertions(+), 4 deletions(-)

diff --git a/tools/schemapi/schemapi.py b/tools/schemapi/schemapi.py
index dbe2d24cb..7515eac93 100644
--- a/tools/schemapi/schemapi.py
+++ b/tools/schemapi/schemapi.py
@@ -317,9 +317,12 @@ def _validator(
         rootschema
             Context to evaluate within.
 
+        We have **both** a current & a backwards-compatible version of this function.
+
         .. _Validator:
            https://python-jsonschema.readthedocs.io/en/stable/validate/#the-validator-protocol
         """
+        # NOTE: This is the current version
         uri = _get_schema_dialect_uri(rootschema or schema)
         validator = _validator_for(uri)
         registry = _registry(rootschema or schema, uri)
@@ -359,7 +362,17 @@ def _registry_update(
     def _resolve_references(
         schema: dict[str, Any], rootschema: dict[str, Any]
     ) -> dict[str, Any]:
-        """Resolve schema references until there is no $ref anymore in the top-level of the dictionary."""
+        """
+        Resolve schema references until there is no ``"$ref"`` anymore in the top-level ``dict``.
+
+        ``jsonschema`` deprecated ``RefResolver`` in favor of `referencing`_.
+
+        We have **both** a current & a backwards-compatible version of this function.
+
+        .. _referencing:
+           https://github.com/python-jsonschema/jsonschema/releases/tag/v4.18.0a1
+        """
+        # NOTE: This is the current version
         root = rootschema or schema
         if ("$ref" not in root) or ("$ref" not in schema):
             return schema
@@ -400,6 +413,22 @@ def _registry_comp_key(root: Map, dialect_id: str, /) -> tuple[str, str]:
     def _validator(
         schema: dict[str, Any], rootschema: dict[str, Any] | None = None
     ) -> Validator:
+        """
+        Constructs a `Validator`_ for future validation.
+
+        We have **both** a current & a backwards-compatible version of this function.
+
+        Parameters
+        ----------
+        schema
+            Schema that a spec will be validated against.
+        rootschema
+            Context to evaluate within.
+
+        .. _Validator:
+           https://python-jsonschema.readthedocs.io/en/stable/validate/#the-validator-protocol
+        """
+        # NOTE: This is the backwards-compatible version
         validator = _validator_for(_get_schema_dialect_uri(rootschema or schema))
         resolver: Any = (
             jsonschema.RefResolver.from_schema(rootschema) if rootschema else rootschema
@@ -410,12 +439,16 @@ def _resolve_references(
         schema: dict[str, Any], rootschema: dict[str, Any]
     ) -> dict[str, Any]:
         """
-        Resolve schema references until there is no $ref anymore in the top-level of the dictionary.
+        Resolve schema references until there is no ``"$ref"`` anymore in the top-level ``dict``.
 
-        ``jsonschema`` deprecated ``RefResolver`` in favor of ``referencing``.
+        ``jsonschema`` deprecated ``RefResolver`` in favor of `referencing`_.
 
-        See https://github.com/python-jsonschema/jsonschema/releases/tag/v4.18.0a1
+        We have **both** a current & a backwards-compatible version of this function.
+
+        .. _referencing:
+           https://github.com/python-jsonschema/jsonschema/releases/tag/v4.18.0a1
         """
+        # NOTE: This is the backwards-compatible version
         resolver = jsonschema.RefResolver.from_schema(rootschema or schema)
         while "$ref" in schema:
             with resolver.resolving(schema["$ref"]) as resolved:

From f235284b37611b1b82c8f02b8b1eb982f6dbe4d7 Mon Sep 17 00:00:00 2001
From: dangotbanned <125183946+dangotbanned@users.noreply.github.com>
Date: Sun, 25 Aug 2024 11:51:32 +0100
Subject: [PATCH 42/92] refactor: Update imports

---
 tools/schemapi/schemapi.py | 9 +++------
 1 file changed, 3 insertions(+), 6 deletions(-)

diff --git a/tools/schemapi/schemapi.py b/tools/schemapi/schemapi.py
index 7515eac93..4d0282243 100644
--- a/tools/schemapi/schemapi.py
+++ b/tools/schemapi/schemapi.py
@@ -6,7 +6,7 @@
 import operator
 import sys
 import textwrap
-from collections import defaultdict
+from collections import defaultdict, deque
 from functools import lru_cache, partial
 from importlib.metadata import version as importlib_version
 from itertools import chain, groupby, islice, zip_longest
@@ -14,13 +14,10 @@
 from typing import (
     TYPE_CHECKING,
     Any,
-    Callable,
     Dict,
-    Final,
     Iterable,
-    Iterator,
-    KeysView,
     List,
+    Mapping,
     Sequence,
     TypeVar,
     Union,
@@ -35,7 +32,7 @@
 from packaging.version import Version
 
 if TYPE_CHECKING:
-    from typing import ClassVar, Literal, Mapping
+    from typing import Callable, ClassVar, Final, Iterator, KeysView, Literal
 
     from jsonschema.protocols import Validator, _JsonParameter
 

From bd1d5800a969fba81d57098aef1fe10d02c48472 Mon Sep 17 00:00:00 2001
From: dangotbanned <125183946+dangotbanned@users.noreply.github.com>
Date: Sun, 25 Aug 2024 12:52:37 +0100
Subject: [PATCH 43/92] chore: Remove stale `SchemaBase.from_json` comment

---
 tools/schemapi/schemapi.py | 7 +------
 1 file changed, 1 insertion(+), 6 deletions(-)

diff --git a/tools/schemapi/schemapi.py b/tools/schemapi/schemapi.py
index 4d0282243..639520cd4 100644
--- a/tools/schemapi/schemapi.py
+++ b/tools/schemapi/schemapi.py
@@ -1293,12 +1293,7 @@ def from_dict(
 
     @classmethod
     def from_json(
-        cls,
-        json_string: str,
-        validate: bool = True,
-        **kwargs: Any,
-        # Type hints for this method would get rather complicated
-        # if we want to provide a more specific return type
+        cls, json_string: str, validate: bool = True, **kwargs: Any
     ) -> ChartType:
         """
         Instantiate the object from a valid JSON string.

From 8ca426675379f4e65d025075c81bc099c6cdadb3 Mon Sep 17 00:00:00 2001
From: dangotbanned <125183946+dangotbanned@users.noreply.github.com>
Date: Sun, 25 Aug 2024 13:38:10 +0100
Subject: [PATCH 44/92] perf: Refactor `SchemaBase.from_dict` and co

There's quite a lot in here, so I've left my notes in `_subclasses` temporarily.
- Removed unused `use_json=False` branch
- Evaluate the hash table **once** and not every time `SchemaBase.from_dict` is called
---
 tests/utils/test_schemapi.py |  32 +++++---
 tools/schemapi/schemapi.py   | 149 +++++++++++++++++++++++------------
 2 files changed, 121 insertions(+), 60 deletions(-)

diff --git a/tests/utils/test_schemapi.py b/tests/utils/test_schemapi.py
index 1a20b9371..f87d8fc42 100644
--- a/tests/utils/test_schemapi.py
+++ b/tests/utils/test_schemapi.py
@@ -10,7 +10,7 @@
 import warnings
 from collections import deque
 from functools import partial
-from typing import TYPE_CHECKING, Any, Callable, Iterable, Sequence
+from typing import TYPE_CHECKING, Any, Callable, Iterable, Literal, Sequence
 
 import jsonschema
 import jsonschema.exceptions
@@ -48,7 +48,23 @@ def test_actual_json_schema_draft_is_same_as_hardcoded_default():
 class _TestSchema(SchemaBase):
     @classmethod
     def _default_wrapper_classes(cls):
-        return _TestSchema.__subclasses__()
+        return schemapi._subclasses(_TestSchema)
+
+    @classmethod
+    def from_dict(
+        cls: type[schemapi.TSchemaBase], dct: dict[str, Any], validate: bool = True
+    ) -> schemapi.TSchemaBase:
+        """
+        Overrides ``SchemaBase``, which uses a cached ``FromDict.hash_tps``.
+
+        The cached version is based on an iterator over:
+
+            schemapi._subclasses(VegaLiteSchema)
+        """
+        if validate:
+            cls.validate(dct)
+        converter = schemapi._FromDict(cls._default_wrapper_classes())
+        return converter.from_dict(dct, cls)
 
 
 class MySchema(_TestSchema):
@@ -383,14 +399,10 @@ class BadSchema(SchemaBase):
     assert str(err.value).startswith("Cannot instantiate object")
 
 
-@pytest.mark.parametrize("use_json", [True, False])
-def test_hash_schema(use_json):
-    classes = _TestSchema._default_wrapper_classes()
-    FromDict = schemapi._FromDict
-
-    for cls in classes:
-        hsh1 = FromDict.hash_schema(cls._schema, use_json=use_json)
-        hsh2 = FromDict.hash_schema(cls._schema, use_json=use_json)
+def test_hash_schema():
+    for cls in _TestSchema._default_wrapper_classes():
+        hsh1 = schemapi._hash_schema(cls._schema)
+        hsh2 = schemapi._hash_schema(cls._schema)
         assert hsh1 == hsh2
         assert hash(hsh1) == hash(hsh2)
 
diff --git a/tools/schemapi/schemapi.py b/tools/schemapi/schemapi.py
index 639520cd4..b3fac3318 100644
--- a/tools/schemapi/schemapi.py
+++ b/tools/schemapi/schemapi.py
@@ -646,17 +646,6 @@ def _prune_subset_enum(iterable: _Errs, /) -> _ErrsLazy:
 }
 
 
-def _subclasses(cls: type[Any]) -> Iterator[type[Any]]:
-    """Breadth-first sequence of all classes which inherit from cls."""
-    seen = set()
-    current: set[type[Any]] = {cls}
-    while current:
-        seen |= current
-        current = set(chain.from_iterable(cls.__subclasses__() for cls in current))
-        for cls in current - seen:
-            yield cls
-
-
 def _from_array_like(obj: Iterable[Any], /) -> list[Any]:
     try:
         ser = nw.from_native(obj, strict=True, series_only=True)
@@ -1288,7 +1277,13 @@ def from_dict(
         """
         if validate:
             cls.validate(dct)
-        converter = _FromDict(cls._default_wrapper_classes())
+        # NOTE: the breadth-first search occurs only once now
+        # `_FromDict` is purely ClassVar/classmethods
+        converter: type[_FromDict] | _FromDict = (
+            _FromDict
+            if _FromDict.hash_tps
+            else _FromDict(cls._default_wrapper_classes())
+        )
         return converter.from_dict(dct, cls)
 
     @classmethod
@@ -1389,6 +1384,9 @@ def _passthrough(*args: Any, **kwds: Any) -> Any | dict[str, Any]:
 
 
 def _freeze(val):
+    # NOTE: No longer referenced
+    # - Previously only called during tests
+    # - Not during any library code
     if isinstance(val, dict):
         return frozenset((k, _freeze(v)) for k, v in val.items())
     elif isinstance(val, set):
@@ -1399,6 +1397,64 @@ def _freeze(val):
         return val
 
 
+def _hash_schema(
+    schema: _JsonParameter,
+    /,
+    *,
+    exclude: Iterable[str] = frozenset(
+        ("definitions", "title", "description", "$schema", "id")
+    ),
+) -> int:
+    """
+    Return the hash value for a ``schema``.
+
+    Parameters
+    ----------
+    schema
+        ``SchemaBase._schema``.
+    exclude
+        ``schema`` keys which are not considered when identifying equivalence.
+    """
+    if isinstance(schema, Mapping):
+        schema = {k: v for k, v in schema.items() if k not in exclude}
+    return hash(json.dumps(schema, sort_keys=True))
+
+
+def _subclasses(cls: type[TSchemaBase]) -> Iterator[type[TSchemaBase]]:
+    """
+    Breadth-first sequence of all classes which inherit from ``cls``.
+
+    Notes
+    -----
+    - `__subclasses__()` alone isn't helpful, as that is only immediate subclasses
+    - Deterministic
+    - Used for `SchemaBase` & `VegaLiteSchema`
+    - In practice, it provides an iterator over all classes in the schema below `VegaLiteSchema`
+        - The first one is `Root`
+    - The order itself, I don't think is important
+        - But probably important that it doesn't change
+        - Thinking they used an iterator so that the subclasses are evaluated after they have all been defined
+
+    - `Chart` seems to try to avoid calling this
+        - Using `TopLevelMixin.__subclasses__()` first if possible
+    - It is always called during `Chart.encode()`
+        - Chart.encode()
+        - altair.utils.core.infer_encoding_types
+        -  _ChannelCache.infer_encoding_types
+        - _ChannelCache._wrap_in_channel
+        - SchemaBase.from_dict (recursive, hot loop, validate =False, within a try/except)
+        - _FromDict(cls._default_wrapper_classes())
+        - schemapi._subclasses(schema.core.VegaLiteSchema)
+    """
+    seen = set()
+    current: set[type[TSchemaBase]] = {cls}
+    while current:
+        seen |= current
+        current = set(chain.from_iterable(cls.__subclasses__() for cls in current))
+        for cls in current - seen:
+            yield cls
+
+
 class _FromDict:
     """
     Class used to construct SchemaBase class hierarchies from a dict.
@@ -1408,40 +1464,31 @@ class _FromDict:
     specified in the ``wrapper_classes`` positional-only argument to the constructor.
     """
 
-    _hash_exclude_keys = ("definitions", "title", "description", "$schema", "id")
+    hash_tps: ClassVar[defaultdict[int, deque[type[SchemaBase]]]] = defaultdict(deque)
+    """
+    Maps unique schemas to corresponding types.
 
-    def __init__(self, wrapper_classes: Iterable[type[SchemaBase]], /) -> None:
-        # Create a mapping of a schema hash to a list of matching classes
-        # This lets us quickly determine the correct class to construct
-        self.class_dict: dict[int, list[type[SchemaBase]]] = defaultdict(list)
-        for tp in wrapper_classes:
-            if tp._schema is not None:
-                self.class_dict[self.hash_schema(tp._schema)].append(tp)
+    The logic is that after removing a subset of keys, some schemas are identical.
 
-    @classmethod
-    def hash_schema(cls, schema: dict[str, Any], use_json: bool = True) -> int:
-        """
-        Compute a python hash for a nested dictionary which properly handles dicts, lists, sets, and tuples.
+    If there are multiple matches, we use the first one in the ``deque``.
 
-        At the top level, the function excludes from the hashed schema all keys
-        listed in `exclude_keys`.
+    ``_subclasses`` yields the results of a `breadth-first search`_,
+    so the first matching class is the most general match.
 
-        This implements two methods: one based on conversion to JSON, and one based
-        on recursive conversions of unhashable to hashable types; the former seems
-        to be slightly faster in several benchmarks.
-        """
-        if cls._hash_exclude_keys and isinstance(schema, dict):
-            schema = {
-                key: val
-                for key, val in schema.items()
-                if key not in cls._hash_exclude_keys
-            }
-        s: Any = json.dumps(schema, sort_keys=True) if use_json else _freeze(schema)
-        return hash(s)
+    .. _breadth-first search:
+       https://en.wikipedia.org/wiki/Breadth-first_search
+    """
+
+    def __init__(self, wrapper_classes: Iterator[type[SchemaBase]], /) -> None:
+        cls = type(self)
+        for tp in wrapper_classes:
+            if tp._schema is not None:
+                cls.hash_tps[_hash_schema(tp._schema)].append(tp)
 
     @overload
+    @classmethod
     def from_dict(
-        self,
+        cls,
         dct: TSchemaBase,
         tp: None = ...,
         schema: None = ...,
@@ -1449,8 +1496,9 @@ def from_dict(
         default_class: Any = ...,
     ) -> TSchemaBase: ...
     @overload
+    @classmethod
     def from_dict(
-        self,
+        cls,
         dct: dict[str, Any] | list[dict[str, Any]],
         tp: Any = ...,
         schema: Any = ...,
@@ -1458,8 +1506,9 @@ def from_dict(
         default_class: type[TSchemaBase] = ...,  # pyright: ignore[reportInvalidTypeVarUse]
     ) -> TSchemaBase: ...
     @overload
+    @classmethod
     def from_dict(
-        self,
+        cls,
         dct: dict[str, Any],
         tp: None = ...,
         schema: dict[str, Any] = ...,
@@ -1467,8 +1516,9 @@ def from_dict(
         default_class: Any = ...,
     ) -> SchemaBase: ...
     @overload
+    @classmethod
     def from_dict(
-        self,
+        cls,
         dct: dict[str, Any],
         tp: type[TSchemaBase],
         schema: None = ...,
@@ -1476,16 +1526,18 @@ def from_dict(
         default_class: Any = ...,
     ) -> TSchemaBase: ...
     @overload
+    @classmethod
     def from_dict(
-        self,
+        cls,
         dct: dict[str, Any] | list[dict[str, Any]],
         tp: type[TSchemaBase],
         schema: dict[str, Any],
         rootschema: dict[str, Any] | None = ...,
         default_class: Any = ...,
     ) -> Never: ...
+    @classmethod
     def from_dict(
-        self,
+        cls,
         dct: dict[str, Any] | list[dict[str, Any]] | TSchemaBase,
         tp: type[TSchemaBase] | None = None,
         schema: dict[str, Any] | None = None,
@@ -1502,18 +1554,15 @@ def from_dict(
             root_schema: dict[str, Any] = rootschema or tp._rootschema or current_schema
             target_tp = tp
         elif schema is not None:
-            # If there are multiple matches, we use the first one in the dict.
-            # Our class dict is constructed breadth-first from top to bottom,
-            # so the first class that matches is the most general match.
             current_schema = schema
             root_schema = rootschema or current_schema
-            matches = self.class_dict[self.hash_schema(current_schema)]
-            target_tp = matches[0] if matches else default_class
+            matches = cls.hash_tps[_hash_schema(current_schema)]
+            target_tp = next(iter(matches), default_class)
         else:
             msg = "Must provide either `tp` or `schema`, but not both."
             raise ValueError(msg)
 
-        from_dict = partial(self.from_dict, rootschema=root_schema)
+        from_dict = partial(cls.from_dict, rootschema=root_schema)
         # Can also return a list?
         resolved = _resolve_references(current_schema, root_schema)
         if "anyOf" in resolved or "oneOf" in resolved:

From 933c045f54a9987ca6a6c43f22e6a1e33fe842d9 Mon Sep 17 00:00:00 2001
From: dangotbanned <125183946+dangotbanned@users.noreply.github.com>
Date: Sun, 25 Aug 2024 13:51:32 +0100
Subject: [PATCH 45/92] perf: Avoid expensive exceptions in
 `_FromDict.from_dict`

Try running `hatch test --all` on main vs this.
Locally, each version gets a 1.32-1.53x speedup.
Somehow this also includes `3.11`, despite that being the version "zero-cost" exceptions were introduced.
---
 tools/schemapi/schemapi.py | 11 ++++++-----
 1 file changed, 6 insertions(+), 5 deletions(-)

diff --git a/tools/schemapi/schemapi.py b/tools/schemapi/schemapi.py
index b3fac3318..fd1164fe3 100644
--- a/tools/schemapi/schemapi.py
+++ b/tools/schemapi/schemapi.py
@@ -1568,11 +1568,12 @@ def from_dict(
         if "anyOf" in resolved or "oneOf" in resolved:
             schemas = resolved.get("anyOf", []) + resolved.get("oneOf", [])
             for possible in schemas:
-                try:
-                    validate_jsonschema_fail_fast(dct, possible, rootschema=root_schema)
-                except ValidationError:
-                    continue
-                else:
+                # NOTE: Instead of raise/except/continue
+                # Pre-"zero-cost" exceptions, this has a huge performance gain.
+                # https://docs.python.org/3/whatsnew/3.11.html#misc
+                # https://github.com/python/cpython/blob/9b3749849eda4012261a112b22eb07f26fd345a9/InternalDocs/exception_handling.md
+                it_errs = _validator(possible, root_schema).iter_errors(dct)
+                if next(it_errs, None) is None:
                     return from_dict(dct, schema=possible, default_class=target_tp)
 
         if _is_dict(dct):

From 1e5993be41feb9f6620262d7a945ba8faaef5c75 Mon Sep 17 00:00:00 2001
From: dangotbanned <125183946+dangotbanned@users.noreply.github.com>
Date: Sun, 25 Aug 2024 13:53:46 +0100
Subject: [PATCH 46/92] chore: Add todo for `__init_subclasses__`

Thinking that moving the checks the the definition of the class (rather than the instance) could reduce a lot of calls.
---
 tools/schemapi/schemapi.py | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/tools/schemapi/schemapi.py b/tools/schemapi/schemapi.py
index fd1164fe3..29fe8be64 100644
--- a/tools/schemapi/schemapi.py
+++ b/tools/schemapi/schemapi.py
@@ -999,6 +999,8 @@ class SchemaBase:
     the _rootschema class attribute) which is used for validation.
     """
 
+    # TODO: Implement `ClassVar` validation using https://peps.python.org/pep-0487/
+
     _schema: ClassVar[dict[str, Any] | Any] = None
     _rootschema: ClassVar[dict[str, Any] | Any] = None
     _class_is_valid_at_instantiation: ClassVar[bool] = True

From 67b7ae431cd1b5a0b4093242a244fb010b45889d Mon Sep 17 00:00:00 2001
From: dangotbanned <125183946+dangotbanned@users.noreply.github.com>
Date: Sun, 25 Aug 2024 14:24:16 +0100
Subject: [PATCH 47/92] test: Update & add snapshots for
 `test_chart_validation_benchmark`

---
 tests/utils/test_schemapi.py | 69 +++++++++++++++++++++++++++++-------
 1 file changed, 56 insertions(+), 13 deletions(-)

diff --git a/tests/utils/test_schemapi.py b/tests/utils/test_schemapi.py
index f87d8fc42..4ba3ea9ca 100644
--- a/tests/utils/test_schemapi.py
+++ b/tests/utils/test_schemapi.py
@@ -884,16 +884,19 @@ def test_chart_validation_errors(chart_func, expected_error_message):
 
 _SKIP_SLOW_BENCHMARKS: bool = True
 _REPEAT_TIMES = 1000
-# to_dict optimize had no observable benefit
 
 
+@pytest.mark.parametrize("to_or_from", ["to_dict-validate", "to_dict", "from_dict"])
+@pytest.mark.filterwarnings("ignore:.*:UserWarning")
 @pytest.mark.skipif(
     _SKIP_SLOW_BENCHMARKS,
     reason="Should only be run in isolation to test single threaded performance.",
 )
-def test_chart_validation_benchmark() -> None:
+def test_chart_validation_benchmark(
+    to_or_from: Literal["to_dict-validate", "to_dict", "from_dict"],
+) -> None:
     """
-    Intended to isolate the `to_dict` call.
+    Intended to isolate `Chart.(to|from)_dict.` calls.
 
     Repeated ``_REPEAT_TIMES`` times, non-parametric:
     - in an attempt to limit the potential overhead of ``pytest``
@@ -901,24 +904,64 @@ def test_chart_validation_benchmark() -> None:
 
     Results
     -------
-    8/22/2024, 10:06:32 - 1000x in 108.46s (0:01:48)
+    ```
+    _REPEAT_TIMES = 1000
+    pytest -k test_chart_validation_benchmark  --numprocesses=3 --durations=3 tests
+
+    # Pre-`SchemaBase.from_dict` refactor (3.12.3)
+    108.16s call     tests/utils/test_schemapi.py::test_chart_validation_benchmark[to_dict-validate]
+    84.62s call     tests/utils/test_schemapi.py::test_chart_validation_benchmark[from_dict]
+    66.71s call     tests/utils/test_schemapi.py::test_chart_validation_benchmark[to_dict]
+
+    # Post-`SchemaBase.from_dict` refactor (3.12.3)
+    107.84s call     tests/utils/test_schemapi.py::test_chart_validation_benchmark[to_dict-validate]
+    50.43s call     tests/utils/test_schemapi.py::test_chart_validation_benchmark[from_dict]
+    67.07s call     tests/utils/test_schemapi.py::test_chart_validation_benchmark[to_dict]
+    ```
     """
+    from itertools import chain, repeat
+
     if TYPE_CHECKING:
         from typing import Iterator
 
         from altair.typing import ChartType
 
-    def _iter_charts(*, times: int) -> Iterator[ChartType]:
-        from itertools import chain, repeat
+    def _iter_charts() -> Iterator[ChartType]:
+        """
+        Ensures only len(chart_funcs_error_message) actual charts are constructed.
+
+        The `to_dict` calls are what gets multiplied
+        """
+        charts: list[ChartType] = [fn() for fn, _ in chart_funcs_error_message]
+        yield from chain.from_iterable(repeat(charts, times=_REPEAT_TIMES))
 
-        with warnings.catch_warnings():
-            warnings.filterwarnings("ignore", category=UserWarning)
-            charts: list[ChartType] = [fn() for fn, _ in chart_funcs_error_message]
-        yield from chain.from_iterable(repeat(charts, times=times))
+    def _iter_chart_factory() -> Iterator[ChartType]:
+        """
+        Validation not the bottleneck, but encode is.
 
-    for chart in _iter_charts(times=_REPEAT_TIMES):
-        with pytest.raises(schemapi.SchemaValidationError):
-            chart.to_dict(validate=True)
+        Ensures at least `times` * len(chart_funcs_error_message) .encode calls are made.
+        """
+        chart_funcs: list[Callable[[], ChartType]] = [
+            fn for fn, _ in chart_funcs_error_message
+        ]
+        for fn in chain.from_iterable(repeat(chart_funcs, times=_REPEAT_TIMES)):
+            yield fn()
+
+    def _to_dict(validate: bool) -> None:
+        if validate:
+            for chart in _iter_charts():
+                with pytest.raises(schemapi.SchemaValidationError):
+                    chart.to_dict(validate=validate)
+        else:
+            for chart in _iter_charts():
+                chart.to_dict(validate=validate)
+
+    if to_or_from == "to_dict":
+        _to_dict(validate=False)
+    elif to_or_from == "to_dict-validate":
+        _to_dict(validate=True)
+    else:
+        assert list(_iter_chart_factory())
 
 
 def test_multiple_field_strings_in_condition():

From 8725a6cee48096b801981c49b054fe634a5df276 Mon Sep 17 00:00:00 2001
From: dangotbanned <125183946+dangotbanned@users.noreply.github.com>
Date: Sun, 25 Aug 2024 14:25:07 +0100
Subject: [PATCH 48/92] build: run `generate-schema-wrapper`

---
 altair/utils/schemapi.py | 230 +++++++++++++++++++++++++++------------
 1 file changed, 159 insertions(+), 71 deletions(-)

diff --git a/altair/utils/schemapi.py b/altair/utils/schemapi.py
index a97dbc789..5188863ee 100644
--- a/altair/utils/schemapi.py
+++ b/altair/utils/schemapi.py
@@ -8,7 +8,7 @@
 import operator
 import sys
 import textwrap
-from collections import defaultdict
+from collections import defaultdict, deque
 from functools import lru_cache, partial
 from importlib.metadata import version as importlib_version
 from itertools import chain, groupby, islice, zip_longest
@@ -16,13 +16,10 @@
 from typing import (
     TYPE_CHECKING,
     Any,
-    Callable,
     Dict,
-    Final,
     Iterable,
-    Iterator,
-    KeysView,
     List,
+    Mapping,
     Sequence,
     TypeVar,
     Union,
@@ -37,7 +34,7 @@
 from packaging.version import Version
 
 if TYPE_CHECKING:
-    from typing import ClassVar, Literal, Mapping
+    from typing import Callable, ClassVar, Final, Iterator, KeysView, Literal
 
     from jsonschema.protocols import Validator, _JsonParameter
 
@@ -75,6 +72,17 @@
     ]
     """Non-exhaustive listing of possible literals in ``ValidationError.validator``"""
 
+__all__ = [
+    "Optional",  # altair.utils
+    "SchemaBase",  # altair.vegalite.v5.schema.core
+    "Undefined",  # altair.utils
+    "UndefinedType",  # altair.vegalite.v5.schema.core -> (side-effect relied on to propagate to alt.__init__)
+    "_resolve_references",  # tools.schemapi.utils -> tools.generate_schema_wrapper
+    "_subclasses",  # altair.vegalite.v5.schema.core
+    "is_undefined",  # altair.typing
+    "validate_jsonschema",  # altair.utils.display
+    "with_property_setters",  # altair.vegalite.v5.schema.channels
+]
 
 _VEGA_LITE_ROOT_URI: Final = "urn:vega-lite-schema"
 """
@@ -308,9 +316,12 @@ def _validator(
         rootschema
             Context to evaluate within.
 
+        We have **both** a current & a backwards-compatible version of this function.
+
         .. _Validator:
            https://python-jsonschema.readthedocs.io/en/stable/validate/#the-validator-protocol
         """
+        # NOTE: This is the current version
         uri = _get_schema_dialect_uri(rootschema or schema)
         validator = _validator_for(uri)
         registry = _registry(rootschema or schema, uri)
@@ -350,7 +361,17 @@ def _registry_update(
     def _resolve_references(
         schema: dict[str, Any], rootschema: dict[str, Any]
     ) -> dict[str, Any]:
-        """Resolve schema references until there is no $ref anymore in the top-level of the dictionary."""
+        """
+        Resolve schema references until there is no ``"$ref"`` anymore in the top-level ``dict``.
+
+        ``jsonschema`` deprecated ``RefResolver`` in favor of `referencing`_.
+
+        We have **both** a current & a backwards-compatible version of this function.
+
+        .. _referencing:
+           https://github.com/python-jsonschema/jsonschema/releases/tag/v4.18.0a1
+        """
+        # NOTE: This is the current version
         root = rootschema or schema
         if ("$ref" not in root) or ("$ref" not in schema):
             return schema
@@ -391,6 +412,22 @@ def _registry_comp_key(root: Map, dialect_id: str, /) -> tuple[str, str]:
     def _validator(
         schema: dict[str, Any], rootschema: dict[str, Any] | None = None
     ) -> Validator:
+        """
+        Constructs a `Validator`_ for future validation.
+
+        We have **both** a current & a backwards-compatible version of this function.
+
+        Parameters
+        ----------
+        schema
+            Schema that a spec will be validated against.
+        rootschema
+            Context to evaluate within.
+
+        .. _Validator:
+           https://python-jsonschema.readthedocs.io/en/stable/validate/#the-validator-protocol
+        """
+        # NOTE: This is the backwards-compatible version
         validator = _validator_for(_get_schema_dialect_uri(rootschema or schema))
         resolver: Any = (
             jsonschema.RefResolver.from_schema(rootschema) if rootschema else rootschema
@@ -401,12 +438,16 @@ def _resolve_references(
         schema: dict[str, Any], rootschema: dict[str, Any]
     ) -> dict[str, Any]:
         """
-        Resolve schema references until there is no $ref anymore in the top-level of the dictionary.
+        Resolve schema references until there is no ``"$ref"`` anymore in the top-level ``dict``.
 
-        ``jsonschema`` deprecated ``RefResolver`` in favor of ``referencing``.
+        ``jsonschema`` deprecated ``RefResolver`` in favor of `referencing`_.
 
-        See https://github.com/python-jsonschema/jsonschema/releases/tag/v4.18.0a1
+        We have **both** a current & a backwards-compatible version of this function.
+
+        .. _referencing:
+           https://github.com/python-jsonschema/jsonschema/releases/tag/v4.18.0a1
         """
+        # NOTE: This is the backwards-compatible version
         resolver = jsonschema.RefResolver.from_schema(rootschema or schema)
         while "$ref" in schema:
             with resolver.resolving(schema["$ref"]) as resolved:
@@ -607,17 +648,6 @@ def _prune_subset_enum(iterable: _Errs, /) -> _ErrsLazy:
 }
 
 
-def _subclasses(cls: type[Any]) -> Iterator[type[Any]]:
-    """Breadth-first sequence of all classes which inherit from cls."""
-    seen = set()
-    current: set[type[Any]] = {cls}
-    while current:
-        seen |= current
-        current = set(chain.from_iterable(cls.__subclasses__() for cls in current))
-        for cls in current - seen:
-            yield cls
-
-
 def _from_array_like(obj: Iterable[Any], /) -> list[Any]:
     try:
         ser = nw.from_native(obj, strict=True, series_only=True)
@@ -971,6 +1001,8 @@ class SchemaBase:
     the _rootschema class attribute) which is used for validation.
     """
 
+    # TODO: Implement `ClassVar` validation using https://peps.python.org/pep-0487/
+
     _schema: ClassVar[dict[str, Any] | Any] = None
     _rootschema: ClassVar[dict[str, Any] | Any] = None
     _class_is_valid_at_instantiation: ClassVar[bool] = True
@@ -1249,17 +1281,18 @@ def from_dict(
         """
         if validate:
             cls.validate(dct)
-        converter = _FromDict(cls._default_wrapper_classes())
+        # NOTE: the breadth-first search occurs only once now
+        # `_FromDict` is purely ClassVar/classmethods
+        converter: type[_FromDict] | _FromDict = (
+            _FromDict
+            if _FromDict.hash_tps
+            else _FromDict(cls._default_wrapper_classes())
+        )
         return converter.from_dict(dct, cls)
 
     @classmethod
     def from_json(
-        cls,
-        json_string: str,
-        validate: bool = True,
-        **kwargs: Any,
-        # Type hints for this method would get rather complicated
-        # if we want to provide a more specific return type
+        cls, json_string: str, validate: bool = True, **kwargs: Any
     ) -> ChartType:
         """
         Instantiate the object from a valid JSON string.
@@ -1355,6 +1388,9 @@ def _passthrough(*args: Any, **kwds: Any) -> Any | dict[str, Any]:
 
 
 def _freeze(val):
+    # NOTE: No longer referenced
+    # - Previously only called during tests
+    # - Not during any library code
     if isinstance(val, dict):
         return frozenset((k, _freeze(v)) for k, v in val.items())
     elif isinstance(val, set):
@@ -1365,6 +1401,64 @@ def _freeze(val):
         return val
 
 
+def _hash_schema(
+    schema: _JsonParameter,
+    /,
+    *,
+    exclude: Iterable[str] = frozenset(
+        ("definitions", "title", "description", "$schema", "id")
+    ),
+) -> int:
+    """
+    Return the hash value for a ``schema``.
+
+    Parameters
+    ----------
+    schema
+        ``SchemaBase._schema``.
+    exclude
+        ``schema`` keys which are not considered when identifying equivalence.
+    """
+    if isinstance(schema, Mapping):
+        schema = {k: v for k, v in schema.items() if k not in exclude}
+    return hash(json.dumps(schema, sort_keys=True))
+
+
+def _subclasses(cls: type[TSchemaBase]) -> Iterator[type[TSchemaBase]]:
+    """
+    Breadth-first sequence of all classes which inherit from ``cls``.
+
+    Notes
+    -----
+    - `__subclasses__()` alone isn't helpful, as that is only immediate subclasses
+    - Deterministic
+    - Used for `SchemaBase` & `VegaLiteSchema`
+    - In practice, it provides an iterator over all classes in the schema below `VegaLiteSchema`
+        - The first one is `Root`
+    - The order itself, I don't think is important
+        - But probably important that it doesn't change
+        - Thinking they used an iterator so that the subclasses are evaluated after they have all been defined
+
+    - `Chart` seems to try to avoid calling this
+        - Using `TopLevelMixin.__subclasses__()` first if possible
+    - It is always called during `Chart.encode()`
+        - Chart.encode()
+        - altair.utils.core.infer_encoding_types
+        -  _ChannelCache.infer_encoding_types
+        - _ChannelCache._wrap_in_channel
+        - SchemaBase.from_dict (recursive, hot loop, validate =False, within a try/except)
+        - _FromDict(cls._default_wrapper_classes())
+        - schemapi._subclasses(schema.core.VegaLiteSchema)
+    """
+    seen = set()
+    current: set[type[TSchemaBase]] = {cls}
+    while current:
+        seen |= current
+        current = set(chain.from_iterable(cls.__subclasses__() for cls in current))
+        for cls in current - seen:
+            yield cls
+
+
 class _FromDict:
     """
     Class used to construct SchemaBase class hierarchies from a dict.
@@ -1374,40 +1468,31 @@ class _FromDict:
     specified in the ``wrapper_classes`` positional-only argument to the constructor.
     """
 
-    _hash_exclude_keys = ("definitions", "title", "description", "$schema", "id")
+    hash_tps: ClassVar[defaultdict[int, deque[type[SchemaBase]]]] = defaultdict(deque)
+    """
+    Maps unique schemas to corresponding types.
 
-    def __init__(self, wrapper_classes: Iterable[type[SchemaBase]], /) -> None:
-        # Create a mapping of a schema hash to a list of matching classes
-        # This lets us quickly determine the correct class to construct
-        self.class_dict: dict[int, list[type[SchemaBase]]] = defaultdict(list)
-        for tp in wrapper_classes:
-            if tp._schema is not None:
-                self.class_dict[self.hash_schema(tp._schema)].append(tp)
+    The logic is that after removing a subset of keys, some schemas are identical.
 
-    @classmethod
-    def hash_schema(cls, schema: dict[str, Any], use_json: bool = True) -> int:
-        """
-        Compute a python hash for a nested dictionary which properly handles dicts, lists, sets, and tuples.
+    If there are multiple matches, we use the first one in the ``deque``.
 
-        At the top level, the function excludes from the hashed schema all keys
-        listed in `exclude_keys`.
+    ``_subclasses`` yields the results of a `breadth-first search`_,
+    so the first matching class is the most general match.
 
-        This implements two methods: one based on conversion to JSON, and one based
-        on recursive conversions of unhashable to hashable types; the former seems
-        to be slightly faster in several benchmarks.
-        """
-        if cls._hash_exclude_keys and isinstance(schema, dict):
-            schema = {
-                key: val
-                for key, val in schema.items()
-                if key not in cls._hash_exclude_keys
-            }
-        s: Any = json.dumps(schema, sort_keys=True) if use_json else _freeze(schema)
-        return hash(s)
+    .. _breadth-first search:
+       https://en.wikipedia.org/wiki/Breadth-first_search
+    """
+
+    def __init__(self, wrapper_classes: Iterator[type[SchemaBase]], /) -> None:
+        cls = type(self)
+        for tp in wrapper_classes:
+            if tp._schema is not None:
+                cls.hash_tps[_hash_schema(tp._schema)].append(tp)
 
     @overload
+    @classmethod
     def from_dict(
-        self,
+        cls,
         dct: TSchemaBase,
         tp: None = ...,
         schema: None = ...,
@@ -1415,8 +1500,9 @@ def from_dict(
         default_class: Any = ...,
     ) -> TSchemaBase: ...
     @overload
+    @classmethod
     def from_dict(
-        self,
+        cls,
         dct: dict[str, Any] | list[dict[str, Any]],
         tp: Any = ...,
         schema: Any = ...,
@@ -1424,8 +1510,9 @@ def from_dict(
         default_class: type[TSchemaBase] = ...,  # pyright: ignore[reportInvalidTypeVarUse]
     ) -> TSchemaBase: ...
     @overload
+    @classmethod
     def from_dict(
-        self,
+        cls,
         dct: dict[str, Any],
         tp: None = ...,
         schema: dict[str, Any] = ...,
@@ -1433,8 +1520,9 @@ def from_dict(
         default_class: Any = ...,
     ) -> SchemaBase: ...
     @overload
+    @classmethod
     def from_dict(
-        self,
+        cls,
         dct: dict[str, Any],
         tp: type[TSchemaBase],
         schema: None = ...,
@@ -1442,16 +1530,18 @@ def from_dict(
         default_class: Any = ...,
     ) -> TSchemaBase: ...
     @overload
+    @classmethod
     def from_dict(
-        self,
+        cls,
         dct: dict[str, Any] | list[dict[str, Any]],
         tp: type[TSchemaBase],
         schema: dict[str, Any],
         rootschema: dict[str, Any] | None = ...,
         default_class: Any = ...,
     ) -> Never: ...
+    @classmethod
     def from_dict(
-        self,
+        cls,
         dct: dict[str, Any] | list[dict[str, Any]] | TSchemaBase,
         tp: type[TSchemaBase] | None = None,
         schema: dict[str, Any] | None = None,
@@ -1468,28 +1558,26 @@ def from_dict(
             root_schema: dict[str, Any] = rootschema or tp._rootschema or current_schema
             target_tp = tp
         elif schema is not None:
-            # If there are multiple matches, we use the first one in the dict.
-            # Our class dict is constructed breadth-first from top to bottom,
-            # so the first class that matches is the most general match.
             current_schema = schema
             root_schema = rootschema or current_schema
-            matches = self.class_dict[self.hash_schema(current_schema)]
-            target_tp = matches[0] if matches else default_class
+            matches = cls.hash_tps[_hash_schema(current_schema)]
+            target_tp = next(iter(matches), default_class)
         else:
             msg = "Must provide either `tp` or `schema`, but not both."
             raise ValueError(msg)
 
-        from_dict = partial(self.from_dict, rootschema=root_schema)
+        from_dict = partial(cls.from_dict, rootschema=root_schema)
         # Can also return a list?
         resolved = _resolve_references(current_schema, root_schema)
         if "anyOf" in resolved or "oneOf" in resolved:
             schemas = resolved.get("anyOf", []) + resolved.get("oneOf", [])
             for possible in schemas:
-                try:
-                    validate_jsonschema_fail_fast(dct, possible, rootschema=root_schema)
-                except ValidationError:
-                    continue
-                else:
+                # NOTE: Instead of raise/except/continue
+                # Pre-"zero-cost" exceptions, this has a huge performance gain.
+                # https://docs.python.org/3/whatsnew/3.11.html#misc
+                # https://github.com/python/cpython/blob/9b3749849eda4012261a112b22eb07f26fd345a9/InternalDocs/exception_handling.md
+                it_errs = _validator(possible, root_schema).iter_errors(dct)
+                if next(it_errs, None) is None:
                     return from_dict(dct, schema=possible, default_class=target_tp)
 
         if _is_dict(dct):

From 677017702c5548ffba2f0a0b6aee7bf2860f944e Mon Sep 17 00:00:00 2001
From: dangotbanned <125183946+dangotbanned@users.noreply.github.com>
Date: Sun, 25 Aug 2024 15:22:28 +0100
Subject: [PATCH 49/92] perf(ruff): Add & ignore some performance rules

Ignoring these *first* for visibility, since I'll be making changes that otherwise have no context
---
 altair/utils/_transformed_data.py |  4 ++--
 altair/utils/schemapi.py          |  4 ++--
 altair/vegalite/v5/api.py         |  4 ++--
 pyproject.toml                    | 10 ++++++++++
 sphinxext/schematable.py          |  2 +-
 tools/generate_schema_wrapper.py  |  4 ++--
 tools/schemapi/schemapi.py        |  4 ++--
 7 files changed, 21 insertions(+), 11 deletions(-)

diff --git a/altair/utils/_transformed_data.py b/altair/utils/_transformed_data.py
index 3839a13d2..d3db8e62e 100644
--- a/altair/utils/_transformed_data.py
+++ b/altair/utils/_transformed_data.py
@@ -214,7 +214,7 @@ def name_views(
         chart_names: list[str] = []
         for subchart in subcharts:
             for name in name_views(subchart, i=i + len(chart_names), exclude=exclude):
-                chart_names.append(name)
+                chart_names.append(name)  # noqa: PERF402
         return chart_names
 
 
@@ -326,7 +326,7 @@ def get_datasets_for_scope(vega_spec: dict[str, Any], scope: Scope) -> list[str]
     # get datasets from group
     datasets = []
     for dataset in group.get("data", []):
-        datasets.append(dataset["name"])
+        datasets.append(dataset["name"])  # noqa: PERF401
 
     # Add facet dataset
     facet_dataset = group.get("from", {}).get("facet", {}).get("name", None)
diff --git a/altair/utils/schemapi.py b/altair/utils/schemapi.py
index 5188863ee..666119927 100644
--- a/altair/utils/schemapi.py
+++ b/altair/utils/schemapi.py
@@ -723,7 +723,7 @@ def _get_message(self) -> str:
             error_messages = []
             for group in group_1, list(group_2), next(it, None):
                 if group is not None:
-                    error_messages.append(self._get_message_for_errors_group(group))
+                    error_messages.append(self._get_message_for_errors_group(group))  # noqa: PERF401
             message = "\n\n".join(
                 self.indent_from_second_line(f"Error {error_id}: {m}")
                 for error_id, m in enumerate(error_messages, start=1)
@@ -850,7 +850,7 @@ def _get_default_error_message(
 
         if "enum" in errors_by_validator:
             for error in errors_by_validator["enum"]:
-                bullet_points.append(f"one of {error.validator_value}")
+                bullet_points.append(f"one of {error.validator_value}")  # noqa: PERF401
 
         if "type" in errors_by_validator:
             types = [f"'{err.validator_value}'" for err in errors_by_validator["type"]]
diff --git a/altair/vegalite/v5/api.py b/altair/vegalite/v5/api.py
index d352b060b..43c1a2ccf 100644
--- a/altair/vegalite/v5/api.py
+++ b/altair/vegalite/v5/api.py
@@ -482,7 +482,7 @@ def check_fields_and_encodings(parameter: Parameter, field_name: str) -> bool:
         try:
             if field_name in getattr(param.select, prop):
                 return True
-        except (AttributeError, TypeError):
+        except (AttributeError, TypeError):  # noqa: PERF203
             pass
 
     return False
@@ -4871,7 +4871,7 @@ def remove_prop(subchart: ChartType, prop: str) -> ChartType:
                     val = c[prop]
                     if val is not Undefined:
                         values.append(val)
-                except KeyError:
+                except KeyError:  # noqa: PERF203
                     pass
             if len(values) == 0:
                 pass
diff --git a/pyproject.toml b/pyproject.toml
index 0fd722574..05aade59b 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -257,8 +257,16 @@ extend-select=[
     "PLR1736",
     # literal-membership
     "PLR6201",
+    # unnecessary-lambda
+    "PLW0108",
     # unspecified-encoding
     "PLW1514",
+    # exception handling #
+    # ------------------ #
+    # try-except-pass
+    "S110",
+    # try-except-continue
+    "S112",
 ]
 select = [
     # flake8-bugbear
@@ -324,6 +332,8 @@ select = [
     "I001",
     # complex-structure
     "C901",
+    # Perflint
+    "PERF",
 ]
 ignore = [  
     # Whitespace before ':'
diff --git a/sphinxext/schematable.py b/sphinxext/schematable.py
index f27622fb8..e0705ee02 100644
--- a/sphinxext/schematable.py
+++ b/sphinxext/schematable.py
@@ -173,7 +173,7 @@ def select_items_from_schema(
         for prop in props:
             try:
                 yield prop, properties[prop], prop in required
-            except KeyError as err:
+            except KeyError as err:  # noqa: PERF203
                 msg = f"Can't find property: {prop}"
                 raise Exception(msg) from err
 
diff --git a/tools/generate_schema_wrapper.py b/tools/generate_schema_wrapper.py
index a625394bd..ee13c99e7 100644
--- a/tools/generate_schema_wrapper.py
+++ b/tools/generate_schema_wrapper.py
@@ -394,7 +394,7 @@ def _add_shorthand_property_to_field_encodings(schema: dict) -> dict:
 
     encoding = SchemaInfo(schema["definitions"][encoding_def], rootschema=schema)
 
-    for _, propschema in encoding.properties.items():
+    for _, propschema in encoding.properties.items():  # noqa: PERF102
         def_dict = get_field_datum_value_defs(propschema, schema)
 
         field_ref = def_dict.get("field")
@@ -566,7 +566,7 @@ def generate_vegalite_schema_wrapper(schema_file: Path) -> str:
     ]
 
     for name in toposort(graph):
-        contents.append(definitions[name].schema_class())
+        contents.append(definitions[name].schema_class())  # noqa: PERF401
 
     contents.append("")  # end with newline
     return "\n".join(contents)
diff --git a/tools/schemapi/schemapi.py b/tools/schemapi/schemapi.py
index 29fe8be64..a5f661013 100644
--- a/tools/schemapi/schemapi.py
+++ b/tools/schemapi/schemapi.py
@@ -721,7 +721,7 @@ def _get_message(self) -> str:
             error_messages = []
             for group in group_1, list(group_2), next(it, None):
                 if group is not None:
-                    error_messages.append(self._get_message_for_errors_group(group))
+                    error_messages.append(self._get_message_for_errors_group(group))  # noqa: PERF401
             message = "\n\n".join(
                 self.indent_from_second_line(f"Error {error_id}: {m}")
                 for error_id, m in enumerate(error_messages, start=1)
@@ -848,7 +848,7 @@ def _get_default_error_message(
 
         if "enum" in errors_by_validator:
             for error in errors_by_validator["enum"]:
-                bullet_points.append(f"one of {error.validator_value}")
+                bullet_points.append(f"one of {error.validator_value}")  # noqa: PERF401
 
         if "type" in errors_by_validator:
             types = [f"'{err.validator_value}'" for err in errors_by_validator["type"]]

From 0ddf19eb23b18308cf7da6250f9b25a5c7311711 Mon Sep 17 00:00:00 2001
From: dangotbanned <125183946+dangotbanned@users.noreply.github.com>
Date: Sun, 25 Aug 2024 17:51:56 +0100
Subject: [PATCH 50/92] perf(ruff): Fix some `"PERF"` violations

---
 altair/utils/_transformed_data.py |  9 ++++----
 altair/utils/schemapi.py          | 17 ++++++++-------
 altair/vegalite/v5/api.py         | 36 ++++++++++---------------------
 tools/schemapi/schemapi.py        | 17 ++++++++-------
 4 files changed, 33 insertions(+), 46 deletions(-)

diff --git a/altair/utils/_transformed_data.py b/altair/utils/_transformed_data.py
index d3db8e62e..43d398575 100644
--- a/altair/utils/_transformed_data.py
+++ b/altair/utils/_transformed_data.py
@@ -213,8 +213,9 @@ def name_views(
 
         chart_names: list[str] = []
         for subchart in subcharts:
-            for name in name_views(subchart, i=i + len(chart_names), exclude=exclude):
-                chart_names.append(name)  # noqa: PERF402
+            chart_names.extend(
+                name_views(subchart, i=i + len(chart_names), exclude=exclude)
+            )
         return chart_names
 
 
@@ -324,9 +325,7 @@ def get_datasets_for_scope(vega_spec: dict[str, Any], scope: Scope) -> list[str]
     group = get_group_mark_for_scope(vega_spec, scope) or {}
 
     # get datasets from group
-    datasets = []
-    for dataset in group.get("data", []):
-        datasets.append(dataset["name"])  # noqa: PERF401
+    datasets = [dataset["name"] for dataset in group.get("data", [])]
 
     # Add facet dataset
     facet_dataset = group.get("from", {}).get("facet", {}).get("name", None)
diff --git a/altair/utils/schemapi.py b/altair/utils/schemapi.py
index 666119927..87f0b6363 100644
--- a/altair/utils/schemapi.py
+++ b/altair/utils/schemapi.py
@@ -717,18 +717,19 @@ def indent_from_second_line(msg: str, /, indent: int = 4) -> str:
         )
 
     def _get_message(self) -> str:
-        it = self._errors
+        it: _ErrsLazyGroup = self._errors
         group_1 = list(next(it))
         if (group_2 := next(it, None)) is not None:
-            error_messages = []
-            for group in group_1, list(group_2), next(it, None):
-                if group is not None:
-                    error_messages.append(self._get_message_for_errors_group(group))  # noqa: PERF401
-            message = "\n\n".join(
+            messages: Iterator[str] = (
+                self._get_message_for_errors_group(g)
+                for g in (group_1, list(group_2), next(it, None))
+                if g is not None
+            )
+            msg = "\n\n".join(
                 self.indent_from_second_line(f"Error {error_id}: {m}")
-                for error_id, m in enumerate(error_messages, start=1)
+                for error_id, m in enumerate(messages, start=1)
             )
-            return f"Multiple errors were found.\n\n{message}"
+            return f"Multiple errors were found.\n\n{msg}"
         else:
             return self._get_message_for_errors_group(group_1)
 
diff --git a/altair/vegalite/v5/api.py b/altair/vegalite/v5/api.py
index 43c1a2ccf..b18216dbb 100644
--- a/altair/vegalite/v5/api.py
+++ b/altair/vegalite/v5/api.py
@@ -426,7 +426,7 @@ def __getattr__(self, field_name: str) -> GetAttrExpression | SelectionExpressio
         # fields or encodings list, then we want to return an expression.
         if check_fields_and_encodings(self, field_name):
             return SelectionExpression(_attrexpr)
-        return _expr_core.GetAttrExpression(self.name, field_name)
+        return _attrexpr
 
     # TODO: Are there any special cases to consider for __getitem__?
     # This was copied from v4.
@@ -478,13 +478,10 @@ def check_fields_and_encodings(parameter: Parameter, field_name: str) -> bool:
     param = parameter.param
     if utils.is_undefined(param) or isinstance(param, core.VariableParameter):
         return False
-    for prop in ["fields", "encodings"]:
-        try:
-            if field_name in getattr(param.select, prop):
-                return True
-        except (AttributeError, TypeError):  # noqa: PERF203
-            pass
-
+    select = param.select
+    for prop in "fields", "encodings":
+        if not utils.is_undefined(p := select._get(prop)) and field_name in p:
+            return True
     return False
 
 
@@ -4841,17 +4838,13 @@ def _repeat_names(
     return params_named
 
 
-def _remove_layer_props(  # noqa: C901
+def _remove_layer_props(
     chart: LayerChart, subcharts: list[ChartType], layer_props: Iterable[str]
 ) -> tuple[dict[str, Any], list[ChartType]]:
     def remove_prop(subchart: ChartType, prop: str) -> ChartType:
-        # If subchart is a UnitSpec, then subchart["height"] raises a KeyError
-        try:
-            if subchart[prop] is not Undefined:
-                subchart = subchart.copy()
-                subchart[prop] = Undefined
-        except KeyError:
-            pass
+        if not utils.is_undefined(subchart._get(prop)):
+            subchart = subchart.copy()
+            subchart[prop] = Undefined
         return subchart
 
     output_dict: dict[str, Any] = {}
@@ -4864,15 +4857,8 @@ def remove_prop(subchart: ChartType, prop: str) -> ChartType:
         if chart[prop] is Undefined:
             # Top level does not have this prop.
             # Check for consistent props within the subcharts.
-            values = []
-            for c in subcharts:
-                # If c is a UnitSpec, then c["height"] raises a KeyError.
-                try:
-                    val = c[prop]
-                    if val is not Undefined:
-                        values.append(val)
-                except KeyError:  # noqa: PERF203
-                    pass
+            values = [v for c in subcharts if not utils.is_undefined(v := c._get(prop))]
+
             if len(values) == 0:
                 pass
             elif all(v == values[0] for v in values[1:]):
diff --git a/tools/schemapi/schemapi.py b/tools/schemapi/schemapi.py
index a5f661013..0ae05d4e0 100644
--- a/tools/schemapi/schemapi.py
+++ b/tools/schemapi/schemapi.py
@@ -715,18 +715,19 @@ def indent_from_second_line(msg: str, /, indent: int = 4) -> str:
         )
 
     def _get_message(self) -> str:
-        it = self._errors
+        it: _ErrsLazyGroup = self._errors
         group_1 = list(next(it))
         if (group_2 := next(it, None)) is not None:
-            error_messages = []
-            for group in group_1, list(group_2), next(it, None):
-                if group is not None:
-                    error_messages.append(self._get_message_for_errors_group(group))  # noqa: PERF401
-            message = "\n\n".join(
+            messages: Iterator[str] = (
+                self._get_message_for_errors_group(g)
+                for g in (group_1, list(group_2), next(it, None))
+                if g is not None
+            )
+            msg = "\n\n".join(
                 self.indent_from_second_line(f"Error {error_id}: {m}")
-                for error_id, m in enumerate(error_messages, start=1)
+                for error_id, m in enumerate(messages, start=1)
             )
-            return f"Multiple errors were found.\n\n{message}"
+            return f"Multiple errors were found.\n\n{msg}"
         else:
             return self._get_message_for_errors_group(group_1)
 

From cace782a11c2fee7e4efa06d4e5a6a626a474dee Mon Sep 17 00:00:00 2001
From: dangotbanned <125183946+dangotbanned@users.noreply.github.com>
Date: Sun, 25 Aug 2024 20:50:06 +0100
Subject: [PATCH 51/92] perf: Remove unreachable `ValidationError` except

This code path will not raise `jsonschema.ValidationError`
---
 altair/utils/core.py | 11 +++--------
 1 file changed, 3 insertions(+), 8 deletions(-)

diff --git a/altair/utils/core.py b/altair/utils/core.py
index f5ef659b1..8df22b154 100644
--- a/altair/utils/core.py
+++ b/altair/utils/core.py
@@ -14,7 +14,6 @@
 from operator import itemgetter
 from typing import TYPE_CHECKING, Any, Callable, Iterator, Literal, TypeVar, cast
 
-import jsonschema
 import narwhals.stable.v1 as nw
 from narwhals.dependencies import get_polars, is_pandas_dataframe
 from narwhals.typing import IntoDataFrame
@@ -855,13 +854,9 @@ def _wrap_in_channel(self, obj: Any, encoding: str, /):
             return [self._wrap_in_channel(el, encoding) for el in obj]
         if channel := self.name_to_channel.get(encoding):
             tp = channel["value" if "value" in obj else "field"]
-            try:
-                # Don't force validation here; some objects won't be valid until
-                # they're created in the context of a chart.
-                return tp.from_dict(obj, validate=False)
-            except jsonschema.ValidationError:
-                # our attempts at finding the correct class have failed
-                return obj
+            # Don't force validation here; some objects won't be valid until
+            # they're created in the context of a chart.
+            return tp.from_dict(obj, validate=False)
         else:
             warnings.warn(f"Unrecognized encoding channel {encoding!r}", stacklevel=1)
             return obj

From ac9993907c78011881cff863dc73afa5e7d6b595 Mon Sep 17 00:00:00 2001
From: dangotbanned <125183946+dangotbanned@users.noreply.github.com>
Date: Sun, 25 Aug 2024 22:30:54 +0100
Subject: [PATCH 52/92] docs(perf): Note some areas that may impact performance

---
 altair/utils/_importers.py    | 2 ++
 altair/utils/core.py          | 1 +
 altair/utils/schemapi.py      | 1 +
 altair/vegalite/v5/api.py     | 3 +++
 tests/vegalite/v5/test_api.py | 5 ++++-
 tools/schemapi/schemapi.py    | 1 +
 6 files changed, 12 insertions(+), 1 deletion(-)

diff --git a/altair/utils/_importers.py b/altair/utils/_importers.py
index 14085ebcf..93e647f33 100644
--- a/altair/utils/_importers.py
+++ b/altair/utils/_importers.py
@@ -76,6 +76,7 @@ def vl_version_for_vl_convert() -> str:
 
 def import_pyarrow_interchange() -> ModuleType:
     min_version = "11.0.0"
+    # FIXME: Hot try/except
     try:
         version = importlib_version("pyarrow")
 
@@ -102,6 +103,7 @@ def import_pyarrow_interchange() -> ModuleType:
 
 
 def pyarrow_available() -> bool:
+    # FIXME: Hot try/except
     try:
         import_pyarrow_interchange()
         return True
diff --git a/altair/utils/core.py b/altair/utils/core.py
index 8df22b154..a1d81c39e 100644
--- a/altair/utils/core.py
+++ b/altair/utils/core.py
@@ -830,6 +830,7 @@ def from_channels(cls, channels: ModuleType, /) -> _ChannelCache:
     @classmethod
     def from_cache(cls) -> _ChannelCache:
         global _CHANNEL_CACHE
+        # FIXME: Hot try/except
         try:
             cached = _CHANNEL_CACHE
         except NameError:
diff --git a/altair/utils/schemapi.py b/altair/utils/schemapi.py
index 87f0b6363..d0006a3ac 100644
--- a/altair/utils/schemapi.py
+++ b/altair/utils/schemapi.py
@@ -1073,6 +1073,7 @@ def __getattr__(self, attr):
         if attr in self._kwds:
             return self._kwds[attr]
         else:
+            # FIXME: Hot try/except
             try:
                 _getattr = super().__getattr__  # pyright: ignore[reportAttributeAccessIssue]
             except AttributeError:
diff --git a/altair/vegalite/v5/api.py b/altair/vegalite/v5/api.py
index b18216dbb..695fcc642 100644
--- a/altair/vegalite/v5/api.py
+++ b/altair/vegalite/v5/api.py
@@ -1790,6 +1790,7 @@ def to_dict(  # noqa: C901
         copy = _top_schema_base(self).copy(deep=False)
         original_data = getattr(copy, "data", Undefined)
         if not utils.is_undefined(original_data):
+            # FIXME: Hot try/except
             try:
                 data = _to_eager_narwhals_dataframe(original_data)
             except TypeError:
@@ -3401,6 +3402,7 @@ def _repr_mimebundle_(self, *args, **kwds) -> MimeBundleType | None:  # type:ign
         """Return a MIME bundle for display in Jupyter frontends."""
         # Catch errors explicitly to get around issues in Jupyter frontend
         # see https://github.com/ipython/ipython/issues/11038
+        # FIXME: Hot try/except
         try:
             dct = self.to_dict(context={"pre_transform": False})
         except Exception:
@@ -3713,6 +3715,7 @@ def from_dict(
         _tp: Any
         for tp in TopLevelMixin.__subclasses__():
             _tp = super() if tp is Chart else tp
+            # FIXME: Hot try/except
             try:
                 return _tp.from_dict(dct, validate=validate)
             except jsonschema.ValidationError:
diff --git a/tests/vegalite/v5/test_api.py b/tests/vegalite/v5/test_api.py
index f5a150556..8b71e6e01 100644
--- a/tests/vegalite/v5/test_api.py
+++ b/tests/vegalite/v5/test_api.py
@@ -1210,7 +1210,10 @@ def test_themes():
         assert "config" not in chart.to_dict()
 
 
-def test_chart_from_dict():
+# TODO: Investigate alternative to looped try/except/pass
+# - AFAIK it would speed up `Chart.from_dict()`
+# - but maybe not central enough to have general impact
+def test_chart_from_dict() -> None:
     base = alt.Chart("data.csv").mark_point().encode(x="x:Q", y="y:Q")
 
     charts = [
diff --git a/tools/schemapi/schemapi.py b/tools/schemapi/schemapi.py
index 0ae05d4e0..21d49851c 100644
--- a/tools/schemapi/schemapi.py
+++ b/tools/schemapi/schemapi.py
@@ -1071,6 +1071,7 @@ def __getattr__(self, attr):
         if attr in self._kwds:
             return self._kwds[attr]
         else:
+            # FIXME: Hot try/except
             try:
                 _getattr = super().__getattr__  # pyright: ignore[reportAttributeAccessIssue]
             except AttributeError:

From f776fcfaffb43aa95d6db7bbfd53a2e3211b5f80 Mon Sep 17 00:00:00 2001
From: dangotbanned <125183946+dangotbanned@users.noreply.github.com>
Date: Sun, 25 Aug 2024 22:34:07 +0100
Subject: [PATCH 53/92] refactor: Remove now-unused
 `validate_jsonschema_fail_fast`

---
 altair/utils/schemapi.py   | 16 ----------------
 tools/schemapi/schemapi.py | 16 ----------------
 2 files changed, 32 deletions(-)

diff --git a/altair/utils/schemapi.py b/altair/utils/schemapi.py
index d0006a3ac..a2406586f 100644
--- a/altair/utils/schemapi.py
+++ b/altair/utils/schemapi.py
@@ -193,22 +193,6 @@ def validate_jsonschema(
             raise NotImplementedError(msg)
 
 
-def validate_jsonschema_fail_fast(
-    spec: _JsonParameter,
-    schema: dict[str, Any],
-    rootschema: dict[str, Any] | None = None,
-) -> None:
-    """
-    Raise as quickly as possible.
-
-    Use instead of ``validate_jsonschema`` when any information about the error(s) are not needed.
-    """
-    if (
-        err := next(_validator(schema, rootschema).iter_errors(spec), None)
-    ) is not None:
-        raise err
-
-
 def _get_schema_dialect_uri(schema: dict[str, Any]) -> str:
     """
     Return value of `$schema`_.
diff --git a/tools/schemapi/schemapi.py b/tools/schemapi/schemapi.py
index 21d49851c..b1c2583e3 100644
--- a/tools/schemapi/schemapi.py
+++ b/tools/schemapi/schemapi.py
@@ -191,22 +191,6 @@ def validate_jsonschema(
             raise NotImplementedError(msg)
 
 
-def validate_jsonschema_fail_fast(
-    spec: _JsonParameter,
-    schema: dict[str, Any],
-    rootschema: dict[str, Any] | None = None,
-) -> None:
-    """
-    Raise as quickly as possible.
-
-    Use instead of ``validate_jsonschema`` when any information about the error(s) are not needed.
-    """
-    if (
-        err := next(_validator(schema, rootschema).iter_errors(spec), None)
-    ) is not None:
-        raise err
-
-
 def _get_schema_dialect_uri(schema: dict[str, Any]) -> str:
     """
     Return value of `$schema`_.

From 53663e811e3839d30d4bf9591300ff8775640473 Mon Sep 17 00:00:00 2001
From: dangotbanned <125183946+dangotbanned@users.noreply.github.com>
Date: Mon, 26 Aug 2024 20:27:06 +0100
Subject: [PATCH 54/92] perf: Avoid an exception in `SchemaBase.__getattr__`

Uses the default parameter of `getattr` instead
---
 altair/utils/schemapi.py   | 9 ++-------
 tools/schemapi/schemapi.py | 9 ++-------
 2 files changed, 4 insertions(+), 14 deletions(-)

diff --git a/altair/utils/schemapi.py b/altair/utils/schemapi.py
index 12c7b8009..4b975aa14 100644
--- a/altair/utils/schemapi.py
+++ b/altair/utils/schemapi.py
@@ -1057,15 +1057,10 @@ def __getattr__(self, attr):
         # reminder: getattr is called after the normal lookups
         if attr == "_kwds":
             raise AttributeError()
-        if attr in self._kwds:
+        elif attr in self._kwds:
             return self._kwds[attr]
         else:
-            # FIXME: Hot try/except
-            try:
-                _getattr = super().__getattr__  # pyright: ignore[reportAttributeAccessIssue]
-            except AttributeError:
-                _getattr = super().__getattribute__
-            return _getattr(attr)
+            return getattr(super(), "__getattr__", super().__getattribute__)(attr)
 
     def __setattr__(self, item, val) -> None:
         self._kwds[item] = val
diff --git a/tools/schemapi/schemapi.py b/tools/schemapi/schemapi.py
index 3068d7558..04ba02a95 100644
--- a/tools/schemapi/schemapi.py
+++ b/tools/schemapi/schemapi.py
@@ -1055,15 +1055,10 @@ def __getattr__(self, attr):
         # reminder: getattr is called after the normal lookups
         if attr == "_kwds":
             raise AttributeError()
-        if attr in self._kwds:
+        elif attr in self._kwds:
             return self._kwds[attr]
         else:
-            # FIXME: Hot try/except
-            try:
-                _getattr = super().__getattr__  # pyright: ignore[reportAttributeAccessIssue]
-            except AttributeError:
-                _getattr = super().__getattribute__
-            return _getattr(attr)
+            return getattr(super(), "__getattr__", super().__getattribute__)(attr)
 
     def __setattr__(self, item, val) -> None:
         self._kwds[item] = val

From bd31d7c574820ec3ee3bf72ec874c96545f415ba Mon Sep 17 00:00:00 2001
From: dangotbanned <125183946+dangotbanned@users.noreply.github.com>
Date: Mon, 26 Aug 2024 20:39:46 +0100
Subject: [PATCH 55/92] refactor: Ensure every `VegaLiteSchema` has a
 `._schema`

Eventually, this will replace the need for checking `None` as frequently
---
 altair/utils/schemapi.py          |  2 +-
 altair/vegalite/v5/schema/core.py |  3 +--
 tools/generate_schema_wrapper.py  | 17 ++++++++++++++---
 tools/schemapi/schemapi.py        |  2 +-
 4 files changed, 17 insertions(+), 7 deletions(-)

diff --git a/altair/utils/schemapi.py b/altair/utils/schemapi.py
index 4b975aa14..e7f16098f 100644
--- a/altair/utils/schemapi.py
+++ b/altair/utils/schemapi.py
@@ -1310,7 +1310,7 @@ def validate(
     @classmethod
     def resolve_references(cls, schema: dict[str, Any] | None = None) -> dict[str, Any]:
         """Resolve references in the context of this object's schema or root schema."""
-        rootschema = cls._rootschema or cls._schema or schema
+        rootschema = cls._rootschema or cls._schema
         if rootschema is None:
             name = type(cls).__name__
             msg = (
diff --git a/altair/vegalite/v5/schema/core.py b/altair/vegalite/v5/schema/core.py
index 0892e7214..833a551ba 100644
--- a/altair/vegalite/v5/schema/core.py
+++ b/altair/vegalite/v5/schema/core.py
@@ -487,6 +487,7 @@ def load_schema() -> dict:
 
 
 class VegaLiteSchema(SchemaBase):
+    _schema = load_schema()
     _rootschema = load_schema()
 
     @classmethod
@@ -502,8 +503,6 @@ class Root(VegaLiteSchema):
     specifications. (The json schema is generated from this type.)
     """
 
-    _schema = VegaLiteSchema._rootschema
-
     def __init__(self, *args, **kwds):
         super().__init__(*args, **kwds)
 
diff --git a/tools/generate_schema_wrapper.py b/tools/generate_schema_wrapper.py
index ee13c99e7..f4b970902 100644
--- a/tools/generate_schema_wrapper.py
+++ b/tools/generate_schema_wrapper.py
@@ -54,6 +54,7 @@
 
 BASE_SCHEMA: Final = """
 class {basename}(SchemaBase):
+    _schema = load_schema()
     _rootschema = load_schema()
     @classmethod
     def _default_wrapper_classes(cls) -> Iterator[type[Any]]:
@@ -301,6 +302,17 @@ def process_description(description: str) -> str:
     return description.strip()
 
 
+class RootSchemaGenerator(SchemaGenerator):
+    schema_class_template = textwrap.dedent(
+        '''
+    class {classname}({basename}):
+        """{docstring}"""
+
+        {init_code}
+    '''
+    )
+
+
 class FieldSchemaGenerator(SchemaGenerator):
     schema_class_template = textwrap.dedent(
         '''
@@ -557,12 +569,11 @@ def generate_vegalite_schema_wrapper(schema_file: Path) -> str:
         "\n" f"__all__ = {all_}\n",
         LOAD_SCHEMA.format(schemafile="vega-lite-schema.json"),
         BASE_SCHEMA.format(basename=basename),
-        schema_class(
+        RootSchemaGenerator(
             "Root",
             schema=rootschema,
             basename=basename,
-            schemarepr=CodeSnippet(f"{basename}._rootschema"),
-        ),
+        ).schema_class(),
     ]
 
     for name in toposort(graph):
diff --git a/tools/schemapi/schemapi.py b/tools/schemapi/schemapi.py
index 04ba02a95..956302519 100644
--- a/tools/schemapi/schemapi.py
+++ b/tools/schemapi/schemapi.py
@@ -1308,7 +1308,7 @@ def validate(
     @classmethod
     def resolve_references(cls, schema: dict[str, Any] | None = None) -> dict[str, Any]:
         """Resolve references in the context of this object's schema or root schema."""
-        rootschema = cls._rootschema or cls._schema or schema
+        rootschema = cls._rootschema or cls._schema
         if rootschema is None:
             name = type(cls).__name__
             msg = (

From 06ca33929c5f2a5f54d4f15f872844a8c540f605 Mon Sep 17 00:00:00 2001
From: dangotbanned <125183946+dangotbanned@users.noreply.github.com>
Date: Mon, 26 Aug 2024 20:45:20 +0100
Subject: [PATCH 56/92] refactor: Adds `_SchemaBasePEP487`

Related https://github.com/vega/altair/pull/3547#discussion_r1731037778
---
 altair/utils/schemapi.py   | 126 +++++++++++++++++++++++++++++++++++++
 tools/schemapi/schemapi.py | 126 +++++++++++++++++++++++++++++++++++++
 2 files changed, 252 insertions(+)

diff --git a/altair/utils/schemapi.py b/altair/utils/schemapi.py
index e7f16098f..12314089e 100644
--- a/altair/utils/schemapi.py
+++ b/altair/utils/schemapi.py
@@ -981,6 +981,132 @@ def _deep_copy(obj: _CopyImpl | Any, by_ref: set[str]) -> _CopyImpl | Any:
         return obj
 
 
+class _SchemaBasePEP487:
+    """Minimal demo for testing feasibility of `__init_subclass__`."""
+
+    _schema: ClassVar[dict[str, Any]]
+    _rootschema: ClassVar[dict[str, Any]]
+    _class_is_valid_at_instantiation: ClassVar[bool] = True
+
+    def __init__(self, *args: Any, **kwds: Any) -> None:
+        if (kwds and args) or len(args) > 1:
+            name = type(self).__name__
+            _args = ", ".join(f"{a!r}" for a in args)
+            _kwds = ", ".join(f"{k}={v!r}" for k, v in kwds.items())
+            msg = (
+                f"Expected either:\n"
+                f" - a single arg with no kwds, for, e.g. {{'type': 'string'}}\n"
+                f" - zero args with zero or more kwds for {{'type': 'object'}}\n\n"
+                f"but got: {name}({_args}, {_kwds})"
+            )
+            raise AssertionError(msg)
+        # use object.__setattr__ because we override setattr below.
+        self._args: tuple[Any, ...]
+        self._kwds: dict[str, Any]
+        object.__setattr__(self, "_args", args)
+        object.__setattr__(self, "_kwds", kwds)
+
+    def __init_subclass__(
+        cls,
+        *args: Any,
+        schema: dict[str, Any] | None = None,
+        rootschema: dict[str, Any] | None = None,
+        valid_at_init: bool | None = None,
+        **kwds: Any,
+    ) -> None:
+        super().__init_subclass__(*args, **kwds)
+        # NOTE: `SchemaBase` itself would have no `_schema` or `_rootschema`, but won't be run through this
+        # FIXED: `VegaLiteSchema` has a `_rootschema` but no `_schema`
+        # FIXED: `Root` uses `VegaLiteSchema._rootschema`, for `_schema` and inherits the same for `_rootschema`
+        # FIXED: Both have only `_schema` - which is a type
+        # - `api.Then`: _schema = {"type": "object"}
+        # - `expr.core.Expression`: _schema = {"type": "string"}
+        # ----
+        # All others either *only* define `_schema`, or inherit it when they are a channel
+        if schema is None:
+            if hasattr(cls, "_schema"):
+                schema = cls._schema
+            else:
+                msg = (
+                    f"Cannot instantiate object of type {cls}: "
+                    "_schema class attribute is not defined."
+                )
+                raise TypeError(msg)
+
+        if rootschema is None:
+            if hasattr(cls, "_rootschema"):
+                rootschema = cls._rootschema
+            elif "$ref" not in schema:
+                rootschema = schema
+            else:
+                msg = "`rootschema` must be provided if `schema` contains a `'$ref'` and does not inherit one."
+                raise TypeError(msg)
+
+        # NOTE: Inherit a `False`instead of overwriting with the default `True`
+        # - If a parent is not valid at init, then none of its subclasses can be
+        # - The current hierarchy does not support the inverse of this
+        #   - Subclasses may declare they are not valid
+        if valid_at_init is None:
+            valid_at_init = cls._class_is_valid_at_instantiation
+        cls._schema = schema
+        cls._rootschema = rootschema
+        cls._class_is_valid_at_instantiation = valid_at_init
+
+    @overload
+    def _get(self, attr: str, default: Optional = ...) -> Any | UndefinedType: ...
+    @overload
+    def _get(self, attr: str, default: T) -> Any | T: ...
+    def _get(self, attr: str, default: Optional[T] = Undefined) -> Any | T:
+        """Get an attribute, returning default if not present."""
+        if (item := self._kwds.get(attr, Undefined)) is not Undefined:
+            return item
+        else:
+            return default
+
+    def __dir__(self) -> list[str]:
+        return sorted(chain(super().__dir__(), self._kwds))
+
+    def __eq__(self, other: Any) -> bool:
+        return (
+            type(self) is type(other)
+            and self._args == other._args
+            and self._kwds == other._kwds
+        )
+
+    def __getattr__(self, attr: str):
+        # reminder: getattr is called after the normal lookups
+        if attr == "_kwds":
+            raise AttributeError()
+        if attr in self._kwds:
+            return self._kwds[attr]
+        else:
+            return getattr(super(), "__getattr__", super().__getattribute__)(attr)
+
+    def __getitem__(self, item: str) -> Any:
+        return self._kwds[item]
+
+    def __setattr__(self, item: str, val: Any) -> None:
+        if item.startswith("_"):
+            # Setting an instances copy of a ClassVar modify that
+            # By default, this makes **another** copy and places in _kwds
+            object.__setattr__(self, item, val)
+        else:
+            self._kwds[item] = val
+
+    def __setitem__(self, item: str, val: Any) -> None:
+        self._kwds[item] = val
+
+    def __repr__(self) -> str:
+        name = type(self).__name__
+        if kwds := self._kwds:
+            it = (f"{k}: {v!r}" for k, v in sorted(kwds.items()) if v is not Undefined)
+            args = ",\n".join(it).replace("\n", "\n  ")
+            LB, RB = "{", "}"
+            return f"{name}({LB}\n  {args}\n{RB})"
+        else:
+            return f"{name}({self._args[0]!r})"
+
+
 class SchemaBase:
     """
     Base class for schema wrappers.
diff --git a/tools/schemapi/schemapi.py b/tools/schemapi/schemapi.py
index 956302519..19326d81b 100644
--- a/tools/schemapi/schemapi.py
+++ b/tools/schemapi/schemapi.py
@@ -979,6 +979,132 @@ def _deep_copy(obj: _CopyImpl | Any, by_ref: set[str]) -> _CopyImpl | Any:
         return obj
 
 
+class _SchemaBasePEP487:
+    """Minimal demo for testing feasibility of `__init_subclass__`."""
+
+    _schema: ClassVar[dict[str, Any]]
+    _rootschema: ClassVar[dict[str, Any]]
+    _class_is_valid_at_instantiation: ClassVar[bool] = True
+
+    def __init__(self, *args: Any, **kwds: Any) -> None:
+        if (kwds and args) or len(args) > 1:
+            name = type(self).__name__
+            _args = ", ".join(f"{a!r}" for a in args)
+            _kwds = ", ".join(f"{k}={v!r}" for k, v in kwds.items())
+            msg = (
+                f"Expected either:\n"
+                f" - a single arg with no kwds, for, e.g. {{'type': 'string'}}\n"
+                f" - zero args with zero or more kwds for {{'type': 'object'}}\n\n"
+                f"but got: {name}({_args}, {_kwds})"
+            )
+            raise AssertionError(msg)
+        # use object.__setattr__ because we override setattr below.
+        self._args: tuple[Any, ...]
+        self._kwds: dict[str, Any]
+        object.__setattr__(self, "_args", args)
+        object.__setattr__(self, "_kwds", kwds)
+
+    def __init_subclass__(
+        cls,
+        *args: Any,
+        schema: dict[str, Any] | None = None,
+        rootschema: dict[str, Any] | None = None,
+        valid_at_init: bool | None = None,
+        **kwds: Any,
+    ) -> None:
+        super().__init_subclass__(*args, **kwds)
+        # NOTE: `SchemaBase` itself would have no `_schema` or `_rootschema`, but won't be run through this
+        # FIXED: `VegaLiteSchema` has a `_rootschema` but no `_schema`
+        # FIXED: `Root` uses `VegaLiteSchema._rootschema`, for `_schema` and inherits the same for `_rootschema`
+        # FIXED: Both have only `_schema` - which is a type
+        # - `api.Then`: _schema = {"type": "object"}
+        # - `expr.core.Expression`: _schema = {"type": "string"}
+        # ----
+        # All others either *only* define `_schema`, or inherit it when they are a channel
+        if schema is None:
+            if hasattr(cls, "_schema"):
+                schema = cls._schema
+            else:
+                msg = (
+                    f"Cannot instantiate object of type {cls}: "
+                    "_schema class attribute is not defined."
+                )
+                raise TypeError(msg)
+
+        if rootschema is None:
+            if hasattr(cls, "_rootschema"):
+                rootschema = cls._rootschema
+            elif "$ref" not in schema:
+                rootschema = schema
+            else:
+                msg = "`rootschema` must be provided if `schema` contains a `'$ref'` and does not inherit one."
+                raise TypeError(msg)
+
+        # NOTE: Inherit a `False`instead of overwriting with the default `True`
+        # - If a parent is not valid at init, then none of its subclasses can be
+        # - The current hierarchy does not support the inverse of this
+        #   - Subclasses may declare they are not valid
+        if valid_at_init is None:
+            valid_at_init = cls._class_is_valid_at_instantiation
+        cls._schema = schema
+        cls._rootschema = rootschema
+        cls._class_is_valid_at_instantiation = valid_at_init
+
+    @overload
+    def _get(self, attr: str, default: Optional = ...) -> Any | UndefinedType: ...
+    @overload
+    def _get(self, attr: str, default: T) -> Any | T: ...
+    def _get(self, attr: str, default: Optional[T] = Undefined) -> Any | T:
+        """Get an attribute, returning default if not present."""
+        if (item := self._kwds.get(attr, Undefined)) is not Undefined:
+            return item
+        else:
+            return default
+
+    def __dir__(self) -> list[str]:
+        return sorted(chain(super().__dir__(), self._kwds))
+
+    def __eq__(self, other: Any) -> bool:
+        return (
+            type(self) is type(other)
+            and self._args == other._args
+            and self._kwds == other._kwds
+        )
+
+    def __getattr__(self, attr: str):
+        # reminder: getattr is called after the normal lookups
+        if attr == "_kwds":
+            raise AttributeError()
+        if attr in self._kwds:
+            return self._kwds[attr]
+        else:
+            return getattr(super(), "__getattr__", super().__getattribute__)(attr)
+
+    def __getitem__(self, item: str) -> Any:
+        return self._kwds[item]
+
+    def __setattr__(self, item: str, val: Any) -> None:
+        if item.startswith("_"):
+            # Setting an instances copy of a ClassVar modify that
+            # By default, this makes **another** copy and places in _kwds
+            object.__setattr__(self, item, val)
+        else:
+            self._kwds[item] = val
+
+    def __setitem__(self, item: str, val: Any) -> None:
+        self._kwds[item] = val
+
+    def __repr__(self) -> str:
+        name = type(self).__name__
+        if kwds := self._kwds:
+            it = (f"{k}: {v!r}" for k, v in sorted(kwds.items()) if v is not Undefined)
+            args = ",\n".join(it).replace("\n", "\n  ")
+            LB, RB = "{", "}"
+            return f"{name}({LB}\n  {args}\n{RB})"
+        else:
+            return f"{name}({self._args[0]!r})"
+
+
 class SchemaBase:
     """
     Base class for schema wrappers.

From 56a43cb244537dc51b28d866e4d6c6b9781ac98c Mon Sep 17 00:00:00 2001
From: dangotbanned <125183946+dangotbanned@users.noreply.github.com>
Date: Mon, 26 Aug 2024 20:49:45 +0100
Subject: [PATCH 57/92] test: Adds a mini suite for `_SchemaBasePEP487`

Purely to demonstrate the differences to status quo.
There would be no loss in functionality, this would simply be some small changes to generated code and the removal of instance-level checks
---
 tests/utils/test_schemapi.py | 151 ++++++++++++++++++++++++++++++++++-
 1 file changed, 148 insertions(+), 3 deletions(-)

diff --git a/tests/utils/test_schemapi.py b/tests/utils/test_schemapi.py
index 4ba3ea9ca..af4af8559 100644
--- a/tests/utils/test_schemapi.py
+++ b/tests/utils/test_schemapi.py
@@ -10,7 +10,7 @@
 import warnings
 from collections import deque
 from functools import partial
-from typing import TYPE_CHECKING, Any, Callable, Iterable, Literal, Sequence
+from typing import TYPE_CHECKING, Any, Callable, Iterable, Iterator, Literal, Sequence
 
 import jsonschema
 import jsonschema.exceptions
@@ -35,6 +35,34 @@
 # try to use SchemaBase objects defined elsewhere as wrappers.
 
 
+@pytest.fixture
+def dummy_rootschema() -> dict[str, Any]:
+    return {
+        "$schema": _JSON_SCHEMA_DRAFT_URL,
+        "definitions": {
+            "StringMapping": {
+                "type": "object",
+                "additionalProperties": {"type": "string"},
+            },
+            "StringArray": {"type": "array", "items": {"type": "string"}},
+        },
+        "properties": {
+            "a": {"$ref": "#/definitions/StringMapping"},
+            "a2": {"type": "object", "additionalProperties": {"type": "number"}},
+            "b": {"$ref": "#/definitions/StringArray"},
+            "b2": {"type": "array", "items": {"type": "number"}},
+            "c": {"type": ["string", "number"]},
+            "d": {
+                "anyOf": [
+                    {"$ref": "#/definitions/StringMapping"},
+                    {"$ref": "#/definitions/StringArray"},
+                ]
+            },
+            "e": {"items": [{"type": "string"}, {"type": "string"}]},
+        },
+    }
+
+
 def test_actual_json_schema_draft_is_same_as_hardcoded_default():
     # See comments next to definition of `_DEFAULT_DIALECT_URI`
     # for details why we need this test
@@ -45,6 +73,125 @@ def test_actual_json_schema_draft_is_same_as_hardcoded_default():
     )
 
 
+def test_init_subclasses_hierarchy(dummy_rootschema) -> None:
+    from referencing.exceptions import Unresolvable
+
+    from altair.expr.core import GetItemExpression, OperatorMixin
+    from altair.utils.schemapi import _SchemaBasePEP487
+
+    sch1 = _SchemaBasePEP487()
+    sch2 = _SchemaBasePEP487()
+    sch3 = _SchemaBasePEP487("blue")
+    sch4 = _SchemaBasePEP487("red")
+    sch5 = _SchemaBasePEP487(color="blue")
+    sch6 = _SchemaBasePEP487(color="red")
+
+    with pytest.raises(
+        AssertionError, match=r"_SchemaBasePEP487\('blue', color='red'\)"
+    ):
+        _SchemaBasePEP487("blue", color="red")
+
+    assert sch1 == sch2
+    assert sch3 != sch4
+    assert sch5 != sch6
+    assert sch3 != sch5
+    assert _SchemaBasePEP487("blue") == sch3
+    assert _SchemaBasePEP487(color="red") == sch6
+    with pytest.raises(AttributeError, match="_SchemaBasePEP487.+color"):
+        attempt = sch4.color is Undefined  # noqa: F841
+
+    assert sch5.color == sch5["color"] == sch5._get("color") == "blue"
+    assert sch5._get("price") is Undefined
+    assert sch5._get("price", 999) == 999
+
+    assert _SchemaBasePEP487._class_is_valid_at_instantiation
+    sch6._class_is_valid_at_instantiation = False  # type: ignore[misc]
+    assert (
+        _SchemaBasePEP487._class_is_valid_at_instantiation
+        != sch6._class_is_valid_at_instantiation
+    )
+
+    with pytest.raises(TypeError, match="Test1PEP487.+ _schema"):
+
+        class Test1PEP487(_SchemaBasePEP487): ...
+
+    class Test2PEP487(_SchemaBasePEP487, schema={"type": "object"}): ...
+
+    with pytest.raises(
+        TypeError,
+        match=r"`rootschema` must be provided if `schema` contains a `'\$ref'` and does not inherit one",
+    ):
+
+        class Test3PEP487(_SchemaBasePEP487, schema={"$ref": "#/definitions/Bar"}): ...
+
+    class RootParentPEP487(_SchemaBasePEP487, schema=dummy_rootschema):
+        @classmethod
+        def _default_wrapper_classes(cls) -> Iterator[type[Any]]:
+            return schemapi._subclasses(RootParentPEP487)
+
+    class Root(RootParentPEP487):
+        """
+        Root schema wrapper.
+
+        A Vega-Lite top-level specification. This is the root class for all Vega-Lite
+        specifications. (The json schema is generated from this type.)
+        """
+
+        def __init__(self, *args, **kwds) -> None:
+            super().__init__(*args, **kwds)
+
+    assert (
+        Root._schema
+        == Root._rootschema
+        == RootParentPEP487._schema
+        == RootParentPEP487._rootschema
+    )
+
+    class StringMapping(Root, schema={"$ref": "#/definitions/StringMapping"}): ...
+
+    class StringArray(Root, schema={"$ref": "#/definitions/StringArray"}): ...
+
+    with pytest.raises(
+        jsonschema.ValidationError,
+        match=r"5 is not of type 'string'",
+    ):
+        schemapi.validate_jsonschema(
+            ["one", "two", 5], StringArray._schema, StringArray._rootschema
+        )
+
+    with pytest.raises(Unresolvable):
+        schemapi.validate_jsonschema(["one", "two", "three"], StringArray._schema)
+
+    schemapi.validate_jsonschema(
+        ["one", "two", "three"], StringArray._schema, StringArray._rootschema
+    )
+
+    class Expression(OperatorMixin, _SchemaBasePEP487, schema={"type": "string"}):
+        def to_dict(self, *args, **kwargs):
+            return repr(self)
+
+        def __setattr__(self, attr, val) -> None:
+            # We don't need the setattr magic defined in SchemaBase
+            return object.__setattr__(self, attr, val)
+
+        def __getitem__(self, val):
+            return GetItemExpression(self, val)
+
+    non_ref_mixin = Expression(
+        Expression("some").to_dict() + Expression("more").to_dict()
+    )
+    schemapi.validate_jsonschema(
+        non_ref_mixin.to_dict(), non_ref_mixin._schema, non_ref_mixin._rootschema
+    )
+    with pytest.raises(
+        jsonschema.ValidationError,
+        match=r"is not of type 'array'",
+    ):
+        schemapi.validate_jsonschema(
+            non_ref_mixin.to_dict(), StringArray._schema, StringArray._rootschema
+        )
+
+
 class _TestSchema(SchemaBase):
     @classmethod
     def _default_wrapper_classes(cls):
@@ -922,8 +1069,6 @@ def test_chart_validation_benchmark(
     from itertools import chain, repeat
 
     if TYPE_CHECKING:
-        from typing import Iterator
-
         from altair.typing import ChartType
 
     def _iter_charts() -> Iterator[ChartType]:

From 003c7fb14f9066ac84b96888bc50e3d3ad68fd99 Mon Sep 17 00:00:00 2001
From: dangotbanned <125183946+dangotbanned@users.noreply.github.com>
Date: Mon, 26 Aug 2024 21:07:10 +0100
Subject: [PATCH 58/92] fix: Fix backwards incompatible import

https://github.com/vega/altair/actions/runs/10566291662/job/29272723977?pr=3547
---
 tests/utils/test_schemapi.py | 9 ++++++++-
 1 file changed, 8 insertions(+), 1 deletion(-)

diff --git a/tests/utils/test_schemapi.py b/tests/utils/test_schemapi.py
index af4af8559..2c4c11ed1 100644
--- a/tests/utils/test_schemapi.py
+++ b/tests/utils/test_schemapi.py
@@ -10,6 +10,7 @@
 import warnings
 from collections import deque
 from functools import partial
+from importlib.metadata import version as importlib_version
 from typing import TYPE_CHECKING, Any, Callable, Iterable, Iterator, Literal, Sequence
 
 import jsonschema
@@ -18,6 +19,7 @@
 import pandas as pd
 import polars as pl
 import pytest
+from packaging.version import Version
 
 import altair as alt
 from altair import load_schema
@@ -74,7 +76,12 @@ def test_actual_json_schema_draft_is_same_as_hardcoded_default():
 
 
 def test_init_subclasses_hierarchy(dummy_rootschema) -> None:
-    from referencing.exceptions import Unresolvable
+    if Version(importlib_version("jsonschema")) >= Version("4.18"):
+        from referencing.exceptions import Unresolvable
+    else:
+        from jsonschema.exceptions import (  # type: ignore[assignment]
+            RefResolutionError as Unresolvable,
+        )
 
     from altair.expr.core import GetItemExpression, OperatorMixin
     from altair.utils.schemapi import _SchemaBasePEP487

From 5fad0889285ddb521960af799276e91073de5a40 Mon Sep 17 00:00:00 2001
From: dangotbanned <125183946+dangotbanned@users.noreply.github.com>
Date: Tue, 27 Aug 2024 21:44:02 +0100
Subject: [PATCH 59/92] ci: Fix include pattern

---
 pyproject.toml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pyproject.toml b/pyproject.toml
index 05aade59b..3e1046bb9 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -455,7 +455,7 @@ pythonVersion="3.8"
 reportUnusedExpression="none"
 include=[
     "./altair/**/*.py",
-    ".doc/*.py",
+    "./doc/*.py",
 	"./sphinxext/**/*.py",
 	"./tests/**/*.py",
 	"./tools/**/*.py",

From 69d9f678b737607ae5a0e3f1e9e8bd0a20a47b99 Mon Sep 17 00:00:00 2001
From: dangotbanned <125183946+dangotbanned@users.noreply.github.com>
Date: Wed, 28 Aug 2024 18:30:19 +0100
Subject: [PATCH 60/92] test: Add `SchemaBase.__init_subclass__` benchmark
 results

Locally, did a full replacement of class hierarchy - but saw no observable performance improvement
---
 tests/utils/test_schemapi.py | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/tests/utils/test_schemapi.py b/tests/utils/test_schemapi.py
index 2c4c11ed1..cba7e2b2c 100644
--- a/tests/utils/test_schemapi.py
+++ b/tests/utils/test_schemapi.py
@@ -1071,6 +1071,11 @@ def test_chart_validation_benchmark(
     107.84s call     tests/utils/test_schemapi.py::test_chart_validation_benchmark[to_dict-validate]
     50.43s call     tests/utils/test_schemapi.py::test_chart_validation_benchmark[from_dict]
     67.07s call     tests/utils/test_schemapi.py::test_chart_validation_benchmark[to_dict]
+
+    # Post-`SchemaBase.__init_subclass__` addition (3.12.3)
+    108.24s call     tests/utils/test_schemapi.py::test_chart_validation_benchmark[to_dict-validate]
+    50.33s call     tests/utils/test_schemapi.py::test_chart_validation_benchmark[from_dict]
+    66.51s call     tests/utils/test_schemapi.py::test_chart_validation_benchmark[to_dict]
     ```
     """
     from itertools import chain, repeat

From 307cb9bf96341f607189f005c9d8026aa6f16de3 Mon Sep 17 00:00:00 2001
From: dangotbanned <125183946+dangotbanned@users.noreply.github.com>
Date: Thu, 29 Aug 2024 17:42:01 +0100
Subject: [PATCH 61/92] refactor(typing): Widen internal `rootschema|schema`
 types from `dict[str, Any]`

No functional or user-facing change.
This will be to support using types other than `dict`, which isn't strictly required here
---
 tools/generate_schema_wrapper.py |  6 +++---
 tools/schemapi/schemapi.py       | 35 +++++++++++---------------------
 2 files changed, 15 insertions(+), 26 deletions(-)

diff --git a/tools/generate_schema_wrapper.py b/tools/generate_schema_wrapper.py
index f4b970902..62384f0a0 100644
--- a/tools/generate_schema_wrapper.py
+++ b/tools/generate_schema_wrapper.py
@@ -424,7 +424,7 @@ def _add_shorthand_property_to_field_encodings(schema: dict) -> dict:
                 "description": "shorthand for field, aggregate, and type",
             }
             if "required" not in defschema:
-                defschema["required"] = ["shorthand"]
+                defschema["required"] = ["shorthand"]  # type: ignore
             elif "shorthand" not in defschema["required"]:
                 defschema["required"].append("shorthand")
             schema["definitions"][field_ref.split("/")[-1]] = defschema
@@ -457,7 +457,7 @@ def recursive_dict_update(schema: dict, root: dict, def_dict: dict) -> None:
                 if k in properties:
                     def_dict[k] = definition
         else:
-            recursive_dict_update(next_schema, root, def_dict)
+            recursive_dict_update(next_schema, root, def_dict)  # type: ignore
     elif "anyOf" in schema:
         for sub_schema in schema["anyOf"]:
             recursive_dict_update(sub_schema, root, def_dict)
@@ -473,7 +473,7 @@ def get_field_datum_value_defs(propschema: SchemaInfo, root: dict) -> dict[str,
             msg = "Unexpected schema structure"
             raise ValueError(msg)
     else:
-        recursive_dict_update(schema, root, def_dict)
+        recursive_dict_update(schema, root, def_dict)  # type: ignore
 
     return {i: j for i, j in def_dict.items() if j}
 
diff --git a/tools/schemapi/schemapi.py b/tools/schemapi/schemapi.py
index 19326d81b..98590a938 100644
--- a/tools/schemapi/schemapi.py
+++ b/tools/schemapi/schemapi.py
@@ -141,9 +141,7 @@ def debug_mode(arg: bool) -> Iterator[None]:
 
 
 def validate_jsonschema(
-    spec: _JsonParameter,
-    schema: dict[str, Any],
-    rootschema: dict[str, Any] | None = None,
+    spec: _JsonParameter, schema: Map, rootschema: Map | None = None
 ) -> None:
     """
     Validates ``spec`` against ``schema`` in the context of ``rootschema``.
@@ -191,7 +189,7 @@ def validate_jsonschema(
             raise NotImplementedError(msg)
 
 
-def _get_schema_dialect_uri(schema: dict[str, Any]) -> str:
+def _get_schema_dialect_uri(schema: Map, /) -> str:
     """
     Return value of `$schema`_.
 
@@ -204,7 +202,7 @@ def _get_schema_dialect_uri(schema: dict[str, Any]) -> str:
     return schema.get("$schema", _DEFAULT_DIALECT_URI)
 
 
-def _prepare_references(schema: dict[str, Any], /) -> dict[str, Any]:
+def _prepare_references(schema: Map, /) -> dict[str, Any]:
     """
     Return a deep copy of ``schema`` w/ replaced uri(s).
 
@@ -218,7 +216,7 @@ def _prepare_references(schema: dict[str, Any], /) -> dict[str, Any]:
     return dict(_rec_refs(schema))
 
 
-def _rec_refs(m: dict[str, Any], /) -> Iterator[tuple[str, Any]]:
+def _rec_refs(m: Map, /) -> Iterator[tuple[str, Any]]:
     """
     Recurse through a schema, yielding fresh copies of mutable containers.
 
@@ -285,9 +283,7 @@ def specification_with(dialect_id: str, /) -> Specification[Any]:
         """
         return _specification_with(dialect_id)
 
-    def _validator(
-        schema: dict[str, Any], rootschema: dict[str, Any] | None = None
-    ) -> Validator:
+    def _validator(schema: Map, rootschema: Map | None = None, /) -> Validator:
         """
         Constructs a `Validator`_ for future validation.
 
@@ -309,7 +305,7 @@ def _validator(
         registry = _registry(rootschema or schema, uri)
         return validator(_prepare_references(schema), registry=registry)
 
-    def _registry(rootschema: dict[str, Any], dialect_id: str) -> Registry[Any]:
+    def _registry(rootschema: Map, dialect_id: str) -> Registry[Any]:
         """
         Constructs a `Registry`_, adding the `Resource`_ produced by ``rootschema``.
 
@@ -333,16 +329,12 @@ def _registry(rootschema: dict[str, Any], dialect_id: str) -> Registry[Any]:
             _REGISTRY_CACHE[cache_key] = registry
             return registry
 
-    def _registry_update(
-        root: dict[str, Any], dialect_id: str, resolver: Resolver[Any]
-    ):
+    def _registry_update(root: Map, dialect_id: str, resolver: Resolver[Any]) -> None:
         global _REGISTRY_CACHE
         cache_key = _registry_comp_key(root, dialect_id)
         _REGISTRY_CACHE[cache_key] = resolver._registry
 
-    def _resolve_references(
-        schema: dict[str, Any], rootschema: dict[str, Any]
-    ) -> dict[str, Any]:
+    def _resolve_references(schema: Map, rootschema: Map) -> Map:
         """
         Resolve schema references until there is no ``"$ref"`` anymore in the top-level ``dict``.
 
@@ -391,9 +383,7 @@ def _registry_comp_key(root: Map, dialect_id: str, /) -> tuple[str, str]:
 
 else:
 
-    def _validator(
-        schema: dict[str, Any], rootschema: dict[str, Any] | None = None
-    ) -> Validator:
+    def _validator(schema: Map, rootschema: Map | None = None, /) -> Validator:
         """
         Constructs a `Validator`_ for future validation.
 
@@ -416,9 +406,7 @@ def _validator(
         )
         return validator(schema, resolver=resolver)
 
-    def _resolve_references(
-        schema: dict[str, Any], rootschema: dict[str, Any]
-    ) -> dict[str, Any]:
+    def _resolve_references(schema: Map, rootschema: Map) -> Map:
         """
         Resolve schema references until there is no ``"$ref"`` anymore in the top-level ``dict``.
 
@@ -1444,7 +1432,8 @@ def resolve_references(cls, schema: dict[str, Any] | None = None) -> dict[str, A
             )
             raise TypeError(msg)
         else:
-            return _resolve_references(schema or cls._schema, rootschema=rootschema)
+            resolved = _resolve_references(schema or cls._schema, rootschema)
+            return cast("dict[str, Any]", resolved)
 
     @classmethod
     def validate_property(

From 8fa8975163cd45aa1c713b7fe027d3619a75fd99 Mon Sep 17 00:00:00 2001
From: dangotbanned <125183946+dangotbanned@users.noreply.github.com>
Date: Thu, 29 Aug 2024 17:48:21 +0100
Subject: [PATCH 62/92] perf: Adds `resolve_references_rpds`

See docstring for notes and plan
---
 tools/schemapi/schemapi.py | 17 +++++++++++++++++
 1 file changed, 17 insertions(+)

diff --git a/tools/schemapi/schemapi.py b/tools/schemapi/schemapi.py
index 98590a938..5eb962c0b 100644
--- a/tools/schemapi/schemapi.py
+++ b/tools/schemapi/schemapi.py
@@ -262,6 +262,7 @@ def _validator_for(uri: str, /) -> Callable[..., Validator]:
     if TYPE_CHECKING:
         from referencing import Specification
         from referencing._core import Resolver
+        from rpds import HashTrieMap
 
     @lru_cache(maxsize=None)
     def specification_with(dialect_id: str, /) -> Specification[Any]:
@@ -379,6 +380,22 @@ def _registry_comp_key(root: Map, dialect_id: str, /) -> tuple[str, str]:
             k1 = json.dumps(root, separators=(",", ":"), sort_keys=True)
         return k1, dialect_id
 
+    def resolve_references_rpds(schema: Map, rootschema: Map) -> HashTrieMap[str, Any]:
+        """
+        **Experimental** `rust`-speed returned type.
+
+        Directly wraps `_resolve_references`.
+
+        Idea
+        ----
+        - Store the result of this when called from ``_FromDict.from_dict()`` once per unique call
+        - Reuse the resolved schema, since we don't mutate it after resolving
+        - Should reduce the cost of ``_FromDict.from_dict()``, when a schema has been seen before
+        """
+        import rpds as rpds
+
+        return rpds.HashTrieMap(_resolve_references(schema, rootschema))
+
     _REGISTRY_CACHE: dict[tuple[str, str], Registry[Any]] = {}
 
 else:

From 5e93933789ab51b8e1f625d768046b0bd15c8034 Mon Sep 17 00:00:00 2001
From: dangotbanned <125183946+dangotbanned@users.noreply.github.com>
Date: Thu, 29 Aug 2024 17:48:59 +0100
Subject: [PATCH 63/92] build: run `generate-schema-wrapper`

---
 altair/utils/schemapi.py | 52 ++++++++++++++++++++++------------------
 1 file changed, 29 insertions(+), 23 deletions(-)

diff --git a/altair/utils/schemapi.py b/altair/utils/schemapi.py
index 12314089e..94a67527f 100644
--- a/altair/utils/schemapi.py
+++ b/altair/utils/schemapi.py
@@ -143,9 +143,7 @@ def debug_mode(arg: bool) -> Iterator[None]:
 
 
 def validate_jsonschema(
-    spec: _JsonParameter,
-    schema: dict[str, Any],
-    rootschema: dict[str, Any] | None = None,
+    spec: _JsonParameter, schema: Map, rootschema: Map | None = None
 ) -> None:
     """
     Validates ``spec`` against ``schema`` in the context of ``rootschema``.
@@ -193,7 +191,7 @@ def validate_jsonschema(
             raise NotImplementedError(msg)
 
 
-def _get_schema_dialect_uri(schema: dict[str, Any]) -> str:
+def _get_schema_dialect_uri(schema: Map, /) -> str:
     """
     Return value of `$schema`_.
 
@@ -206,7 +204,7 @@ def _get_schema_dialect_uri(schema: dict[str, Any]) -> str:
     return schema.get("$schema", _DEFAULT_DIALECT_URI)
 
 
-def _prepare_references(schema: dict[str, Any], /) -> dict[str, Any]:
+def _prepare_references(schema: Map, /) -> dict[str, Any]:
     """
     Return a deep copy of ``schema`` w/ replaced uri(s).
 
@@ -220,7 +218,7 @@ def _prepare_references(schema: dict[str, Any], /) -> dict[str, Any]:
     return dict(_rec_refs(schema))
 
 
-def _rec_refs(m: dict[str, Any], /) -> Iterator[tuple[str, Any]]:
+def _rec_refs(m: Map, /) -> Iterator[tuple[str, Any]]:
     """
     Recurse through a schema, yielding fresh copies of mutable containers.
 
@@ -266,6 +264,7 @@ def _validator_for(uri: str, /) -> Callable[..., Validator]:
     if TYPE_CHECKING:
         from referencing import Specification
         from referencing._core import Resolver
+        from rpds import HashTrieMap
 
     @lru_cache(maxsize=None)
     def specification_with(dialect_id: str, /) -> Specification[Any]:
@@ -287,9 +286,7 @@ def specification_with(dialect_id: str, /) -> Specification[Any]:
         """
         return _specification_with(dialect_id)
 
-    def _validator(
-        schema: dict[str, Any], rootschema: dict[str, Any] | None = None
-    ) -> Validator:
+    def _validator(schema: Map, rootschema: Map | None = None, /) -> Validator:
         """
         Constructs a `Validator`_ for future validation.
 
@@ -311,7 +308,7 @@ def _validator(
         registry = _registry(rootschema or schema, uri)
         return validator(_prepare_references(schema), registry=registry)
 
-    def _registry(rootschema: dict[str, Any], dialect_id: str) -> Registry[Any]:
+    def _registry(rootschema: Map, dialect_id: str) -> Registry[Any]:
         """
         Constructs a `Registry`_, adding the `Resource`_ produced by ``rootschema``.
 
@@ -335,16 +332,12 @@ def _registry(rootschema: dict[str, Any], dialect_id: str) -> Registry[Any]:
             _REGISTRY_CACHE[cache_key] = registry
             return registry
 
-    def _registry_update(
-        root: dict[str, Any], dialect_id: str, resolver: Resolver[Any]
-    ):
+    def _registry_update(root: Map, dialect_id: str, resolver: Resolver[Any]) -> None:
         global _REGISTRY_CACHE
         cache_key = _registry_comp_key(root, dialect_id)
         _REGISTRY_CACHE[cache_key] = resolver._registry
 
-    def _resolve_references(
-        schema: dict[str, Any], rootschema: dict[str, Any]
-    ) -> dict[str, Any]:
+    def _resolve_references(schema: Map, rootschema: Map) -> Map:
         """
         Resolve schema references until there is no ``"$ref"`` anymore in the top-level ``dict``.
 
@@ -389,13 +382,27 @@ def _registry_comp_key(root: Map, dialect_id: str, /) -> tuple[str, str]:
             k1 = json.dumps(root, separators=(",", ":"), sort_keys=True)
         return k1, dialect_id
 
+    def resolve_references_rpds(schema: Map, rootschema: Map) -> HashTrieMap[str, Any]:
+        """
+        **Experimental** `rust`-speed returned type.
+
+        Directly wraps `_resolve_references`.
+
+        Idea
+        ----
+        - Store the result of this when called from ``_FromDict.from_dict()`` once per unique call
+        - Reuse the resolved schema, since we don't mutate it after resolving
+        - Should reduce the cost of ``_FromDict.from_dict()``, when a schema has been seen before
+        """
+        import rpds as rpds
+
+        return rpds.HashTrieMap(_resolve_references(schema, rootschema))
+
     _REGISTRY_CACHE: dict[tuple[str, str], Registry[Any]] = {}
 
 else:
 
-    def _validator(
-        schema: dict[str, Any], rootschema: dict[str, Any] | None = None
-    ) -> Validator:
+    def _validator(schema: Map, rootschema: Map | None = None, /) -> Validator:
         """
         Constructs a `Validator`_ for future validation.
 
@@ -418,9 +425,7 @@ def _validator(
         )
         return validator(schema, resolver=resolver)
 
-    def _resolve_references(
-        schema: dict[str, Any], rootschema: dict[str, Any]
-    ) -> dict[str, Any]:
+    def _resolve_references(schema: Map, rootschema: Map) -> Map:
         """
         Resolve schema references until there is no ``"$ref"`` anymore in the top-level ``dict``.
 
@@ -1446,7 +1451,8 @@ def resolve_references(cls, schema: dict[str, Any] | None = None) -> dict[str, A
             )
             raise TypeError(msg)
         else:
-            return _resolve_references(schema or cls._schema, rootschema=rootschema)
+            resolved = _resolve_references(schema or cls._schema, rootschema)
+            return cast("dict[str, Any]", resolved)
 
     @classmethod
     def validate_property(

From c322e79a4df7bb3607105fc1b545b8037e6cc4b8 Mon Sep 17 00:00:00 2001
From: dangotbanned <125183946+dangotbanned@users.noreply.github.com>
Date: Thu, 29 Aug 2024 18:33:16 +0100
Subject: [PATCH 64/92] refactor: Rename `_rec_refs` -> `_recurse_refs`

---
 tools/schemapi/schemapi.py | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/tools/schemapi/schemapi.py b/tools/schemapi/schemapi.py
index 5eb962c0b..d259b63a7 100644
--- a/tools/schemapi/schemapi.py
+++ b/tools/schemapi/schemapi.py
@@ -213,10 +213,10 @@ def _prepare_references(schema: Map, /) -> dict[str, Any]:
     -----
     ``copy.deepcopy`` is not needed as the iterator yields new objects.
     """
-    return dict(_rec_refs(schema))
+    return dict(_recurse_refs(schema))
 
 
-def _rec_refs(m: Map, /) -> Iterator[tuple[str, Any]]:
+def _recurse_refs(m: Map, /) -> Iterator[tuple[str, Any]]:
     """
     Recurse through a schema, yielding fresh copies of mutable containers.
 
@@ -226,9 +226,9 @@ def _rec_refs(m: Map, /) -> Iterator[tuple[str, Any]]:
         if k == "$ref":
             yield k, f"{_VEGA_LITE_ROOT_URI}{v}"
         elif isinstance(v, dict):
-            yield k, dict(_rec_refs(v))
+            yield k, dict(_recurse_refs(v))
         elif isinstance(v, list):
-            yield k, [dict(_rec_refs(el)) if _is_dict(el) else el for el in v]
+            yield k, [dict(_recurse_refs(el)) if _is_dict(el) else el for el in v]
         else:
             yield k, v
 

From 96eed9bfe17d9ef9bdf0aa4d0e35f6907e9c0212 Mon Sep 17 00:00:00 2001
From: dangotbanned <125183946+dangotbanned@users.noreply.github.com>
Date: Thu, 29 Aug 2024 21:01:16 +0100
Subject: [PATCH 65/92] perf: Adds `_FromDict.hash_resolved`

---
 tools/schemapi/schemapi.py | 19 ++++++++++++++++---
 1 file changed, 16 insertions(+), 3 deletions(-)

diff --git a/tools/schemapi/schemapi.py b/tools/schemapi/schemapi.py
index d259b63a7..8e9e0a7f1 100644
--- a/tools/schemapi/schemapi.py
+++ b/tools/schemapi/schemapi.py
@@ -1597,6 +1597,13 @@ class _FromDict:
        https://en.wikipedia.org/wiki/Breadth-first_search
     """
 
+    hash_resolved: ClassVar[dict[int, Map]] = {}
+    """
+    Maps unique schemas to their reference-resolved equivalent.
+
+    Ensures that ``_resolve_references`` is evaluated **at most once**, per hash.
+    """
+
     def __init__(self, wrapper_classes: Iterator[type[SchemaBase]], /) -> None:
         cls = type(self)
         for tp in wrapper_classes:
@@ -1665,24 +1672,30 @@ def from_dict(
         """Construct an object from a dict representation."""
         target_tp: Any
         current_schema: dict[str, Any]
+        hash_schema: int
         if isinstance(dct, SchemaBase):
             return dct
         elif tp is not None:
             current_schema = tp._schema
+            hash_schema = _hash_schema(current_schema)
             root_schema: dict[str, Any] = rootschema or tp._rootschema or current_schema
             target_tp = tp
         elif schema is not None:
             current_schema = schema
+            hash_schema = _hash_schema(current_schema)
             root_schema = rootschema or current_schema
-            matches = cls.hash_tps[_hash_schema(current_schema)]
+            matches = cls.hash_tps[hash_schema]
             target_tp = next(iter(matches), default_class)
         else:
             msg = "Must provide either `tp` or `schema`, but not both."
             raise ValueError(msg)
 
         from_dict = partial(cls.from_dict, rootschema=root_schema)
-        # Can also return a list?
-        resolved = _resolve_references(current_schema, root_schema)
+        if resolved := cls.hash_resolved.get(hash_schema):
+            ...
+        else:
+            resolved = _resolve_references(current_schema, root_schema)
+            cls.hash_resolved[hash_schema] = resolved
         if "anyOf" in resolved or "oneOf" in resolved:
             schemas = resolved.get("anyOf", []) + resolved.get("oneOf", [])
             for possible in schemas:

From 3258f5681020374a9f8e45c29c3962f24b4a9191 Mon Sep 17 00:00:00 2001
From: dangotbanned <125183946+dangotbanned@users.noreply.github.com>
Date: Thu, 29 Aug 2024 21:05:02 +0100
Subject: [PATCH 66/92] perf: Remove unreachable `"oneOf"` keyword check

Since `v2` this keyword has not been part of any schema.
It has only been a property of what became `FieldOneOfPredicate` https://github.com/vega/schema/blob/ef61166f3f95154465c4b3ebdca88e3c2d25b005/vega-lite/v2.0.0.json#L4060
---
 tools/schemapi/schemapi.py | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/tools/schemapi/schemapi.py b/tools/schemapi/schemapi.py
index 8e9e0a7f1..9be14e299 100644
--- a/tools/schemapi/schemapi.py
+++ b/tools/schemapi/schemapi.py
@@ -1696,9 +1696,8 @@ def from_dict(
         else:
             resolved = _resolve_references(current_schema, root_schema)
             cls.hash_resolved[hash_schema] = resolved
-        if "anyOf" in resolved or "oneOf" in resolved:
-            schemas = resolved.get("anyOf", []) + resolved.get("oneOf", [])
-            for possible in schemas:
+        if "anyOf" in resolved:
+            for possible in resolved["anyOf"]:
                 # NOTE: Instead of raise/except/continue
                 # Pre-"zero-cost" exceptions, this has a huge performance gain.
                 # https://docs.python.org/3/whatsnew/3.11.html#misc

From d6ce7497d00a21185d079d069d9e530e1795d959 Mon Sep 17 00:00:00 2001
From: dangotbanned <125183946+dangotbanned@users.noreply.github.com>
Date: Thu, 29 Aug 2024 21:06:47 +0100
Subject: [PATCH 67/92] build: run `generate-schema-wrapper`

---
 altair/utils/schemapi.py | 32 ++++++++++++++++++++++----------
 1 file changed, 22 insertions(+), 10 deletions(-)

diff --git a/altair/utils/schemapi.py b/altair/utils/schemapi.py
index 94a67527f..1ad304b52 100644
--- a/altair/utils/schemapi.py
+++ b/altair/utils/schemapi.py
@@ -215,10 +215,10 @@ def _prepare_references(schema: Map, /) -> dict[str, Any]:
     -----
     ``copy.deepcopy`` is not needed as the iterator yields new objects.
     """
-    return dict(_rec_refs(schema))
+    return dict(_recurse_refs(schema))
 
 
-def _rec_refs(m: Map, /) -> Iterator[tuple[str, Any]]:
+def _recurse_refs(m: Map, /) -> Iterator[tuple[str, Any]]:
     """
     Recurse through a schema, yielding fresh copies of mutable containers.
 
@@ -228,9 +228,9 @@ def _rec_refs(m: Map, /) -> Iterator[tuple[str, Any]]:
         if k == "$ref":
             yield k, f"{_VEGA_LITE_ROOT_URI}{v}"
         elif isinstance(v, dict):
-            yield k, dict(_rec_refs(v))
+            yield k, dict(_recurse_refs(v))
         elif isinstance(v, list):
-            yield k, [dict(_rec_refs(el)) if _is_dict(el) else el for el in v]
+            yield k, [dict(_recurse_refs(el)) if _is_dict(el) else el for el in v]
         else:
             yield k, v
 
@@ -1599,6 +1599,13 @@ class _FromDict:
        https://en.wikipedia.org/wiki/Breadth-first_search
     """
 
+    hash_resolved: ClassVar[dict[int, Map]] = {}
+    """
+    Maps unique schemas to their reference-resolved equivalent.
+
+    Ensures that ``_resolve_references`` is evaluated **at most once**, per hash.
+    """
+
     def __init__(self, wrapper_classes: Iterator[type[SchemaBase]], /) -> None:
         cls = type(self)
         for tp in wrapper_classes:
@@ -1667,27 +1674,32 @@ def from_dict(
         """Construct an object from a dict representation."""
         target_tp: Any
         current_schema: dict[str, Any]
+        hash_schema: int
         if isinstance(dct, SchemaBase):
             return dct
         elif tp is not None:
             current_schema = tp._schema
+            hash_schema = _hash_schema(current_schema)
             root_schema: dict[str, Any] = rootschema or tp._rootschema or current_schema
             target_tp = tp
         elif schema is not None:
             current_schema = schema
+            hash_schema = _hash_schema(current_schema)
             root_schema = rootschema or current_schema
-            matches = cls.hash_tps[_hash_schema(current_schema)]
+            matches = cls.hash_tps[hash_schema]
             target_tp = next(iter(matches), default_class)
         else:
             msg = "Must provide either `tp` or `schema`, but not both."
             raise ValueError(msg)
 
         from_dict = partial(cls.from_dict, rootschema=root_schema)
-        # Can also return a list?
-        resolved = _resolve_references(current_schema, root_schema)
-        if "anyOf" in resolved or "oneOf" in resolved:
-            schemas = resolved.get("anyOf", []) + resolved.get("oneOf", [])
-            for possible in schemas:
+        if resolved := cls.hash_resolved.get(hash_schema):
+            ...
+        else:
+            resolved = _resolve_references(current_schema, root_schema)
+            cls.hash_resolved[hash_schema] = resolved
+        if "anyOf" in resolved:
+            for possible in resolved["anyOf"]:
                 # NOTE: Instead of raise/except/continue
                 # Pre-"zero-cost" exceptions, this has a huge performance gain.
                 # https://docs.python.org/3/whatsnew/3.11.html#misc

From 89fcaf7390d326f691d49ab06d209be75f5bd085 Mon Sep 17 00:00:00 2001
From: dangotbanned <125183946+dangotbanned@users.noreply.github.com>
Date: Fri, 30 Aug 2024 14:03:55 +0100
Subject: [PATCH 68/92] perf: Optimize `dict` branch in `_FromDict.from_dict`

- Bypass `dct.items()` when there are no properties
-Reuse result of `props` lookup, rather than twice per hit
---
 tools/schemapi/schemapi.py | 16 +++++++++-------
 1 file changed, 9 insertions(+), 7 deletions(-)

diff --git a/tools/schemapi/schemapi.py b/tools/schemapi/schemapi.py
index 9be14e299..a23cdc082 100644
--- a/tools/schemapi/schemapi.py
+++ b/tools/schemapi/schemapi.py
@@ -1661,7 +1661,7 @@ def from_dict(
         default_class: Any = ...,
     ) -> Never: ...
     @classmethod
-    def from_dict(
+    def from_dict(  # noqa: C901
         cls,
         dct: dict[str, Any] | list[dict[str, Any]] | TSchemaBase,
         tp: type[TSchemaBase] | None = None,
@@ -1708,12 +1708,14 @@ def from_dict(
 
         if _is_dict(dct):
             # TODO: handle schemas for additionalProperties/patternProperties
-            props: dict[str, Any] = resolved.get("properties", {})
-            kwds = {
-                k: (from_dict(v, schema=props[k]) if k in props else v)
-                for k, v in dct.items()
-            }
-            return target_tp(**kwds)
+            if props := resolved.get("properties"):
+                kwds = {
+                    k: (from_dict(v, schema=sch) if (sch := props.get(k)) else v)
+                    for k, v in dct.items()
+                }
+                return target_tp(**kwds)
+            else:
+                return target_tp(**dct)
         elif _is_list(dct):
             item_schema: dict[str, Any] = resolved.get("items", {})
             return target_tp([from_dict(k, schema=item_schema) for k in dct])

From c9f9d8aae2b52b75555b70e84895c7edc291a42b Mon Sep 17 00:00:00 2001
From: dangotbanned <125183946+dangotbanned@users.noreply.github.com>
Date: Fri, 30 Aug 2024 14:55:24 +0100
Subject: [PATCH 69/92] docs(perf): Adds benchmark result

Very minor improvement
---
 tests/utils/test_schemapi.py | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/tests/utils/test_schemapi.py b/tests/utils/test_schemapi.py
index cba7e2b2c..1fce07009 100644
--- a/tests/utils/test_schemapi.py
+++ b/tests/utils/test_schemapi.py
@@ -1076,6 +1076,11 @@ def test_chart_validation_benchmark(
     108.24s call     tests/utils/test_schemapi.py::test_chart_validation_benchmark[to_dict-validate]
     50.33s call     tests/utils/test_schemapi.py::test_chart_validation_benchmark[from_dict]
     66.51s call     tests/utils/test_schemapi.py::test_chart_validation_benchmark[to_dict]
+
+    # Post-`dict` branch micro optimization in `_FromDict.from_dict` (3.12.3)
+    107.90s call     tests/utils/test_schemapi.py::test_chart_validation_benchmark[to_dict-validate]
+    49.63s call     tests/utils/test_schemapi.py::test_chart_validation_benchmark[from_dict]
+    66.87s call     tests/utils/test_schemapi.py::test_chart_validation_benchmark[to_dict]
     ```
     """
     from itertools import chain, repeat

From e9c8f857e2eb581540838afd877128b012510baa Mon Sep 17 00:00:00 2001
From: dangotbanned <125183946+dangotbanned@users.noreply.github.com>
Date: Fri, 30 Aug 2024 14:56:12 +0100
Subject: [PATCH 70/92] chore(perf): Add note on next refactor candidate

---
 tools/schemapi/schemapi.py | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/tools/schemapi/schemapi.py b/tools/schemapi/schemapi.py
index a23cdc082..25ca1ec49 100644
--- a/tools/schemapi/schemapi.py
+++ b/tools/schemapi/schemapi.py
@@ -213,6 +213,8 @@ def _prepare_references(schema: Map, /) -> dict[str, Any]:
     -----
     ``copy.deepcopy`` is not needed as the iterator yields new objects.
     """
+    # FIXME: The hottest function + it is recursive
+    # Should be done once per schema
     return dict(_recurse_refs(schema))
 
 

From 759a55641071f3aa0548e3d8775e4e3323672581 Mon Sep 17 00:00:00 2001
From: dangotbanned <125183946+dangotbanned@users.noreply.github.com>
Date: Fri, 30 Aug 2024 14:58:09 +0100
Subject: [PATCH 71/92] test: Add note on possibly outdated tests

If the current schema doesn't trigger branches, but the test schema(s) do - it may indicate the tests need to be updated to better reflect actual
---
 tools/schemapi/schemapi.py | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/tools/schemapi/schemapi.py b/tools/schemapi/schemapi.py
index 25ca1ec49..0fdbf4c7f 100644
--- a/tools/schemapi/schemapi.py
+++ b/tools/schemapi/schemapi.py
@@ -1680,6 +1680,8 @@ def from_dict(  # noqa: C901
         elif tp is not None:
             current_schema = tp._schema
             hash_schema = _hash_schema(current_schema)
+            # NOTE: the `current_schema` branch only triggered for mock schema tests:
+            # test_schemapi.py::[test_construct_multifaceted_schema, test_copy_method, test_round_trip, test_copy_module, test_from_dict, test_to_from_json, test_to_from_pickle]
             root_schema: dict[str, Any] = rootschema or tp._rootschema or current_schema
             target_tp = tp
         elif schema is not None:

From dace4883d50e8a1d782acd85985271f654d8842d Mon Sep 17 00:00:00 2001
From: dangotbanned <125183946+dangotbanned@users.noreply.github.com>
Date: Fri, 30 Aug 2024 14:58:30 +0100
Subject: [PATCH 72/92] build: run `generate-schema-wrapper`

---
 altair/utils/schemapi.py | 20 +++++++++++++-------
 1 file changed, 13 insertions(+), 7 deletions(-)

diff --git a/altair/utils/schemapi.py b/altair/utils/schemapi.py
index 1ad304b52..f334a5397 100644
--- a/altair/utils/schemapi.py
+++ b/altair/utils/schemapi.py
@@ -215,6 +215,8 @@ def _prepare_references(schema: Map, /) -> dict[str, Any]:
     -----
     ``copy.deepcopy`` is not needed as the iterator yields new objects.
     """
+    # FIXME: The hottest function + it is recursive
+    # Should be done once per schema
     return dict(_recurse_refs(schema))
 
 
@@ -1663,7 +1665,7 @@ def from_dict(
         default_class: Any = ...,
     ) -> Never: ...
     @classmethod
-    def from_dict(
+    def from_dict(  # noqa: C901
         cls,
         dct: dict[str, Any] | list[dict[str, Any]] | TSchemaBase,
         tp: type[TSchemaBase] | None = None,
@@ -1680,6 +1682,8 @@ def from_dict(
         elif tp is not None:
             current_schema = tp._schema
             hash_schema = _hash_schema(current_schema)
+            # NOTE: the `current_schema` branch only triggered for mock schema tests:
+            # test_schemapi.py::[test_construct_multifaceted_schema, test_copy_method, test_round_trip, test_copy_module, test_from_dict, test_to_from_json, test_to_from_pickle]
             root_schema: dict[str, Any] = rootschema or tp._rootschema or current_schema
             target_tp = tp
         elif schema is not None:
@@ -1710,12 +1714,14 @@ def from_dict(
 
         if _is_dict(dct):
             # TODO: handle schemas for additionalProperties/patternProperties
-            props: dict[str, Any] = resolved.get("properties", {})
-            kwds = {
-                k: (from_dict(v, schema=props[k]) if k in props else v)
-                for k, v in dct.items()
-            }
-            return target_tp(**kwds)
+            if props := resolved.get("properties"):
+                kwds = {
+                    k: (from_dict(v, schema=sch) if (sch := props.get(k)) else v)
+                    for k, v in dct.items()
+                }
+                return target_tp(**kwds)
+            else:
+                return target_tp(**dct)
         elif _is_list(dct):
             item_schema: dict[str, Any] = resolved.get("items", {})
             return target_tp([from_dict(k, schema=item_schema) for k in dct])

From 531aa7c51de5541e4d3f97d3622a494471f8db32 Mon Sep 17 00:00:00 2001
From: dangotbanned <125183946+dangotbanned@users.noreply.github.com>
Date: Fri, 30 Aug 2024 16:20:26 +0100
Subject: [PATCH 73/92] chore(perf): Add FIXME on recursive source

---
 tools/schemapi/schemapi.py | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/tools/schemapi/schemapi.py b/tools/schemapi/schemapi.py
index 0fdbf4c7f..fc0f70357 100644
--- a/tools/schemapi/schemapi.py
+++ b/tools/schemapi/schemapi.py
@@ -1685,6 +1685,9 @@ def from_dict(  # noqa: C901
             root_schema: dict[str, Any] = rootschema or tp._rootschema or current_schema
             target_tp = tp
         elif schema is not None:
+            # FIXME: This is the slow branch
+            # - Improving the perf of the `tp` one is too small scale
+            # - Every recursive `from_dict` call that isn't solved hits this
             current_schema = schema
             hash_schema = _hash_schema(current_schema)
             root_schema = rootschema or current_schema

From 3277b432c72f0dfc77e8b234c37bbe7ebd4f22e8 Mon Sep 17 00:00:00 2001
From: dangotbanned <125183946+dangotbanned@users.noreply.github.com>
Date: Fri, 30 Aug 2024 19:04:39 +0100
Subject: [PATCH 74/92] refactor(perf): Crawl registry, init `Resolver` with
 `_VEGA_LITE_ROOT_URI`

---
 tools/schemapi/schemapi.py | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/tools/schemapi/schemapi.py b/tools/schemapi/schemapi.py
index fc0f70357..c803063f5 100644
--- a/tools/schemapi/schemapi.py
+++ b/tools/schemapi/schemapi.py
@@ -328,7 +328,7 @@ def _registry(rootschema: Map, dialect_id: str) -> Registry[Any]:
         else:
             specification = specification_with(dialect_id)
             resource = specification.create_resource(rootschema)
-            registry = Registry().with_resource(_VEGA_LITE_ROOT_URI, resource)
+            registry = Registry().with_resource(_VEGA_LITE_ROOT_URI, resource).crawl()
             _REGISTRY_CACHE[cache_key] = registry
             return registry
 
@@ -354,9 +354,9 @@ def _resolve_references(schema: Map, rootschema: Map) -> Map:
             return schema
         uri = _get_schema_dialect_uri(rootschema)
         registry = _registry(root, uri)
-        resolver = registry.resolver()
+        resolver = registry.resolver(_VEGA_LITE_ROOT_URI)
         while "$ref" in schema:
-            resolved = resolver.lookup(_VEGA_LITE_ROOT_URI + schema["$ref"])
+            resolved = resolver.lookup(schema["$ref"])
             schema = resolved.contents
         _registry_update(root, uri, resolved.resolver)
         return schema
@@ -394,7 +394,7 @@ def resolve_references_rpds(schema: Map, rootschema: Map) -> HashTrieMap[str, An
         - Reuse the resolved schema, since we don't mutate it after resolving
         - Should reduce the cost of ``_FromDict.from_dict()``, when a schema has been seen before
         """
-        import rpds as rpds
+        import rpds
 
         return rpds.HashTrieMap(_resolve_references(schema, rootschema))
 

From 96dde1832a507d415bd2a40f250e4f45bba78c6d Mon Sep 17 00:00:00 2001
From: dangotbanned <125183946+dangotbanned@users.noreply.github.com>
Date: Fri, 30 Aug 2024 19:06:23 +0100
Subject: [PATCH 75/92] build: run `generate-schema-wrapper`

---
 altair/utils/schemapi.py | 11 +++++++----
 1 file changed, 7 insertions(+), 4 deletions(-)

diff --git a/altair/utils/schemapi.py b/altair/utils/schemapi.py
index f334a5397..492eb78cb 100644
--- a/altair/utils/schemapi.py
+++ b/altair/utils/schemapi.py
@@ -330,7 +330,7 @@ def _registry(rootschema: Map, dialect_id: str) -> Registry[Any]:
         else:
             specification = specification_with(dialect_id)
             resource = specification.create_resource(rootschema)
-            registry = Registry().with_resource(_VEGA_LITE_ROOT_URI, resource)
+            registry = Registry().with_resource(_VEGA_LITE_ROOT_URI, resource).crawl()
             _REGISTRY_CACHE[cache_key] = registry
             return registry
 
@@ -356,9 +356,9 @@ def _resolve_references(schema: Map, rootschema: Map) -> Map:
             return schema
         uri = _get_schema_dialect_uri(rootschema)
         registry = _registry(root, uri)
-        resolver = registry.resolver()
+        resolver = registry.resolver(_VEGA_LITE_ROOT_URI)
         while "$ref" in schema:
-            resolved = resolver.lookup(_VEGA_LITE_ROOT_URI + schema["$ref"])
+            resolved = resolver.lookup(schema["$ref"])
             schema = resolved.contents
         _registry_update(root, uri, resolved.resolver)
         return schema
@@ -396,7 +396,7 @@ def resolve_references_rpds(schema: Map, rootschema: Map) -> HashTrieMap[str, An
         - Reuse the resolved schema, since we don't mutate it after resolving
         - Should reduce the cost of ``_FromDict.from_dict()``, when a schema has been seen before
         """
-        import rpds as rpds
+        import rpds
 
         return rpds.HashTrieMap(_resolve_references(schema, rootschema))
 
@@ -1687,6 +1687,9 @@ def from_dict(  # noqa: C901
             root_schema: dict[str, Any] = rootschema or tp._rootschema or current_schema
             target_tp = tp
         elif schema is not None:
+            # FIXME: This is the slow branch
+            # - Improving the perf of the `tp` one is too small scale
+            # - Every recursive `from_dict` call that isn't solved hits this
             current_schema = schema
             hash_schema = _hash_schema(current_schema)
             root_schema = rootschema or current_schema

From ab20005a58b7dfcc92d202c7220a8e1ff5abc6da Mon Sep 17 00:00:00 2001
From: dangotbanned <125183946+dangotbanned@users.noreply.github.com>
Date: Fri, 30 Aug 2024 20:02:44 +0100
Subject: [PATCH 76/92] refactor: Reuse `JSONEncoder` for hashing

---
 altair/utils/schemapi.py   | 6 ++++--
 tools/schemapi/schemapi.py | 6 ++++--
 2 files changed, 8 insertions(+), 4 deletions(-)

diff --git a/altair/utils/schemapi.py b/altair/utils/schemapi.py
index 492eb78cb..22599a4f0 100644
--- a/altair/utils/schemapi.py
+++ b/altair/utils/schemapi.py
@@ -259,6 +259,8 @@ def _validator_for(uri: str, /) -> Callable[..., Validator]:
         return tp
 
 
+_HASH_ENCODER = json.JSONEncoder(sort_keys=True, separators=(",", ":"))
+
 if Version(importlib_version("jsonschema")) >= Version("4.18"):
     from referencing import Registry
     from referencing.jsonschema import specification_with as _specification_with
@@ -381,7 +383,7 @@ def _registry_comp_key(root: Map, dialect_id: str, /) -> tuple[str, str]:
         elif len(root) == 1:
             k1 = "".join(f"{s!s}" for s in chain(*root.items()))
         else:
-            k1 = json.dumps(root, separators=(",", ":"), sort_keys=True)
+            k1 = _HASH_ENCODER.encode(root)
         return k1, dialect_id
 
     def resolve_references_rpds(schema: Map, rootschema: Map) -> HashTrieMap[str, Any]:
@@ -1539,7 +1541,7 @@ def _hash_schema(
     """
     if isinstance(schema, Mapping):
         schema = {k: v for k, v in schema.items() if k not in exclude}
-    return hash(json.dumps(schema, sort_keys=True))
+    return hash(_HASH_ENCODER.encode(schema))
 
 
 def _subclasses(cls: type[TSchemaBase]) -> Iterator[type[TSchemaBase]]:
diff --git a/tools/schemapi/schemapi.py b/tools/schemapi/schemapi.py
index c803063f5..939c07c46 100644
--- a/tools/schemapi/schemapi.py
+++ b/tools/schemapi/schemapi.py
@@ -257,6 +257,8 @@ def _validator_for(uri: str, /) -> Callable[..., Validator]:
         return tp
 
 
+_HASH_ENCODER = json.JSONEncoder(sort_keys=True, separators=(",", ":"))
+
 if Version(importlib_version("jsonschema")) >= Version("4.18"):
     from referencing import Registry
     from referencing.jsonschema import specification_with as _specification_with
@@ -379,7 +381,7 @@ def _registry_comp_key(root: Map, dialect_id: str, /) -> tuple[str, str]:
         elif len(root) == 1:
             k1 = "".join(f"{s!s}" for s in chain(*root.items()))
         else:
-            k1 = json.dumps(root, separators=(",", ":"), sort_keys=True)
+            k1 = _HASH_ENCODER.encode(root)
         return k1, dialect_id
 
     def resolve_references_rpds(schema: Map, rootschema: Map) -> HashTrieMap[str, Any]:
@@ -1537,7 +1539,7 @@ def _hash_schema(
     """
     if isinstance(schema, Mapping):
         schema = {k: v for k, v in schema.items() if k not in exclude}
-    return hash(json.dumps(schema, sort_keys=True))
+    return hash(_HASH_ENCODER.encode(schema))
 
 
 def _subclasses(cls: type[TSchemaBase]) -> Iterator[type[TSchemaBase]]:

From a419ede8cda4a53a2919c8e312780b0980884cf9 Mon Sep 17 00:00:00 2001
From: dangotbanned <125183946+dangotbanned@users.noreply.github.com>
Date: Sat, 31 Aug 2024 20:05:16 +0100
Subject: [PATCH 77/92] perf: Avoid exception handling in `Chart.from_dict`

---
 altair/utils/schemapi.py   | 11 +++++++++++
 altair/vegalite/v5/api.py  | 11 +++++------
 tools/schemapi/schemapi.py | 11 +++++++++++
 3 files changed, 27 insertions(+), 6 deletions(-)

diff --git a/altair/utils/schemapi.py b/altair/utils/schemapi.py
index 14f88889b..e7a3dd88b 100644
--- a/altair/utils/schemapi.py
+++ b/altair/utils/schemapi.py
@@ -1530,6 +1530,17 @@ def _is_iterable(
     return not isinstance(obj, exclude) and isinstance(obj, Iterable)
 
 
+def _is_valid(spec: _JsonParameter, tp: type[SchemaBase], /) -> bool:
+    """
+    Return True if ``tp`` can be constructed from ``spec``.
+
+    Notes
+    -----
+    Don't use this if you need to know *details* of the errors in ``spec``..
+    """
+    return next(_validator(tp._schema, tp._rootschema).iter_errors(spec), None) is None
+
+
 def _passthrough(*args: Any, **kwds: Any) -> Any | dict[str, Any]:
     return args[0] if args else kwds
 
diff --git a/altair/vegalite/v5/api.py b/altair/vegalite/v5/api.py
index a2940ecb8..c9d9d8b09 100644
--- a/altair/vegalite/v5/api.py
+++ b/altair/vegalite/v5/api.py
@@ -22,7 +22,6 @@
 )
 from typing_extensions import TypeAlias
 
-import jsonschema
 import narwhals.stable.v1 as nw
 
 from altair import utils
@@ -37,6 +36,7 @@
 )
 from altair.utils.data import DataType
 from altair.utils.data import is_data_type as _is_data_type
+from altair.utils.schemapi import _is_valid
 
 from .compiler import vegalite_compilers
 from .data import data_transformers
@@ -3724,14 +3724,13 @@ def from_dict(
         jsonschema.ValidationError :
             If ``validate`` and ``dct`` does not conform to the schema
         """
+        if not validate:
+            return super().from_dict(dct, validate=False)
         _tp: Any
         for tp in TopLevelMixin.__subclasses__():
             _tp = super() if tp is Chart else tp
-            # FIXME: Hot try/except
-            try:
-                return _tp.from_dict(dct, validate=validate)
-            except jsonschema.ValidationError:
-                pass
+            if _is_valid(dct, _tp):
+                return _tp.from_dict(dct, validate=False)
 
         # As a last resort, try using the Root vegalite object
         return t.cast(_TSchemaBase, core.Root.from_dict(dct, validate))
diff --git a/tools/schemapi/schemapi.py b/tools/schemapi/schemapi.py
index 5cc697e07..dea8805e5 100644
--- a/tools/schemapi/schemapi.py
+++ b/tools/schemapi/schemapi.py
@@ -1528,6 +1528,17 @@ def _is_iterable(
     return not isinstance(obj, exclude) and isinstance(obj, Iterable)
 
 
+def _is_valid(spec: _JsonParameter, tp: type[SchemaBase], /) -> bool:
+    """
+    Return True if ``tp`` can be constructed from ``spec``.
+
+    Notes
+    -----
+    Don't use this if you need to know *details* of the errors in ``spec``..
+    """
+    return next(_validator(tp._schema, tp._rootschema).iter_errors(spec), None) is None
+
+
 def _passthrough(*args: Any, **kwds: Any) -> Any | dict[str, Any]:
     return args[0] if args else kwds
 

From 4a7cc4e32433452499fcabeef64a671736968a88 Mon Sep 17 00:00:00 2001
From: dangotbanned <125183946+dangotbanned@users.noreply.github.com>
Date: Sun, 1 Sep 2024 13:21:17 +0100
Subject: [PATCH 78/92] chore(perf): Remove `# FIXME`(s) that had no effect

---
 altair/utils/core.py      | 1 -
 altair/vegalite/v5/api.py | 2 --
 2 files changed, 3 deletions(-)

diff --git a/altair/utils/core.py b/altair/utils/core.py
index a1d81c39e..8df22b154 100644
--- a/altair/utils/core.py
+++ b/altair/utils/core.py
@@ -830,7 +830,6 @@ def from_channels(cls, channels: ModuleType, /) -> _ChannelCache:
     @classmethod
     def from_cache(cls) -> _ChannelCache:
         global _CHANNEL_CACHE
-        # FIXME: Hot try/except
         try:
             cached = _CHANNEL_CACHE
         except NameError:
diff --git a/altair/vegalite/v5/api.py b/altair/vegalite/v5/api.py
index c9d9d8b09..ae7428960 100644
--- a/altair/vegalite/v5/api.py
+++ b/altair/vegalite/v5/api.py
@@ -1803,7 +1803,6 @@ def to_dict(  # noqa: C901
         copy = _top_schema_base(self).copy(deep=False)
         original_data = getattr(copy, "data", Undefined)
         if not utils.is_undefined(original_data):
-            # FIXME: Hot try/except
             try:
                 data = _to_eager_narwhals_dataframe(original_data)
             except TypeError:
@@ -3419,7 +3418,6 @@ def _repr_mimebundle_(self, *args, **kwds) -> MimeBundleType | None:  # type:ign
         """Return a MIME bundle for display in Jupyter frontends."""
         # Catch errors explicitly to get around issues in Jupyter frontend
         # see https://github.com/ipython/ipython/issues/11038
-        # FIXME: Hot try/except
         try:
             dct = self.to_dict(context={"pre_transform": False})
         except Exception:

From 9bc3ccd16bc648d678ac0d8b5ce6afdc48dd737a Mon Sep 17 00:00:00 2001
From: dangotbanned <125183946+dangotbanned@users.noreply.github.com>
Date: Sun, 1 Sep 2024 13:23:05 +0100
Subject: [PATCH 79/92] chore: Remove `# FIXME`(s) that won't be addressed

Maybe revisit lazy imports in the future, but for now this is too unrelated to the PR
---
 altair/utils/_importers.py | 2 --
 1 file changed, 2 deletions(-)

diff --git a/altair/utils/_importers.py b/altair/utils/_importers.py
index 93e647f33..14085ebcf 100644
--- a/altair/utils/_importers.py
+++ b/altair/utils/_importers.py
@@ -76,7 +76,6 @@ def vl_version_for_vl_convert() -> str:
 
 def import_pyarrow_interchange() -> ModuleType:
     min_version = "11.0.0"
-    # FIXME: Hot try/except
     try:
         version = importlib_version("pyarrow")
 
@@ -103,7 +102,6 @@ def import_pyarrow_interchange() -> ModuleType:
 
 
 def pyarrow_available() -> bool:
-    # FIXME: Hot try/except
     try:
         import_pyarrow_interchange()
         return True

From af480f55a9e67e7f1e4f8e08be9a6e0fb166b2b3 Mon Sep 17 00:00:00 2001
From: dangotbanned <125183946+dangotbanned@users.noreply.github.com>
Date: Mon, 2 Sep 2024 16:01:38 +0100
Subject: [PATCH 80/92] refactor: Remove unused `_freeze`

See 8ca426675379f4e65d025075c81bc099c6cdadb3
---
 tools/schemapi/schemapi.py | 17 ++---------------
 1 file changed, 2 insertions(+), 15 deletions(-)

diff --git a/tools/schemapi/schemapi.py b/tools/schemapi/schemapi.py
index dea8805e5..09c23d4ff 100644
--- a/tools/schemapi/schemapi.py
+++ b/tools/schemapi/schemapi.py
@@ -91,7 +91,8 @@
 This URI is arbitrary and could be anything else.
 
 It just cannot be an empty string as we need to reference the schema registered in
-the ``referencing.Registry``."""
+the ``referencing.Registry``.
+"""
 
 _DEFAULT_DIALECT_URI: LiteralString = "http://json-schema.org/draft-07/schema#"
 """
@@ -1543,20 +1544,6 @@ def _passthrough(*args: Any, **kwds: Any) -> Any | dict[str, Any]:
     return args[0] if args else kwds
 
 
-def _freeze(val):
-    # NOTE: No longer referenced
-    # - Previously only called during tests
-    # - Not during any library code
-    if isinstance(val, dict):
-        return frozenset((k, _freeze(v)) for k, v in val.items())
-    elif isinstance(val, set):
-        return frozenset(_freeze(v) for v in val)
-    elif isinstance(val, (list, tuple)):
-        return tuple(_freeze(v) for v in val)
-    else:
-        return val
-
-
 def _hash_schema(
     schema: _JsonParameter,
     /,

From dff817ba604270de6acf6ceb6904d4df83ebe839 Mon Sep 17 00:00:00 2001
From: dangotbanned <125183946+dangotbanned@users.noreply.github.com>
Date: Mon, 2 Sep 2024 16:21:00 +0100
Subject: [PATCH 81/92] chore: Remove comments

Moving to discussion threads on the PR
---
 tools/schemapi/schemapi.py | 7 -------
 1 file changed, 7 deletions(-)

diff --git a/tools/schemapi/schemapi.py b/tools/schemapi/schemapi.py
index 09c23d4ff..5fd4c1385 100644
--- a/tools/schemapi/schemapi.py
+++ b/tools/schemapi/schemapi.py
@@ -1366,8 +1366,6 @@ def from_dict(
         """
         if validate:
             cls.validate(dct)
-        # NOTE: the breadth-first search occurs only once now
-        # `_FromDict` is purely ClassVar/classmethods
         converter: type[_FromDict] | _FromDict = (
             _FromDict
             if _FromDict.hash_tps
@@ -1707,14 +1705,9 @@ def from_dict(  # noqa: C901
         elif tp is not None:
             current_schema = tp._schema
             hash_schema = _hash_schema(current_schema)
-            # NOTE: the `current_schema` branch only triggered for mock schema tests:
-            # test_schemapi.py::[test_construct_multifaceted_schema, test_copy_method, test_round_trip, test_copy_module, test_from_dict, test_to_from_json, test_to_from_pickle]
             root_schema: dict[str, Any] = rootschema or tp._rootschema or current_schema
             target_tp = tp
         elif schema is not None:
-            # FIXME: This is the slow branch
-            # - Improving the perf of the `tp` one is too small scale
-            # - Every recursive `from_dict` call that isn't solved hits this
             current_schema = schema
             hash_schema = _hash_schema(current_schema)
             root_schema = rootschema or current_schema

From 133aa01bd527555942446e719974d334e9cb5cda Mon Sep 17 00:00:00 2001
From: dangotbanned <125183946+dangotbanned@users.noreply.github.com>
Date: Mon, 2 Sep 2024 16:22:19 +0100
Subject: [PATCH 82/92] build: run `generate-schema-wrapper`

---
 altair/utils/schemapi.py | 24 ++----------------------
 1 file changed, 2 insertions(+), 22 deletions(-)

diff --git a/altair/utils/schemapi.py b/altair/utils/schemapi.py
index e7a3dd88b..8a9876a53 100644
--- a/altair/utils/schemapi.py
+++ b/altair/utils/schemapi.py
@@ -93,7 +93,8 @@
 This URI is arbitrary and could be anything else.
 
 It just cannot be an empty string as we need to reference the schema registered in
-the ``referencing.Registry``."""
+the ``referencing.Registry``.
+"""
 
 _DEFAULT_DIALECT_URI: LiteralString = "http://json-schema.org/draft-07/schema#"
 """
@@ -1367,8 +1368,6 @@ def from_dict(
         """
         if validate:
             cls.validate(dct)
-        # NOTE: the breadth-first search occurs only once now
-        # `_FromDict` is purely ClassVar/classmethods
         converter: type[_FromDict] | _FromDict = (
             _FromDict
             if _FromDict.hash_tps
@@ -1545,20 +1544,6 @@ def _passthrough(*args: Any, **kwds: Any) -> Any | dict[str, Any]:
     return args[0] if args else kwds
 
 
-def _freeze(val):
-    # NOTE: No longer referenced
-    # - Previously only called during tests
-    # - Not during any library code
-    if isinstance(val, dict):
-        return frozenset((k, _freeze(v)) for k, v in val.items())
-    elif isinstance(val, set):
-        return frozenset(_freeze(v) for v in val)
-    elif isinstance(val, (list, tuple)):
-        return tuple(_freeze(v) for v in val)
-    else:
-        return val
-
-
 def _hash_schema(
     schema: _JsonParameter,
     /,
@@ -1722,14 +1707,9 @@ def from_dict(  # noqa: C901
         elif tp is not None:
             current_schema = tp._schema
             hash_schema = _hash_schema(current_schema)
-            # NOTE: the `current_schema` branch only triggered for mock schema tests:
-            # test_schemapi.py::[test_construct_multifaceted_schema, test_copy_method, test_round_trip, test_copy_module, test_from_dict, test_to_from_json, test_to_from_pickle]
             root_schema: dict[str, Any] = rootschema or tp._rootschema or current_schema
             target_tp = tp
         elif schema is not None:
-            # FIXME: This is the slow branch
-            # - Improving the perf of the `tp` one is too small scale
-            # - Every recursive `from_dict` call that isn't solved hits this
             current_schema = schema
             hash_schema = _hash_schema(current_schema)
             root_schema = rootschema or current_schema

From 141c8d14ee4fc07392c7a0ac89083404d4381d4e Mon Sep 17 00:00:00 2001
From: dangotbanned <125183946+dangotbanned@users.noreply.github.com>
Date: Mon, 2 Sep 2024 16:45:51 +0100
Subject: [PATCH 83/92] refactor: Collapse `...` following `:=`

Not sure why I wrote this with `...` originally.
---
 altair/utils/schemapi.py   | 4 +---
 tools/schemapi/schemapi.py | 4 +---
 2 files changed, 2 insertions(+), 6 deletions(-)

diff --git a/altair/utils/schemapi.py b/altair/utils/schemapi.py
index 8a9876a53..7e790273d 100644
--- a/altair/utils/schemapi.py
+++ b/altair/utils/schemapi.py
@@ -1720,9 +1720,7 @@ def from_dict(  # noqa: C901
             raise ValueError(msg)
 
         from_dict = partial(cls.from_dict, rootschema=root_schema)
-        if resolved := cls.hash_resolved.get(hash_schema):
-            ...
-        else:
+        if (resolved := cls.hash_resolved.get(hash_schema)) is None:
             resolved = _resolve_references(current_schema, root_schema)
             cls.hash_resolved[hash_schema] = resolved
         if "anyOf" in resolved:
diff --git a/tools/schemapi/schemapi.py b/tools/schemapi/schemapi.py
index 5fd4c1385..d1e43010f 100644
--- a/tools/schemapi/schemapi.py
+++ b/tools/schemapi/schemapi.py
@@ -1718,9 +1718,7 @@ def from_dict(  # noqa: C901
             raise ValueError(msg)
 
         from_dict = partial(cls.from_dict, rootschema=root_schema)
-        if resolved := cls.hash_resolved.get(hash_schema):
-            ...
-        else:
+        if (resolved := cls.hash_resolved.get(hash_schema)) is None:
             resolved = _resolve_references(current_schema, root_schema)
             cls.hash_resolved[hash_schema] = resolved
         if "anyOf" in resolved:

From fba3c46c0aa69a99d731c6a39a90ce396eae8863 Mon Sep 17 00:00:00 2001
From: dangotbanned <125183946+dangotbanned@users.noreply.github.com>
Date: Mon, 2 Sep 2024 17:18:38 +0100
Subject: [PATCH 84/92] chore: Remove temp notes from `_subclasses`

---
 altair/utils/schemapi.py   | 26 +-------------------------
 tools/schemapi/schemapi.py | 26 +-------------------------
 2 files changed, 2 insertions(+), 50 deletions(-)

diff --git a/altair/utils/schemapi.py b/altair/utils/schemapi.py
index 7e790273d..027e81556 100644
--- a/altair/utils/schemapi.py
+++ b/altair/utils/schemapi.py
@@ -1568,31 +1568,7 @@ def _hash_schema(
 
 
 def _subclasses(cls: type[TSchemaBase]) -> Iterator[type[TSchemaBase]]:
-    """
-    Breadth-first sequence of all classes which inherit from ``cls``.
-
-    Notes
-    -----
-    - `__subclasses__()` alone isn't helpful, as that is only immediate subclasses
-    - Deterministic
-    - Used for `SchemaBase` & `VegaLiteSchema`
-    - In practice, it provides an iterator over all classes in the schema below `VegaLiteSchema`
-        - The first one is `Root`
-    - The order itself, I don't think is important
-        - But probably important that it doesn't change
-        - Thinking they used an iterator so that the subclasses are evaluated after they have all been defined
-
-    - `Chart` seems to try to avoid calling this
-        - Using `TopLevelMixin.__subclasses__()` first if possible
-    - It is always called during `Chart.encode()`
-        - Chart.encode()
-        - altair.utils.core.infer_encoding_types
-        -  _ChannelCache.infer_encoding_types
-        - _ChannelCache._wrap_in_channel
-        - SchemaBase.from_dict (recursive, hot loop, validate =False, within a try/except)
-        - _FromDict(cls._default_wrapper_classes())
-        - schemapi._subclasses(schema.core.VegaLiteSchema)
-    """
+    """Breadth-first sequence of all classes which inherit from ``cls``."""
     seen = set()
     current: set[type[TSchemaBase]] = {cls}
     while current:
diff --git a/tools/schemapi/schemapi.py b/tools/schemapi/schemapi.py
index d1e43010f..e309b084c 100644
--- a/tools/schemapi/schemapi.py
+++ b/tools/schemapi/schemapi.py
@@ -1566,31 +1566,7 @@ def _hash_schema(
 
 
 def _subclasses(cls: type[TSchemaBase]) -> Iterator[type[TSchemaBase]]:
-    """
-    Breadth-first sequence of all classes which inherit from ``cls``.
-
-    Notes
-    -----
-    - `__subclasses__()` alone isn't helpful, as that is only immediate subclasses
-    - Deterministic
-    - Used for `SchemaBase` & `VegaLiteSchema`
-    - In practice, it provides an iterator over all classes in the schema below `VegaLiteSchema`
-        - The first one is `Root`
-    - The order itself, I don't think is important
-        - But probably important that it doesn't change
-        - Thinking they used an iterator so that the subclasses are evaluated after they have all been defined
-
-    - `Chart` seems to try to avoid calling this
-        - Using `TopLevelMixin.__subclasses__()` first if possible
-    - It is always called during `Chart.encode()`
-        - Chart.encode()
-        - altair.utils.core.infer_encoding_types
-        -  _ChannelCache.infer_encoding_types
-        - _ChannelCache._wrap_in_channel
-        - SchemaBase.from_dict (recursive, hot loop, validate =False, within a try/except)
-        - _FromDict(cls._default_wrapper_classes())
-        - schemapi._subclasses(schema.core.VegaLiteSchema)
-    """
+    """Breadth-first sequence of all classes which inherit from ``cls``."""
     seen = set()
     current: set[type[TSchemaBase]] = {cls}
     while current:

From 44f52274696019c8706ba24488bee9f924d488d1 Mon Sep 17 00:00:00 2001
From: dangotbanned <125183946+dangotbanned@users.noreply.github.com>
Date: Mon, 2 Sep 2024 17:22:38 +0100
Subject: [PATCH 85/92] chore: Remove outdated TODO

---
 altair/utils/schemapi.py   | 2 --
 tools/schemapi/schemapi.py | 2 --
 2 files changed, 4 deletions(-)

diff --git a/altair/utils/schemapi.py b/altair/utils/schemapi.py
index 027e81556..14effd02b 100644
--- a/altair/utils/schemapi.py
+++ b/altair/utils/schemapi.py
@@ -1143,8 +1143,6 @@ class SchemaBase:
     the _rootschema class attribute) which is used for validation.
     """
 
-    # TODO: Implement `ClassVar` validation using https://peps.python.org/pep-0487/
-
     _schema: ClassVar[dict[str, Any] | Any] = None
     _rootschema: ClassVar[dict[str, Any] | Any] = None
     _class_is_valid_at_instantiation: ClassVar[bool] = True
diff --git a/tools/schemapi/schemapi.py b/tools/schemapi/schemapi.py
index e309b084c..f1f157901 100644
--- a/tools/schemapi/schemapi.py
+++ b/tools/schemapi/schemapi.py
@@ -1141,8 +1141,6 @@ class SchemaBase:
     the _rootschema class attribute) which is used for validation.
     """
 
-    # TODO: Implement `ClassVar` validation using https://peps.python.org/pep-0487/
-
     _schema: ClassVar[dict[str, Any] | Any] = None
     _rootschema: ClassVar[dict[str, Any] | Any] = None
     _class_is_valid_at_instantiation: ClassVar[bool] = True

From 5d9fb651eff416b682a5d84bcbc30ccad611f45a Mon Sep 17 00:00:00 2001
From: dangotbanned <125183946+dangotbanned@users.noreply.github.com>
Date: Mon, 2 Sep 2024 17:28:32 +0100
Subject: [PATCH 86/92] refactor: Remove unused `resolve_references_rpds`

Wasn't able to demonstrate a performance improvement
---
 altair/utils/schemapi.py   | 17 -----------------
 tools/schemapi/schemapi.py | 17 -----------------
 2 files changed, 34 deletions(-)

diff --git a/altair/utils/schemapi.py b/altair/utils/schemapi.py
index 14effd02b..3ce87b92e 100644
--- a/altair/utils/schemapi.py
+++ b/altair/utils/schemapi.py
@@ -271,7 +271,6 @@ def _validator_for(uri: str, /) -> Callable[..., Validator]:
     if TYPE_CHECKING:
         from referencing import Specification
         from referencing._core import Resolver
-        from rpds import HashTrieMap
 
     @lru_cache(maxsize=None)
     def specification_with(dialect_id: str, /) -> Specification[Any]:
@@ -389,22 +388,6 @@ def _registry_comp_key(root: Map, dialect_id: str, /) -> tuple[str, str]:
             k1 = _HASH_ENCODER.encode(root)
         return k1, dialect_id
 
-    def resolve_references_rpds(schema: Map, rootschema: Map) -> HashTrieMap[str, Any]:
-        """
-        **Experimental** `rust`-speed returned type.
-
-        Directly wraps `_resolve_references`.
-
-        Idea
-        ----
-        - Store the result of this when called from ``_FromDict.from_dict()`` once per unique call
-        - Reuse the resolved schema, since we don't mutate it after resolving
-        - Should reduce the cost of ``_FromDict.from_dict()``, when a schema has been seen before
-        """
-        import rpds
-
-        return rpds.HashTrieMap(_resolve_references(schema, rootschema))
-
     _REGISTRY_CACHE: dict[tuple[str, str], Registry[Any]] = {}
 
 else:
diff --git a/tools/schemapi/schemapi.py b/tools/schemapi/schemapi.py
index f1f157901..483fb88e5 100644
--- a/tools/schemapi/schemapi.py
+++ b/tools/schemapi/schemapi.py
@@ -269,7 +269,6 @@ def _validator_for(uri: str, /) -> Callable[..., Validator]:
     if TYPE_CHECKING:
         from referencing import Specification
         from referencing._core import Resolver
-        from rpds import HashTrieMap
 
     @lru_cache(maxsize=None)
     def specification_with(dialect_id: str, /) -> Specification[Any]:
@@ -387,22 +386,6 @@ def _registry_comp_key(root: Map, dialect_id: str, /) -> tuple[str, str]:
             k1 = _HASH_ENCODER.encode(root)
         return k1, dialect_id
 
-    def resolve_references_rpds(schema: Map, rootschema: Map) -> HashTrieMap[str, Any]:
-        """
-        **Experimental** `rust`-speed returned type.
-
-        Directly wraps `_resolve_references`.
-
-        Idea
-        ----
-        - Store the result of this when called from ``_FromDict.from_dict()`` once per unique call
-        - Reuse the resolved schema, since we don't mutate it after resolving
-        - Should reduce the cost of ``_FromDict.from_dict()``, when a schema has been seen before
-        """
-        import rpds
-
-        return rpds.HashTrieMap(_resolve_references(schema, rootschema))
-
     _REGISTRY_CACHE: dict[tuple[str, str], Registry[Any]] = {}
 
 else:

From a26b3f16a3a947227c0b86742b25bdcc281686cf Mon Sep 17 00:00:00 2001
From: dangotbanned <125183946+dangotbanned@users.noreply.github.com>
Date: Mon, 2 Sep 2024 18:34:08 +0100
Subject: [PATCH 87/92] refactor: Collect functions, global into `_Registry`

---
 altair/utils/schemapi.py   | 129 +++++++++++++++++++++----------------
 tools/schemapi/schemapi.py | 129 +++++++++++++++++++++----------------
 2 files changed, 146 insertions(+), 112 deletions(-)

diff --git a/altair/utils/schemapi.py b/altair/utils/schemapi.py
index 3ce87b92e..6f05ff19f 100644
--- a/altair/utils/schemapi.py
+++ b/altair/utils/schemapi.py
@@ -292,6 +292,74 @@ def specification_with(dialect_id: str, /) -> Specification[Any]:
         """
         return _specification_with(dialect_id)
 
+    class _Registry:
+        """
+        A cache of `Registry`_ (s).
+
+        An instance named ``registry`` is used to wrap the `Registry`_ API,
+        with a managed cache.
+
+        See Also
+        --------
+        ``_Registry.__call__``
+
+        .. _Registry:
+               https://referencing.readthedocs.io/en/stable/api/#referencing.Registry
+        """
+
+        _cached: ClassVar[dict[tuple[str, str], Registry[Any]]] = {}
+
+        @staticmethod
+        def compute_key(root: Map, dialect_id: str, /) -> tuple[str, str]:
+            """
+            Generate a simple-minded hash to identify a registry.
+
+            Notes
+            -----
+            Why the strange hash?
+            - **All** generated schemas hit the ``"$ref"`` branch.
+            - ``api.Then`` hits the len(...) 1 branch w/ ``{"type": "object"}``.
+            - Final branch is only hit by mock schemas in:
+                - `tests/utils/test_core.py::test_infer_encoding_types`
+                - `tests/utils/test_schemapi.py`
+            """
+            if "$ref" in root:
+                k1 = root["$ref"]
+            elif len(root) == 1:
+                k1 = "".join(f"{s!s}" for s in chain(*root.items()))
+            else:
+                k1 = _HASH_ENCODER.encode(root)
+            return k1, dialect_id
+
+        @classmethod
+        def update_cached(
+            cls, root: Map, dialect_id: str, resolver: Resolver[Any]
+        ) -> None:
+            cls._cached[cls.compute_key(root, dialect_id)] = resolver._registry
+
+        def __call__(self, root: Map, dialect_id: str, /) -> Registry[Any]:
+            """
+            Constructs a `Registry`_, adding the `Resource`_ produced by ``rootschema``.
+
+            Requires at least ``jsonschema`` `v4.18.0a1`_.
+
+            .. _Registry:
+               https://referencing.readthedocs.io/en/stable/api/#referencing.Registry
+            .. _Resource:
+               https://referencing.readthedocs.io/en/stable/api/#referencing.Resource
+            .. _v4.18.0a1:
+               https://github.com/python-jsonschema/jsonschema/releases/tag/v4.18.0a1
+            """
+            cache_key = self.compute_key(root, dialect_id)
+            if (reg := self._cached.get(cache_key, None)) is not None:
+                return reg
+            resource = specification_with(dialect_id).create_resource(root)
+            reg = Registry().with_resource(_VEGA_LITE_ROOT_URI, resource).crawl()
+            type(self)._cached[cache_key] = reg
+            return reg
+
+    registry: _Registry = _Registry()
+
     def _validator(schema: Map, rootschema: Map | None = None, /) -> Validator:
         """
         Constructs a `Validator`_ for future validation.
@@ -311,37 +379,9 @@ def _validator(schema: Map, rootschema: Map | None = None, /) -> Validator:
         # NOTE: This is the current version
         uri = _get_schema_dialect_uri(rootschema or schema)
         validator = _validator_for(uri)
-        registry = _registry(rootschema or schema, uri)
-        return validator(_prepare_references(schema), registry=registry)
-
-    def _registry(rootschema: Map, dialect_id: str) -> Registry[Any]:
-        """
-        Constructs a `Registry`_, adding the `Resource`_ produced by ``rootschema``.
-
-        Requires at least ``jsonschema`` `v4.18.0a1`_.
-
-        .. _Registry:
-           https://referencing.readthedocs.io/en/stable/api/#referencing.Registry
-        .. _Resource:
-           https://referencing.readthedocs.io/en/stable/api/#referencing.Resource
-        .. _v4.18.0a1:
-           https://github.com/python-jsonschema/jsonschema/releases/tag/v4.18.0a1
-        """
-        global _REGISTRY_CACHE
-        cache_key = _registry_comp_key(rootschema, dialect_id)
-        if (registry := _REGISTRY_CACHE.get(cache_key, None)) is not None:
-            return registry
-        else:
-            specification = specification_with(dialect_id)
-            resource = specification.create_resource(rootschema)
-            registry = Registry().with_resource(_VEGA_LITE_ROOT_URI, resource).crawl()
-            _REGISTRY_CACHE[cache_key] = registry
-            return registry
-
-    def _registry_update(root: Map, dialect_id: str, resolver: Resolver[Any]) -> None:
-        global _REGISTRY_CACHE
-        cache_key = _registry_comp_key(root, dialect_id)
-        _REGISTRY_CACHE[cache_key] = resolver._registry
+        return validator(
+            _prepare_references(schema), registry=registry(rootschema or schema, uri)
+        )
 
     def _resolve_references(schema: Map, rootschema: Map) -> Map:
         """
@@ -359,36 +399,13 @@ def _resolve_references(schema: Map, rootschema: Map) -> Map:
         if ("$ref" not in root) or ("$ref" not in schema):
             return schema
         uri = _get_schema_dialect_uri(rootschema)
-        registry = _registry(root, uri)
-        resolver = registry.resolver(_VEGA_LITE_ROOT_URI)
+        resolver = registry(root, uri).resolver(_VEGA_LITE_ROOT_URI)
         while "$ref" in schema:
             resolved = resolver.lookup(schema["$ref"])
             schema = resolved.contents
-        _registry_update(root, uri, resolved.resolver)
+        registry.update_cached(root, uri, resolved.resolver)
         return schema
 
-    def _registry_comp_key(root: Map, dialect_id: str, /) -> tuple[str, str]:
-        """
-        Generate a simple-minded hash to identify a registry.
-
-        Notes
-        -----
-        Why the strange hash?
-        - **All** generated schemas hit the ``"$ref"`` branch.
-        - ``api.Then`` hits the len(...) 1 branch w/ ``{"type": "object"}``.
-        - Final branch is only hit by mock schemas in:
-            - `tests/utils/test_core.py::test_infer_encoding_types`
-            - `tests/utils/test_schemapi.py`
-        """
-        if "$ref" in root:
-            k1 = root["$ref"]
-        elif len(root) == 1:
-            k1 = "".join(f"{s!s}" for s in chain(*root.items()))
-        else:
-            k1 = _HASH_ENCODER.encode(root)
-        return k1, dialect_id
-
-    _REGISTRY_CACHE: dict[tuple[str, str], Registry[Any]] = {}
 
 else:
 
diff --git a/tools/schemapi/schemapi.py b/tools/schemapi/schemapi.py
index 483fb88e5..2320ceb1d 100644
--- a/tools/schemapi/schemapi.py
+++ b/tools/schemapi/schemapi.py
@@ -290,6 +290,74 @@ def specification_with(dialect_id: str, /) -> Specification[Any]:
         """
         return _specification_with(dialect_id)
 
+    class _Registry:
+        """
+        A cache of `Registry`_ (s).
+
+        An instance named ``registry`` is used to wrap the `Registry`_ API,
+        with a managed cache.
+
+        See Also
+        --------
+        ``_Registry.__call__``
+
+        .. _Registry:
+               https://referencing.readthedocs.io/en/stable/api/#referencing.Registry
+        """
+
+        _cached: ClassVar[dict[tuple[str, str], Registry[Any]]] = {}
+
+        @staticmethod
+        def compute_key(root: Map, dialect_id: str, /) -> tuple[str, str]:
+            """
+            Generate a simple-minded hash to identify a registry.
+
+            Notes
+            -----
+            Why the strange hash?
+            - **All** generated schemas hit the ``"$ref"`` branch.
+            - ``api.Then`` hits the len(...) 1 branch w/ ``{"type": "object"}``.
+            - Final branch is only hit by mock schemas in:
+                - `tests/utils/test_core.py::test_infer_encoding_types`
+                - `tests/utils/test_schemapi.py`
+            """
+            if "$ref" in root:
+                k1 = root["$ref"]
+            elif len(root) == 1:
+                k1 = "".join(f"{s!s}" for s in chain(*root.items()))
+            else:
+                k1 = _HASH_ENCODER.encode(root)
+            return k1, dialect_id
+
+        @classmethod
+        def update_cached(
+            cls, root: Map, dialect_id: str, resolver: Resolver[Any]
+        ) -> None:
+            cls._cached[cls.compute_key(root, dialect_id)] = resolver._registry
+
+        def __call__(self, root: Map, dialect_id: str, /) -> Registry[Any]:
+            """
+            Constructs a `Registry`_, adding the `Resource`_ produced by ``rootschema``.
+
+            Requires at least ``jsonschema`` `v4.18.0a1`_.
+
+            .. _Registry:
+               https://referencing.readthedocs.io/en/stable/api/#referencing.Registry
+            .. _Resource:
+               https://referencing.readthedocs.io/en/stable/api/#referencing.Resource
+            .. _v4.18.0a1:
+               https://github.com/python-jsonschema/jsonschema/releases/tag/v4.18.0a1
+            """
+            cache_key = self.compute_key(root, dialect_id)
+            if (reg := self._cached.get(cache_key, None)) is not None:
+                return reg
+            resource = specification_with(dialect_id).create_resource(root)
+            reg = Registry().with_resource(_VEGA_LITE_ROOT_URI, resource).crawl()
+            type(self)._cached[cache_key] = reg
+            return reg
+
+    registry: _Registry = _Registry()
+
     def _validator(schema: Map, rootschema: Map | None = None, /) -> Validator:
         """
         Constructs a `Validator`_ for future validation.
@@ -309,37 +377,9 @@ def _validator(schema: Map, rootschema: Map | None = None, /) -> Validator:
         # NOTE: This is the current version
         uri = _get_schema_dialect_uri(rootschema or schema)
         validator = _validator_for(uri)
-        registry = _registry(rootschema or schema, uri)
-        return validator(_prepare_references(schema), registry=registry)
-
-    def _registry(rootschema: Map, dialect_id: str) -> Registry[Any]:
-        """
-        Constructs a `Registry`_, adding the `Resource`_ produced by ``rootschema``.
-
-        Requires at least ``jsonschema`` `v4.18.0a1`_.
-
-        .. _Registry:
-           https://referencing.readthedocs.io/en/stable/api/#referencing.Registry
-        .. _Resource:
-           https://referencing.readthedocs.io/en/stable/api/#referencing.Resource
-        .. _v4.18.0a1:
-           https://github.com/python-jsonschema/jsonschema/releases/tag/v4.18.0a1
-        """
-        global _REGISTRY_CACHE
-        cache_key = _registry_comp_key(rootschema, dialect_id)
-        if (registry := _REGISTRY_CACHE.get(cache_key, None)) is not None:
-            return registry
-        else:
-            specification = specification_with(dialect_id)
-            resource = specification.create_resource(rootschema)
-            registry = Registry().with_resource(_VEGA_LITE_ROOT_URI, resource).crawl()
-            _REGISTRY_CACHE[cache_key] = registry
-            return registry
-
-    def _registry_update(root: Map, dialect_id: str, resolver: Resolver[Any]) -> None:
-        global _REGISTRY_CACHE
-        cache_key = _registry_comp_key(root, dialect_id)
-        _REGISTRY_CACHE[cache_key] = resolver._registry
+        return validator(
+            _prepare_references(schema), registry=registry(rootschema or schema, uri)
+        )
 
     def _resolve_references(schema: Map, rootschema: Map) -> Map:
         """
@@ -357,36 +397,13 @@ def _resolve_references(schema: Map, rootschema: Map) -> Map:
         if ("$ref" not in root) or ("$ref" not in schema):
             return schema
         uri = _get_schema_dialect_uri(rootschema)
-        registry = _registry(root, uri)
-        resolver = registry.resolver(_VEGA_LITE_ROOT_URI)
+        resolver = registry(root, uri).resolver(_VEGA_LITE_ROOT_URI)
         while "$ref" in schema:
             resolved = resolver.lookup(schema["$ref"])
             schema = resolved.contents
-        _registry_update(root, uri, resolved.resolver)
+        registry.update_cached(root, uri, resolved.resolver)
         return schema
 
-    def _registry_comp_key(root: Map, dialect_id: str, /) -> tuple[str, str]:
-        """
-        Generate a simple-minded hash to identify a registry.
-
-        Notes
-        -----
-        Why the strange hash?
-        - **All** generated schemas hit the ``"$ref"`` branch.
-        - ``api.Then`` hits the len(...) 1 branch w/ ``{"type": "object"}``.
-        - Final branch is only hit by mock schemas in:
-            - `tests/utils/test_core.py::test_infer_encoding_types`
-            - `tests/utils/test_schemapi.py`
-        """
-        if "$ref" in root:
-            k1 = root["$ref"]
-        elif len(root) == 1:
-            k1 = "".join(f"{s!s}" for s in chain(*root.items()))
-        else:
-            k1 = _HASH_ENCODER.encode(root)
-        return k1, dialect_id
-
-    _REGISTRY_CACHE: dict[tuple[str, str], Registry[Any]] = {}
 
 else:
 

From 82a81061a28e2554f45c40f4d3a902d84c9337fe Mon Sep 17 00:00:00 2001
From: dangotbanned <125183946+dangotbanned@users.noreply.github.com>
Date: Tue, 3 Sep 2024 15:19:43 +0100
Subject: [PATCH 88/92] chore: Add `_is_valid` to `schemapi.__all__`

Used in `Chart.from_dict`
---
 altair/utils/schemapi.py   | 1 +
 tools/schemapi/schemapi.py | 1 +
 2 files changed, 2 insertions(+)

diff --git a/altair/utils/schemapi.py b/altair/utils/schemapi.py
index 6f05ff19f..692e7a444 100644
--- a/altair/utils/schemapi.py
+++ b/altair/utils/schemapi.py
@@ -79,6 +79,7 @@
     "SchemaBase",  # altair.vegalite.v5.schema.core
     "Undefined",  # altair.utils
     "UndefinedType",  # altair.vegalite.v5.schema.core -> (side-effect relied on to propagate to alt.__init__)
+    "_is_valid",  # altair.vegalite.v5.api
     "_resolve_references",  # tools.schemapi.utils -> tools.generate_schema_wrapper
     "_subclasses",  # altair.vegalite.v5.schema.core
     "is_undefined",  # altair.typing
diff --git a/tools/schemapi/schemapi.py b/tools/schemapi/schemapi.py
index 2320ceb1d..f7ac217a4 100644
--- a/tools/schemapi/schemapi.py
+++ b/tools/schemapi/schemapi.py
@@ -77,6 +77,7 @@
     "SchemaBase",  # altair.vegalite.v5.schema.core
     "Undefined",  # altair.utils
     "UndefinedType",  # altair.vegalite.v5.schema.core -> (side-effect relied on to propagate to alt.__init__)
+    "_is_valid",  # altair.vegalite.v5.api
     "_resolve_references",  # tools.schemapi.utils -> tools.generate_schema_wrapper
     "_subclasses",  # altair.vegalite.v5.schema.core
     "is_undefined",  # altair.typing

From af783b2031aedd12fb0c3074bbfd41d618be7236 Mon Sep 17 00:00:00 2001
From: dangotbanned <125183946+dangotbanned@users.noreply.github.com>
Date: Tue, 3 Sep 2024 15:38:41 +0100
Subject: [PATCH 89/92] chore: Remove more comments

All were notes added earlier in PR, but not needed now
---
 altair/utils/schemapi.py      | 14 --------------
 tests/vegalite/v5/test_api.py |  3 ---
 tools/schemapi/schemapi.py    | 14 --------------
 3 files changed, 31 deletions(-)

diff --git a/altair/utils/schemapi.py b/altair/utils/schemapi.py
index 692e7a444..41db4fee7 100644
--- a/altair/utils/schemapi.py
+++ b/altair/utils/schemapi.py
@@ -1044,14 +1044,6 @@ def __init_subclass__(
         **kwds: Any,
     ) -> None:
         super().__init_subclass__(*args, **kwds)
-        # NOTE: `SchemaBase` itself would have no `_schema` or `_rootschema`, but won't be run through this
-        # FIXED: `VegaLiteSchema` has a `_rootschema` but no `_schema`
-        # FIXED: `Root` uses `VegaLiteSchema._rootschema`, for `_schema` and inherits the same for `_rootschema`
-        # FIXED: Both have only `_schema` - which is a type
-        # - `api.Then`: _schema = {"type": "object"}
-        # - `expr.core.Expression`: _schema = {"type": "string"}
-        # ----
-        # All others either *only* define `_schema`, or inherit it when they are a channel
         if schema is None:
             if hasattr(cls, "_schema"):
                 schema = cls._schema
@@ -1061,7 +1053,6 @@ def __init_subclass__(
                     "_schema class attribute is not defined."
                 )
                 raise TypeError(msg)
-
         if rootschema is None:
             if hasattr(cls, "_rootschema"):
                 rootschema = cls._rootschema
@@ -1070,11 +1061,6 @@ def __init_subclass__(
             else:
                 msg = "`rootschema` must be provided if `schema` contains a `'$ref'` and does not inherit one."
                 raise TypeError(msg)
-
-        # NOTE: Inherit a `False`instead of overwriting with the default `True`
-        # - If a parent is not valid at init, then none of its subclasses can be
-        # - The current hierarchy does not support the inverse of this
-        #   - Subclasses may declare they are not valid
         if valid_at_init is None:
             valid_at_init = cls._class_is_valid_at_instantiation
         cls._schema = schema
diff --git a/tests/vegalite/v5/test_api.py b/tests/vegalite/v5/test_api.py
index 48cbe0367..3278f8f62 100644
--- a/tests/vegalite/v5/test_api.py
+++ b/tests/vegalite/v5/test_api.py
@@ -1228,9 +1228,6 @@ def test_themes():
         assert "config" not in chart.to_dict()
 
 
-# TODO: Investigate alternative to looped try/except/pass
-# - AFAIK it would speed up `Chart.from_dict()`
-# - but maybe not central enough to have general impact
 def test_chart_from_dict() -> None:
     base = alt.Chart("data.csv").mark_point().encode(x="x:Q", y="y:Q")
 
diff --git a/tools/schemapi/schemapi.py b/tools/schemapi/schemapi.py
index f7ac217a4..ec5a6d21f 100644
--- a/tools/schemapi/schemapi.py
+++ b/tools/schemapi/schemapi.py
@@ -1042,14 +1042,6 @@ def __init_subclass__(
         **kwds: Any,
     ) -> None:
         super().__init_subclass__(*args, **kwds)
-        # NOTE: `SchemaBase` itself would have no `_schema` or `_rootschema`, but won't be run through this
-        # FIXED: `VegaLiteSchema` has a `_rootschema` but no `_schema`
-        # FIXED: `Root` uses `VegaLiteSchema._rootschema`, for `_schema` and inherits the same for `_rootschema`
-        # FIXED: Both have only `_schema` - which is a type
-        # - `api.Then`: _schema = {"type": "object"}
-        # - `expr.core.Expression`: _schema = {"type": "string"}
-        # ----
-        # All others either *only* define `_schema`, or inherit it when they are a channel
         if schema is None:
             if hasattr(cls, "_schema"):
                 schema = cls._schema
@@ -1059,7 +1051,6 @@ def __init_subclass__(
                     "_schema class attribute is not defined."
                 )
                 raise TypeError(msg)
-
         if rootschema is None:
             if hasattr(cls, "_rootschema"):
                 rootschema = cls._rootschema
@@ -1068,11 +1059,6 @@ def __init_subclass__(
             else:
                 msg = "`rootschema` must be provided if `schema` contains a `'$ref'` and does not inherit one."
                 raise TypeError(msg)
-
-        # NOTE: Inherit a `False`instead of overwriting with the default `True`
-        # - If a parent is not valid at init, then none of its subclasses can be
-        # - The current hierarchy does not support the inverse of this
-        #   - Subclasses may declare they are not valid
         if valid_at_init is None:
             valid_at_init = cls._class_is_valid_at_instantiation
         cls._schema = schema

From 9a48448e334c53d2c431bb9a79eb397bc7c0e7d7 Mon Sep 17 00:00:00 2001
From: dangotbanned <125183946+dangotbanned@users.noreply.github.com>
Date: Tue, 3 Sep 2024 15:52:11 +0100
Subject: [PATCH 90/92] refactor: Remove `_SchemaBasePEP487` & test suite

WIll put this on another branch, won't improve performance so not relevant here

https://github.com/vega/altair/pull/3547#issuecomment-2315914787
---
 altair/utils/schemapi.py     | 112 -------------------------
 tests/utils/test_schemapi.py | 154 -----------------------------------
 tools/schemapi/schemapi.py   | 112 -------------------------
 3 files changed, 378 deletions(-)

diff --git a/altair/utils/schemapi.py b/altair/utils/schemapi.py
index 41db4fee7..37335ee0d 100644
--- a/altair/utils/schemapi.py
+++ b/altair/utils/schemapi.py
@@ -1010,118 +1010,6 @@ def _deep_copy(obj: _CopyImpl | Any, by_ref: set[str]) -> _CopyImpl | Any:
         return obj
 
 
-class _SchemaBasePEP487:
-    """Minimal demo for testing feasibility of `__init_subclass__`."""
-
-    _schema: ClassVar[dict[str, Any]]
-    _rootschema: ClassVar[dict[str, Any]]
-    _class_is_valid_at_instantiation: ClassVar[bool] = True
-
-    def __init__(self, *args: Any, **kwds: Any) -> None:
-        if (kwds and args) or len(args) > 1:
-            name = type(self).__name__
-            _args = ", ".join(f"{a!r}" for a in args)
-            _kwds = ", ".join(f"{k}={v!r}" for k, v in kwds.items())
-            msg = (
-                f"Expected either:\n"
-                f" - a single arg with no kwds, for, e.g. {{'type': 'string'}}\n"
-                f" - zero args with zero or more kwds for {{'type': 'object'}}\n\n"
-                f"but got: {name}({_args}, {_kwds})"
-            )
-            raise AssertionError(msg)
-        # use object.__setattr__ because we override setattr below.
-        self._args: tuple[Any, ...]
-        self._kwds: dict[str, Any]
-        object.__setattr__(self, "_args", args)
-        object.__setattr__(self, "_kwds", kwds)
-
-    def __init_subclass__(
-        cls,
-        *args: Any,
-        schema: dict[str, Any] | None = None,
-        rootschema: dict[str, Any] | None = None,
-        valid_at_init: bool | None = None,
-        **kwds: Any,
-    ) -> None:
-        super().__init_subclass__(*args, **kwds)
-        if schema is None:
-            if hasattr(cls, "_schema"):
-                schema = cls._schema
-            else:
-                msg = (
-                    f"Cannot instantiate object of type {cls}: "
-                    "_schema class attribute is not defined."
-                )
-                raise TypeError(msg)
-        if rootschema is None:
-            if hasattr(cls, "_rootschema"):
-                rootschema = cls._rootschema
-            elif "$ref" not in schema:
-                rootschema = schema
-            else:
-                msg = "`rootschema` must be provided if `schema` contains a `'$ref'` and does not inherit one."
-                raise TypeError(msg)
-        if valid_at_init is None:
-            valid_at_init = cls._class_is_valid_at_instantiation
-        cls._schema = schema
-        cls._rootschema = rootschema
-        cls._class_is_valid_at_instantiation = valid_at_init
-
-    @overload
-    def _get(self, attr: str, default: Optional = ...) -> Any | UndefinedType: ...
-    @overload
-    def _get(self, attr: str, default: T) -> Any | T: ...
-    def _get(self, attr: str, default: Optional[T] = Undefined) -> Any | T:
-        """Get an attribute, returning default if not present."""
-        if (item := self._kwds.get(attr, Undefined)) is not Undefined:
-            return item
-        else:
-            return default
-
-    def __dir__(self) -> list[str]:
-        return sorted(chain(super().__dir__(), self._kwds))
-
-    def __eq__(self, other: Any) -> bool:
-        return (
-            type(self) is type(other)
-            and self._args == other._args
-            and self._kwds == other._kwds
-        )
-
-    def __getattr__(self, attr: str):
-        # reminder: getattr is called after the normal lookups
-        if attr == "_kwds":
-            raise AttributeError()
-        if attr in self._kwds:
-            return self._kwds[attr]
-        else:
-            return getattr(super(), "__getattr__", super().__getattribute__)(attr)
-
-    def __getitem__(self, item: str) -> Any:
-        return self._kwds[item]
-
-    def __setattr__(self, item: str, val: Any) -> None:
-        if item.startswith("_"):
-            # Setting an instances copy of a ClassVar modify that
-            # By default, this makes **another** copy and places in _kwds
-            object.__setattr__(self, item, val)
-        else:
-            self._kwds[item] = val
-
-    def __setitem__(self, item: str, val: Any) -> None:
-        self._kwds[item] = val
-
-    def __repr__(self) -> str:
-        name = type(self).__name__
-        if kwds := self._kwds:
-            it = (f"{k}: {v!r}" for k, v in sorted(kwds.items()) if v is not Undefined)
-            args = ",\n".join(it).replace("\n", "\n  ")
-            LB, RB = "{", "}"
-            return f"{name}({LB}\n  {args}\n{RB})"
-        else:
-            return f"{name}({self._args[0]!r})"
-
-
 class SchemaBase:
     """
     Base class for schema wrappers.
diff --git a/tests/utils/test_schemapi.py b/tests/utils/test_schemapi.py
index 1fce07009..3bc42a328 100644
--- a/tests/utils/test_schemapi.py
+++ b/tests/utils/test_schemapi.py
@@ -10,7 +10,6 @@
 import warnings
 from collections import deque
 from functools import partial
-from importlib.metadata import version as importlib_version
 from typing import TYPE_CHECKING, Any, Callable, Iterable, Iterator, Literal, Sequence
 
 import jsonschema
@@ -19,7 +18,6 @@
 import pandas as pd
 import polars as pl
 import pytest
-from packaging.version import Version
 
 import altair as alt
 from altair import load_schema
@@ -37,34 +35,6 @@
 # try to use SchemaBase objects defined elsewhere as wrappers.
 
 
-@pytest.fixture
-def dummy_rootschema() -> dict[str, Any]:
-    return {
-        "$schema": _JSON_SCHEMA_DRAFT_URL,
-        "definitions": {
-            "StringMapping": {
-                "type": "object",
-                "additionalProperties": {"type": "string"},
-            },
-            "StringArray": {"type": "array", "items": {"type": "string"}},
-        },
-        "properties": {
-            "a": {"$ref": "#/definitions/StringMapping"},
-            "a2": {"type": "object", "additionalProperties": {"type": "number"}},
-            "b": {"$ref": "#/definitions/StringArray"},
-            "b2": {"type": "array", "items": {"type": "number"}},
-            "c": {"type": ["string", "number"]},
-            "d": {
-                "anyOf": [
-                    {"$ref": "#/definitions/StringMapping"},
-                    {"$ref": "#/definitions/StringArray"},
-                ]
-            },
-            "e": {"items": [{"type": "string"}, {"type": "string"}]},
-        },
-    }
-
-
 def test_actual_json_schema_draft_is_same_as_hardcoded_default():
     # See comments next to definition of `_DEFAULT_DIALECT_URI`
     # for details why we need this test
@@ -75,130 +45,6 @@ def test_actual_json_schema_draft_is_same_as_hardcoded_default():
     )
 
 
-def test_init_subclasses_hierarchy(dummy_rootschema) -> None:
-    if Version(importlib_version("jsonschema")) >= Version("4.18"):
-        from referencing.exceptions import Unresolvable
-    else:
-        from jsonschema.exceptions import (  # type: ignore[assignment]
-            RefResolutionError as Unresolvable,
-        )
-
-    from altair.expr.core import GetItemExpression, OperatorMixin
-    from altair.utils.schemapi import _SchemaBasePEP487
-
-    sch1 = _SchemaBasePEP487()
-    sch2 = _SchemaBasePEP487()
-    sch3 = _SchemaBasePEP487("blue")
-    sch4 = _SchemaBasePEP487("red")
-    sch5 = _SchemaBasePEP487(color="blue")
-    sch6 = _SchemaBasePEP487(color="red")
-
-    with pytest.raises(
-        AssertionError, match=r"_SchemaBasePEP487\('blue', color='red'\)"
-    ):
-        _SchemaBasePEP487("blue", color="red")
-
-    assert sch1 == sch2
-    assert sch3 != sch4
-    assert sch5 != sch6
-    assert sch3 != sch5
-    assert _SchemaBasePEP487("blue") == sch3
-    assert _SchemaBasePEP487(color="red") == sch6
-    with pytest.raises(AttributeError, match="_SchemaBasePEP487.+color"):
-        attempt = sch4.color is Undefined  # noqa: F841
-
-    assert sch5.color == sch5["color"] == sch5._get("color") == "blue"
-    assert sch5._get("price") is Undefined
-    assert sch5._get("price", 999) == 999
-
-    assert _SchemaBasePEP487._class_is_valid_at_instantiation
-    sch6._class_is_valid_at_instantiation = False  # type: ignore[misc]
-    assert (
-        _SchemaBasePEP487._class_is_valid_at_instantiation
-        != sch6._class_is_valid_at_instantiation
-    )
-
-    with pytest.raises(TypeError, match="Test1PEP487.+ _schema"):
-
-        class Test1PEP487(_SchemaBasePEP487): ...
-
-    class Test2PEP487(_SchemaBasePEP487, schema={"type": "object"}): ...
-
-    with pytest.raises(
-        TypeError,
-        match=r"`rootschema` must be provided if `schema` contains a `'\$ref'` and does not inherit one",
-    ):
-
-        class Test3PEP487(_SchemaBasePEP487, schema={"$ref": "#/definitions/Bar"}): ...
-
-    class RootParentPEP487(_SchemaBasePEP487, schema=dummy_rootschema):
-        @classmethod
-        def _default_wrapper_classes(cls) -> Iterator[type[Any]]:
-            return schemapi._subclasses(RootParentPEP487)
-
-    class Root(RootParentPEP487):
-        """
-        Root schema wrapper.
-
-        A Vega-Lite top-level specification. This is the root class for all Vega-Lite
-        specifications. (The json schema is generated from this type.)
-        """
-
-        def __init__(self, *args, **kwds) -> None:
-            super().__init__(*args, **kwds)
-
-    assert (
-        Root._schema
-        == Root._rootschema
-        == RootParentPEP487._schema
-        == RootParentPEP487._rootschema
-    )
-
-    class StringMapping(Root, schema={"$ref": "#/definitions/StringMapping"}): ...
-
-    class StringArray(Root, schema={"$ref": "#/definitions/StringArray"}): ...
-
-    with pytest.raises(
-        jsonschema.ValidationError,
-        match=r"5 is not of type 'string'",
-    ):
-        schemapi.validate_jsonschema(
-            ["one", "two", 5], StringArray._schema, StringArray._rootschema
-        )
-
-    with pytest.raises(Unresolvable):
-        schemapi.validate_jsonschema(["one", "two", "three"], StringArray._schema)
-
-    schemapi.validate_jsonschema(
-        ["one", "two", "three"], StringArray._schema, StringArray._rootschema
-    )
-
-    class Expression(OperatorMixin, _SchemaBasePEP487, schema={"type": "string"}):
-        def to_dict(self, *args, **kwargs):
-            return repr(self)
-
-        def __setattr__(self, attr, val) -> None:
-            # We don't need the setattr magic defined in SchemaBase
-            return object.__setattr__(self, attr, val)
-
-        def __getitem__(self, val):
-            return GetItemExpression(self, val)
-
-    non_ref_mixin = Expression(
-        Expression("some").to_dict() + Expression("more").to_dict()
-    )
-    schemapi.validate_jsonschema(
-        non_ref_mixin.to_dict(), non_ref_mixin._schema, non_ref_mixin._rootschema
-    )
-    with pytest.raises(
-        jsonschema.ValidationError,
-        match=r"is not of type 'array'",
-    ):
-        schemapi.validate_jsonschema(
-            non_ref_mixin.to_dict(), StringArray._schema, StringArray._rootschema
-        )
-
-
 class _TestSchema(SchemaBase):
     @classmethod
     def _default_wrapper_classes(cls):
diff --git a/tools/schemapi/schemapi.py b/tools/schemapi/schemapi.py
index ec5a6d21f..68a19b44a 100644
--- a/tools/schemapi/schemapi.py
+++ b/tools/schemapi/schemapi.py
@@ -1008,118 +1008,6 @@ def _deep_copy(obj: _CopyImpl | Any, by_ref: set[str]) -> _CopyImpl | Any:
         return obj
 
 
-class _SchemaBasePEP487:
-    """Minimal demo for testing feasibility of `__init_subclass__`."""
-
-    _schema: ClassVar[dict[str, Any]]
-    _rootschema: ClassVar[dict[str, Any]]
-    _class_is_valid_at_instantiation: ClassVar[bool] = True
-
-    def __init__(self, *args: Any, **kwds: Any) -> None:
-        if (kwds and args) or len(args) > 1:
-            name = type(self).__name__
-            _args = ", ".join(f"{a!r}" for a in args)
-            _kwds = ", ".join(f"{k}={v!r}" for k, v in kwds.items())
-            msg = (
-                f"Expected either:\n"
-                f" - a single arg with no kwds, for, e.g. {{'type': 'string'}}\n"
-                f" - zero args with zero or more kwds for {{'type': 'object'}}\n\n"
-                f"but got: {name}({_args}, {_kwds})"
-            )
-            raise AssertionError(msg)
-        # use object.__setattr__ because we override setattr below.
-        self._args: tuple[Any, ...]
-        self._kwds: dict[str, Any]
-        object.__setattr__(self, "_args", args)
-        object.__setattr__(self, "_kwds", kwds)
-
-    def __init_subclass__(
-        cls,
-        *args: Any,
-        schema: dict[str, Any] | None = None,
-        rootschema: dict[str, Any] | None = None,
-        valid_at_init: bool | None = None,
-        **kwds: Any,
-    ) -> None:
-        super().__init_subclass__(*args, **kwds)
-        if schema is None:
-            if hasattr(cls, "_schema"):
-                schema = cls._schema
-            else:
-                msg = (
-                    f"Cannot instantiate object of type {cls}: "
-                    "_schema class attribute is not defined."
-                )
-                raise TypeError(msg)
-        if rootschema is None:
-            if hasattr(cls, "_rootschema"):
-                rootschema = cls._rootschema
-            elif "$ref" not in schema:
-                rootschema = schema
-            else:
-                msg = "`rootschema` must be provided if `schema` contains a `'$ref'` and does not inherit one."
-                raise TypeError(msg)
-        if valid_at_init is None:
-            valid_at_init = cls._class_is_valid_at_instantiation
-        cls._schema = schema
-        cls._rootschema = rootschema
-        cls._class_is_valid_at_instantiation = valid_at_init
-
-    @overload
-    def _get(self, attr: str, default: Optional = ...) -> Any | UndefinedType: ...
-    @overload
-    def _get(self, attr: str, default: T) -> Any | T: ...
-    def _get(self, attr: str, default: Optional[T] = Undefined) -> Any | T:
-        """Get an attribute, returning default if not present."""
-        if (item := self._kwds.get(attr, Undefined)) is not Undefined:
-            return item
-        else:
-            return default
-
-    def __dir__(self) -> list[str]:
-        return sorted(chain(super().__dir__(), self._kwds))
-
-    def __eq__(self, other: Any) -> bool:
-        return (
-            type(self) is type(other)
-            and self._args == other._args
-            and self._kwds == other._kwds
-        )
-
-    def __getattr__(self, attr: str):
-        # reminder: getattr is called after the normal lookups
-        if attr == "_kwds":
-            raise AttributeError()
-        if attr in self._kwds:
-            return self._kwds[attr]
-        else:
-            return getattr(super(), "__getattr__", super().__getattribute__)(attr)
-
-    def __getitem__(self, item: str) -> Any:
-        return self._kwds[item]
-
-    def __setattr__(self, item: str, val: Any) -> None:
-        if item.startswith("_"):
-            # Setting an instances copy of a ClassVar modify that
-            # By default, this makes **another** copy and places in _kwds
-            object.__setattr__(self, item, val)
-        else:
-            self._kwds[item] = val
-
-    def __setitem__(self, item: str, val: Any) -> None:
-        self._kwds[item] = val
-
-    def __repr__(self) -> str:
-        name = type(self).__name__
-        if kwds := self._kwds:
-            it = (f"{k}: {v!r}" for k, v in sorted(kwds.items()) if v is not Undefined)
-            args = ",\n".join(it).replace("\n", "\n  ")
-            LB, RB = "{", "}"
-            return f"{name}({LB}\n  {args}\n{RB})"
-        else:
-            return f"{name}({self._args[0]!r})"
-
-
 class SchemaBase:
     """
     Base class for schema wrappers.

From 67642f06d7a1c4c28a05ac8c5dda78dd2f01a1d5 Mon Sep 17 00:00:00 2001
From: dangotbanned <125183946+dangotbanned@users.noreply.github.com>
Date: Tue, 3 Sep 2024 16:50:13 +0100
Subject: [PATCH 91/92] ci: Remove debugging `hatch` script

---
 pyproject.toml | 7 -------
 1 file changed, 7 deletions(-)

diff --git a/pyproject.toml b/pyproject.toml
index 91fdb0218..bbfce770f 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -138,13 +138,6 @@ test-slow = [
     "pytest -p no:randomly -n logical --numprocesses=logical --doctest-modules tests altair -m \"slow\" {args}"
 ]
 
-# Much more isolated, focused purely on a faster `schemapi.py` rebuild/test loop.
-validation = [
-    "mypy tools/schemapi/schemapi.py",
-    "python tools/generate_schema_wrapper.py",
-    "pytest -k test_schemapi tests {args}",
-]
-
 [tool.hatch.envs.hatch-test]
 # https://hatch.pypa.io/latest/tutorials/testing/overview/
 features = ["all", "dev", "doc"]

From 8002dab1f4ebea29183921ba9cd02a21e870f16c Mon Sep 17 00:00:00 2001
From: dangotbanned <125183946+dangotbanned@users.noreply.github.com>
Date: Tue, 3 Sep 2024 16:54:40 +0100
Subject: [PATCH 92/92] test: Remove `test_chart_validation_benchmark`

Feeling I've squeezed out all the performance I can for now.
Will add in a collpased comment on the PR for reference
---
 tests/utils/test_schemapi.py | 92 +-----------------------------------
 1 file changed, 1 insertion(+), 91 deletions(-)

diff --git a/tests/utils/test_schemapi.py b/tests/utils/test_schemapi.py
index 3bc42a328..25f483753 100644
--- a/tests/utils/test_schemapi.py
+++ b/tests/utils/test_schemapi.py
@@ -10,7 +10,7 @@
 import warnings
 from collections import deque
 from functools import partial
-from typing import TYPE_CHECKING, Any, Callable, Iterable, Iterator, Literal, Sequence
+from typing import TYPE_CHECKING, Any, Callable, Iterable, Sequence
 
 import jsonschema
 import jsonschema.exceptions
@@ -882,96 +882,6 @@ def test_chart_validation_errors(chart_func, expected_error_message):
         chart.to_dict()
 
 
-_SKIP_SLOW_BENCHMARKS: bool = True
-_REPEAT_TIMES = 1000
-
-
-@pytest.mark.parametrize("to_or_from", ["to_dict-validate", "to_dict", "from_dict"])
-@pytest.mark.filterwarnings("ignore:.*:UserWarning")
-@pytest.mark.skipif(
-    _SKIP_SLOW_BENCHMARKS,
-    reason="Should only be run in isolation to test single threaded performance.",
-)
-def test_chart_validation_benchmark(
-    to_or_from: Literal["to_dict-validate", "to_dict", "from_dict"],
-) -> None:
-    """
-    Intended to isolate `Chart.(to|from)_dict.` calls.
-
-    Repeated ``_REPEAT_TIMES`` times, non-parametric:
-    - in an attempt to limit the potential overhead of ``pytest``
-    - but enforce ``1`` thread, like a user-code would be.
-
-    Results
-    -------
-    ```
-    _REPEAT_TIMES = 1000
-    pytest -k test_chart_validation_benchmark  --numprocesses=3 --durations=3 tests
-
-    # Pre-`SchemaBase.from_dict` refactor (3.12.3)
-    108.16s call     tests/utils/test_schemapi.py::test_chart_validation_benchmark[to_dict-validate]
-    84.62s call     tests/utils/test_schemapi.py::test_chart_validation_benchmark[from_dict]
-    66.71s call     tests/utils/test_schemapi.py::test_chart_validation_benchmark[to_dict]
-
-    # Post-`SchemaBase.from_dict` refactor (3.12.3)
-    107.84s call     tests/utils/test_schemapi.py::test_chart_validation_benchmark[to_dict-validate]
-    50.43s call     tests/utils/test_schemapi.py::test_chart_validation_benchmark[from_dict]
-    67.07s call     tests/utils/test_schemapi.py::test_chart_validation_benchmark[to_dict]
-
-    # Post-`SchemaBase.__init_subclass__` addition (3.12.3)
-    108.24s call     tests/utils/test_schemapi.py::test_chart_validation_benchmark[to_dict-validate]
-    50.33s call     tests/utils/test_schemapi.py::test_chart_validation_benchmark[from_dict]
-    66.51s call     tests/utils/test_schemapi.py::test_chart_validation_benchmark[to_dict]
-
-    # Post-`dict` branch micro optimization in `_FromDict.from_dict` (3.12.3)
-    107.90s call     tests/utils/test_schemapi.py::test_chart_validation_benchmark[to_dict-validate]
-    49.63s call     tests/utils/test_schemapi.py::test_chart_validation_benchmark[from_dict]
-    66.87s call     tests/utils/test_schemapi.py::test_chart_validation_benchmark[to_dict]
-    ```
-    """
-    from itertools import chain, repeat
-
-    if TYPE_CHECKING:
-        from altair.typing import ChartType
-
-    def _iter_charts() -> Iterator[ChartType]:
-        """
-        Ensures only len(chart_funcs_error_message) actual charts are constructed.
-
-        The `to_dict` calls are what gets multiplied
-        """
-        charts: list[ChartType] = [fn() for fn, _ in chart_funcs_error_message]
-        yield from chain.from_iterable(repeat(charts, times=_REPEAT_TIMES))
-
-    def _iter_chart_factory() -> Iterator[ChartType]:
-        """
-        Validation not the bottleneck, but encode is.
-
-        Ensures at least `times` * len(chart_funcs_error_message) .encode calls are made.
-        """
-        chart_funcs: list[Callable[[], ChartType]] = [
-            fn for fn, _ in chart_funcs_error_message
-        ]
-        for fn in chain.from_iterable(repeat(chart_funcs, times=_REPEAT_TIMES)):
-            yield fn()
-
-    def _to_dict(validate: bool) -> None:
-        if validate:
-            for chart in _iter_charts():
-                with pytest.raises(schemapi.SchemaValidationError):
-                    chart.to_dict(validate=validate)
-        else:
-            for chart in _iter_charts():
-                chart.to_dict(validate=validate)
-
-    if to_or_from == "to_dict":
-        _to_dict(validate=False)
-    elif to_or_from == "to_dict-validate":
-        _to_dict(validate=True)
-    else:
-        assert list(_iter_chart_factory())
-
-
 def test_multiple_field_strings_in_condition():
     selection = alt.selection_point()
     expected_error_message = "A field cannot be used for both the `if_true` and `if_false` values of a condition. One of them has to specify a `value` or `datum` definition."