From 0ea77def7ad3b506c3168920b3e177f974ac82d6 Mon Sep 17 00:00:00 2001 From: Mauko Quiroga Date: Sat, 5 Oct 2024 03:29:42 +0200 Subject: [PATCH 01/29] docs(enums): add docs to Enum (#1271) --- openfisca_core/indexed_enums/__init__.py | 34 +++++++----------- openfisca_core/indexed_enums/enum.py | 44 +++++++++++++++--------- 2 files changed, 39 insertions(+), 39 deletions(-) diff --git a/openfisca_core/indexed_enums/__init__.py b/openfisca_core/indexed_enums/__init__.py index 9c4ff7dd6..70e2d3549 100644 --- a/openfisca_core/indexed_enums/__init__.py +++ b/openfisca_core/indexed_enums/__init__.py @@ -1,25 +1,15 @@ -# Transitional imports to ensure non-breaking changes. -# Could be deprecated in the next major release. -# -# How imports are being used today: -# -# >>> from openfisca_core.module import symbol -# -# The previous example provokes cyclic dependency problems -# that prevent us from modularizing the different components -# of the library so to make them easier to test and to maintain. -# -# How could them be used after the next major release: -# -# >>> from openfisca_core import module -# >>> module.symbol() -# -# And for classes: -# -# >>> from openfisca_core.module import Symbol -# >>> Symbol() -# -# See: https://www.python.org/dev/peps/pep-0008/#imports +"""Enumerations for variables with a limited set of possible values. + +These include: + * Highest academic level: high school, associate degree, bachelor's degree, + master's degree, doctorate… + * A household housing occupancy status: owner, tenant, free-lodger, + homeless… + * The main occupation of a person: employee, freelancer, retired, student, + unemployed… + * Etc. + +""" from . import types from .config import ENUM_ARRAY_DTYPE diff --git a/openfisca_core/indexed_enums/enum.py b/openfisca_core/indexed_enums/enum.py index a6fd5d7f9..d76c9f9e2 100644 --- a/openfisca_core/indexed_enums/enum.py +++ b/openfisca_core/indexed_enums/enum.py @@ -8,22 +8,34 @@ class Enum(t.Enum): - """Enum based on `enum34 `_, whose items - have an index. + """Enum based on `enum34 `_. + + Its items have an :any:`int` index. This is useful and performant when + running simulations on large populations. + """ - # Tweak enums to add an index attribute to each enum item - def __init__(self, name: str) -> None: - # When the enum item is initialized, self._member_names_ contains the - # names of the previously initialized items, so its length is the index - # of this item. + #: The ``index`` of the ``Enum`` member. + index: int + + def __init__(self, *__args: object, **__kwargs: object) -> None: + """Tweak :any:`~enum.Enum` to add an index to each enum item. + + When the enum is initialised, ``_member_names_`` contains the names of + the already initialized items, so its length is the index of this item. + + Args: + *__args: Positional arguments. + **__kwargs: Keyword arguments. + + """ + self.index = len(self._member_names_) - # Bypass the slow Enum.__eq__ + #: Bypass the slow :any:`~enum.Enum.__eq__` method. __eq__ = object.__eq__ - # In Python 3, __hash__ must be defined if __eq__ is defined to stay - # hashable. + #: :meth:`.__hash__` must also be defined so as to stay hashable. __hash__ = object.__hash__ @classmethod @@ -31,15 +43,13 @@ def encode( cls, array: EnumArray | numpy.int32 | numpy.float32 | numpy.object_, ) -> EnumArray: - """Encode a string numpy array, an enum item numpy array, or an int numpy - array into an :any:`EnumArray`. See :any:`EnumArray.decode` for - decoding. + """Encode an encodable array into an ``EnumArray``. - :param numpy.ndarray array: Array of string identifiers, or of enum - items, to encode. + Args: + array: Array to encode. - :returns: An :any:`EnumArray` encoding the input array values. - :rtype: :any:`EnumArray` + Returns: + EnumArray: An ``EnumArray`` with the encoded input values. For instance: From 91beee1221089b4917f16bd6d42364f2b5f8a7c2 Mon Sep 17 00:00:00 2001 From: Mauko Quiroga Date: Sat, 5 Oct 2024 14:32:55 +0200 Subject: [PATCH 02/29] docs(enums): add docs to EnumArray (#1271) --- openfisca_core/indexed_enums/enum_array.py | 77 ++++++++++++++++++---- setup.cfg | 14 +++- 2 files changed, 78 insertions(+), 13 deletions(-) diff --git a/openfisca_core/indexed_enums/enum_array.py b/openfisca_core/indexed_enums/enum_array.py index a1479d5b8..dc63071da 100644 --- a/openfisca_core/indexed_enums/enum_array.py +++ b/openfisca_core/indexed_enums/enum_array.py @@ -9,41 +9,84 @@ class EnumArray(t.EnumArray): - """NumPy array subclass representing an array of enum items. + """Subclass of :any:`numpy.ndarray` representing an array of ``Enum``. + + ``Enum`` arrays are encoded as :any:`int` arrays to improve performance. + + Note: + Subclassing :any:`numpy.ndarray` is a little tricky™. To read more + about the :meth:`.__new__` and :meth:`.__array_finalize__` methods + below, see `Subclassing ndarray`_. + + .. _Subclassing ndarray: + https://numpy.org/doc/stable/user/basics.subclassing.html - EnumArrays are encoded as ``int`` arrays to improve performance """ - # Subclassing ndarray is a little tricky. - # To read more about the two following methods, see: - # https://docs.scipy.org/doc/numpy-1.13.0/user/basics.subclassing.html#slightly-more-realistic-example-attribute-added-to-existing-array. def __new__( cls, input_array: t.Array[t.DTypeEnum], possible_values: None | type[t.Enum] = None, ) -> Self: + """See comment above.""" obj = numpy.asarray(input_array).view(cls) obj.possible_values = possible_values return obj - # See previous comment def __array_finalize__(self, obj: numpy.int32 | None) -> None: + """See comment above.""" if obj is None: return self.possible_values = getattr(obj, "possible_values", None) def __eq__(self, other: object) -> bool: - # When comparing to an item of self.possible_values, use the item index - # to speed up the comparison. + """Compare equality with the item index. + + When comparing to an item of :attr:`.possible_values`, use the item + index to speed up the comparison. + + Whenever possible, use :any:`numpy.ndarray.view` so that the result is + a classic :any:`numpy.ndarray`, not an :obj:`.EnumArray`. + + Args: + other: Another object to compare to. + + Returns: + bool: When ??? + numpy.ndarray[numpy.bool_]: When ??? + + Note: + This breaks the `Liskov substitution principle`_. + + .. _Liskov substitution principle: + https://en.wikipedia.org/wiki/Liskov_substitution_principle + + """ + if other.__class__.__name__ is self.possible_values.__name__: - # Use view(ndarray) so that the result is a classic ndarray, not an - # EnumArray. return self.view(numpy.ndarray) == other.index return self.view(numpy.ndarray) == other def __ne__(self, other: object) -> bool: + """Inequality… + + Args: + other: Another object to compare to. + + Returns: + bool: When ??? + numpy.ndarray[numpy.bool_]: When ??? + + Note: + This breaks the `Liskov substitution principle`_. + + .. _Liskov substitution principle: + https://en.wikipedia.org/wiki/Liskov_substitution_principle + + """ + return numpy.logical_not(self == other) def _forbidden_operation(self, other: Any) -> NoReturn: @@ -65,7 +108,10 @@ def _forbidden_operation(self, other: Any) -> NoReturn: __or__ = _forbidden_operation def decode(self) -> numpy.object_: - """Return the array of enum items corresponding to self. + """Decode itself to a normal array. + + Returns: + numpy.ndarray[t.Enum]: The enum items of the ``EnumArray``. For instance: @@ -76,14 +122,19 @@ def decode(self) -> numpy.object_: Decoded value: enum item + """ + return numpy.select( [self == item.index for item in self.possible_values], list(self.possible_values), ) def decode_to_str(self) -> numpy.str_: - """Return the array of string identifiers corresponding to self. + """Decode itself to an array of strings. + + Returns: + numpy.ndarray[numpy.str_]: The string values of the ``EnumArray``. For instance: @@ -92,7 +143,9 @@ def decode_to_str(self) -> numpy.str_: >>> 2 # Encoded value >>> enum_array.decode_to_str()[0] 'free_lodger' # String identifier + """ + return numpy.select( [self == item.index for item in self.possible_values], [item.name for item in self.possible_values], diff --git a/setup.cfg b/setup.cfg index 9b8ce699b..fd18e5ab3 100644 --- a/setup.cfg +++ b/setup.cfg @@ -11,7 +11,19 @@ convention = google docstring_style = google extend-ignore = D -ignore = B019, E203, E501, F405, E701, E704, RST212, RST213, RST301, RST306, W503 +ignore = + B019, + E203, + E501, + F405, + E701, + E704, + RST210, + RST212, + RST213, + RST301, + RST306, + W503 in-place = true include-in-doctest = openfisca_core/commons From 15c67c07c17af3a97676251b83d58b9c5ae1c69e Mon Sep 17 00:00:00 2001 From: Mauko Quiroga Date: Sat, 5 Oct 2024 15:53:14 +0200 Subject: [PATCH 03/29] docs(enums): fix sphinx markup (#1271) --- openfisca_core/indexed_enums/enum.py | 19 +++++++++------- openfisca_core/indexed_enums/enum_array.py | 25 ++++++++++++---------- 2 files changed, 25 insertions(+), 19 deletions(-) diff --git a/openfisca_core/indexed_enums/enum.py b/openfisca_core/indexed_enums/enum.py index d76c9f9e2..a67cd317c 100644 --- a/openfisca_core/indexed_enums/enum.py +++ b/openfisca_core/indexed_enums/enum.py @@ -10,16 +10,16 @@ class Enum(t.Enum): """Enum based on `enum34 `_. - Its items have an :any:`int` index. This is useful and performant when - running simulations on large populations. + Its items have an :class:`int` index, useful and performant when running + :mod:`~openfisca_core.simulations` on large :mod:`~openfisca_core.populations`. """ - #: The ``index`` of the ``Enum`` member. + #: The :attr:`index` of the :class:`.Enum` member. index: int def __init__(self, *__args: object, **__kwargs: object) -> None: - """Tweak :any:`~enum.Enum` to add an index to each enum item. + """Tweak :class:`enum.Enum` to add an :attr:`.index` to each enum item. When the enum is initialised, ``_member_names_`` contains the names of the already initialized items, so its length is the index of this item. @@ -28,11 +28,14 @@ def __init__(self, *__args: object, **__kwargs: object) -> None: *__args: Positional arguments. **__kwargs: Keyword arguments. + Note: + ``_member_names_`` is undocumented in upstream :class:`enum.Enum`. + """ self.index = len(self._member_names_) - #: Bypass the slow :any:`~enum.Enum.__eq__` method. + #: Bypass the slow :meth:`enum.Enum.__eq__` method. __eq__ = object.__eq__ #: :meth:`.__hash__` must also be defined so as to stay hashable. @@ -43,13 +46,13 @@ def encode( cls, array: EnumArray | numpy.int32 | numpy.float32 | numpy.object_, ) -> EnumArray: - """Encode an encodable array into an ``EnumArray``. + """Encode an encodable array into an :class:`.EnumArray`. Args: - array: Array to encode. + array: :class:`~numpy.ndarray` to encode. Returns: - EnumArray: An ``EnumArray`` with the encoded input values. + EnumArray: An :class:`.EnumArray` with the encoded input values. For instance: diff --git a/openfisca_core/indexed_enums/enum_array.py b/openfisca_core/indexed_enums/enum_array.py index dc63071da..a04e28cdb 100644 --- a/openfisca_core/indexed_enums/enum_array.py +++ b/openfisca_core/indexed_enums/enum_array.py @@ -9,12 +9,12 @@ class EnumArray(t.EnumArray): - """Subclass of :any:`numpy.ndarray` representing an array of ``Enum``. + """A subclass of :class:`~numpy.ndarray` of :class:`.Enum`. - ``Enum`` arrays are encoded as :any:`int` arrays to improve performance. + :class:`.Enum` arrays are encoded as :class:`int` to improve performance. Note: - Subclassing :any:`numpy.ndarray` is a little tricky™. To read more + Subclassing :class:`~numpy.ndarray` is a little tricky™. To read more about the :meth:`.__new__` and :meth:`.__array_finalize__` methods below, see `Subclassing ndarray`_. @@ -23,6 +23,9 @@ class EnumArray(t.EnumArray): """ + #: Enum type of the array items. + possible_values: None | type[t.Enum] = None + def __new__( cls, input_array: t.Array[t.DTypeEnum], @@ -41,16 +44,16 @@ def __array_finalize__(self, obj: numpy.int32 | None) -> None: self.possible_values = getattr(obj, "possible_values", None) def __eq__(self, other: object) -> bool: - """Compare equality with the item index. + """Compare equality with the item's :attr:`~.Enum.index`. - When comparing to an item of :attr:`.possible_values`, use the item - index to speed up the comparison. + When comparing to an item of :attr:`.possible_values`, use the + item's :attr:`~.Enum.index`. to speed up the comparison. Whenever possible, use :any:`numpy.ndarray.view` so that the result is - a classic :any:`numpy.ndarray`, not an :obj:`.EnumArray`. + a classic :class:`~numpy.ndarray`, not an :obj:`.EnumArray`. Args: - other: Another object to compare to. + other: Another :class:`object` to compare to. Returns: bool: When ??? @@ -73,7 +76,7 @@ def __ne__(self, other: object) -> bool: """Inequality… Args: - other: Another object to compare to. + other: Another :class:`object` to compare to. Returns: bool: When ??? @@ -111,7 +114,7 @@ def decode(self) -> numpy.object_: """Decode itself to a normal array. Returns: - numpy.ndarray[t.Enum]: The enum items of the ``EnumArray``. + numpy.ndarray[t.Enum]: The items of the :obj:`.EnumArray`. For instance: @@ -134,7 +137,7 @@ def decode_to_str(self) -> numpy.str_: """Decode itself to an array of strings. Returns: - numpy.ndarray[numpy.str_]: The string values of the ``EnumArray``. + numpy.ndarray[numpy.str_]: The string values of the :obj:`.EnumArray`. For instance: From 6300df6286f2598af791fdb6ab206a70c7646263 Mon Sep 17 00:00:00 2001 From: Mauko Quiroga Date: Sat, 5 Oct 2024 17:29:47 +0200 Subject: [PATCH 04/29] style(enums): fix linter issues (#1271) --- openfisca_core/indexed_enums/config.py | 3 +++ openfisca_core/indexed_enums/enum.py | 4 +++- openfisca_core/indexed_enums/enum_array.py | 12 ++++++------ setup.py | 2 +- stubs/numexpr/__init__.pyi | 1 + 5 files changed, 14 insertions(+), 8 deletions(-) diff --git a/openfisca_core/indexed_enums/config.py b/openfisca_core/indexed_enums/config.py index f7da69b84..abb8817de 100644 --- a/openfisca_core/indexed_enums/config.py +++ b/openfisca_core/indexed_enums/config.py @@ -1,3 +1,6 @@ import numpy ENUM_ARRAY_DTYPE = numpy.int16 + + +__all__ = ["ENUM_ARRAY_DTYPE"] diff --git a/openfisca_core/indexed_enums/enum.py b/openfisca_core/indexed_enums/enum.py index a67cd317c..56f007941 100644 --- a/openfisca_core/indexed_enums/enum.py +++ b/openfisca_core/indexed_enums/enum.py @@ -32,7 +32,6 @@ def __init__(self, *__args: object, **__kwargs: object) -> None: ``_member_names_`` is undocumented in upstream :class:`enum.Enum`. """ - self.index = len(self._member_names_) #: Bypass the slow :meth:`enum.Enum.__eq__` method. @@ -100,3 +99,6 @@ def encode( ).astype(ENUM_ARRAY_DTYPE) return EnumArray(array, cls) + + +__all__ = ["Enum"] diff --git a/openfisca_core/indexed_enums/enum_array.py b/openfisca_core/indexed_enums/enum_array.py index a04e28cdb..b40ba0401 100644 --- a/openfisca_core/indexed_enums/enum_array.py +++ b/openfisca_core/indexed_enums/enum_array.py @@ -66,14 +66,13 @@ def __eq__(self, other: object) -> bool: https://en.wikipedia.org/wiki/Liskov_substitution_principle """ - if other.__class__.__name__ is self.possible_values.__name__: return self.view(numpy.ndarray) == other.index return self.view(numpy.ndarray) == other def __ne__(self, other: object) -> bool: - """Inequality… + """Inequality. Args: other: Another :class:`object` to compare to. @@ -89,10 +88,10 @@ def __ne__(self, other: object) -> bool: https://en.wikipedia.org/wiki/Liskov_substitution_principle """ - return numpy.logical_not(self == other) - def _forbidden_operation(self, other: Any) -> NoReturn: + @staticmethod + def _forbidden_operation(other: Any) -> NoReturn: msg = ( "Forbidden operation. The only operations allowed on EnumArrays " "are '==' and '!='." @@ -127,7 +126,6 @@ def decode(self) -> numpy.object_: Decoded value: enum item """ - return numpy.select( [self == item.index for item in self.possible_values], list(self.possible_values), @@ -148,7 +146,6 @@ def decode_to_str(self) -> numpy.str_: 'free_lodger' # String identifier """ - return numpy.select( [self == item.index for item in self.possible_values], [item.name for item in self.possible_values], @@ -159,3 +156,6 @@ def __repr__(self) -> str: def __str__(self) -> str: return str(self.decode_to_str()) + + +__all__ = ["EnumArray"] diff --git a/setup.py b/setup.py index 202e5e449..d342bb9f4 100644 --- a/setup.py +++ b/setup.py @@ -62,7 +62,7 @@ "pylint >=3.3.1, <4.0", "pylint-per-file-ignores >=1.3.2, <2.0", "pyright >=1.1.382, <2.0", - "ruff >=0.6.7, <1.0", + "ruff >=0.6.9, <1.0", "ruff-lsp >=0.0.57, <1.0", "xdoctest >=1.2.0, <2.0", *api_requirements, diff --git a/stubs/numexpr/__init__.pyi b/stubs/numexpr/__init__.pyi index f9ada73c3..931d47ddb 100644 --- a/stubs/numexpr/__init__.pyi +++ b/stubs/numexpr/__init__.pyi @@ -4,6 +4,7 @@ import numpy def evaluate( __ex: str, + /, *__args: object, **__kwargs: object, ) -> NDArray[numpy.bool_] | NDArray[numpy.int32] | NDArray[numpy.float32]: ... From 8be6ae431666ef466daf31250a8b45262f3c7457 Mon Sep 17 00:00:00 2001 From: Mauko Quiroga Date: Sat, 5 Oct 2024 18:53:07 +0200 Subject: [PATCH 05/29] style(sphinx): fix more doc markup errors (#1271) --- openfisca_core/commons/__init__.py | 53 +------------------ openfisca_core/commons/formulas.py | 14 ++--- openfisca_core/commons/misc.py | 8 ++- openfisca_core/commons/rates.py | 12 ++--- openfisca_core/commons/tests/test_dummy.py | 1 - openfisca_core/commons/tests/test_formulas.py | 7 --- openfisca_core/commons/tests/test_rates.py | 2 - openfisca_core/data_storage/__init__.py | 23 +------- .../data_storage/in_memory_storage.py | 25 ++++----- .../data_storage/on_disk_storage.py | 24 ++++----- openfisca_core/indexed_enums/__init__.py | 13 +---- openfisca_core/indexed_enums/enum_array.py | 8 +-- openfisca_tasks/lint.mk | 1 + 13 files changed, 40 insertions(+), 151 deletions(-) diff --git a/openfisca_core/commons/__init__.py b/openfisca_core/commons/__init__.py index 1a3d065ee..550088141 100644 --- a/openfisca_core/commons/__init__.py +++ b/openfisca_core/commons/__init__.py @@ -1,55 +1,4 @@ -"""Common tools for contributors and users. - -The tools in this sub-package are intended, to help both contributors -to OpenFisca Core and to country packages. - -Official Public API: - * :func:`.apply_thresholds` - * :func:`.average_rate` - * :func:`.concat` - * :func:`.empty_clone` - * :func:`.eval_expression` - * :func:`.marginal_rate` - * :func:`.stringify_array` - * :func:`.switch` - -Deprecated: - * :class:`.Dummy` - -Note: - The ``deprecated`` imports are transitional, in order to ensure non-breaking - changes, and could be removed from the codebase in the next - major release. - -Note: - How imports are being used today:: - - from openfisca_core.commons import * # Bad - from openfisca_core.commons.formulas import switch # Bad - from openfisca_core.commons.decorators import deprecated # Bad - - - The previous examples provoke cyclic dependency problems, that prevent us - from modularizing the different components of the library, which would make - them easier to test and to maintain. - - How they could be used in a future release:: - - from openfisca_core import commons - from openfisca_core.commons import deprecated - - deprecated() # Good: import classes as publicly exposed - commons.switch() # Good: use functions as publicly exposed - - .. seealso:: `PEP8#Imports`_ and `OpenFisca's Styleguide`_. - - .. _PEP8#Imports: - https://www.python.org/dev/peps/pep-0008/#imports - - .. _OpenFisca's Styleguide: - https://github.com/openfisca/openfisca-core/blob/master/STYLEGUIDE.md - -""" +"""Common tools for contributors and users.""" from . import types from .dummy import Dummy diff --git a/openfisca_core/commons/formulas.py b/openfisca_core/commons/formulas.py index a184ad2dc..d83f187e2 100644 --- a/openfisca_core/commons/formulas.py +++ b/openfisca_core/commons/formulas.py @@ -24,10 +24,7 @@ def apply_thresholds( choices: A list of the possible values to choose from. Returns: - Array[numpy.float32]: A list of the values chosen. - - Raises: - AssertionError: When thresholds and choices are incompatible. + ndarray[float32]: A list of the values chosen. Examples: >>> input = numpy.array([4, 5, 6, 7, 8]) @@ -37,7 +34,6 @@ def apply_thresholds( array([10, 10, 15, 15, 20]) """ - condlist: list[t.Array[numpy.bool_] | bool] condlist = [input <= threshold for threshold in thresholds] @@ -66,7 +62,7 @@ def concat( that: Another array to concatenate. Returns: - Array[numpy.str_]: An array with the concatenated values. + ndarray[str_]: An array with the concatenated values. Examples: >>> this = ["this", "that"] @@ -75,7 +71,6 @@ def concat( array(['this1.0', 'that2.5']...) """ - if not isinstance(this, numpy.ndarray): this = numpy.array(this) @@ -105,10 +100,7 @@ def switch( value_by_condition: Values to replace for each condition. Returns: - Array: An array with the replaced values. - - Raises: - AssertionError: When ``value_by_condition`` is empty. + ndarray[float32]: An array with the replaced values. Examples: >>> conditions = numpy.array([1, 1, 1, 2]) diff --git a/openfisca_core/commons/misc.py b/openfisca_core/commons/misc.py index ba9687619..e3e55948d 100644 --- a/openfisca_core/commons/misc.py +++ b/openfisca_core/commons/misc.py @@ -13,7 +13,7 @@ def empty_clone(original: object) -> object: original: An object to clone. Returns: - The cloned, empty, object. + object: The cloned, empty, object. Examples: >>> Foo = type("Foo", (list,), {}) @@ -50,7 +50,7 @@ def stringify_array(array: None | t.Array[numpy.generic]) -> str: array: An array. Returns: - str: "None" if the ``array`` is None. + str: ``"None"`` if the ``array`` is ``None``. str: The stringified ``array`` otherwise. Examples: @@ -71,7 +71,6 @@ def stringify_array(array: None | t.Array[numpy.generic]) -> str: "[, {}, >> target = numpy.array([1, 2, 3]) @@ -37,7 +37,6 @@ def average_rate( array([ nan, 0. , -0.5]) """ - if not isinstance(varying, numpy.ndarray): varying = numpy.array(varying, dtype=numpy.float32) @@ -79,9 +78,9 @@ def marginal_rate( trim: The lower and upper bounds of the marginal rate. Returns: - Array[numpy.float32]: The marginal rate for each target. When ``trim`` - is provided, values that are out of the provided bounds are replaced by - :any:`numpy.nan`. + ndarray[float32]: The marginal rate for each target. When ``trim`` + is provided, values that are out of the provided bounds are + replaced by :class:`numpy.nan`. Examples: >>> target = numpy.array([1, 2, 3]) @@ -91,7 +90,6 @@ def marginal_rate( array([nan, 0.5]) """ - if not isinstance(varying, numpy.ndarray): varying = numpy.array(varying, dtype=numpy.float32) diff --git a/openfisca_core/commons/tests/test_dummy.py b/openfisca_core/commons/tests/test_dummy.py index dfe04b3e4..4dd13eaba 100644 --- a/openfisca_core/commons/tests/test_dummy.py +++ b/openfisca_core/commons/tests/test_dummy.py @@ -5,6 +5,5 @@ def test_dummy_deprecation() -> None: """Dummy throws a deprecation warning when instantiated.""" - with pytest.warns(DeprecationWarning): assert Dummy() diff --git a/openfisca_core/commons/tests/test_formulas.py b/openfisca_core/commons/tests/test_formulas.py index 130df9505..6fa98a7c2 100644 --- a/openfisca_core/commons/tests/test_formulas.py +++ b/openfisca_core/commons/tests/test_formulas.py @@ -7,7 +7,6 @@ def test_apply_thresholds_when_several_inputs() -> None: """Make a choice for any given input.""" - input_ = numpy.array([4, 5, 6, 7, 8, 9, 10]) thresholds = [5, 7, 9] choices = [10, 15, 20, 25] @@ -19,7 +18,6 @@ def test_apply_thresholds_when_several_inputs() -> None: def test_apply_thresholds_when_too_many_thresholds() -> None: """Raise an AssertionError when thresholds > choices.""" - input_ = numpy.array([6]) thresholds = [5, 7, 9, 11] choices = [10, 15, 20] @@ -30,7 +28,6 @@ def test_apply_thresholds_when_too_many_thresholds() -> None: def test_apply_thresholds_when_too_many_choices() -> None: """Raise an AssertionError when thresholds < choices - 1.""" - input_ = numpy.array([6]) thresholds = [5, 7] choices = [10, 15, 20, 25] @@ -41,7 +38,6 @@ def test_apply_thresholds_when_too_many_choices() -> None: def test_concat_when_this_is_array_not_str() -> None: """Cast ``this`` to ``str`` when it is a NumPy array other than string.""" - this = numpy.array([1, 2]) that = numpy.array(["la", "o"]) @@ -52,7 +48,6 @@ def test_concat_when_this_is_array_not_str() -> None: def test_concat_when_that_is_array_not_str() -> None: """Cast ``that`` to ``str`` when it is a NumPy array other than string.""" - this = numpy.array(["ho", "cha"]) that = numpy.array([1, 2]) @@ -63,7 +58,6 @@ def test_concat_when_that_is_array_not_str() -> None: def test_concat_when_args_not_str_array_like() -> None: """Cast ``this`` and ``that`` to a NumPy array or strings.""" - this = (1, 2) that = (3, 4) @@ -74,7 +68,6 @@ def test_concat_when_args_not_str_array_like() -> None: def test_switch_when_values_are_empty() -> None: """Raise an AssertionError when the values are empty.""" - conditions = [1, 1, 1, 2] value_by_condition = {} diff --git a/openfisca_core/commons/tests/test_rates.py b/openfisca_core/commons/tests/test_rates.py index c266582fc..fbee4cc83 100644 --- a/openfisca_core/commons/tests/test_rates.py +++ b/openfisca_core/commons/tests/test_rates.py @@ -8,7 +8,6 @@ def test_average_rate_when_varying_is_zero() -> None: """Yield infinity when the varying gross income crosses zero.""" - target = numpy.array([1, 2, 3]) varying = [0, 0, 0] @@ -19,7 +18,6 @@ def test_average_rate_when_varying_is_zero() -> None: def test_marginal_rate_when_varying_is_zero() -> None: """Yield infinity when the varying gross income crosses zero.""" - target = numpy.array([1, 2, 3]) varying = numpy.array([0, 0, 0]) diff --git a/openfisca_core/data_storage/__init__.py b/openfisca_core/data_storage/__init__.py index 9f63047fb..4dbbb8954 100644 --- a/openfisca_core/data_storage/__init__.py +++ b/openfisca_core/data_storage/__init__.py @@ -1,25 +1,4 @@ -# Transitional imports to ensure non-breaking changes. -# Could be deprecated in the next major release. -# -# How imports are being used today: -# -# >>> from openfisca_core.module import symbol -# -# The previous example provokes cyclic dependency problems -# that prevent us from modularizing the different components -# of the library so to make them easier to test and to maintain. -# -# How could them be used after the next major release: -# -# >>> from openfisca_core import module -# >>> module.symbol() -# -# And for classes: -# -# >>> from openfisca_core.module import Symbol -# >>> Symbol() -# -# See: https://www.python.org/dev/peps/pep-0008/#imports +"""Different storage backends for the data of a simulation.""" from . import types from .in_memory_storage import InMemoryStorage diff --git a/openfisca_core/data_storage/in_memory_storage.py b/openfisca_core/data_storage/in_memory_storage.py index 18387ff64..cd8f9ef08 100644 --- a/openfisca_core/data_storage/in_memory_storage.py +++ b/openfisca_core/data_storage/in_memory_storage.py @@ -29,13 +29,15 @@ def __init__(self, is_eternal: bool = False) -> None: self.is_eternal = is_eternal def get(self, period: None | t.Period = None) -> None | t.Array[t.DTypeGeneric]: - """Retrieve the data for the specified period from memory. + """Retrieve the data for the specified :obj:`.Period` from memory. Args: - period: The period for which data should be retrieved. + period: The :obj:`.Period` for which data should be retrieved. Returns: - The data for the specified period, or None if no data is available. + None: If no data is available. + EnumArray: The data for the specified :obj:`.Period`. + ndarray[generic]: The data for the specified :obj:`.Period`. Examples: >>> import numpy @@ -53,7 +55,6 @@ def get(self, period: None | t.Period = None) -> None | t.Array[t.DTypeGeneric]: array([1, 2, 3]) """ - if self.is_eternal: period = periods.period(DateUnit.ETERNITY) period = periods.period(period) @@ -64,11 +65,11 @@ def get(self, period: None | t.Period = None) -> None | t.Array[t.DTypeGeneric]: return values def put(self, value: t.Array[t.DTypeGeneric], period: None | t.Period) -> None: - """Store the specified data in memory for the specified period. + """Store the specified data in memory for the specified :obj:`.Period`. Args: value: The data to store - period: The period for which the data should be stored. + period: The :obj:`.Period` for which the data should be stored. Examples: >>> import numpy @@ -86,7 +87,6 @@ def put(self, value: t.Array[t.DTypeGeneric], period: None | t.Period) -> None: array(['1', '2', 'salary'], dtype=' None: self._arrays[period] = value def delete(self, period: None | t.Period = None) -> None: - """Delete the data for the specified period from memory. + """Delete the data for the specified :obj:`.Period` from memory. Args: - period: The period for which data should be deleted. + period: The :obj:`.Period` for which data should be deleted. Note: If ``period`` is specified, all data will be deleted. @@ -128,7 +128,6 @@ def delete(self, period: None | t.Period = None) -> None: >>> storage.get(period) """ - if period is None: self._arrays = {} return @@ -147,7 +146,7 @@ def get_known_periods(self) -> KeysView[t.Period]: """List of storage's known periods. Returns: - A sequence containing the storage's known periods. + KeysView[Period]: A sequence containing the storage's known periods. Examples: >>> from openfisca_core import data_storage, periods @@ -164,14 +163,13 @@ def get_known_periods(self) -> KeysView[t.Period]: dict_keys([Period(('year', Instant((2017, 1, 1)), 1))]) """ - return self._arrays.keys() def get_memory_usage(self) -> t.MemoryUsage: """Memory usage of the storage. Returns: - A dictionary representing the storage's memory usage. + MemoryUsage: A dictionary representing the storage's memory usage. Examples: >>> from openfisca_core import data_storage @@ -181,7 +179,6 @@ def get_memory_usage(self) -> t.MemoryUsage: {'nb_arrays': 0, 'total_nb_bytes': 0, 'cell_size': nan} """ - if not self._arrays: return { "nb_arrays": 0, diff --git a/openfisca_core/data_storage/on_disk_storage.py b/openfisca_core/data_storage/on_disk_storage.py index d1b8e2c4e..3d0ef7fc1 100644 --- a/openfisca_core/data_storage/on_disk_storage.py +++ b/openfisca_core/data_storage/on_disk_storage.py @@ -33,7 +33,7 @@ class OnDiskStorage: #: Whether to preserve the storage directory. preserve_storage_dir: bool - #: Mapping of file paths to possible Enum values. + #: Mapping of file paths to possible :class:`.Enum` values. _enums: MutableMapping[str, type[t.Enum]] #: Mapping of periods to file paths. @@ -52,17 +52,18 @@ def __init__( self.storage_dir = storage_dir def _decode_file(self, file: str) -> t.Array[t.DTypeGeneric]: - """Decode a file by loading its contents as a ``numpy`` array. + """Decode a file by loading its contents as a :mod:`numpy` array. Args: file: Path to the file to be decoded. Returns: - ``numpy`` array or ``EnumArray`` representing the data in the file. + EnumArray: Representing the data in the file. + ndarray[generic]: Representing the data in the file. Note: - If the file is associated with ``Enum`` values, the array is - converted back to an ``EnumArray`` object. + If the file is associated with :class:`~indexed_enums.Enum` values, the + array is converted back to an :obj:`~indexed_enums.EnumArray` object. Examples: >>> import tempfile @@ -89,7 +90,6 @@ def _decode_file(self, file: str) -> t.Array[t.DTypeGeneric]: EnumArray([]) """ - enum = self._enums.get(file) if enum is not None: @@ -106,8 +106,9 @@ def get(self, period: None | t.Period = None) -> None | t.Array[t.DTypeGeneric]: period: The period for which data should be retrieved. Returns: - A ``numpy`` array or ``EnumArray`` representing the vector for the - specified period, or ``None`` if no vector is stored. + None: If no data is available. + EnumArray: Representing the data for the specified period. + ndarray[generic]: Representing the data for the specified period. Examples: >>> import tempfile @@ -127,7 +128,6 @@ def get(self, period: None | t.Period = None) -> None | t.Array[t.DTypeGeneric]: array([1, 2, 3]) """ - if self.is_eternal: period = periods.period(DateUnit.ETERNITY) period = periods.period(period) @@ -162,7 +162,6 @@ def put(self, value: t.Array[t.DTypeGeneric], period: None | t.Period) -> None: array(['1', '2', 'salary'], dtype=' None: ... storage.get(period) """ - if period is None: self._files = {} return @@ -231,7 +229,7 @@ def get_known_periods(self) -> KeysView[t.Period]: """List of storage's known periods. Returns: - A sequence containing the storage's known periods. + KeysView[Period]: A sequence containing the storage's known periods. Examples: >>> import tempfile @@ -255,7 +253,6 @@ def get_known_periods(self) -> KeysView[t.Period]: dict_keys([Period(('year', Instant((2017, 1, 1)), 1))]) """ - return self._files.keys() def restore(self) -> None: @@ -289,7 +286,6 @@ def restore(self) -> None: >>> directory.cleanup() """ - self._files = files = {} # Restore self._files from content of storage_dir. for filename in os.listdir(self.storage_dir): diff --git a/openfisca_core/indexed_enums/__init__.py b/openfisca_core/indexed_enums/__init__.py index 70e2d3549..a6a452511 100644 --- a/openfisca_core/indexed_enums/__init__.py +++ b/openfisca_core/indexed_enums/__init__.py @@ -1,15 +1,4 @@ -"""Enumerations for variables with a limited set of possible values. - -These include: - * Highest academic level: high school, associate degree, bachelor's degree, - master's degree, doctorate… - * A household housing occupancy status: owner, tenant, free-lodger, - homeless… - * The main occupation of a person: employee, freelancer, retired, student, - unemployed… - * Etc. - -""" +"""Enumerations for variables with a limited set of possible values.""" from . import types from .config import ENUM_ARRAY_DTYPE diff --git a/openfisca_core/indexed_enums/enum_array.py b/openfisca_core/indexed_enums/enum_array.py index b40ba0401..93c848698 100644 --- a/openfisca_core/indexed_enums/enum_array.py +++ b/openfisca_core/indexed_enums/enum_array.py @@ -57,7 +57,7 @@ def __eq__(self, other: object) -> bool: Returns: bool: When ??? - numpy.ndarray[numpy.bool_]: When ??? + ndarray[bool_]: When ??? Note: This breaks the `Liskov substitution principle`_. @@ -79,7 +79,7 @@ def __ne__(self, other: object) -> bool: Returns: bool: When ??? - numpy.ndarray[numpy.bool_]: When ??? + ndarray[bool_]: When ??? Note: This breaks the `Liskov substitution principle`_. @@ -113,7 +113,7 @@ def decode(self) -> numpy.object_: """Decode itself to a normal array. Returns: - numpy.ndarray[t.Enum]: The items of the :obj:`.EnumArray`. + ndarray[Enum]: The items of the :obj:`.EnumArray`. For instance: @@ -135,7 +135,7 @@ def decode_to_str(self) -> numpy.str_: """Decode itself to an array of strings. Returns: - numpy.ndarray[numpy.str_]: The string values of the :obj:`.EnumArray`. + ndarray[str_]: The string values of the :obj:`.EnumArray`. For instance: diff --git a/openfisca_tasks/lint.mk b/openfisca_tasks/lint.mk index a3f5a8e45..f5fdbc7ce 100644 --- a/openfisca_tasks/lint.mk +++ b/openfisca_tasks/lint.mk @@ -21,6 +21,7 @@ lint-doc: \ lint-doc-commons \ lint-doc-data_storage \ lint-doc-entities \ + lint-doc-indexed_enums \ ; ## Run linters to check for syntax and style errors in the doc. From 6783cb6ec7d50da2f51b17097a57b90f247cda5c Mon Sep 17 00:00:00 2001 From: Mauko Quiroga Date: Sat, 5 Oct 2024 18:56:16 +0200 Subject: [PATCH 06/29] chore: version bump (fixes #1271) --- CHANGELOG.md | 6 ++++++ setup.py | 2 +- 2 files changed, 7 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 7f900c330..74f86b175 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,11 @@ # Changelog +### 42.0.8 [#1272](https://github.com/openfisca/openfisca-core/pull/1272) + +#### Documentation + +- Add documentation to the `indexed_enums` module + ### 42.0.7 [#1264](https://github.com/openfisca/openfisca-core/pull/1264) #### Technical changes diff --git a/setup.py b/setup.py index d342bb9f4..491479ccb 100644 --- a/setup.py +++ b/setup.py @@ -70,7 +70,7 @@ setup( name="OpenFisca-Core", - version="42.0.7", + version="42.0.8", author="OpenFisca Team", author_email="contact@openfisca.org", classifiers=[ From 00120b1d3e74096da4f0e9875b2da51b76b4563a Mon Sep 17 00:00:00 2001 From: Mauko Quiroga Date: Sun, 6 Oct 2024 15:30:52 +0200 Subject: [PATCH 07/29] test(enums): fix doctests (#1267) --- openfisca_core/indexed_enums/enum.py | 170 ++++++++++++++++-- openfisca_core/indexed_enums/enum_array.py | 136 ++++++++++++-- .../indexed_enums/tests/__init__.py | 0 .../indexed_enums/tests/test_enum.py | 152 ++++++++++++++++ .../indexed_enums/tests/test_enum_array.py | 30 ++++ 5 files changed, 462 insertions(+), 26 deletions(-) create mode 100644 openfisca_core/indexed_enums/tests/__init__.py create mode 100644 openfisca_core/indexed_enums/tests/test_enum.py create mode 100644 openfisca_core/indexed_enums/tests/test_enum_array.py diff --git a/openfisca_core/indexed_enums/enum.py b/openfisca_core/indexed_enums/enum.py index 56f007941..3aa633eb3 100644 --- a/openfisca_core/indexed_enums/enum.py +++ b/openfisca_core/indexed_enums/enum.py @@ -13,6 +13,69 @@ class Enum(t.Enum): Its items have an :class:`int` index, useful and performant when running :mod:`~openfisca_core.simulations` on large :mod:`~openfisca_core.populations`. + Examples: + >>> from openfisca_core import indexed_enums as enum + + >>> class Housing(enum.Enum): + ... OWNER = "Owner" + ... TENANT = "Tenant" + ... FREE_LODGER = "Free lodger" + ... HOMELESS = "Homeless" + + >>> repr(Housing) + "" + + >>> repr(Housing.TENANT) + "" + + >>> str(Housing.TENANT) + 'Housing.TENANT' + + >>> dict([(Housing.TENANT, Housing.TENANT.value)]) + {: 'Tenant'} + + >>> list(Housing) + [, , ...] + + >>> Housing["TENANT"] + + + >>> Housing("Tenant") + + + >>> Housing.TENANT in Housing + True + + >>> len(Housing) + 4 + + >>> Housing.TENANT == Housing.TENANT + True + + >>> Housing.TENANT != Housing.TENANT + False + + >>> Housing.TENANT > Housing.TENANT + False + + >>> Housing.TENANT < Housing.TENANT + False + + >>> Housing.TENANT >= Housing.TENANT + True + + >>> Housing.TENANT <= Housing.TENANT + True + + >>> Housing.TENANT.index + 1 + + >>> Housing.TENANT.name + 'TENANT' + + >>> Housing.TENANT.value + 'Tenant' + """ #: The :attr:`index` of the :class:`.Enum` member. @@ -28,14 +91,61 @@ def __init__(self, *__args: object, **__kwargs: object) -> None: *__args: Positional arguments. **__kwargs: Keyword arguments. + Examples: + >>> import numpy + + >>> from openfisca_core import indexed_enums as enum + + >>> Housing = enum.Enum("Housing", "owner tenant") + >>> Housing.tenant.index + 1 + + >>> class Housing(enum.Enum): + ... OWNER = "Owner" + ... TENANT = "Tenant" + + >>> Housing.TENANT.index + 1 + + >>> array = numpy.array([[1, 2], [3, 4]]) + >>> array[Housing.TENANT.index] + array([3, 4]) + Note: ``_member_names_`` is undocumented in upstream :class:`enum.Enum`. """ self.index = len(self._member_names_) - #: Bypass the slow :meth:`enum.Enum.__eq__` method. - __eq__ = object.__eq__ + def __eq__(self, other: object) -> bool: + if not isinstance(other, Enum): + return NotImplemented + return self.index == other.index + + def __ne__(self, other: object) -> bool: + if not isinstance(other, Enum): + return NotImplemented + return self.index != other.index + + def __lt__(self, other: object) -> bool: + if not isinstance(other, Enum): + return NotImplemented + return self.index < other.index + + def __le__(self, other: object) -> bool: + if not isinstance(other, Enum): + return NotImplemented + return self.index <= other.index + + def __gt__(self, other: object) -> bool: + if not isinstance(other, Enum): + return NotImplemented + return self.index > other.index + + def __ge__(self, other: object) -> bool: + if not isinstance(other, Enum): + return NotImplemented + return self.index >= other.index #: :meth:`.__hash__` must also be defined so as to stay hashable. __hash__ = object.__hash__ @@ -53,19 +163,53 @@ def encode( Returns: EnumArray: An :class:`.EnumArray` with the encoded input values. - For instance: + Examples: + >>> import numpy + + >>> from openfisca_core import indexed_enums as enum + + >>> class Housing(enum.Enum): + ... OWNER = "Owner" + ... TENANT = "Tenant" + + # EnumArray + + >>> array = numpy.array([1]) + >>> enum_array = enum.EnumArray(array, Housing) + >>> Housing.encode(enum_array) + EnumArray([]) + + # Array of Enum + + >>> array = numpy.array([Housing.TENANT]) + >>> enum_array = Housing.encode(array) + >>> enum_array[0] == Housing.TENANT.index + True + + # Array of integers + + >>> array = numpy.array([1]) + >>> enum_array = Housing.encode(array) + >>> enum_array[0] == Housing.TENANT.index + True + + # Array of bytes + + >>> array = numpy.array([b"TENANT"]) + >>> enum_array = Housing.encode(array) + >>> enum_array[0] == Housing.TENANT.index + True + + # Array of strings + + >>> array = numpy.array(["TENANT"]) + >>> enum_array = Housing.encode(array) + >>> enum_array[0] == Housing.TENANT.index + True - >>> string_identifier_array = asarray(["free_lodger", "owner"]) - >>> encoded_array = HousingOccupancyStatus.encode(string_identifier_array) - >>> encoded_array[0] - 2 # Encoded value + .. seealso:: + :meth:`.EnumArray.decode` for decoding. - >>> free_lodger = HousingOccupancyStatus.free_lodger - >>> owner = HousingOccupancyStatus.owner - >>> enum_item_array = asarray([free_lodger, owner]) - >>> encoded_array = HousingOccupancyStatus.encode(enum_item_array) - >>> encoded_array[0] - 2 # Encoded value """ if isinstance(array, EnumArray): return array diff --git a/openfisca_core/indexed_enums/enum_array.py b/openfisca_core/indexed_enums/enum_array.py index 93c848698..334083dd3 100644 --- a/openfisca_core/indexed_enums/enum_array.py +++ b/openfisca_core/indexed_enums/enum_array.py @@ -18,6 +18,52 @@ class EnumArray(t.EnumArray): about the :meth:`.__new__` and :meth:`.__array_finalize__` methods below, see `Subclassing ndarray`_. + Examples: + >>> import numpy + + >>> from openfisca_core import indexed_enums as enum, variables + + >>> class Housing(enum.Enum): + ... OWNER = "Owner" + ... TENANT = "Tenant" + ... FREE_LODGER = "Free lodger" + ... HOMELESS = "Homeless" + + >>> array = numpy.array([1]) + >>> enum_array = enum.EnumArray(array, Housing) + + >>> repr(enum.EnumArray) + "" + + >>> repr(enum_array) + "EnumArray([])" + + >>> str(enum_array) + "['TENANT']" + + >>> list(enum_array) + [1] + + >>> enum_array[0] + 1 + + >>> enum_array[0] in enum_array + True + + >>> len(enum_array) + 1 + + >>> enum_array = enum.EnumArray(list(Housing), Housing) + >>> enum_array[Housing.TENANT.index] + + + >>> class OccupancyStatus(variables.Variable): + ... value_type = enum.Enum + ... possible_values = Housing + + >>> enum.EnumArray(array, OccupancyStatus.possible_values) + EnumArray([]) + .. _Subclassing ndarray: https://numpy.org/doc/stable/user/basics.subclassing.html @@ -59,6 +105,33 @@ def __eq__(self, other: object) -> bool: bool: When ??? ndarray[bool_]: When ??? + Examples: + >>> import numpy + + >>> from openfisca_core import indexed_enums as enum + + >>> class Housing(enum.Enum): + ... OWNER = "Owner" + ... TENANT = "Tenant" + + >>> array = numpy.array([1]) + >>> enum_array = enum.EnumArray(array, Housing) + + >>> enum_array == 1 + array([ True]) + + >>> enum_array == [1] + array([ True]) + + >>> enum_array == [2] + array([False]) + + >>> enum_array == "1" + array([False]) + + >>> enum_array is None + False + Note: This breaks the `Liskov substitution principle`_. @@ -81,6 +154,33 @@ def __ne__(self, other: object) -> bool: bool: When ??? ndarray[bool_]: When ??? + Examples: + >>> import numpy + + >>> from openfisca_core import indexed_enums as enum + + >>> class Housing(enum.Enum): + ... OWNER = "Owner" + ... TENANT = "Tenant" + + >>> array = numpy.array([1]) + >>> enum_array = enum.EnumArray(array, Housing) + + >>> enum_array != 1 + array([False]) + + >>> enum_array != [1] + array([False]) + + >>> enum_array != [2] + array([ True]) + + >>> enum_array != "1" + array([ True]) + + >>> enum_array is not None + True + Note: This breaks the `Liskov substitution principle`_. @@ -115,15 +215,19 @@ def decode(self) -> numpy.object_: Returns: ndarray[Enum]: The items of the :obj:`.EnumArray`. - For instance: + Examples: + >>> import numpy - >>> enum_array = household("housing_occupancy_status", period) - >>> enum_array[0] - >>> 2 # Encoded value - >>> enum_array.decode()[0] - + >>> from openfisca_core import indexed_enums as enum - Decoded value: enum item + >>> class Housing(enum.Enum): + ... OWNER = "Owner" + ... TENANT = "Tenant" + + >>> array = numpy.array([1]) + >>> enum_array = enum.EnumArray(array, Housing) + >>> enum_array.decode() + array([], dtype=object) """ return numpy.select( @@ -137,13 +241,19 @@ def decode_to_str(self) -> numpy.str_: Returns: ndarray[str_]: The string values of the :obj:`.EnumArray`. - For instance: + Examples: + >>> import numpy - >>> enum_array = household("housing_occupancy_status", period) - >>> enum_array[0] - >>> 2 # Encoded value - >>> enum_array.decode_to_str()[0] - 'free_lodger' # String identifier + >>> from openfisca_core import indexed_enums as enum + + >>> class Housing(enum.Enum): + ... OWNER = "Owner" + ... TENANT = "Tenant" + + >>> array = numpy.array([1]) + >>> enum_array = enum.EnumArray(array, Housing) + >>> enum_array.decode_to_str() + array(['TENANT'], dtype=' Date: Mon, 7 Oct 2024 15:37:55 +0200 Subject: [PATCH 08/29] test(enums): fix Enum.decode (#1267) --- openfisca_core/indexed_enums/enum.py | 93 ++++++++---- .../indexed_enums/tests/test_enum.py | 140 ++++++++---------- openfisca_core/indexed_enums/types.py | 22 ++- 3 files changed, 148 insertions(+), 107 deletions(-) diff --git a/openfisca_core/indexed_enums/enum.py b/openfisca_core/indexed_enums/enum.py index 3aa633eb3..b3f7f1dbb 100644 --- a/openfisca_core/indexed_enums/enum.py +++ b/openfisca_core/indexed_enums/enum.py @@ -153,7 +153,16 @@ def __ge__(self, other: object) -> bool: @classmethod def encode( cls, - array: EnumArray | numpy.int32 | numpy.float32 | numpy.object_, + array: ( + EnumArray + | t.Array[t.DTypeStr] + | t.Array[t.DTypeInt] + | t.Array[t.DTypeEnum] + | t.Array[t.DTypeObject] + | t.ArrayLike[str] + | t.ArrayLike[int] + | t.ArrayLike[t.Enum] + ), ) -> EnumArray: """Encode an encodable array into an :class:`.EnumArray`. @@ -163,6 +172,11 @@ def encode( Returns: EnumArray: An :class:`.EnumArray` with the encoded input values. + Raises: + TypeError: If ``array`` is a scalar :class:`~numpy.ndarray`. + TypeError: If ``array`` is of a diffent :class:`.Enum` type. + NotImplementedError: If ``array`` is of an unsupported type. + Examples: >>> import numpy @@ -183,29 +197,29 @@ def encode( >>> array = numpy.array([Housing.TENANT]) >>> enum_array = Housing.encode(array) - >>> enum_array[0] == Housing.TENANT.index - True + >>> enum_array == Housing.TENANT + array([ True]) # Array of integers >>> array = numpy.array([1]) >>> enum_array = Housing.encode(array) - >>> enum_array[0] == Housing.TENANT.index - True + >>> enum_array == Housing.TENANT + array([ True]) - # Array of bytes + # Array of strings - >>> array = numpy.array([b"TENANT"]) + >>> array = numpy.array(["TENANT"]) >>> enum_array = Housing.encode(array) >>> enum_array[0] == Housing.TENANT.index True - # Array of strings + # Array of bytes - >>> array = numpy.array(["TENANT"]) + >>> array = numpy.array([b"TENANT"]) >>> enum_array = Housing.encode(array) - >>> enum_array[0] == Housing.TENANT.index - True + Traceback (most recent call last): + NotImplementedError: Unsupported encoding: bytes48. .. seealso:: :meth:`.EnumArray.decode` for decoding. @@ -214,15 +228,36 @@ def encode( if isinstance(array, EnumArray): return array + if not isinstance(array, numpy.ndarray): + return cls.encode(numpy.array(array)) + + if array.size == 0: + return EnumArray(array, cls) + + if array.ndim == 0: + msg = ( + "Scalar arrays are not supported: expecting a vector array, " + f"instead. Please try again with `numpy.array([{array}])`." + ) + raise TypeError(msg) + + # Enum data type array + if numpy.issubdtype(array.dtype, t.DTypeEnum): + indexes = numpy.array([item.index for item in cls], t.DTypeEnum) + return EnumArray(indexes[array[array < indexes.size]], cls) + + # Integer array + if numpy.issubdtype(array.dtype, int): + array = numpy.array(array, dtype=t.DTypeEnum) + return cls.encode(array) + # String array - if isinstance(array, numpy.ndarray) and array.dtype.kind in {"U", "S"}: - array = numpy.select( - [array == item.name for item in cls], - [item.index for item in cls], - ).astype(ENUM_ARRAY_DTYPE) + if numpy.issubdtype(array.dtype, t.DTypeStr): + enums = [cls.__members__[key] for key in array if key in cls.__members__] + return cls.encode(enums) # Enum items arrays - elif isinstance(array, numpy.ndarray) and array.dtype.kind == "O": + if numpy.issubdtype(array.dtype, t.DTypeObject): # Ensure we are comparing the comparable. The problem this fixes: # On entering this method "cls" will generally come from # variable.possible_values, while the array values may come from @@ -234,15 +269,21 @@ def encode( # So, instead of relying on the "cls" passed in, we use only its # name to check that the values in the array, if non-empty, are of # the right type. - if len(array) > 0 and cls.__name__ is array[0].__class__.__name__: - cls = array[0].__class__ - - array = numpy.select( - [array == item for item in cls], - [item.index for item in cls], - ).astype(ENUM_ARRAY_DTYPE) - - return EnumArray(array, cls) + if cls.__name__ is array[0].__class__.__name__: + array = numpy.select( + [array == item for item in array[0].__class__], + [item.index for item in array[0].__class__], + ).astype(ENUM_ARRAY_DTYPE) + return EnumArray(array, cls) + + msg = ( + f"Diverging enum types are not supported: expected {cls.__name__}, " + f"but got {array[0].__class__.__name__} instead." + ) + raise TypeError(msg) + + msg = f"Unsupported encoding: {array.dtype.name}." + raise NotImplementedError(msg) __all__ = ["Enum"] diff --git a/openfisca_core/indexed_enums/tests/test_enum.py b/openfisca_core/indexed_enums/tests/test_enum.py index 55aa8208a..059918f7d 100644 --- a/openfisca_core/indexed_enums/tests/test_enum.py +++ b/openfisca_core/indexed_enums/tests/test_enum.py @@ -1,4 +1,5 @@ import numpy +import pytest from openfisca_core import indexed_enums as enum @@ -8,145 +9,126 @@ class Animal(enum.Enum): DOG = b"Dog" +class Colour(enum.Enum): + INCARNADINE = "incarnadine" + TURQUOISE = "turquoise" + AMARANTH = "amaranth" + + # Arrays of Enum -def test_enum_encode_with_enum_scalar_array(): - """Encode when called with an enum scalar array.""" - array = numpy.array(Animal.DOG) +def test_enum_encode_with_array_of_enum(): + """Does encode when called with an array of enums.""" + array = numpy.array([Animal.DOG]) enum_array = Animal.encode(array) - assert enum_array == Animal.DOG.index + assert enum_array == Animal.DOG def test_enum_encode_with_enum_sequence(): - """Does not encode when called with an enum sequence.""" + """Does encode when called with an enum sequence.""" sequence = list(Animal) enum_array = Animal.encode(sequence) - assert enum_array[0] != Animal.DOG.index - - -def test_enum_encode_with_enum_scalar(): - """Does not encode when called with an enum scalar.""" - scalar = Animal.DOG - enum_array = Animal.encode(scalar) - assert enum_array != Animal.DOG.index - - -# Arrays of int + assert Animal.DOG in enum_array -def test_enum_encode_with_int_scalar_array(): - """Does not encode when called with an int scalar array (noop).""" - array = numpy.array(1) - enum_array = Animal.encode(array) - assert enum_array == Animal.DOG.index +def test_enum_encode_with_enum_scalar_array(): + """Does not encode when called with an enum scalar array.""" + array = numpy.array(Animal.DOG) + with pytest.raises(TypeError): + Animal.encode(array) -def test_enum_encode_with_int_sequence(): - """Does not encode when called with an int sequence (noop).""" - sequence = range(1, 2) - enum_array = Animal.encode(sequence) - assert enum_array[0] == Animal.DOG.index - +def test_enum_encode_with_enum_with_bad_value(): + """Does not encode when called with a value not in an Enum.""" + array = numpy.array([Colour.AMARANTH]) + with pytest.raises(TypeError): + Animal.encode(array) -def test_enum_encode_with_int_scalar(): - """Does not encode when called with an int scalar (noop).""" - scalar = 1 - enum_array = Animal.encode(scalar) - assert enum_array == Animal.DOG.index +# Arrays of int -# Arrays of bytes -def test_enum_encode_with_bytes_scalar_array(): - """Encode when called with a bytes scalar array.""" - array = numpy.array(b"DOG") +def test_enum_encode_with_array_of_int(): + """Does encode when called with an array of int.""" + array = numpy.array([1]) enum_array = Animal.encode(array) - assert enum_array == Animal.DOG.index + assert enum_array == Animal.DOG -def test_enum_encode_with_bytes_sequence(): - """Does not encode when called with a bytes sequence.""" - sequence = bytearray(b"DOG") +def test_enum_encode_with_int_sequence(): + """Does encode when called with an int sequence.""" + sequence = (1, 2) enum_array = Animal.encode(sequence) - assert enum_array[0] != Animal.DOG.index + assert Animal.DOG in enum_array -def test_enum_encode_with_bytes_scalar(): - """Does not encode when called with a bytes scalar.""" - scalar = b"DOG" - enum_array = Animal.encode(scalar) - assert enum_array != Animal.DOG.index +def test_enum_encode_with_int_scalar_array(): + """Does not encode when called with an int scalar array.""" + array = numpy.array(1) + with pytest.raises(TypeError): + Animal.encode(array) -def test_enum_encode_with_bytes_with_bad_value(): +def test_enum_encode_with_int_with_bad_value(): """Does not encode when called with a value not in an Enum.""" - array = numpy.array([b"IGUANA"]) + array = numpy.array([2]) enum_array = Animal.encode(array) - assert enum_array != Animal.CAT.index - assert enum_array != Animal.DOG.index + assert len(enum_array) == 0 # Arrays of strings -def test_enum_encode_with_str_scalar_array(): - """Encode when called with a str scalar array.""" - array = numpy.array("DOG") +def test_enum_encode_with_array_of_string(): + """Does encode when called with an array of string.""" + array = numpy.array(["DOG"]) enum_array = Animal.encode(array) - assert enum_array == Animal.DOG.index + assert enum_array == Animal.DOG def test_enum_encode_with_str_sequence(): - """Does not encode when called with a str sequence.""" + """Does encode when called with a str sequence.""" sequence = ("DOG",) enum_array = Animal.encode(sequence) - assert enum_array[0] != Animal.DOG.index + assert Animal.DOG in enum_array -def test_enum_encode_with_str_scalar(): - """Does not encode when called with a str scalar.""" - scalar = "DOG" - enum_array = Animal.encode(scalar) - assert enum_array != Animal.DOG.index +def test_enum_encode_with_str_scalar_array(): + """Does not encode when called with a str scalar array.""" + array = numpy.array("DOG") + with pytest.raises(TypeError): + Animal.encode(array) def test_enum_encode_with_str_with_bad_value(): """Does not encode when called with a value not in an Enum.""" array = numpy.array(["JAIBA"]) enum_array = Animal.encode(array) - assert enum_array != Animal.CAT.index - assert enum_array != Animal.DOG.index + assert len(enum_array) == 0 # Unsupported encodings def test_enum_encode_with_any_array(): - """Does not encode when called with unsupported types (noop).""" + """Does not encode when called with unsupported types.""" value = {"animal": "dog"} array = numpy.array([value]) - enum_array = Animal.encode(array) - assert enum_array[0] == value + with pytest.raises(TypeError): + Animal.encode(array) def test_enum_encode_with_any_scalar_array(): - """Does not encode when called with unsupported types (noop).""" + """Does not encode when called with unsupported types.""" value = 1.5 array = numpy.array(value) - enum_array = Animal.encode(array) - assert enum_array == value + with pytest.raises(TypeError): + Animal.encode(array) def test_enum_encode_with_any_sequence(): - """Does not encode when called with unsupported types (noop).""" + """Does not encode when called with unsupported types.""" sequence = memoryview(b"DOG") - enum_array = Animal.encode(sequence) - assert enum_array[0] == sequence[0] - - -def test_enum_encode_with_anything(): - """Does not encode when called with unsupported types (noop).""" - anything = {object()} - enum_array = Animal.encode(anything) - assert enum_array == anything + with pytest.raises(NotImplementedError): + Animal.encode(sequence) diff --git a/openfisca_core/indexed_enums/types.py b/openfisca_core/indexed_enums/types.py index d69eb098a..ab4283b75 100644 --- a/openfisca_core/indexed_enums/types.py +++ b/openfisca_core/indexed_enums/types.py @@ -1,3 +1,21 @@ -from openfisca_core.types import Array, DTypeEnum, Enum, EnumArray +from openfisca_core.types import ( + Array, + ArrayLike, + DTypeEnum, + DTypeInt, + DTypeObject, + DTypeStr, + Enum, + EnumArray, +) -__all__ = ["Array", "DTypeEnum", "Enum", "EnumArray"] +__all__ = [ + "Array", + "ArrayLike", + "DTypeEnum", + "DTypeInt", + "DTypeObject", + "DTypeStr", + "Enum", + "EnumArray", +] From 0195451bc67426bee0077d7a5f0499c4b81f863d Mon Sep 17 00:00:00 2001 From: Mauko Quiroga Date: Mon, 7 Oct 2024 15:56:51 +0200 Subject: [PATCH 09/29] refactor(enums): remove magic methods (#1267) --- openfisca_core/indexed_enums/enum.py | 32 ---------------------------- setup.cfg | 2 +- 2 files changed, 1 insertion(+), 33 deletions(-) diff --git a/openfisca_core/indexed_enums/enum.py b/openfisca_core/indexed_enums/enum.py index b3f7f1dbb..3909e38ea 100644 --- a/openfisca_core/indexed_enums/enum.py +++ b/openfisca_core/indexed_enums/enum.py @@ -55,18 +55,6 @@ class Enum(t.Enum): >>> Housing.TENANT != Housing.TENANT False - >>> Housing.TENANT > Housing.TENANT - False - - >>> Housing.TENANT < Housing.TENANT - False - - >>> Housing.TENANT >= Housing.TENANT - True - - >>> Housing.TENANT <= Housing.TENANT - True - >>> Housing.TENANT.index 1 @@ -127,26 +115,6 @@ def __ne__(self, other: object) -> bool: return NotImplemented return self.index != other.index - def __lt__(self, other: object) -> bool: - if not isinstance(other, Enum): - return NotImplemented - return self.index < other.index - - def __le__(self, other: object) -> bool: - if not isinstance(other, Enum): - return NotImplemented - return self.index <= other.index - - def __gt__(self, other: object) -> bool: - if not isinstance(other, Enum): - return NotImplemented - return self.index > other.index - - def __ge__(self, other: object) -> bool: - if not isinstance(other, Enum): - return NotImplemented - return self.index >= other.index - #: :meth:`.__hash__` must also be defined so as to stay hashable. __hash__ = object.__hash__ diff --git a/setup.cfg b/setup.cfg index fd18e5ab3..9664127cf 100644 --- a/setup.cfg +++ b/setup.cfg @@ -49,7 +49,7 @@ disable = all enable = C0115, C0116, R0401 per-file-ignores = types.py:C0115,C0116 - /tests/:C0116 + /tests/:C0115,C0116 score = no [isort] From b5b79685046a72b3375b68eb71b0a8b803e2138b Mon Sep 17 00:00:00 2001 From: Mauko Quiroga Date: Wed, 9 Oct 2024 02:38:27 +0200 Subject: [PATCH 10/29] fix(enums): do actual indexing (#1267) --- openfisca_core/indexed_enums/__init__.py | 2 + openfisca_core/indexed_enums/_enum_type.py | 113 ++++++++++++++++++ openfisca_core/indexed_enums/_type_guards.py | 68 +++++++++++ openfisca_core/indexed_enums/enum.py | 91 ++++++-------- openfisca_core/indexed_enums/enum_array.py | 2 +- .../indexed_enums/tests/test_enum.py | 8 +- openfisca_core/indexed_enums/types.py | 51 ++++++-- openfisca_core/types.py | 10 +- tests/core/tools/test_assert_near.py | 2 +- 9 files changed, 279 insertions(+), 68 deletions(-) create mode 100644 openfisca_core/indexed_enums/_enum_type.py create mode 100644 openfisca_core/indexed_enums/_type_guards.py diff --git a/openfisca_core/indexed_enums/__init__.py b/openfisca_core/indexed_enums/__init__.py index a6a452511..10bbd3d98 100644 --- a/openfisca_core/indexed_enums/__init__.py +++ b/openfisca_core/indexed_enums/__init__.py @@ -1,6 +1,7 @@ """Enumerations for variables with a limited set of possible values.""" from . import types +from ._enum_type import EnumType from .config import ENUM_ARRAY_DTYPE from .enum import Enum from .enum_array import EnumArray @@ -9,5 +10,6 @@ "ENUM_ARRAY_DTYPE", "Enum", "EnumArray", + "EnumType", "types", ] diff --git a/openfisca_core/indexed_enums/_enum_type.py b/openfisca_core/indexed_enums/_enum_type.py new file mode 100644 index 000000000..0b2f26b35 --- /dev/null +++ b/openfisca_core/indexed_enums/_enum_type.py @@ -0,0 +1,113 @@ +from __future__ import annotations + +from typing import final + +import numpy + +from . import types as t + + +def _item_list(enum_class: type[t.Enum]) -> t.ItemList: + """Return the non-vectorised list of enum items.""" + return [ + (index, name, value) + for index, (name, value) in enumerate(enum_class.__members__.items()) + ] + + +def _item_dtype(enum_class: type[t.Enum]) -> t.RecDType: + """Return the dtype of the indexed enum's items.""" + size = max(map(len, enum_class.__members__.keys())) + return numpy.dtype( + ( + numpy.generic, + { + "index": (t.EnumDType, 0), + "name": (f"U{size}", 2), + "enum": (enum_class, 2 + size * 4), + }, + ) + ) + + +def _item_array(enum_class: type[t.Enum]) -> t.RecArray: + """Return the indexed enum's items.""" + items = _item_list(enum_class) + dtype = _item_dtype(enum_class) + array = numpy.array(items, dtype=dtype) + return array.view(numpy.recarray) + + +@final +class EnumType(t.EnumType): + """Meta class for creating an indexed :class:`.Enum`. + + Examples: + >>> from openfisca_core import indexed_enums as enum + + >>> class Enum(enum.Enum, metaclass=enum.EnumType): + ... pass + + >>> Enum.items + Traceback (most recent call last): + AttributeError: type object 'Enum' has no attribute 'items' + + >>> class Housing(Enum): + ... OWNER = "Owner" + ... TENANT = "Tenant" + + >>> Housing.items + rec.array([(0, 'OWNER', ), ...]) + + >>> Housing.indices + array([0, 1], dtype=int16) + + >>> Housing.names + array(['OWNER', 'TENANT'], dtype='>> Housing.enums + array([, ], dtype...) + + """ + + #: The items of the indexed enum class. + items: t.RecArray + + @property + def indices(cls) -> t.IndexArray: + """Return the indices of the indexed enum class.""" + return cls.items.index + + @property + def names(cls) -> t.StrArray: + """Return the names of the indexed enum class.""" + return cls.items.name + + @property + def enums(cls) -> t.ObjArray: + """Return the members of the indexed enum class.""" + return cls.items.enum + + def __new__( + metacls, + cls: str, + bases: tuple[type, ...], + classdict: t.EnumDict, + **kwds: object, + ) -> t.EnumType: + """Create a new indexed enum class.""" + # Create the enum class. + enum_class = super().__new__(metacls, cls, bases, classdict, **kwds) + + # If the enum class has no members, return it as is. + if not enum_class.__members__: + return enum_class + + # Add the items attribute to the enum class. + enum_class.items = _item_array(enum_class) + + # Return the modified enum class. + return enum_class + + def __dir__(cls) -> list[str]: + return sorted({"items", "indices", "names", "enums", *super().__dir__()}) diff --git a/openfisca_core/indexed_enums/_type_guards.py b/openfisca_core/indexed_enums/_type_guards.py new file mode 100644 index 000000000..3caf1859b --- /dev/null +++ b/openfisca_core/indexed_enums/_type_guards.py @@ -0,0 +1,68 @@ +from __future__ import annotations + +from typing_extensions import TypeIs + +import numpy + +from . import types as t + + +def _is_int_array(array: t.AnyArray) -> TypeIs[t.IndexArray | t.IntArray]: + """Narrow the type of a given array to an array of :obj:`numpy.integer`. + + Args: + array: Array to check. + + Returns: + bool: True if ``array`` is an array of :obj:`numpy.integer`, False otherwise. + + Examples: + >>> import numpy + + >>> array = numpy.array([1], dtype=numpy.int16) + >>> _is_int_array(array) + True + + >>> array = numpy.array([1], dtype=numpy.int32) + >>> _is_int_array(array) + True + + >>> array = numpy.array([1.0]) + >>> _is_int_array(array) + False + + """ + return numpy.issubdtype(array.dtype, numpy.integer) + + +def _is_str_array(array: t.AnyArray) -> TypeIs[t.StrArray]: + """Narrow the type of a given array to an array of :obj:`numpy.str_`. + + Args: + array: Array to check. + + Returns: + bool: True if ``array`` is an array of :obj:`numpy.str_`, False otherwise. + + Examples: + >>> import numpy + + >>> from openfisca_core import indexed_enums as enum + + >>> class Housing(enum.Enum): + ... OWNER = "owner" + ... TENANT = "tenant" + + >>> array = numpy.array([Housing.OWNER]) + >>> _is_str_array(array) + False + + >>> array = numpy.array(["owner"]) + >>> _is_str_array(array) + True + + """ + return numpy.issubdtype(array.dtype, str) + + +__all__ = ["_is_int_array", "_is_str_array"] diff --git a/openfisca_core/indexed_enums/enum.py b/openfisca_core/indexed_enums/enum.py index 3909e38ea..069cc8fa9 100644 --- a/openfisca_core/indexed_enums/enum.py +++ b/openfisca_core/indexed_enums/enum.py @@ -3,11 +3,12 @@ import numpy from . import types as t -from .config import ENUM_ARRAY_DTYPE +from ._enum_type import EnumType +from ._type_guards import _is_int_array, _is_str_array from .enum_array import EnumArray -class Enum(t.Enum): +class Enum(t.Enum, metaclass=EnumType): """Enum based on `enum34 `_. Its items have an :class:`int` index, useful and performant when running @@ -115,20 +116,19 @@ def __ne__(self, other: object) -> bool: return NotImplemented return self.index != other.index - #: :meth:`.__hash__` must also be defined so as to stay hashable. - __hash__ = object.__hash__ + def __hash__(self) -> int: + return hash(self.index) @classmethod def encode( cls, array: ( EnumArray - | t.Array[t.DTypeStr] - | t.Array[t.DTypeInt] - | t.Array[t.DTypeEnum] - | t.Array[t.DTypeObject] - | t.ArrayLike[str] + | t.IntArray + | t.StrArray + | t.ObjArray | t.ArrayLike[int] + | t.ArrayLike[str] | t.ArrayLike[t.Enum] ), ) -> EnumArray: @@ -143,7 +143,6 @@ def encode( Raises: TypeError: If ``array`` is a scalar :class:`~numpy.ndarray`. TypeError: If ``array`` is of a diffent :class:`.Enum` type. - NotImplementedError: If ``array`` is of an unsupported type. Examples: >>> import numpy @@ -187,7 +186,7 @@ def encode( >>> array = numpy.array([b"TENANT"]) >>> enum_array = Housing.encode(array) Traceback (most recent call last): - NotImplementedError: Unsupported encoding: bytes48. + TypeError: Failed to encode "[b'TENANT']" of type 'bytes_', as i... .. seealso:: :meth:`.EnumArray.decode` for decoding. @@ -200,7 +199,7 @@ def encode( return cls.encode(numpy.array(array)) if array.size == 0: - return EnumArray(array, cls) + return EnumArray(numpy.array([]), cls) if array.ndim == 0: msg = ( @@ -209,49 +208,37 @@ def encode( ) raise TypeError(msg) - # Enum data type array - if numpy.issubdtype(array.dtype, t.DTypeEnum): - indexes = numpy.array([item.index for item in cls], t.DTypeEnum) - return EnumArray(indexes[array[array < indexes.size]], cls) - # Integer array - if numpy.issubdtype(array.dtype, int): - array = numpy.array(array, dtype=t.DTypeEnum) - return cls.encode(array) + if _is_int_array(array): + indices = numpy.array(array[array < len(cls.items)], dtype=t.EnumDType) + return EnumArray(indices, cls) # String array - if numpy.issubdtype(array.dtype, t.DTypeStr): - enums = [cls.__members__[key] for key in array if key in cls.__members__] - return cls.encode(enums) - - # Enum items arrays - if numpy.issubdtype(array.dtype, t.DTypeObject): - # Ensure we are comparing the comparable. The problem this fixes: - # On entering this method "cls" will generally come from - # variable.possible_values, while the array values may come from - # directly importing a module containing an Enum class. However, - # variables (and hence their possible_values) are loaded by a call - # to load_module, which gives them a different identity from the - # ones imported in the usual way. - # - # So, instead of relying on the "cls" passed in, we use only its - # name to check that the values in the array, if non-empty, are of - # the right type. - if cls.__name__ is array[0].__class__.__name__: - array = numpy.select( - [array == item for item in array[0].__class__], - [item.index for item in array[0].__class__], - ).astype(ENUM_ARRAY_DTYPE) - return EnumArray(array, cls) - - msg = ( - f"Diverging enum types are not supported: expected {cls.__name__}, " - f"but got {array[0].__class__.__name__} instead." - ) - raise TypeError(msg) - - msg = f"Unsupported encoding: {array.dtype.name}." - raise NotImplementedError(msg) + if _is_str_array(array): + indices = cls.items[numpy.isin(cls.names, array)].index + return EnumArray(indices, cls) + + # Ensure we are comparing the comparable. The problem this fixes: + # On entering this method "cls" will generally come from + # variable.possible_values, while the array values may come from + # directly importing a module containing an Enum class. However, + # variables (and hence their possible_values) are loaded by a call + # to load_module, which gives them a different identity from the + # ones imported in the usual way. + # + # So, instead of relying on the "cls" passed in, we use only its + # name to check that the values in the array, if non-empty, are of + # the right type. + if cls.__name__ is array[0].__class__.__name__: + indices = cls.items[numpy.isin(cls.enums, array)].index + return EnumArray(indices, cls) + + msg = ( + f"Failed to encode \"{array}\" of type '{array[0].__class__.__name__}', " + "as it is not supported. Please, try again with an array of " + f"'{int.__name__}', '{str.__name__}', or '{cls.__name__}'." + ) + raise TypeError(msg) __all__ = ["Enum"] diff --git a/openfisca_core/indexed_enums/enum_array.py b/openfisca_core/indexed_enums/enum_array.py index 334083dd3..06fc1fbc9 100644 --- a/openfisca_core/indexed_enums/enum_array.py +++ b/openfisca_core/indexed_enums/enum_array.py @@ -74,7 +74,7 @@ class EnumArray(t.EnumArray): def __new__( cls, - input_array: t.Array[t.DTypeEnum], + input_array: t.IndexArray, possible_values: None | type[t.Enum] = None, ) -> Self: """See comment above.""" diff --git a/openfisca_core/indexed_enums/tests/test_enum.py b/openfisca_core/indexed_enums/tests/test_enum.py index 059918f7d..c77b9ddac 100644 --- a/openfisca_core/indexed_enums/tests/test_enum.py +++ b/openfisca_core/indexed_enums/tests/test_enum.py @@ -27,7 +27,7 @@ def test_enum_encode_with_array_of_enum(): def test_enum_encode_with_enum_sequence(): """Does encode when called with an enum sequence.""" - sequence = list(Animal) + sequence = list(Animal) + list(Colour) enum_array = Animal.encode(sequence) assert Animal.DOG in enum_array @@ -89,7 +89,7 @@ def test_enum_encode_with_array_of_string(): def test_enum_encode_with_str_sequence(): """Does encode when called with a str sequence.""" - sequence = ("DOG",) + sequence = ("DOG", "JAIBA") enum_array = Animal.encode(sequence) assert Animal.DOG in enum_array @@ -130,5 +130,5 @@ def test_enum_encode_with_any_scalar_array(): def test_enum_encode_with_any_sequence(): """Does not encode when called with unsupported types.""" sequence = memoryview(b"DOG") - with pytest.raises(NotImplementedError): - Animal.encode(sequence) + enum_array = Animal.encode(sequence) + assert len(enum_array) == 0 diff --git a/openfisca_core/indexed_enums/types.py b/openfisca_core/indexed_enums/types.py index ab4283b75..a16b03750 100644 --- a/openfisca_core/indexed_enums/types.py +++ b/openfisca_core/indexed_enums/types.py @@ -1,21 +1,56 @@ +from typing import Any +from typing_extensions import TypeAlias + from openfisca_core.types import ( Array, ArrayLike, - DTypeEnum, - DTypeInt, - DTypeObject, - DTypeStr, + DTypeEnum as EnumDType, + DTypeGeneric as AnyDType, + DTypeInt as IntDType, + DTypeLike, + DTypeObject as ObjDType, + DTypeStr as StrDType, Enum, EnumArray, + EnumType, ) +import enum + +import numpy + +#: Type for enum dicts. +EnumDict: TypeAlias = enum._EnumDict # noqa: SLF001 + +#: Type for the non-vectorised list of enum items. +ItemList: TypeAlias = list[tuple[int, str, Enum]] + +#: Type for record arrays data type. +RecDType: TypeAlias = numpy.dtype[numpy.void] + +#: Type for record arrays. +RecArray: TypeAlias = numpy.recarray[object, Any] + +#: Type for enum indices arrays. +IndexArray: TypeAlias = Array[EnumDType] + +#: Type for int arrays. +IntArray: TypeAlias = Array[IntDType] + +#: Type for str arrays. +StrArray: TypeAlias = Array[StrDType] + +#: Type for object arrays. +ObjArray: TypeAlias = Array[ObjDType] + +#: Type for generic arrays. +AnyArray: TypeAlias = Array[AnyDType] + __all__ = [ "Array", "ArrayLike", - "DTypeEnum", - "DTypeInt", - "DTypeObject", - "DTypeStr", + "DTypeLike", "Enum", "EnumArray", + "EnumType", ] diff --git a/openfisca_core/types.py b/openfisca_core/types.py index b922cde09..d81a0789a 100644 --- a/openfisca_core/types.py +++ b/openfisca_core/types.py @@ -1,7 +1,7 @@ from __future__ import annotations from collections.abc import Iterable, Sequence, Sized -from numpy.typing import NDArray +from numpy.typing import DTypeLike, NDArray from typing import Any, NewType, TypeVar, Union from typing_extensions import Protocol, Self, TypeAlias @@ -108,7 +108,10 @@ def plural(self, /) -> None | RolePlural: ... # Indexed enums -class Enum(enum.Enum, metaclass=enum.EnumMeta): +class EnumType(enum.EnumMeta): ... + + +class Enum(enum.Enum, metaclass=EnumType): index: int @@ -239,3 +242,6 @@ def __call__( class Params(Protocol): def __call__(self, instant: Instant, /) -> ParameterNodeAtInstant: ... + + +__all__ = ["DTypeLike"] diff --git a/tests/core/tools/test_assert_near.py b/tests/core/tools/test_assert_near.py index c351be0f9..bdcb589b4 100644 --- a/tests/core/tools/test_assert_near.py +++ b/tests/core/tools/test_assert_near.py @@ -21,5 +21,5 @@ def test_enum_2(tax_benefit_system) -> None: "housing_occupancy_status" ].possible_values value = possible_values.encode(numpy.array(["tenant", "owner"])) - expected_value = ["tenant", "owner"] + expected_value = ["owner", "tenant"] assert_near(value, expected_value) From cbc7fdb19cb257e6b244479a611ac5b81f26db34 Mon Sep 17 00:00:00 2001 From: Mauko Quiroga Date: Wed, 9 Oct 2024 02:52:18 +0200 Subject: [PATCH 11/29] build(test): add enums to test path (#1267) --- openfisca_tasks/test_code.mk | 1 + setup.cfg | 1 + 2 files changed, 2 insertions(+) diff --git a/openfisca_tasks/test_code.mk b/openfisca_tasks/test_code.mk index ed2d435ed..f2ab7247a 100644 --- a/openfisca_tasks/test_code.mk +++ b/openfisca_tasks/test_code.mk @@ -40,6 +40,7 @@ test-core: $(shell git ls-files "*test_*.py") openfisca_core/data_storage \ openfisca_core/entities \ openfisca_core/holders \ + openfisca_core/indexed_enums \ openfisca_core/periods \ openfisca_core/projectors @PYTEST_ADDOPTS="$${PYTEST_ADDOPTS} ${pytest_args}" \ diff --git a/setup.cfg b/setup.cfg index 9664127cf..60ac8faf0 100644 --- a/setup.cfg +++ b/setup.cfg @@ -29,6 +29,7 @@ include-in-doctest = openfisca_core/commons openfisca_core/entities openfisca_core/holders + openfisca_core/indexed_enums openfisca_core/periods openfisca_core/projectors max-line-length = 88 From cd6ca32964fbdb2fd501f6e8d92e639c3cf35a30 Mon Sep 17 00:00:00 2001 From: Mauko Quiroga Date: Wed, 9 Oct 2024 03:37:01 +0200 Subject: [PATCH 12/29] fix(enums): failing doctest when AttributeError (#1267) --- openfisca_core/indexed_enums/_enum_type.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/openfisca_core/indexed_enums/_enum_type.py b/openfisca_core/indexed_enums/_enum_type.py index 0b2f26b35..777d611ba 100644 --- a/openfisca_core/indexed_enums/_enum_type.py +++ b/openfisca_core/indexed_enums/_enum_type.py @@ -50,7 +50,7 @@ class EnumType(t.EnumType): >>> Enum.items Traceback (most recent call last): - AttributeError: type object 'Enum' has no attribute 'items' + AttributeError: ... >>> class Housing(Enum): ... OWNER = "Owner" From 38f99e0cd11170431fdaad144935a5959bcd47d4 Mon Sep 17 00:00:00 2001 From: Mauko Quiroga Date: Wed, 9 Oct 2024 03:54:43 +0200 Subject: [PATCH 13/29] fix(enums): ensure __eq__ gives a numpy array (#1267) --- openfisca_core/indexed_enums/enum_array.py | 6 ++++-- openfisca_core/indexed_enums/types.py | 2 ++ 2 files changed, 6 insertions(+), 2 deletions(-) diff --git a/openfisca_core/indexed_enums/enum_array.py b/openfisca_core/indexed_enums/enum_array.py index 06fc1fbc9..2e9ebf148 100644 --- a/openfisca_core/indexed_enums/enum_array.py +++ b/openfisca_core/indexed_enums/enum_array.py @@ -141,8 +141,10 @@ def __eq__(self, other: object) -> bool: """ if other.__class__.__name__ is self.possible_values.__name__: return self.view(numpy.ndarray) == other.index - - return self.view(numpy.ndarray) == other + is_eq = self.view(numpy.ndarray) == other + if isinstance(is_eq, numpy.ndarray): + return is_eq + return numpy.array([is_eq], dtype=t.BoolDType) def __ne__(self, other: object) -> bool: """Inequality. diff --git a/openfisca_core/indexed_enums/types.py b/openfisca_core/indexed_enums/types.py index a16b03750..ffc2cc9f2 100644 --- a/openfisca_core/indexed_enums/types.py +++ b/openfisca_core/indexed_enums/types.py @@ -4,6 +4,7 @@ from openfisca_core.types import ( Array, ArrayLike, + DTypeBool as BoolDType, DTypeEnum as EnumDType, DTypeGeneric as AnyDType, DTypeInt as IntDType, @@ -49,6 +50,7 @@ __all__ = [ "Array", "ArrayLike", + "BoolDType", "DTypeLike", "Enum", "EnumArray", From 8074af4bb88af3d25c6aea83fc53bc084dafeea0 Mon Sep 17 00:00:00 2001 From: Mauko Quiroga Date: Wed, 9 Oct 2024 04:22:03 +0200 Subject: [PATCH 14/29] fix(enums): fix __repr__ (#1267) --- openfisca_core/data_storage/on_disk_storage.py | 2 +- openfisca_core/indexed_enums/_enum_type.py | 4 ++-- openfisca_core/indexed_enums/enum.py | 15 +++++++++------ openfisca_core/indexed_enums/enum_array.py | 11 ++++++----- 4 files changed, 18 insertions(+), 14 deletions(-) diff --git a/openfisca_core/data_storage/on_disk_storage.py b/openfisca_core/data_storage/on_disk_storage.py index 3d0ef7fc1..99cfe56dd 100644 --- a/openfisca_core/data_storage/on_disk_storage.py +++ b/openfisca_core/data_storage/on_disk_storage.py @@ -87,7 +87,7 @@ def _decode_file(self, file: str) -> t.Array[t.DTypeGeneric]: ... storage = data_storage.OnDiskStorage(directory) ... storage.put(value, period) ... storage._decode_file(storage._files[period]) - EnumArray([]) + EnumArray(Housing.TENANT) """ enum = self._enums.get(file) diff --git a/openfisca_core/indexed_enums/_enum_type.py b/openfisca_core/indexed_enums/_enum_type.py index 777d611ba..0152595ea 100644 --- a/openfisca_core/indexed_enums/_enum_type.py +++ b/openfisca_core/indexed_enums/_enum_type.py @@ -57,7 +57,7 @@ class EnumType(t.EnumType): ... TENANT = "Tenant" >>> Housing.items - rec.array([(0, 'OWNER', ), ...]) + rec.array([(0, 'OWNER', Housing.OWNER), (1, 'TENANT', Housing.TENAN...) >>> Housing.indices array([0, 1], dtype=int16) @@ -66,7 +66,7 @@ class EnumType(t.EnumType): array(['OWNER', 'TENANT'], dtype='>> Housing.enums - array([, ], dtype...) + array([Housing.OWNER, Housing.TENANT], dtype=object) """ diff --git a/openfisca_core/indexed_enums/enum.py b/openfisca_core/indexed_enums/enum.py index 069cc8fa9..938335bcb 100644 --- a/openfisca_core/indexed_enums/enum.py +++ b/openfisca_core/indexed_enums/enum.py @@ -27,22 +27,22 @@ class Enum(t.Enum, metaclass=EnumType): "" >>> repr(Housing.TENANT) - "" + 'Housing.TENANT' >>> str(Housing.TENANT) 'Housing.TENANT' >>> dict([(Housing.TENANT, Housing.TENANT.value)]) - {: 'Tenant'} + {Housing.TENANT: 'Tenant'} >>> list(Housing) - [, , ...] + [Housing.OWNER, Housing.TENANT, Housing.FREE_LODGER, Housing.HOMELESS] >>> Housing["TENANT"] - + Housing.TENANT >>> Housing("Tenant") - + Housing.TENANT >>> Housing.TENANT in Housing True @@ -106,6 +106,9 @@ def __init__(self, *__args: object, **__kwargs: object) -> None: """ self.index = len(self._member_names_) + def __repr__(self) -> str: + return f"{self.__class__.__name__}.{self.name}" + def __eq__(self, other: object) -> bool: if not isinstance(other, Enum): return NotImplemented @@ -158,7 +161,7 @@ def encode( >>> array = numpy.array([1]) >>> enum_array = enum.EnumArray(array, Housing) >>> Housing.encode(enum_array) - EnumArray([]) + EnumArray(Housing.TENANT) # Array of Enum diff --git a/openfisca_core/indexed_enums/enum_array.py b/openfisca_core/indexed_enums/enum_array.py index 2e9ebf148..807be2ec5 100644 --- a/openfisca_core/indexed_enums/enum_array.py +++ b/openfisca_core/indexed_enums/enum_array.py @@ -36,7 +36,7 @@ class EnumArray(t.EnumArray): "" >>> repr(enum_array) - "EnumArray([])" + 'EnumArray(Housing.TENANT)' >>> str(enum_array) "['TENANT']" @@ -55,14 +55,14 @@ class EnumArray(t.EnumArray): >>> enum_array = enum.EnumArray(list(Housing), Housing) >>> enum_array[Housing.TENANT.index] - + Housing.TENANT >>> class OccupancyStatus(variables.Variable): ... value_type = enum.Enum ... possible_values = Housing >>> enum.EnumArray(array, OccupancyStatus.possible_values) - EnumArray([]) + EnumArray(Housing.TENANT) .. _Subclassing ndarray: https://numpy.org/doc/stable/user/basics.subclassing.html @@ -229,7 +229,7 @@ def decode(self) -> numpy.object_: >>> array = numpy.array([1]) >>> enum_array = enum.EnumArray(array, Housing) >>> enum_array.decode() - array([], dtype=object) + array([Housing.TENANT], dtype=object) """ return numpy.select( @@ -264,7 +264,8 @@ def decode_to_str(self) -> numpy.str_: ) def __repr__(self) -> str: - return f"{self.__class__.__name__}({self.decode()!s})" + items = ", ".join(str(item) for item in self.decode()) + return f"{self.__class__.__name__}({items})" def __str__(self) -> str: return str(self.decode_to_str()) From 52592246e6e0824ab8f56e1659363379bb3ddf1d Mon Sep 17 00:00:00 2001 From: Mauko Quiroga Date: Wed, 9 Oct 2024 04:31:41 +0200 Subject: [PATCH 15/29] test(enums): update str eq test (#1267) --- openfisca_core/indexed_enums/enum.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/openfisca_core/indexed_enums/enum.py b/openfisca_core/indexed_enums/enum.py index 938335bcb..a291acbd6 100644 --- a/openfisca_core/indexed_enums/enum.py +++ b/openfisca_core/indexed_enums/enum.py @@ -181,8 +181,8 @@ def encode( >>> array = numpy.array(["TENANT"]) >>> enum_array = Housing.encode(array) - >>> enum_array[0] == Housing.TENANT.index - True + >>> enum_array == Housing.TENANT + array([ True]) # Array of bytes From 6dfc93f4127943f7453bc2720f0f9ed23f9aad9d Mon Sep 17 00:00:00 2001 From: Mauko Quiroga Date: Wed, 9 Oct 2024 03:10:16 +0200 Subject: [PATCH 16/29] chore: version bump (fixes #1267) --- CHANGELOG.md | 47 ++++++++++++++++++++++ openfisca_core/indexed_enums/_enum_type.py | 3 ++ setup.py | 2 +- 3 files changed, 51 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 74f86b175..e2a2f70cd 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,52 @@ # Changelog +## 42.1.0 [#1273](https://github.com/openfisca/openfisca-core/pull/1273) + +#### New features + +- Introduce `indexed_enums.EnumType` + - Allows for actually fancy indexing `indexed_enums.Enum` + +#### Technical changes + +- Fix doctests + - Now `pytest openfisca_core/indexed_enums` runs without errors +- Fix bug in `Enum.encode` when passing a scalar + - Still raises `TypeError` but with an explanation of why it fails +- Fix bug in `Enum.encode` when encoding values not present in the enum + - When encoding values not present in an enum, `Enum.encode` always encoded + the first item of the enum + - Now, it correctly encodes only the values requested that exist in the enum + +##### Before + +```python +from openfisca_core import indexed_enums as enum + +class TestEnum(enum.Enum): + ONE = "one" + TWO = "two" + +TestEnum.encode([2]) +# EnumArray([0]) +``` + +##### After + +```python +from openfisca_core import indexed_enums as enum + +class TestEnum(enum.Enum): + ONE = "one" + TWO = "two" + +TestEnum.encode([2]) +# EnumArray([]) + +TestEnum.encode([0,1,2,5]) +# EnumArray([ ]) +``` + ### 42.0.8 [#1272](https://github.com/openfisca/openfisca-core/pull/1272) #### Documentation diff --git a/openfisca_core/indexed_enums/_enum_type.py b/openfisca_core/indexed_enums/_enum_type.py index 0152595ea..4208ab3ce 100644 --- a/openfisca_core/indexed_enums/_enum_type.py +++ b/openfisca_core/indexed_enums/_enum_type.py @@ -111,3 +111,6 @@ def __new__( def __dir__(cls) -> list[str]: return sorted({"items", "indices", "names", "enums", *super().__dir__()}) + + +__all__ = ["EnumType"] diff --git a/setup.py b/setup.py index 491479ccb..8cdaa6173 100644 --- a/setup.py +++ b/setup.py @@ -70,7 +70,7 @@ setup( name="OpenFisca-Core", - version="42.0.8", + version="42.1.0", author="OpenFisca Team", author_email="contact@openfisca.org", classifiers=[ From 803680cb2709bb4f62d04d8b23cfdfe65377de9e Mon Sep 17 00:00:00 2001 From: Mauko Quiroga Date: Wed, 9 Oct 2024 18:35:04 +0200 Subject: [PATCH 17/29] docs(enums): add types (#1233) --- openfisca_core/indexed_enums/enum_array.py | 110 ++++++++++++------ .../indexed_enums/tests/test_enum_array.py | 6 +- openfisca_core/indexed_enums/types.py | 9 +- openfisca_core/types.py | 11 +- tests/core/test_simulation_builder.py | 7 +- 5 files changed, 100 insertions(+), 43 deletions(-) diff --git a/openfisca_core/indexed_enums/enum_array.py b/openfisca_core/indexed_enums/enum_array.py index 807be2ec5..aa3db3f07 100644 --- a/openfisca_core/indexed_enums/enum_array.py +++ b/openfisca_core/indexed_enums/enum_array.py @@ -1,6 +1,6 @@ from __future__ import annotations -from typing import Any, NoReturn +from typing import NoReturn from typing_extensions import Self import numpy @@ -29,7 +29,7 @@ class EnumArray(t.EnumArray): ... FREE_LODGER = "Free lodger" ... HOMELESS = "Homeless" - >>> array = numpy.array([1]) + >>> array = numpy.array([1], dtype=numpy.int16) >>> enum_array = enum.EnumArray(array, Housing) >>> repr(enum.EnumArray) @@ -41,10 +41,10 @@ class EnumArray(t.EnumArray): >>> str(enum_array) "['TENANT']" - >>> list(enum_array) + >>> list(map(int, enum_array)) [1] - >>> enum_array[0] + >>> int(enum_array[0]) 1 >>> enum_array[0] in enum_array @@ -54,8 +54,8 @@ class EnumArray(t.EnumArray): 1 >>> enum_array = enum.EnumArray(list(Housing), Housing) - >>> enum_array[Housing.TENANT.index] - Housing.TENANT + Traceback (most recent call last): + TypeError: int() argument must be a string, a bytes-like object or a... >>> class OccupancyStatus(variables.Variable): ... value_type = enum.Enum @@ -74,22 +74,27 @@ class EnumArray(t.EnumArray): def __new__( cls, - input_array: t.IndexArray, + input_array: object, possible_values: None | type[t.Enum] = None, ) -> Self: """See comment above.""" - obj = numpy.asarray(input_array).view(cls) + if not isinstance(input_array, numpy.ndarray): + return cls.__new__(cls, numpy.asarray(input_array), possible_values) + if input_array.ndim == 0: + return cls.__new__(cls, input_array.reshape(1), possible_values) + obj = input_array.astype(t.EnumDType).view(cls) obj.possible_values = possible_values return obj - def __array_finalize__(self, obj: numpy.int32 | None) -> None: + def __array_finalize__(self, obj: None | t.EnumArray | t.ObjArray) -> None: """See comment above.""" if obj is None: return + if isinstance(obj, EnumArray): + self.possible_values = obj.possible_values + return - self.possible_values = getattr(obj, "possible_values", None) - - def __eq__(self, other: object) -> bool: + def __eq__(self, other: object) -> t.BoolArray: # type: ignore[override] """Compare equality with the item's :attr:`~.Enum.index`. When comparing to an item of :attr:`.possible_values`, use the @@ -117,6 +122,12 @@ def __eq__(self, other: object) -> bool: >>> array = numpy.array([1]) >>> enum_array = enum.EnumArray(array, Housing) + >>> enum_array == Housing + array([False, True]) + + >>> enum_array == Housing.TENANT + array([ True]) + >>> enum_array == 1 array([ True]) @@ -139,14 +150,27 @@ def __eq__(self, other: object) -> bool: https://en.wikipedia.org/wiki/Liskov_substitution_principle """ - if other.__class__.__name__ is self.possible_values.__name__: + if self.possible_values is None: + return NotImplemented + if other is None: + return NotImplemented + if ( + isinstance(other, type(t.Enum)) + and other.__name__ is self.possible_values.__name__ + ): + return self.view(numpy.ndarray) == other.indices[other.indices <= max(self)] + if ( + isinstance(other, t.Enum) + and other.__class__.__name__ is self.possible_values.__name__ + ): return self.view(numpy.ndarray) == other.index - is_eq = self.view(numpy.ndarray) == other - if isinstance(is_eq, numpy.ndarray): - return is_eq - return numpy.array([is_eq], dtype=t.BoolDType) + # For NumPy >=1.26.x. + if isinstance(is_equal := self.view(numpy.ndarray) == other, numpy.ndarray): + return is_equal + # For NumPy <1.26.x. + return numpy.array([is_equal], dtype=t.BoolDType) - def __ne__(self, other: object) -> bool: + def __ne__(self, other: object) -> t.BoolArray: # type: ignore[override] """Inequality. Args: @@ -168,6 +192,12 @@ def __ne__(self, other: object) -> bool: >>> array = numpy.array([1]) >>> enum_array = enum.EnumArray(array, Housing) + >>> enum_array != Housing + array([ True, False]) + + >>> enum_array != Housing.TENANT + array([False]) + >>> enum_array != 1 array([False]) @@ -193,14 +223,12 @@ def __ne__(self, other: object) -> bool: return numpy.logical_not(self == other) @staticmethod - def _forbidden_operation(other: Any) -> NoReturn: + def _forbidden_operation(*__args: object, **__kwds: object) -> NoReturn: msg = ( "Forbidden operation. The only operations allowed on EnumArrays " "are '==' and '!='." ) - raise TypeError( - msg, - ) + raise TypeError(msg) __add__ = _forbidden_operation __mul__ = _forbidden_operation @@ -211,12 +239,15 @@ def _forbidden_operation(other: Any) -> NoReturn: __and__ = _forbidden_operation __or__ = _forbidden_operation - def decode(self) -> numpy.object_: + def decode(self) -> t.ObjArray: """Decode itself to a normal array. Returns: ndarray[Enum]: The items of the :obj:`.EnumArray`. + Raises: + TypeError: When the :attr:`.possible_values` is not defined. + Examples: >>> import numpy @@ -232,17 +263,25 @@ def decode(self) -> numpy.object_: array([Housing.TENANT], dtype=object) """ - return numpy.select( - [self == item.index for item in self.possible_values], - list(self.possible_values), - ) - - def decode_to_str(self) -> numpy.str_: + if self.possible_values is None: + msg = ( + f"The possible values of the {self.__class__.__name__} are " + f"not defined." + ) + raise TypeError(msg) + arr = self.astype(t.EnumDType) + arr = arr.reshape(1) if arr.ndim == 0 else arr + return self.possible_values.items[arr.astype(t.EnumDType)].enum + + def decode_to_str(self) -> t.StrArray: """Decode itself to an array of strings. Returns: ndarray[str_]: The string values of the :obj:`.EnumArray`. + Raises: + TypeError: When the :attr:`.possible_values` is not defined. + Examples: >>> import numpy @@ -258,10 +297,15 @@ def decode_to_str(self) -> numpy.str_: array(['TENANT'], dtype=' str: items = ", ".join(str(item) for item in self.decode()) diff --git a/openfisca_core/indexed_enums/tests/test_enum_array.py b/openfisca_core/indexed_enums/tests/test_enum_array.py index 2add52bab..1ab247468 100644 --- a/openfisca_core/indexed_enums/tests/test_enum_array.py +++ b/openfisca_core/indexed_enums/tests/test_enum_array.py @@ -11,17 +11,17 @@ class Fruit(enum.Enum): @pytest.fixture def enum_array(): - return enum.EnumArray([numpy.array(1)], Fruit) + return enum.EnumArray(numpy.array([1]), Fruit) def test_enum_array_eq_operation(enum_array): """The equality operation is permitted.""" - assert enum_array == enum.EnumArray([numpy.array(1)], Fruit) + assert enum_array == enum.EnumArray(numpy.array([1]), Fruit) def test_enum_array_ne_operation(enum_array): """The non-equality operation is permitted.""" - assert enum_array != enum.EnumArray([numpy.array(0)], Fruit) + assert enum_array != enum.EnumArray(numpy.array([0]), Fruit) def test_enum_array_any_other_operation(enum_array): diff --git a/openfisca_core/indexed_enums/types.py b/openfisca_core/indexed_enums/types.py index ffc2cc9f2..72703d825 100644 --- a/openfisca_core/indexed_enums/types.py +++ b/openfisca_core/indexed_enums/types.py @@ -1,4 +1,3 @@ -from typing import Any from typing_extensions import TypeAlias from openfisca_core.types import ( @@ -14,6 +13,7 @@ Enum, EnumArray, EnumType, + RecArray, ) import enum @@ -29,12 +29,12 @@ #: Type for record arrays data type. RecDType: TypeAlias = numpy.dtype[numpy.void] -#: Type for record arrays. -RecArray: TypeAlias = numpy.recarray[object, Any] - #: Type for enum indices arrays. IndexArray: TypeAlias = Array[EnumDType] +#: Type for boolean arrays. +BoolArray: TypeAlias = Array[BoolDType] + #: Type for int arrays. IntArray: TypeAlias = Array[IntDType] @@ -55,4 +55,5 @@ "Enum", "EnumArray", "EnumType", + "RecArray", ] diff --git a/openfisca_core/types.py b/openfisca_core/types.py index d81a0789a..02d012687 100644 --- a/openfisca_core/types.py +++ b/openfisca_core/types.py @@ -28,6 +28,9 @@ #: Type representing an array-like object. ArrayLike: TypeAlias = Sequence[_L] +#: Type for record arrays. +RecArray: TypeAlias = numpy.recarray[object, Any] # type: ignore[misc] + #: Type for bool arrays. DTypeBool: TypeAlias = numpy.bool_ @@ -108,11 +111,17 @@ def plural(self, /) -> None | RolePlural: ... # Indexed enums -class EnumType(enum.EnumMeta): ... +class EnumType(enum.EnumMeta): + items: RecArray + + @property + @abc.abstractmethod + def indices(cls) -> Array[DTypeEnum]: ... class Enum(enum.Enum, metaclass=EnumType): index: int + _member_names_: list[str] class EnumArray(Array[DTypeEnum], metaclass=abc.ABCMeta): diff --git a/tests/core/test_simulation_builder.py b/tests/core/test_simulation_builder.py index b905b29b8..507d10e70 100644 --- a/tests/core/test_simulation_builder.py +++ b/tests/core/test_simulation_builder.py @@ -44,14 +44,17 @@ def __init__(self) -> None: @pytest.fixture def enum_variable(): + class _TestEnum(Enum): + foo = "bar" + class TestEnum(Variable): definition_period = DateUnit.ETERNITY value_type = Enum dtype = "O" - default_value = "0" + default_value = _TestEnum.foo is_neutralized = False set_input = None - possible_values = Enum("foo", "bar") + possible_values = _TestEnum name = "enum" def __init__(self) -> None: From b8efde7d92ab812051cb46697e391399954a04b5 Mon Sep 17 00:00:00 2001 From: Mauko Quiroga Date: Wed, 9 Oct 2024 19:51:14 +0200 Subject: [PATCH 18/29] test(enums): fix mypy errors (#1233) --- openfisca_core/indexed_enums/_enum_type.py | 17 ++++++---- openfisca_core/indexed_enums/enum.py | 2 +- openfisca_core/indexed_enums/enum_array.py | 20 +++++++++--- openfisca_core/indexed_enums/types.py | 24 +++++++------- openfisca_core/simulations/simulation.py | 6 +++- openfisca_core/types.py | 38 ++++++++++++++-------- 6 files changed, 68 insertions(+), 39 deletions(-) diff --git a/openfisca_core/indexed_enums/_enum_type.py b/openfisca_core/indexed_enums/_enum_type.py index 4208ab3ce..1af4b153c 100644 --- a/openfisca_core/indexed_enums/_enum_type.py +++ b/openfisca_core/indexed_enums/_enum_type.py @@ -7,15 +7,15 @@ from . import types as t -def _item_list(enum_class: type[t.Enum]) -> t.ItemList: +def _item_list(enum_class: t.EnumType) -> t.ItemList: """Return the non-vectorised list of enum items.""" - return [ + return [ # type: ignore[var-annotated] (index, name, value) for index, (name, value) in enumerate(enum_class.__members__.items()) ] -def _item_dtype(enum_class: type[t.Enum]) -> t.RecDType: +def _item_dtype(enum_class: t.EnumType) -> t.RecDType: """Return the dtype of the indexed enum's items.""" size = max(map(len, enum_class.__members__.keys())) return numpy.dtype( @@ -30,7 +30,7 @@ def _item_dtype(enum_class: type[t.Enum]) -> t.RecDType: ) -def _item_array(enum_class: type[t.Enum]) -> t.RecArray: +def _item_array(enum_class: t.EnumType) -> t.RecArray: """Return the indexed enum's items.""" items = _item_list(enum_class) dtype = _item_dtype(enum_class) @@ -76,17 +76,20 @@ class EnumType(t.EnumType): @property def indices(cls) -> t.IndexArray: """Return the indices of the indexed enum class.""" - return cls.items.index + indices: t.IndexArray = cls.items.index + return indices @property def names(cls) -> t.StrArray: """Return the names of the indexed enum class.""" - return cls.items.name + names: t.StrArray = cls.items.name + return names @property def enums(cls) -> t.ObjArray: """Return the members of the indexed enum class.""" - return cls.items.enum + enums: t.ObjArray = cls.items.enum + return enums def __new__( metacls, diff --git a/openfisca_core/indexed_enums/enum.py b/openfisca_core/indexed_enums/enum.py index a291acbd6..15bed5878 100644 --- a/openfisca_core/indexed_enums/enum.py +++ b/openfisca_core/indexed_enums/enum.py @@ -217,7 +217,7 @@ def encode( return EnumArray(indices, cls) # String array - if _is_str_array(array): + if _is_str_array(array): # type: ignore[unreachable] indices = cls.items[numpy.isin(cls.names, array)].index return EnumArray(indices, cls) diff --git a/openfisca_core/indexed_enums/enum_array.py b/openfisca_core/indexed_enums/enum_array.py index aa3db3f07..aa613315f 100644 --- a/openfisca_core/indexed_enums/enum_array.py +++ b/openfisca_core/indexed_enums/enum_array.py @@ -150,6 +150,8 @@ def __eq__(self, other: object) -> t.BoolArray: # type: ignore[override] https://en.wikipedia.org/wiki/Liskov_substitution_principle """ + result: t.BoolArray + if self.possible_values is None: return NotImplemented if other is None: @@ -158,12 +160,16 @@ def __eq__(self, other: object) -> t.BoolArray: # type: ignore[override] isinstance(other, type(t.Enum)) and other.__name__ is self.possible_values.__name__ ): - return self.view(numpy.ndarray) == other.indices[other.indices <= max(self)] + result = ( + self.view(numpy.ndarray) == other.indices[other.indices <= max(self)] + ) + return result if ( isinstance(other, t.Enum) and other.__class__.__name__ is self.possible_values.__name__ ): - return self.view(numpy.ndarray) == other.index + result = self.view(numpy.ndarray) == other.index + return result # For NumPy >=1.26.x. if isinstance(is_equal := self.view(numpy.ndarray) == other, numpy.ndarray): return is_equal @@ -263,6 +269,8 @@ def decode(self) -> t.ObjArray: array([Housing.TENANT], dtype=object) """ + result: t.ObjArray + if self.possible_values is None: msg = ( f"The possible values of the {self.__class__.__name__} are " @@ -271,7 +279,8 @@ def decode(self) -> t.ObjArray: raise TypeError(msg) arr = self.astype(t.EnumDType) arr = arr.reshape(1) if arr.ndim == 0 else arr - return self.possible_values.items[arr.astype(t.EnumDType)].enum + result = self.possible_values.items[arr.astype(t.EnumDType)].enum + return result def decode_to_str(self) -> t.StrArray: """Decode itself to an array of strings. @@ -297,6 +306,8 @@ def decode_to_str(self) -> t.StrArray: array(['TENANT'], dtype=' t.StrArray: raise TypeError(msg) arr = self.astype(t.EnumDType) arr = arr.reshape(1) if arr.ndim == 0 else arr - return self.possible_values.items[arr.astype(t.EnumDType)].name + result = self.possible_values.items[arr.astype(t.EnumDType)].name + return result def __repr__(self) -> str: items = ", ".join(str(item) for item in self.decode()) diff --git a/openfisca_core/indexed_enums/types.py b/openfisca_core/indexed_enums/types.py index 72703d825..b43cdc1e5 100644 --- a/openfisca_core/indexed_enums/types.py +++ b/openfisca_core/indexed_enums/types.py @@ -3,28 +3,27 @@ from openfisca_core.types import ( Array, ArrayLike, - DTypeBool as BoolDType, - DTypeEnum as EnumDType, - DTypeGeneric as AnyDType, - DTypeInt as IntDType, DTypeLike, - DTypeObject as ObjDType, - DTypeStr as StrDType, Enum, EnumArray, EnumType, RecArray, ) -import enum +from enum import _EnumDict as EnumDict # noqa: PLC2701 import numpy - -#: Type for enum dicts. -EnumDict: TypeAlias = enum._EnumDict # noqa: SLF001 +from numpy import ( + bool_ as BoolDType, + generic as AnyDType, + int16 as EnumDType, + int32 as IntDType, + object_ as ObjDType, + str_ as StrDType, +) #: Type for the non-vectorised list of enum items. -ItemList: TypeAlias = list[tuple[int, str, Enum]] +ItemList: TypeAlias = list[tuple[int, str, EnumType]] #: Type for record arrays data type. RecDType: TypeAlias = numpy.dtype[numpy.void] @@ -48,12 +47,11 @@ AnyArray: TypeAlias = Array[AnyDType] __all__ = [ - "Array", "ArrayLike", - "BoolDType", "DTypeLike", "Enum", "EnumArray", + "EnumDict", "EnumType", "RecArray", ] diff --git a/openfisca_core/simulations/simulation.py b/openfisca_core/simulations/simulation.py index c32fea22a..b7d20fa97 100644 --- a/openfisca_core/simulations/simulation.py +++ b/openfisca_core/simulations/simulation.py @@ -3,7 +3,11 @@ from collections.abc import Mapping from typing import NamedTuple -from openfisca_core.types import Population, TaxBenefitSystem, Variable +from openfisca_core.types import ( + CorePopulation as Population, + TaxBenefitSystem, + Variable, +) import tempfile import warnings diff --git a/openfisca_core/types.py b/openfisca_core/types.py index 02d012687..b1d2a2710 100644 --- a/openfisca_core/types.py +++ b/openfisca_core/types.py @@ -137,8 +137,8 @@ def __new__( class Holder(Protocol): - def clone(self, population: Any, /) -> Holder: ... - def get_memory_usage(self, /) -> Any: ... + def clone(self, population: CorePopulation, /) -> Holder: ... + def get_memory_usage(self, /) -> dict[str, object]: ... # Parameters @@ -198,27 +198,39 @@ def offset(self, offset: str | int, unit: None | DateUnit = None, /) -> Period: # Populations -class Population(Protocol): - entity: Any +class CorePopulation(Protocol): ... - def get_holder(self, variable_name: VariableName, /) -> Any: ... + +class SinglePopulation(CorePopulation, Protocol): + entity: SingleEntity + + def get_holder(self, variable_name: VariableName, /) -> Holder: ... + + +class GroupPopulation(CorePopulation, Protocol): ... # Simulations class Simulation(Protocol): - def calculate(self, variable_name: VariableName, period: Any, /) -> Any: ... - def calculate_add(self, variable_name: VariableName, period: Any, /) -> Any: ... - def calculate_divide(self, variable_name: VariableName, period: Any, /) -> Any: ... - def get_population(self, plural: None | str, /) -> Any: ... + def calculate( + self, variable_name: VariableName, period: Period, / + ) -> Array[DTypeGeneric]: ... + def calculate_add( + self, variable_name: VariableName, period: Period, / + ) -> Array[DTypeGeneric]: ... + def calculate_divide( + self, variable_name: VariableName, period: Period, / + ) -> Array[DTypeGeneric]: ... + def get_population(self, plural: None | str, /) -> CorePopulation: ... # Tax-Benefit systems class TaxBenefitSystem(Protocol): - person_entity: Any + person_entity: SingleEntity def get_variable( self, @@ -235,18 +247,18 @@ def get_variable( class Variable(Protocol): - entity: Any + entity: CoreEntity name: VariableName class Formula(Protocol): def __call__( self, - population: Population, + population: CorePopulation, instant: Instant, params: Params, /, - ) -> Array[Any]: ... + ) -> Array[DTypeGeneric]: ... class Params(Protocol): From c54d3a56626068e19d13384a1dffd41b945f2fe9 Mon Sep 17 00:00:00 2001 From: Mauko Quiroga Date: Wed, 9 Oct 2024 19:55:43 +0200 Subject: [PATCH 19/29] chore: version bump (fixes #1233) --- CHANGELOG.md | 6 ++++++ setup.py | 2 +- 2 files changed, 7 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index e2a2f70cd..b7a2683e1 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,11 @@ # Changelog +### 42.1.1 [#1224](https://github.com/openfisca/openfisca-core/pull/1224) + +#### Technical changes + +- Fix type definitions in the enums module + ## 42.1.0 [#1273](https://github.com/openfisca/openfisca-core/pull/1273) #### New features diff --git a/setup.py b/setup.py index 8cdaa6173..913d08a59 100644 --- a/setup.py +++ b/setup.py @@ -70,7 +70,7 @@ setup( name="OpenFisca-Core", - version="42.1.0", + version="42.1.1", author="OpenFisca Team", author_email="contact@openfisca.org", classifiers=[ From 62c29b707283cae7ccb4d9f14a85726613febcdd Mon Sep 17 00:00:00 2001 From: Mauko Quiroga Date: Thu, 10 Oct 2024 12:46:12 +0200 Subject: [PATCH 20/29] fix(enums): preserve order in encode (#1233) --- .../data_storage/on_disk_storage.py | 2 +- openfisca_core/indexed_enums/_enum_type.py | 2 +- openfisca_core/indexed_enums/enum.py | 20 ++++++++++--------- openfisca_core/indexed_enums/enum_array.py | 9 +++------ .../indexed_enums/tests/test_enum.py | 13 ++++++------ openfisca_core/indexed_enums/types.py | 2 +- openfisca_core/types.py | 2 +- setup.py | 4 ++-- tests/core/test_tracers.py | 2 +- tests/core/tools/test_assert_near.py | 2 +- 10 files changed, 29 insertions(+), 29 deletions(-) diff --git a/openfisca_core/data_storage/on_disk_storage.py b/openfisca_core/data_storage/on_disk_storage.py index 99cfe56dd..818cec22e 100644 --- a/openfisca_core/data_storage/on_disk_storage.py +++ b/openfisca_core/data_storage/on_disk_storage.py @@ -87,7 +87,7 @@ def _decode_file(self, file: str) -> t.Array[t.DTypeGeneric]: ... storage = data_storage.OnDiskStorage(directory) ... storage.put(value, period) ... storage._decode_file(storage._files[period]) - EnumArray(Housing.TENANT) + EnumArray([Housing.TENANT]) """ enum = self._enums.get(file) diff --git a/openfisca_core/indexed_enums/_enum_type.py b/openfisca_core/indexed_enums/_enum_type.py index 1af4b153c..9cce3b47f 100644 --- a/openfisca_core/indexed_enums/_enum_type.py +++ b/openfisca_core/indexed_enums/_enum_type.py @@ -60,7 +60,7 @@ class EnumType(t.EnumType): rec.array([(0, 'OWNER', Housing.OWNER), (1, 'TENANT', Housing.TENAN...) >>> Housing.indices - array([0, 1], dtype=int16) + array([0, 1], dtype=uint8) >>> Housing.names array(['OWNER', 'TENANT'], dtype=' int: def encode( cls, array: ( - EnumArray + t.EnumArray | t.IntArray | t.StrArray | t.ObjArray @@ -134,7 +134,7 @@ def encode( | t.ArrayLike[str] | t.ArrayLike[t.Enum] ), - ) -> EnumArray: + ) -> t.EnumArray: """Encode an encodable array into an :class:`.EnumArray`. Args: @@ -161,7 +161,7 @@ def encode( >>> array = numpy.array([1]) >>> enum_array = enum.EnumArray(array, Housing) >>> Housing.encode(enum_array) - EnumArray(Housing.TENANT) + EnumArray([Housing.TENANT]) # Array of Enum @@ -213,13 +213,14 @@ def encode( # Integer array if _is_int_array(array): - indices = numpy.array(array[array < len(cls.items)], dtype=t.EnumDType) - return EnumArray(indices, cls) + indices = numpy.array(array[array < cls.indices.size]) + return EnumArray(indices.astype(t.EnumDType), cls) # String array if _is_str_array(array): # type: ignore[unreachable] - indices = cls.items[numpy.isin(cls.names, array)].index - return EnumArray(indices, cls) + names = array[numpy.isin(array, cls.names)] + indices = numpy.array([cls[name].index for name in names]) + return EnumArray(indices.astype(t.EnumDType), cls) # Ensure we are comparing the comparable. The problem this fixes: # On entering this method "cls" will generally come from @@ -233,8 +234,9 @@ def encode( # name to check that the values in the array, if non-empty, are of # the right type. if cls.__name__ is array[0].__class__.__name__: - indices = cls.items[numpy.isin(cls.enums, array)].index - return EnumArray(indices, cls) + enums = array[numpy.isin(array, cls.enums)] + indices = numpy.array([enum.index for enum in enums]) + return EnumArray(indices.astype(t.EnumDType), cls) msg = ( f"Failed to encode \"{array}\" of type '{array[0].__class__.__name__}', " diff --git a/openfisca_core/indexed_enums/enum_array.py b/openfisca_core/indexed_enums/enum_array.py index aa613315f..fe25d6586 100644 --- a/openfisca_core/indexed_enums/enum_array.py +++ b/openfisca_core/indexed_enums/enum_array.py @@ -36,7 +36,7 @@ class EnumArray(t.EnumArray): "" >>> repr(enum_array) - 'EnumArray(Housing.TENANT)' + 'EnumArray([Housing.TENANT])' >>> str(enum_array) "['TENANT']" @@ -62,7 +62,7 @@ class EnumArray(t.EnumArray): ... possible_values = Housing >>> enum.EnumArray(array, OccupancyStatus.possible_values) - EnumArray(Housing.TENANT) + EnumArray([Housing.TENANT]) .. _Subclassing ndarray: https://numpy.org/doc/stable/user/basics.subclassing.html @@ -270,7 +270,6 @@ def decode(self) -> t.ObjArray: """ result: t.ObjArray - if self.possible_values is None: msg = ( f"The possible values of the {self.__class__.__name__} are " @@ -307,7 +306,6 @@ def decode_to_str(self) -> t.StrArray: """ result: t.StrArray - if self.possible_values is None: msg = ( f"The possible values of the {self.__class__.__name__} are " @@ -320,8 +318,7 @@ def decode_to_str(self) -> t.StrArray: return result def __repr__(self) -> str: - items = ", ".join(str(item) for item in self.decode()) - return f"{self.__class__.__name__}({items})" + return f"{self.__class__.__name__}({self.decode()!s})" def __str__(self) -> str: return str(self.decode_to_str()) diff --git a/openfisca_core/indexed_enums/tests/test_enum.py b/openfisca_core/indexed_enums/tests/test_enum.py index c77b9ddac..5d4957490 100644 --- a/openfisca_core/indexed_enums/tests/test_enum.py +++ b/openfisca_core/indexed_enums/tests/test_enum.py @@ -1,5 +1,6 @@ import numpy import pytest +from numpy.testing import assert_array_equal from openfisca_core import indexed_enums as enum @@ -20,9 +21,9 @@ class Colour(enum.Enum): def test_enum_encode_with_array_of_enum(): """Does encode when called with an array of enums.""" - array = numpy.array([Animal.DOG]) + array = numpy.array([Animal.DOG, Animal.DOG, Animal.CAT, Colour.AMARANTH]) enum_array = Animal.encode(array) - assert enum_array == Animal.DOG + assert_array_equal(enum_array, numpy.array([1, 1, 0])) def test_enum_encode_with_enum_sequence(): @@ -51,9 +52,9 @@ def test_enum_encode_with_enum_with_bad_value(): def test_enum_encode_with_array_of_int(): """Does encode when called with an array of int.""" - array = numpy.array([1]) + array = numpy.array([1, 1, 0, 2]) enum_array = Animal.encode(array) - assert enum_array == Animal.DOG + assert_array_equal(enum_array, numpy.array([1, 1, 0])) def test_enum_encode_with_int_sequence(): @@ -82,9 +83,9 @@ def test_enum_encode_with_int_with_bad_value(): def test_enum_encode_with_array_of_string(): """Does encode when called with an array of string.""" - array = numpy.array(["DOG"]) + array = numpy.array(["DOG", "DOG", "CAT", "AMARANTH"]) enum_array = Animal.encode(array) - assert enum_array == Animal.DOG + assert_array_equal(enum_array, numpy.array([1, 1, 0])) def test_enum_encode_with_str_sequence(): diff --git a/openfisca_core/indexed_enums/types.py b/openfisca_core/indexed_enums/types.py index b43cdc1e5..b545d0bb6 100644 --- a/openfisca_core/indexed_enums/types.py +++ b/openfisca_core/indexed_enums/types.py @@ -16,10 +16,10 @@ from numpy import ( bool_ as BoolDType, generic as AnyDType, - int16 as EnumDType, int32 as IntDType, object_ as ObjDType, str_ as StrDType, + uint8 as EnumDType, ) #: Type for the non-vectorised list of enum items. diff --git a/openfisca_core/types.py b/openfisca_core/types.py index b1d2a2710..38bd57c6a 100644 --- a/openfisca_core/types.py +++ b/openfisca_core/types.py @@ -47,7 +47,7 @@ DTypeBytes: TypeAlias = numpy.bytes_ #: Type for Enum arrays. -DTypeEnum: TypeAlias = numpy.int16 +DTypeEnum: TypeAlias = numpy.uint8 #: Type for date arrays. DTypeDate: TypeAlias = numpy.datetime64 diff --git a/setup.py b/setup.py index 913d08a59..c404c804c 100644 --- a/setup.py +++ b/setup.py @@ -30,8 +30,8 @@ "PyYAML >=6.0, <7.0", "StrEnum >=0.4.8, <0.5.0", # 3.11.x backport "dpath >=2.1.4, <3.0", - "numexpr >=2.8.4, <3.0", - "numpy >=1.24.2, <2.0", + "numexpr >=2.10.0, <2.10.1", + "numpy >=1.24.2, <1.26.4", "pendulum >=3.0.0, <4.0.0", "psutil >=5.9.4, <6.0", "pytest >=8.3.3, <9.0", diff --git a/tests/core/test_tracers.py b/tests/core/test_tracers.py index 178b957ec..c9af9ecee 100644 --- a/tests/core/test_tracers.py +++ b/tests/core/test_tracers.py @@ -452,7 +452,7 @@ def test_log_aggregate_with_enum(tracer) -> None: assert ( lines[0] - == " A<2017> >> {'avg': EnumArray(HousingOccupancyStatus.tenant), 'max': EnumArray(HousingOccupancyStatus.tenant), 'min': EnumArray(HousingOccupancyStatus.tenant)}" + == " A<2017> >> {'avg': EnumArray([HousingOccupancyStatus.tenant]), 'max': EnumArray([HousingOccupancyStatus.tenant]), 'min': EnumArray([HousingOccupancyStatus.tenant])}" ) diff --git a/tests/core/tools/test_assert_near.py b/tests/core/tools/test_assert_near.py index bdcb589b4..c351be0f9 100644 --- a/tests/core/tools/test_assert_near.py +++ b/tests/core/tools/test_assert_near.py @@ -21,5 +21,5 @@ def test_enum_2(tax_benefit_system) -> None: "housing_occupancy_status" ].possible_values value = possible_values.encode(numpy.array(["tenant", "owner"])) - expected_value = ["owner", "tenant"] + expected_value = ["tenant", "owner"] assert_near(value, expected_value) From a69cb0a9be78d52c9efb293a7c893bb49cc75cf8 Mon Sep 17 00:00:00 2001 From: Mauko Quiroga Date: Thu, 10 Oct 2024 16:53:03 +0200 Subject: [PATCH 21/29] fix(enums): do really proper indexing (#1233) --- openfisca_core/indexed_enums/__init__.py | 2 + .../{_type_guards.py => _guards.py} | 31 ++- openfisca_core/indexed_enums/_utils.py | 216 ++++++++++++++++++ openfisca_core/indexed_enums/enum.py | 63 +++-- .../indexed_enums/tests/test_enum.py | 32 +-- openfisca_core/types.py | 6 + openfisca_tasks/test_code.mk | 2 +- setup.py | 5 +- 8 files changed, 314 insertions(+), 43 deletions(-) rename openfisca_core/indexed_enums/{_type_guards.py => _guards.py} (64%) create mode 100644 openfisca_core/indexed_enums/_utils.py diff --git a/openfisca_core/indexed_enums/__init__.py b/openfisca_core/indexed_enums/__init__.py index 10bbd3d98..6268b8348 100644 --- a/openfisca_core/indexed_enums/__init__.py +++ b/openfisca_core/indexed_enums/__init__.py @@ -2,6 +2,7 @@ from . import types from ._enum_type import EnumType +from ._errors import EnumMemberNotFoundError from .config import ENUM_ARRAY_DTYPE from .enum import Enum from .enum_array import EnumArray @@ -10,6 +11,7 @@ "ENUM_ARRAY_DTYPE", "Enum", "EnumArray", + "EnumMemberNotFoundError", "EnumType", "types", ] diff --git a/openfisca_core/indexed_enums/_type_guards.py b/openfisca_core/indexed_enums/_guards.py similarity index 64% rename from openfisca_core/indexed_enums/_type_guards.py rename to openfisca_core/indexed_enums/_guards.py index 3caf1859b..9220a72bb 100644 --- a/openfisca_core/indexed_enums/_type_guards.py +++ b/openfisca_core/indexed_enums/_guards.py @@ -7,7 +7,7 @@ from . import types as t -def _is_int_array(array: t.AnyArray) -> TypeIs[t.IndexArray | t.IntArray]: +def _is_int_array(array: t.AnyArray) -> TypeIs[t.IndexArray]: """Narrow the type of a given array to an array of :obj:`numpy.integer`. Args: @@ -35,6 +35,33 @@ def _is_int_array(array: t.AnyArray) -> TypeIs[t.IndexArray | t.IntArray]: return numpy.issubdtype(array.dtype, numpy.integer) +def _is_obj_array(array: t.AnyArray) -> TypeIs[t.ObjArray]: + """Narrow the type of a given array to an array of :obj:`numpy.object_`. + + Args: + array: Array to check. + + Returns: + bool: True if ``array`` is an array of :obj:`numpy.object_`, False otherwise. + + Examples: + >>> import numpy + + >>> from openfisca_core import indexed_enums as enum + + >>> Enum = enum.Enum("Enum", ["A", "B"]) + >>> array = numpy.array([Enum.A], dtype=numpy.object_) + >>> _is_obj_array(array) + True + + >>> array = numpy.array([1.0]) + >>> _is_obj_array(array) + False + + """ + return numpy.issubdtype(array.dtype, t.ObjDType) + + def _is_str_array(array: t.AnyArray) -> TypeIs[t.StrArray]: """Narrow the type of a given array to an array of :obj:`numpy.str_`. @@ -65,4 +92,4 @@ def _is_str_array(array: t.AnyArray) -> TypeIs[t.StrArray]: return numpy.issubdtype(array.dtype, str) -__all__ = ["_is_int_array", "_is_str_array"] +__all__ = ["_is_int_array", "_is_obj_array", "_is_str_array"] diff --git a/openfisca_core/indexed_enums/_utils.py b/openfisca_core/indexed_enums/_utils.py new file mode 100644 index 000000000..d8385621b --- /dev/null +++ b/openfisca_core/indexed_enums/_utils.py @@ -0,0 +1,216 @@ +import numpy + +from . import types as t +from ._errors import EnumMemberNotFoundError + + +def _enum_to_index(enum_class: type[t.Enum], value: t.ObjArray) -> t.IndexArray: + """Transform an array of enum members into an index array. + + Args: + enum_class: The enum class to encode the enum members array. + value: The enum members array to encode. + + Returns: + The index array. + + Raises: + EnumMemberNotFoundError: If one value is not in the enum class. + + Examples: + >>> import numpy + + >>> from openfisca_core import indexed_enums as enum + + >>> class Road(enum.Enum): + ... STREET = ( + ... "A public road that connects two points, but also has " + ... "buildings on both sides of it; these typically run " + ... "perpendicular to avenues." + ... ) + ... AVENUE = ( + ... "A public way that also has buildings and/or trees on both " + ... "sides; these run perpendicular to streets and are " + ... "traditionally wider." + ... ) + + >>> class Rogue(enum.Enum): + ... BOULEVARD = "More like a shady impasse, to be honest." + + # >>> _enum_to_index(Road, numpy.array(Road.AVENUE)) + # array([1], dtype=uint8) + # + # >>> _enum_to_index(Road, numpy.array([Road.AVENUE])) + # array([1], dtype=uint8) + # + # >>> value = numpy.array([Road.STREET, Road.AVENUE, Road.STREET]) + # >>> _enum_to_index(Road, value) + # array([0, 1, 0], dtype=uint8) + + >>> value = numpy.array([Road.AVENUE, Road.AVENUE, Rogue.BOULEVARD]) + >>> _enum_to_index(Road, value) + Traceback (most recent call last): + EnumMemberNotFoundError: Member BOULEVARD not found in enum 'Road'... + + """ + # Create a mask to determine which values are in the enum class. + mask = numpy.isin(value, enum_class.enums) + + # Get the values that are not in the enum class. + ko = value[~mask] + + # If there are values that are not in the enum class, raise an error. + if ko.size > 0: + raise EnumMemberNotFoundError(enum_class, ko[0].name) + + # In case we're dealing with a scalar, we need to convert it to an array. + ok = value[mask] + + # Get the indices that would sort the enums. + sorted_index = numpy.argsort(enum_class.enums) + + # Get the enums as if they were sorted. + sorted_enums = enum_class.enums[sorted_index] + + # Get the index positions of the enums in the sorted enums. + index_where = numpy.searchsorted(sorted_enums, ok) + + # Get the actual index of the enums in the enum class. + index = sorted_index[index_where] + + # Finally, return the index array. + return numpy.array(index, dtype=t.EnumDType) + + +def _int_to_index(enum_class: type[t.Enum], value: t.IndexArray) -> t.IndexArray: + """Transform an integer array into an index array. + + Args: + enum_class: The enum class to encode the integer array. + value: The integer array to encode. + + Returns: + The index array. + + Raises: + EnumMemberNotFoundError: If one value is not in the enum class. + + Examples: + >>> import numpy + + >>> from openfisca_core import indexed_enums as enum + + >>> class Road(enum.Enum): + ... STREET = ( + ... "A public road that connects two points, but also has " + ... "buildings on both sides of it; these typically run " + ... "perpendicular to avenues." + ... ) + ... AVENUE = ( + ... "A public way that also has buildings and/or trees on both " + ... "sides; these run perpendicular to streets and are " + ... "traditionally wider." + ... ) + + >>> _int_to_index(Road, numpy.array(1)) + array([1], dtype=uint8) + + >>> _int_to_index(Road, numpy.array([1])) + array([1], dtype=uint8) + + >>> _int_to_index(Road, numpy.array([0, 1, 0])) + array([0, 1, 0], dtype=uint8) + + >>> _int_to_index(Road, numpy.array([1, 1, 2])) + Traceback (most recent call last): + EnumMemberNotFoundError: Member with index 2 not found in enum 'Road... + + """ + # Create a mask to determine which values are in the enum class. + mask = numpy.isin(value, enum_class.indices) + + # Get the values that are not in the enum class. + ko = value[~mask] + + # If there are values that are not in the enum class, raise an error. + if ko.size > 0: + raise EnumMemberNotFoundError(enum_class, f"with index {ko[0]}") + + # Finally, return the index array. + return numpy.array(value[mask], dtype=t.EnumDType) + + +def _str_to_index(enum_class: type[t.Enum], value: t.StrArray) -> t.IndexArray: + """Transform a string array into an index array. + + Args: + enum_class: The enum class to encode the string array. + value: The string array to encode. + + Returns: + The index array. + + Raises: + EnumMemberNotFoundError: If one value is not in the enum class. + + Examples: + >>> import numpy + + >>> from openfisca_core import indexed_enums as enum + + >>> class Road(enum.Enum): + ... STREET = ( + ... "A public road that connects two points, but also has " + ... "buildings on both sides of it; these typically run " + ... "perpendicular to avenues." + ... ) + ... AVENUE = ( + ... "A public way that also has buildings and/or trees on both " + ... "sides; these run perpendicular to streets and are " + ... "traditionally wider." + ... ) + + >>> _str_to_index(Road, numpy.array("AVENUE")) + array([1], dtype=uint8) + + >>> _str_to_index(Road, numpy.array(["AVENUE"])) + array([1], dtype=uint8) + + >>> _str_to_index(Road, numpy.array(["STREET", "AVENUE", "STREET"])) + array([0, 1, 0], dtype=uint8) + + >>> _str_to_index(Road, numpy.array(["AVENUE", "AVENUE", "BOULEVARD"])) + Traceback (most recent call last): + EnumMemberNotFoundError: Member BOULEVARD not found in enum 'Road'... + + """ + # Create a mask to determine which values are in the enum class. + mask = numpy.isin(value, enum_class.names) + + # Get the values that are not in the enum class. + ko = value[~mask] + + # If there are values that are not in the enum class, raise an error. + if ko.size > 0: + raise EnumMemberNotFoundError(enum_class, ko[0]) + + # In case we're dealing with a scalar, we need to convert it to an array. + ok = value[mask] + + # Get the indices that would sort the names. + sorted_index = numpy.argsort(enum_class.names) + + # Get the names as if they were sorted. + sorted_names = enum_class.names[sorted_index] + + # Get the index positions of the names in the sorted names. + index_where = numpy.searchsorted(sorted_names, ok) + + # Get the actual index of the names in the enum class. + index = sorted_index[index_where] + + # Finally, return the index array. + return numpy.array(index, dtype=t.EnumDType) + + +__all__ = ["_enum_to_index", "_int_to_index", "_str_to_index"] diff --git a/openfisca_core/indexed_enums/enum.py b/openfisca_core/indexed_enums/enum.py index e163095dd..7265e90bc 100644 --- a/openfisca_core/indexed_enums/enum.py +++ b/openfisca_core/indexed_enums/enum.py @@ -4,7 +4,8 @@ from . import types as t from ._enum_type import EnumType -from ._type_guards import _is_int_array, _is_str_array +from ._guards import _is_int_array, _is_obj_array, _is_str_array +from ._utils import _enum_to_index, _int_to_index, _str_to_index from .enum_array import EnumArray @@ -109,24 +110,45 @@ def __init__(self, *__args: object, **__kwargs: object) -> None: def __repr__(self) -> str: return f"{self.__class__.__name__}.{self.name}" + def __hash__(self) -> int: + return hash(self.__class__.__name__) ^ hash(self.index) + def __eq__(self, other: object) -> bool: if not isinstance(other, Enum): return NotImplemented - return self.index == other.index + return hash(self) ^ hash(other) == 0 def __ne__(self, other: object) -> bool: if not isinstance(other, Enum): return NotImplemented - return self.index != other.index + return hash(self) ^ hash(other) != 0 - def __hash__(self) -> int: - return hash(self.index) + def __lt__(self, other: object) -> bool: + if not isinstance(other, Enum): + return NotImplemented + return self.index < other.index + + def __le__(self, other: object) -> bool: + if not isinstance(other, Enum): + return NotImplemented + return self.index <= other.index + + def __gt__(self, other: object) -> bool: + if not isinstance(other, Enum): + return NotImplemented + return self.index > other.index + + def __ge__(self, other: object) -> bool: + if not isinstance(other, Enum): + return NotImplemented + return self.index >= other.index @classmethod def encode( cls, array: ( t.EnumArray + | t.IndexArray | t.IntArray | t.StrArray | t.ObjArray @@ -144,7 +166,7 @@ def encode( EnumArray: An :class:`.EnumArray` with the encoded input values. Raises: - TypeError: If ``array`` is a scalar :class:`~numpy.ndarray`. + NotImplementedError: If ``array`` is a scalar :class:`~numpy.ndarray`. TypeError: If ``array`` is of a diffent :class:`.Enum` type. Examples: @@ -195,32 +217,33 @@ def encode( :meth:`.EnumArray.decode` for decoding. """ - if isinstance(array, EnumArray): - return array - + # Array-like values need to be converted to a numpy array. if not isinstance(array, numpy.ndarray): return cls.encode(numpy.array(array)) + # Empty arrays are returned as is. if array.size == 0: return EnumArray(numpy.array([]), cls) + # Scalar arrays are not supported. if array.ndim == 0: msg = ( "Scalar arrays are not supported: expecting a vector array, " f"instead. Please try again with `numpy.array([{array}])`." ) - raise TypeError(msg) + raise NotImplementedError(msg) + + # Enum arrays. + if isinstance(array, t.EnumArray): + return array - # Integer array + # Index arrays. if _is_int_array(array): - indices = numpy.array(array[array < cls.indices.size]) - return EnumArray(indices.astype(t.EnumDType), cls) + return EnumArray(_int_to_index(cls, array), cls) - # String array + # String arrays. if _is_str_array(array): # type: ignore[unreachable] - names = array[numpy.isin(array, cls.names)] - indices = numpy.array([cls[name].index for name in names]) - return EnumArray(indices.astype(t.EnumDType), cls) + return EnumArray(_str_to_index(cls, array), cls) # Ensure we are comparing the comparable. The problem this fixes: # On entering this method "cls" will generally come from @@ -233,10 +256,8 @@ def encode( # So, instead of relying on the "cls" passed in, we use only its # name to check that the values in the array, if non-empty, are of # the right type. - if cls.__name__ is array[0].__class__.__name__: - enums = array[numpy.isin(array, cls.enums)] - indices = numpy.array([enum.index for enum in enums]) - return EnumArray(indices.astype(t.EnumDType), cls) + if _is_obj_array(array) and cls.__name__ is array[0].__class__.__name__: + return EnumArray(_enum_to_index(cls, array), cls) msg = ( f"Failed to encode \"{array}\" of type '{array[0].__class__.__name__}', " diff --git a/openfisca_core/indexed_enums/tests/test_enum.py b/openfisca_core/indexed_enums/tests/test_enum.py index 5d4957490..1f3e95a6f 100644 --- a/openfisca_core/indexed_enums/tests/test_enum.py +++ b/openfisca_core/indexed_enums/tests/test_enum.py @@ -21,14 +21,14 @@ class Colour(enum.Enum): def test_enum_encode_with_array_of_enum(): """Does encode when called with an array of enums.""" - array = numpy.array([Animal.DOG, Animal.DOG, Animal.CAT, Colour.AMARANTH]) + array = numpy.array([Animal.DOG, Animal.DOG, Animal.CAT]) enum_array = Animal.encode(array) assert_array_equal(enum_array, numpy.array([1, 1, 0])) def test_enum_encode_with_enum_sequence(): """Does encode when called with an enum sequence.""" - sequence = list(Animal) + list(Colour) + sequence = list(Animal) enum_array = Animal.encode(sequence) assert Animal.DOG in enum_array @@ -36,7 +36,7 @@ def test_enum_encode_with_enum_sequence(): def test_enum_encode_with_enum_scalar_array(): """Does not encode when called with an enum scalar array.""" array = numpy.array(Animal.DOG) - with pytest.raises(TypeError): + with pytest.raises(NotImplementedError): Animal.encode(array) @@ -52,14 +52,14 @@ def test_enum_encode_with_enum_with_bad_value(): def test_enum_encode_with_array_of_int(): """Does encode when called with an array of int.""" - array = numpy.array([1, 1, 0, 2]) + array = numpy.array([1, 1, 0]) enum_array = Animal.encode(array) assert_array_equal(enum_array, numpy.array([1, 1, 0])) def test_enum_encode_with_int_sequence(): """Does encode when called with an int sequence.""" - sequence = (1, 2) + sequence = (0, 1) enum_array = Animal.encode(sequence) assert Animal.DOG in enum_array @@ -67,15 +67,15 @@ def test_enum_encode_with_int_sequence(): def test_enum_encode_with_int_scalar_array(): """Does not encode when called with an int scalar array.""" array = numpy.array(1) - with pytest.raises(TypeError): + with pytest.raises(NotImplementedError): Animal.encode(array) def test_enum_encode_with_int_with_bad_value(): """Does not encode when called with a value not in an Enum.""" array = numpy.array([2]) - enum_array = Animal.encode(array) - assert len(enum_array) == 0 + with pytest.raises(IndexError): + Animal.encode(array) # Arrays of strings @@ -83,14 +83,14 @@ def test_enum_encode_with_int_with_bad_value(): def test_enum_encode_with_array_of_string(): """Does encode when called with an array of string.""" - array = numpy.array(["DOG", "DOG", "CAT", "AMARANTH"]) + array = numpy.array(["DOG", "DOG", "CAT"]) enum_array = Animal.encode(array) assert_array_equal(enum_array, numpy.array([1, 1, 0])) def test_enum_encode_with_str_sequence(): """Does encode when called with a str sequence.""" - sequence = ("DOG", "JAIBA") + sequence = ("DOG", "CAT") enum_array = Animal.encode(sequence) assert Animal.DOG in enum_array @@ -98,15 +98,15 @@ def test_enum_encode_with_str_sequence(): def test_enum_encode_with_str_scalar_array(): """Does not encode when called with a str scalar array.""" array = numpy.array("DOG") - with pytest.raises(TypeError): + with pytest.raises(NotImplementedError): Animal.encode(array) def test_enum_encode_with_str_with_bad_value(): """Does not encode when called with a value not in an Enum.""" array = numpy.array(["JAIBA"]) - enum_array = Animal.encode(array) - assert len(enum_array) == 0 + with pytest.raises(IndexError): + Animal.encode(array) # Unsupported encodings @@ -124,12 +124,12 @@ def test_enum_encode_with_any_scalar_array(): """Does not encode when called with unsupported types.""" value = 1.5 array = numpy.array(value) - with pytest.raises(TypeError): + with pytest.raises(NotImplementedError): Animal.encode(array) def test_enum_encode_with_any_sequence(): """Does not encode when called with unsupported types.""" sequence = memoryview(b"DOG") - enum_array = Animal.encode(sequence) - assert len(enum_array) == 0 + with pytest.raises(IndexError): + Animal.encode(sequence) diff --git a/openfisca_core/types.py b/openfisca_core/types.py index 38bd57c6a..702138e39 100644 --- a/openfisca_core/types.py +++ b/openfisca_core/types.py @@ -117,6 +117,12 @@ class EnumType(enum.EnumMeta): @property @abc.abstractmethod def indices(cls) -> Array[DTypeEnum]: ... + @property + @abc.abstractmethod + def names(cls) -> Array[DTypeStr]: ... + @property + @abc.abstractmethod + def enums(cls) -> Array[DTypeGeneric]: ... class Enum(enum.Enum, metaclass=EnumType): diff --git a/openfisca_tasks/test_code.mk b/openfisca_tasks/test_code.mk index f2ab7247a..6a27f1b9c 100644 --- a/openfisca_tasks/test_code.mk +++ b/openfisca_tasks/test_code.mk @@ -35,7 +35,7 @@ test-code: test-core test-country test-extension ## Run openfisca-core tests. test-core: $(shell git ls-files "*test_*.py") @$(call print_help,$@:) - @python -m pytest --capture=no --xdoctest --xdoctest-verbose=0 \ + @python -m pytest --capture=no \ openfisca_core/commons \ openfisca_core/data_storage \ openfisca_core/entities \ diff --git a/setup.py b/setup.py index c404c804c..80b9363bd 100644 --- a/setup.py +++ b/setup.py @@ -30,8 +30,8 @@ "PyYAML >=6.0, <7.0", "StrEnum >=0.4.8, <0.5.0", # 3.11.x backport "dpath >=2.1.4, <3.0", - "numexpr >=2.10.0, <2.10.1", - "numpy >=1.24.2, <1.26.4", + "numexpr >=2.9.0, <2.10", + "numpy >=1.24.3, <2.0", "pendulum >=3.0.0, <4.0.0", "psutil >=5.9.4, <6.0", "pytest >=8.3.3, <9.0", @@ -64,7 +64,6 @@ "pyright >=1.1.382, <2.0", "ruff >=0.6.9, <1.0", "ruff-lsp >=0.0.57, <1.0", - "xdoctest >=1.2.0, <2.0", *api_requirements, ] From bbecb0e3ce82c581f9bb23b2f65bf387865ef2da Mon Sep 17 00:00:00 2001 From: Mauko Quiroga Date: Thu, 10 Oct 2024 17:04:49 +0200 Subject: [PATCH 22/29] feat(enums): add custom error (#1233) --- openfisca_core/indexed_enums/_errors.py | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) create mode 100644 openfisca_core/indexed_enums/_errors.py diff --git a/openfisca_core/indexed_enums/_errors.py b/openfisca_core/indexed_enums/_errors.py new file mode 100644 index 000000000..d16024cf2 --- /dev/null +++ b/openfisca_core/indexed_enums/_errors.py @@ -0,0 +1,18 @@ +from . import types as t + + +class EnumMemberNotFoundError(IndexError): + """Raised when a member is not found in an enum.""" + + def __init__(self, enum_class: type[t.Enum], value: str) -> None: + msg = ( + f"Member {value} not found in enum '{enum_class.__name__}'. " + f"Possible values are: {', '.join(enum_class.names[:-1])}, and " + f"{enum_class.names[-1]!s}; or their corresponding indices: " + f"{', '.join(enum_class.indices[:-1].astype(t.StrDType))}, and " + f"{enum_class.indices[-1]!s}." + ) + super().__init__(msg) + + +__all__ = ["EnumMemberNotFoundError"] From 67f14dc4deab3f61bb933e8b76e31e8152859bbb Mon Sep 17 00:00:00 2001 From: Mauko Quiroga Date: Thu, 10 Oct 2024 18:52:31 +0200 Subject: [PATCH 23/29] refactor(enums): cache index arrays (#1233) --- openfisca_core/indexed_enums/_enum_type.py | 36 ++++++++++++++++++---- openfisca_core/indexed_enums/_utils.py | 20 +++--------- openfisca_core/indexed_enums/py.typed | 0 openfisca_core/types.py | 4 +++ setup.py | 2 +- 5 files changed, 39 insertions(+), 23 deletions(-) create mode 100644 openfisca_core/indexed_enums/py.typed diff --git a/openfisca_core/indexed_enums/_enum_type.py b/openfisca_core/indexed_enums/_enum_type.py index 9cce3b47f..6356984ab 100644 --- a/openfisca_core/indexed_enums/_enum_type.py +++ b/openfisca_core/indexed_enums/_enum_type.py @@ -73,6 +73,18 @@ class EnumType(t.EnumType): #: The items of the indexed enum class. items: t.RecArray + #: The names as if they were sorted. + _sorted_names_: t.StrArray + + #: The enums as if they were sorted. + _sorted_enums_: t.ObjArray + + #: The indices that would sort the names. + _sorted_names_index_: t.IndexArray + + #: The indices that would sort the enums. + _sorted_enums_index_: t.IndexArray + @property def indices(cls) -> t.IndexArray: """Return the indices of the indexed enum class.""" @@ -93,24 +105,36 @@ def enums(cls) -> t.ObjArray: def __new__( metacls, - cls: str, + name: str, bases: tuple[type, ...], classdict: t.EnumDict, **kwds: object, ) -> t.EnumType: """Create a new indexed enum class.""" # Create the enum class. - enum_class = super().__new__(metacls, cls, bases, classdict, **kwds) + cls = super().__new__(metacls, name, bases, classdict, **kwds) # If the enum class has no members, return it as is. - if not enum_class.__members__: - return enum_class + if not cls.__members__: + return cls # Add the items attribute to the enum class. - enum_class.items = _item_array(enum_class) + cls.items = _item_array(cls) + + # Add the indices that would sort the names. + cls._sorted_names_index_ = numpy.argsort(cls.names).astype(t.EnumDType) + + # Add the indices that would sort the enums. + cls._sorted_enums_index_ = numpy.argsort(cls.enums).astype(t.EnumDType) + + # Add the names as if they were sorted. + cls._sorted_names_ = cls.names[cls._sorted_names_index_] + + # Add the enums as if they were sorted. + cls._sorted_enums_ = cls.enums[cls._sorted_enums_index_] # Return the modified enum class. - return enum_class + return cls def __dir__(cls) -> list[str]: return sorted({"items", "indices", "names", "enums", *super().__dir__()}) diff --git a/openfisca_core/indexed_enums/_utils.py b/openfisca_core/indexed_enums/_utils.py index d8385621b..c95104b76 100644 --- a/openfisca_core/indexed_enums/_utils.py +++ b/openfisca_core/indexed_enums/_utils.py @@ -66,17 +66,11 @@ def _enum_to_index(enum_class: type[t.Enum], value: t.ObjArray) -> t.IndexArray: # In case we're dealing with a scalar, we need to convert it to an array. ok = value[mask] - # Get the indices that would sort the enums. - sorted_index = numpy.argsort(enum_class.enums) - - # Get the enums as if they were sorted. - sorted_enums = enum_class.enums[sorted_index] - # Get the index positions of the enums in the sorted enums. - index_where = numpy.searchsorted(sorted_enums, ok) + index_where = numpy.searchsorted(enum_class._sorted_enums_, ok) # Get the actual index of the enums in the enum class. - index = sorted_index[index_where] + index = enum_class._sorted_enums_index_[index_where] # Finally, return the index array. return numpy.array(index, dtype=t.EnumDType) @@ -197,17 +191,11 @@ def _str_to_index(enum_class: type[t.Enum], value: t.StrArray) -> t.IndexArray: # In case we're dealing with a scalar, we need to convert it to an array. ok = value[mask] - # Get the indices that would sort the names. - sorted_index = numpy.argsort(enum_class.names) - - # Get the names as if they were sorted. - sorted_names = enum_class.names[sorted_index] - # Get the index positions of the names in the sorted names. - index_where = numpy.searchsorted(sorted_names, ok) + index_where = numpy.searchsorted(enum_class._sorted_names_, ok) # Get the actual index of the names in the enum class. - index = sorted_index[index_where] + index = enum_class._sorted_names_index_[index_where] # Finally, return the index array. return numpy.array(index, dtype=t.EnumDType) diff --git a/openfisca_core/indexed_enums/py.typed b/openfisca_core/indexed_enums/py.typed new file mode 100644 index 000000000..e69de29bb diff --git a/openfisca_core/types.py b/openfisca_core/types.py index 702138e39..b1cb65970 100644 --- a/openfisca_core/types.py +++ b/openfisca_core/types.py @@ -113,6 +113,10 @@ def plural(self, /) -> None | RolePlural: ... class EnumType(enum.EnumMeta): items: RecArray + _sorted_names_: Array[DTypeStr] + _sorted_enums_: Array[DTypeObject] + _sorted_names_index_: Array[DTypeEnum] + _sorted_enums_index_: Array[DTypeEnum] @property @abc.abstractmethod diff --git a/setup.py b/setup.py index 80b9363bd..350654ba3 100644 --- a/setup.py +++ b/setup.py @@ -30,7 +30,7 @@ "PyYAML >=6.0, <7.0", "StrEnum >=0.4.8, <0.5.0", # 3.11.x backport "dpath >=2.1.4, <3.0", - "numexpr >=2.9.0, <2.10", + "numexpr >=2.10.1, <3.0", "numpy >=1.24.3, <2.0", "pendulum >=3.0.0, <4.0.0", "psutil >=5.9.4, <6.0", From 573eb2fa78a65a324bc2d908a482ca0eb63e690e Mon Sep 17 00:00:00 2001 From: Mauko Quiroga Date: Fri, 11 Oct 2024 19:09:27 +0200 Subject: [PATCH 24/29] refactor(enums): make __eq__ faster (#1233) --- openfisca_core/indexed_enums/__init__.py | 3 +- openfisca_core/indexed_enums/_enum_type.py | 24 ------ openfisca_core/indexed_enums/_errors.py | 14 +++- openfisca_core/indexed_enums/_utils.py | 83 +++++-------------- openfisca_core/indexed_enums/enum.py | 68 +++++++++------ .../indexed_enums/tests/test_enum.py | 6 +- openfisca_core/indexed_enums/types.py | 4 +- openfisca_core/types.py | 4 - 8 files changed, 81 insertions(+), 125 deletions(-) diff --git a/openfisca_core/indexed_enums/__init__.py b/openfisca_core/indexed_enums/__init__.py index 6268b8348..494601fc8 100644 --- a/openfisca_core/indexed_enums/__init__.py +++ b/openfisca_core/indexed_enums/__init__.py @@ -2,7 +2,7 @@ from . import types from ._enum_type import EnumType -from ._errors import EnumMemberNotFoundError +from ._errors import EnumEncodingError, EnumMemberNotFoundError from .config import ENUM_ARRAY_DTYPE from .enum import Enum from .enum_array import EnumArray @@ -11,6 +11,7 @@ "ENUM_ARRAY_DTYPE", "Enum", "EnumArray", + "EnumEncodingError", "EnumMemberNotFoundError", "EnumType", "types", diff --git a/openfisca_core/indexed_enums/_enum_type.py b/openfisca_core/indexed_enums/_enum_type.py index 6356984ab..a86969ccf 100644 --- a/openfisca_core/indexed_enums/_enum_type.py +++ b/openfisca_core/indexed_enums/_enum_type.py @@ -73,18 +73,6 @@ class EnumType(t.EnumType): #: The items of the indexed enum class. items: t.RecArray - #: The names as if they were sorted. - _sorted_names_: t.StrArray - - #: The enums as if they were sorted. - _sorted_enums_: t.ObjArray - - #: The indices that would sort the names. - _sorted_names_index_: t.IndexArray - - #: The indices that would sort the enums. - _sorted_enums_index_: t.IndexArray - @property def indices(cls) -> t.IndexArray: """Return the indices of the indexed enum class.""" @@ -121,18 +109,6 @@ def __new__( # Add the items attribute to the enum class. cls.items = _item_array(cls) - # Add the indices that would sort the names. - cls._sorted_names_index_ = numpy.argsort(cls.names).astype(t.EnumDType) - - # Add the indices that would sort the enums. - cls._sorted_enums_index_ = numpy.argsort(cls.enums).astype(t.EnumDType) - - # Add the names as if they were sorted. - cls._sorted_names_ = cls.names[cls._sorted_names_index_] - - # Add the enums as if they were sorted. - cls._sorted_enums_ = cls.enums[cls._sorted_enums_index_] - # Return the modified enum class. return cls diff --git a/openfisca_core/indexed_enums/_errors.py b/openfisca_core/indexed_enums/_errors.py index d16024cf2..7ec21eca4 100644 --- a/openfisca_core/indexed_enums/_errors.py +++ b/openfisca_core/indexed_enums/_errors.py @@ -1,6 +1,18 @@ from . import types as t +class EnumEncodingError(TypeError): + """Raised when an enum is encoded with an unsupported type.""" + + def __init__(self, enum_class: type[t.Enum], value: t.VarArray) -> None: + msg = ( + f"Failed to encode \"{value}\" of type '{value[0].__class__.__name__}', " + "as it is not supported. Please, try again with an array of " + f"'{int.__name__}', '{str.__name__}', or '{enum_class.__name__}'." + ) + super().__init__(msg) + + class EnumMemberNotFoundError(IndexError): """Raised when a member is not found in an enum.""" @@ -15,4 +27,4 @@ def __init__(self, enum_class: type[t.Enum], value: str) -> None: super().__init__(msg) -__all__ = ["EnumMemberNotFoundError"] +__all__ = ["EnumEncodingError", "EnumMemberNotFoundError"] diff --git a/openfisca_core/indexed_enums/_utils.py b/openfisca_core/indexed_enums/_utils.py index c95104b76..0a29ff961 100644 --- a/openfisca_core/indexed_enums/_utils.py +++ b/openfisca_core/indexed_enums/_utils.py @@ -14,9 +14,6 @@ def _enum_to_index(enum_class: type[t.Enum], value: t.ObjArray) -> t.IndexArray: Returns: The index array. - Raises: - EnumMemberNotFoundError: If one value is not in the enum class. - Examples: >>> import numpy @@ -37,43 +34,24 @@ def _enum_to_index(enum_class: type[t.Enum], value: t.ObjArray) -> t.IndexArray: >>> class Rogue(enum.Enum): ... BOULEVARD = "More like a shady impasse, to be honest." - # >>> _enum_to_index(Road, numpy.array(Road.AVENUE)) - # array([1], dtype=uint8) - # - # >>> _enum_to_index(Road, numpy.array([Road.AVENUE])) - # array([1], dtype=uint8) - # - # >>> value = numpy.array([Road.STREET, Road.AVENUE, Road.STREET]) - # >>> _enum_to_index(Road, value) - # array([0, 1, 0], dtype=uint8) - - >>> value = numpy.array([Road.AVENUE, Road.AVENUE, Rogue.BOULEVARD]) - >>> _enum_to_index(Road, value) + >>> _enum_to_index(Road, numpy.array(Road.AVENUE)) Traceback (most recent call last): - EnumMemberNotFoundError: Member BOULEVARD not found in enum 'Road'... - - """ - # Create a mask to determine which values are in the enum class. - mask = numpy.isin(value, enum_class.enums) - - # Get the values that are not in the enum class. - ko = value[~mask] - - # If there are values that are not in the enum class, raise an error. - if ko.size > 0: - raise EnumMemberNotFoundError(enum_class, ko[0].name) + TypeError: iteration over a 0-d array - # In case we're dealing with a scalar, we need to convert it to an array. - ok = value[mask] + >>> _enum_to_index(Road, numpy.array([Road.AVENUE])) + array([1], dtype=uint8) - # Get the index positions of the enums in the sorted enums. - index_where = numpy.searchsorted(enum_class._sorted_enums_, ok) + >>> value = numpy.array([Road.STREET, Road.AVENUE, Road.STREET]) + >>> _enum_to_index(Road, value) + array([0, 1, 0], dtype=uint8) - # Get the actual index of the enums in the enum class. - index = enum_class._sorted_enums_index_[index_where] + >>> value = numpy.array([Road.AVENUE, Road.AVENUE, Rogue.BOULEVARD]) + >>> _enum_to_index(Road, value) + array([1, 1, 0], dtype=uint8) - # Finally, return the index array. - return numpy.array(index, dtype=t.EnumDType) + """ + index = [member.index for member in value] + return _int_to_index(enum_class, numpy.array(index)) def _int_to_index(enum_class: type[t.Enum], value: t.IndexArray) -> t.IndexArray: @@ -121,7 +99,7 @@ def _int_to_index(enum_class: type[t.Enum], value: t.IndexArray) -> t.IndexArray """ # Create a mask to determine which values are in the enum class. - mask = numpy.isin(value, enum_class.indices) + mask = value < enum_class.items.size # Get the values that are not in the enum class. ko = value[~mask] @@ -144,9 +122,6 @@ def _str_to_index(enum_class: type[t.Enum], value: t.StrArray) -> t.IndexArray: Returns: The index array. - Raises: - EnumMemberNotFoundError: If one value is not in the enum class. - Examples: >>> import numpy @@ -165,7 +140,8 @@ def _str_to_index(enum_class: type[t.Enum], value: t.StrArray) -> t.IndexArray: ... ) >>> _str_to_index(Road, numpy.array("AVENUE")) - array([1], dtype=uint8) + Traceback (most recent call last): + TypeError: iteration over a 0-d array >>> _str_to_index(Road, numpy.array(["AVENUE"])) array([1], dtype=uint8) @@ -174,31 +150,12 @@ def _str_to_index(enum_class: type[t.Enum], value: t.StrArray) -> t.IndexArray: array([0, 1, 0], dtype=uint8) >>> _str_to_index(Road, numpy.array(["AVENUE", "AVENUE", "BOULEVARD"])) - Traceback (most recent call last): - EnumMemberNotFoundError: Member BOULEVARD not found in enum 'Road'... + array([1, 1, 0], dtype=uint8) """ - # Create a mask to determine which values are in the enum class. - mask = numpy.isin(value, enum_class.names) - - # Get the values that are not in the enum class. - ko = value[~mask] - - # If there are values that are not in the enum class, raise an error. - if ko.size > 0: - raise EnumMemberNotFoundError(enum_class, ko[0]) - - # In case we're dealing with a scalar, we need to convert it to an array. - ok = value[mask] - - # Get the index positions of the names in the sorted names. - index_where = numpy.searchsorted(enum_class._sorted_names_, ok) - - # Get the actual index of the names in the enum class. - index = enum_class._sorted_names_index_[index_where] - - # Finally, return the index array. - return numpy.array(index, dtype=t.EnumDType) + names = enum_class.names + index = [enum_class[name].index if name in names else 0 for name in value] + return _int_to_index(enum_class, numpy.array(index)) __all__ = ["_enum_to_index", "_int_to_index", "_str_to_index"] diff --git a/openfisca_core/indexed_enums/enum.py b/openfisca_core/indexed_enums/enum.py index 7265e90bc..446d6cb74 100644 --- a/openfisca_core/indexed_enums/enum.py +++ b/openfisca_core/indexed_enums/enum.py @@ -4,6 +4,7 @@ from . import types as t from ._enum_type import EnumType +from ._errors import EnumEncodingError from ._guards import _is_int_array, _is_obj_array, _is_str_array from ._utils import _enum_to_index, _int_to_index, _str_to_index from .enum_array import EnumArray @@ -111,37 +112,55 @@ def __repr__(self) -> str: return f"{self.__class__.__name__}.{self.name}" def __hash__(self) -> int: - return hash(self.__class__.__name__) ^ hash(self.index) + return object.__hash__(self) def __eq__(self, other: object) -> bool: - if not isinstance(other, Enum): - return NotImplemented - return hash(self) ^ hash(other) == 0 + if ( + isinstance(other, Enum) + and self.__class__.__name__ == other.__class__.__name__ + ): + return self.index == other.index + return NotImplemented def __ne__(self, other: object) -> bool: - if not isinstance(other, Enum): - return NotImplemented - return hash(self) ^ hash(other) != 0 + if ( + isinstance(other, Enum) + and self.__class__.__name__ == other.__class__.__name__ + ): + return self.index != other.index + return NotImplemented def __lt__(self, other: object) -> bool: - if not isinstance(other, Enum): - return NotImplemented - return self.index < other.index + if ( + isinstance(other, Enum) + and self.__class__.__name__ == other.__class__.__name__ + ): + return self.index < other.index + return NotImplemented def __le__(self, other: object) -> bool: - if not isinstance(other, Enum): - return NotImplemented - return self.index <= other.index + if ( + isinstance(other, Enum) + and self.__class__.__name__ == other.__class__.__name__ + ): + return self.index <= other.index + return NotImplemented def __gt__(self, other: object) -> bool: - if not isinstance(other, Enum): - return NotImplemented - return self.index > other.index + if ( + isinstance(other, Enum) + and self.__class__.__name__ == other.__class__.__name__ + ): + return self.index > other.index + return NotImplemented def __ge__(self, other: object) -> bool: - if not isinstance(other, Enum): - return NotImplemented - return self.index >= other.index + if ( + isinstance(other, Enum) + and self.__class__.__name__ == other.__class__.__name__ + ): + return self.index >= other.index + return NotImplemented @classmethod def encode( @@ -167,7 +186,7 @@ def encode( Raises: NotImplementedError: If ``array`` is a scalar :class:`~numpy.ndarray`. - TypeError: If ``array`` is of a diffent :class:`.Enum` type. + EnumEncodingError: If ``array`` is of a diffent :class:`.Enum` type. Examples: >>> import numpy @@ -211,7 +230,7 @@ def encode( >>> array = numpy.array([b"TENANT"]) >>> enum_array = Housing.encode(array) Traceback (most recent call last): - TypeError: Failed to encode "[b'TENANT']" of type 'bytes_', as i... + EnumEncodingError: Failed to encode "[b'TENANT']" of type 'bytes... .. seealso:: :meth:`.EnumArray.decode` for decoding. @@ -259,12 +278,7 @@ def encode( if _is_obj_array(array) and cls.__name__ is array[0].__class__.__name__: return EnumArray(_enum_to_index(cls, array), cls) - msg = ( - f"Failed to encode \"{array}\" of type '{array[0].__class__.__name__}', " - "as it is not supported. Please, try again with an array of " - f"'{int.__name__}', '{str.__name__}', or '{cls.__name__}'." - ) - raise TypeError(msg) + raise EnumEncodingError(cls, array) __all__ = ["Enum"] diff --git a/openfisca_core/indexed_enums/tests/test_enum.py b/openfisca_core/indexed_enums/tests/test_enum.py index 1f3e95a6f..8ffae5dd8 100644 --- a/openfisca_core/indexed_enums/tests/test_enum.py +++ b/openfisca_core/indexed_enums/tests/test_enum.py @@ -103,10 +103,10 @@ def test_enum_encode_with_str_scalar_array(): def test_enum_encode_with_str_with_bad_value(): - """Does not encode when called with a value not in an Enum.""" + """Encode encode when called with a value not in an Enum.""" array = numpy.array(["JAIBA"]) - with pytest.raises(IndexError): - Animal.encode(array) + enum_array = Animal.encode(array) + assert Animal.CAT in enum_array # Unsupported encodings diff --git a/openfisca_core/indexed_enums/types.py b/openfisca_core/indexed_enums/types.py index b545d0bb6..784ae1e92 100644 --- a/openfisca_core/indexed_enums/types.py +++ b/openfisca_core/indexed_enums/types.py @@ -15,7 +15,7 @@ import numpy from numpy import ( bool_ as BoolDType, - generic as AnyDType, + generic as VarDType, int32 as IntDType, object_ as ObjDType, str_ as StrDType, @@ -44,7 +44,7 @@ ObjArray: TypeAlias = Array[ObjDType] #: Type for generic arrays. -AnyArray: TypeAlias = Array[AnyDType] +VarArray: TypeAlias = Array[VarDType] __all__ = [ "ArrayLike", diff --git a/openfisca_core/types.py b/openfisca_core/types.py index b1cb65970..702138e39 100644 --- a/openfisca_core/types.py +++ b/openfisca_core/types.py @@ -113,10 +113,6 @@ def plural(self, /) -> None | RolePlural: ... class EnumType(enum.EnumMeta): items: RecArray - _sorted_names_: Array[DTypeStr] - _sorted_enums_: Array[DTypeObject] - _sorted_names_index_: Array[DTypeEnum] - _sorted_enums_index_: Array[DTypeEnum] @property @abc.abstractmethod From 8494e6c3ad7292f1615280d23e6c81403a611efd Mon Sep 17 00:00:00 2001 From: Mauko Quiroga Date: Fri, 11 Oct 2024 22:52:26 +0200 Subject: [PATCH 25/29] refactor(enums): make dtype check faster (#1233) --- openfisca_core/indexed_enums/__init__.py | 2 - openfisca_core/indexed_enums/_enum_type.py | 119 --------------------- openfisca_core/indexed_enums/_errors.py | 9 +- openfisca_core/indexed_enums/_guards.py | 31 ++++-- openfisca_core/indexed_enums/_utils.py | 4 +- openfisca_core/indexed_enums/enum.py | 3 +- openfisca_core/indexed_enums/enum_array.py | 29 ++--- openfisca_core/indexed_enums/types.py | 22 +--- openfisca_core/types.py | 21 +--- 9 files changed, 47 insertions(+), 193 deletions(-) delete mode 100644 openfisca_core/indexed_enums/_enum_type.py diff --git a/openfisca_core/indexed_enums/__init__.py b/openfisca_core/indexed_enums/__init__.py index 494601fc8..4c44f881f 100644 --- a/openfisca_core/indexed_enums/__init__.py +++ b/openfisca_core/indexed_enums/__init__.py @@ -1,7 +1,6 @@ """Enumerations for variables with a limited set of possible values.""" from . import types -from ._enum_type import EnumType from ._errors import EnumEncodingError, EnumMemberNotFoundError from .config import ENUM_ARRAY_DTYPE from .enum import Enum @@ -13,6 +12,5 @@ "EnumArray", "EnumEncodingError", "EnumMemberNotFoundError", - "EnumType", "types", ] diff --git a/openfisca_core/indexed_enums/_enum_type.py b/openfisca_core/indexed_enums/_enum_type.py deleted file mode 100644 index a86969ccf..000000000 --- a/openfisca_core/indexed_enums/_enum_type.py +++ /dev/null @@ -1,119 +0,0 @@ -from __future__ import annotations - -from typing import final - -import numpy - -from . import types as t - - -def _item_list(enum_class: t.EnumType) -> t.ItemList: - """Return the non-vectorised list of enum items.""" - return [ # type: ignore[var-annotated] - (index, name, value) - for index, (name, value) in enumerate(enum_class.__members__.items()) - ] - - -def _item_dtype(enum_class: t.EnumType) -> t.RecDType: - """Return the dtype of the indexed enum's items.""" - size = max(map(len, enum_class.__members__.keys())) - return numpy.dtype( - ( - numpy.generic, - { - "index": (t.EnumDType, 0), - "name": (f"U{size}", 2), - "enum": (enum_class, 2 + size * 4), - }, - ) - ) - - -def _item_array(enum_class: t.EnumType) -> t.RecArray: - """Return the indexed enum's items.""" - items = _item_list(enum_class) - dtype = _item_dtype(enum_class) - array = numpy.array(items, dtype=dtype) - return array.view(numpy.recarray) - - -@final -class EnumType(t.EnumType): - """Meta class for creating an indexed :class:`.Enum`. - - Examples: - >>> from openfisca_core import indexed_enums as enum - - >>> class Enum(enum.Enum, metaclass=enum.EnumType): - ... pass - - >>> Enum.items - Traceback (most recent call last): - AttributeError: ... - - >>> class Housing(Enum): - ... OWNER = "Owner" - ... TENANT = "Tenant" - - >>> Housing.items - rec.array([(0, 'OWNER', Housing.OWNER), (1, 'TENANT', Housing.TENAN...) - - >>> Housing.indices - array([0, 1], dtype=uint8) - - >>> Housing.names - array(['OWNER', 'TENANT'], dtype='>> Housing.enums - array([Housing.OWNER, Housing.TENANT], dtype=object) - - """ - - #: The items of the indexed enum class. - items: t.RecArray - - @property - def indices(cls) -> t.IndexArray: - """Return the indices of the indexed enum class.""" - indices: t.IndexArray = cls.items.index - return indices - - @property - def names(cls) -> t.StrArray: - """Return the names of the indexed enum class.""" - names: t.StrArray = cls.items.name - return names - - @property - def enums(cls) -> t.ObjArray: - """Return the members of the indexed enum class.""" - enums: t.ObjArray = cls.items.enum - return enums - - def __new__( - metacls, - name: str, - bases: tuple[type, ...], - classdict: t.EnumDict, - **kwds: object, - ) -> t.EnumType: - """Create a new indexed enum class.""" - # Create the enum class. - cls = super().__new__(metacls, name, bases, classdict, **kwds) - - # If the enum class has no members, return it as is. - if not cls.__members__: - return cls - - # Add the items attribute to the enum class. - cls.items = _item_array(cls) - - # Return the modified enum class. - return cls - - def __dir__(cls) -> list[str]: - return sorted({"items", "indices", "names", "enums", *super().__dir__()}) - - -__all__ = ["EnumType"] diff --git a/openfisca_core/indexed_enums/_errors.py b/openfisca_core/indexed_enums/_errors.py index 7ec21eca4..7debd8cda 100644 --- a/openfisca_core/indexed_enums/_errors.py +++ b/openfisca_core/indexed_enums/_errors.py @@ -17,12 +17,13 @@ class EnumMemberNotFoundError(IndexError): """Raised when a member is not found in an enum.""" def __init__(self, enum_class: type[t.Enum], value: str) -> None: + index = [str(enum.index) for enum in enum_class] + names = [enum.name for enum in enum_class] msg = ( f"Member {value} not found in enum '{enum_class.__name__}'. " - f"Possible values are: {', '.join(enum_class.names[:-1])}, and " - f"{enum_class.names[-1]!s}; or their corresponding indices: " - f"{', '.join(enum_class.indices[:-1].astype(t.StrDType))}, and " - f"{enum_class.indices[-1]!s}." + f"Possible values are: {', '.join(names[:-1])}, and {names[-1]!s}; " + f"or their corresponding indices: {', '.join(index[:-1])}, and " + f"{index[-1]}." ) super().__init__(msg) diff --git a/openfisca_core/indexed_enums/_guards.py b/openfisca_core/indexed_enums/_guards.py index 9220a72bb..b1f38322d 100644 --- a/openfisca_core/indexed_enums/_guards.py +++ b/openfisca_core/indexed_enums/_guards.py @@ -1,13 +1,32 @@ from __future__ import annotations +from typing import Final from typing_extensions import TypeIs import numpy from . import types as t +#: Types for int arrays. +ints: Final = { + numpy.uint8, + numpy.uint16, + numpy.uint32, + numpy.uint64, + numpy.int8, + numpy.int16, + numpy.int32, + numpy.int64, +} -def _is_int_array(array: t.AnyArray) -> TypeIs[t.IndexArray]: +#: Types for object arrays. +objs: Final = {object, numpy.object_} + +#: Types for str arrays. +strs: Final = {str, numpy.str_} + + +def _is_int_array(array: t.VarArray) -> TypeIs[t.IndexArray]: """Narrow the type of a given array to an array of :obj:`numpy.integer`. Args: @@ -32,10 +51,10 @@ def _is_int_array(array: t.AnyArray) -> TypeIs[t.IndexArray]: False """ - return numpy.issubdtype(array.dtype, numpy.integer) + return array.dtype.type in ints -def _is_obj_array(array: t.AnyArray) -> TypeIs[t.ObjArray]: +def _is_obj_array(array: t.VarArray) -> TypeIs[t.ObjArray]: """Narrow the type of a given array to an array of :obj:`numpy.object_`. Args: @@ -59,10 +78,10 @@ def _is_obj_array(array: t.AnyArray) -> TypeIs[t.ObjArray]: False """ - return numpy.issubdtype(array.dtype, t.ObjDType) + return array.dtype.type in objs -def _is_str_array(array: t.AnyArray) -> TypeIs[t.StrArray]: +def _is_str_array(array: t.VarArray) -> TypeIs[t.StrArray]: """Narrow the type of a given array to an array of :obj:`numpy.str_`. Args: @@ -89,7 +108,7 @@ def _is_str_array(array: t.AnyArray) -> TypeIs[t.StrArray]: True """ - return numpy.issubdtype(array.dtype, str) + return array.dtype.type in strs __all__ = ["_is_int_array", "_is_obj_array", "_is_str_array"] diff --git a/openfisca_core/indexed_enums/_utils.py b/openfisca_core/indexed_enums/_utils.py index 0a29ff961..798c36e4e 100644 --- a/openfisca_core/indexed_enums/_utils.py +++ b/openfisca_core/indexed_enums/_utils.py @@ -99,7 +99,7 @@ def _int_to_index(enum_class: type[t.Enum], value: t.IndexArray) -> t.IndexArray """ # Create a mask to determine which values are in the enum class. - mask = value < enum_class.items.size + mask = value < len(enum_class._member_names_) # Get the values that are not in the enum class. ko = value[~mask] @@ -153,7 +153,7 @@ def _str_to_index(enum_class: type[t.Enum], value: t.StrArray) -> t.IndexArray: array([1, 1, 0], dtype=uint8) """ - names = enum_class.names + names = enum_class._member_names_ index = [enum_class[name].index if name in names else 0 for name in value] return _int_to_index(enum_class, numpy.array(index)) diff --git a/openfisca_core/indexed_enums/enum.py b/openfisca_core/indexed_enums/enum.py index 446d6cb74..79e34354b 100644 --- a/openfisca_core/indexed_enums/enum.py +++ b/openfisca_core/indexed_enums/enum.py @@ -3,14 +3,13 @@ import numpy from . import types as t -from ._enum_type import EnumType from ._errors import EnumEncodingError from ._guards import _is_int_array, _is_obj_array, _is_str_array from ._utils import _enum_to_index, _int_to_index, _str_to_index from .enum_array import EnumArray -class Enum(t.Enum, metaclass=EnumType): +class Enum(t.Enum): """Enum based on `enum34 `_. Its items have an :class:`int` index, useful and performant when running diff --git a/openfisca_core/indexed_enums/enum_array.py b/openfisca_core/indexed_enums/enum_array.py index fe25d6586..41653bf70 100644 --- a/openfisca_core/indexed_enums/enum_array.py +++ b/openfisca_core/indexed_enums/enum_array.py @@ -78,11 +78,7 @@ def __new__( possible_values: None | type[t.Enum] = None, ) -> Self: """See comment above.""" - if not isinstance(input_array, numpy.ndarray): - return cls.__new__(cls, numpy.asarray(input_array), possible_values) - if input_array.ndim == 0: - return cls.__new__(cls, input_array.reshape(1), possible_values) - obj = input_array.astype(t.EnumDType).view(cls) + obj = numpy.asarray(input_array).astype(t.EnumDType).view(cls) obj.possible_values = possible_values return obj @@ -160,9 +156,8 @@ def __eq__(self, other: object) -> t.BoolArray: # type: ignore[override] isinstance(other, type(t.Enum)) and other.__name__ is self.possible_values.__name__ ): - result = ( - self.view(numpy.ndarray) == other.indices[other.indices <= max(self)] - ) + index = numpy.array([enum.index for enum in self.possible_values]) + result = self.view(numpy.ndarray) == index[index <= max(self)] return result if ( isinstance(other, t.Enum) @@ -269,17 +264,16 @@ def decode(self) -> t.ObjArray: array([Housing.TENANT], dtype=object) """ - result: t.ObjArray if self.possible_values is None: msg = ( f"The possible values of the {self.__class__.__name__} are " f"not defined." ) raise TypeError(msg) - arr = self.astype(t.EnumDType) - arr = arr.reshape(1) if arr.ndim == 0 else arr - result = self.possible_values.items[arr.astype(t.EnumDType)].enum - return result + return numpy.select( + [self == item.index for item in self.possible_values], + list(self.possible_values), # pyright: ignore[reportArgumentType] + ) def decode_to_str(self) -> t.StrArray: """Decode itself to an array of strings. @@ -305,17 +299,16 @@ def decode_to_str(self) -> t.StrArray: array(['TENANT'], dtype=' str: return f"{self.__class__.__name__}({self.decode()!s})" diff --git a/openfisca_core/indexed_enums/types.py b/openfisca_core/indexed_enums/types.py index 784ae1e92..0c3b79448 100644 --- a/openfisca_core/indexed_enums/types.py +++ b/openfisca_core/indexed_enums/types.py @@ -1,18 +1,7 @@ from typing_extensions import TypeAlias -from openfisca_core.types import ( - Array, - ArrayLike, - DTypeLike, - Enum, - EnumArray, - EnumType, - RecArray, -) - -from enum import _EnumDict as EnumDict # noqa: PLC2701 +from openfisca_core.types import Array, ArrayLike, DTypeLike, Enum, EnumArray -import numpy from numpy import ( bool_ as BoolDType, generic as VarDType, @@ -22,12 +11,6 @@ uint8 as EnumDType, ) -#: Type for the non-vectorised list of enum items. -ItemList: TypeAlias = list[tuple[int, str, EnumType]] - -#: Type for record arrays data type. -RecDType: TypeAlias = numpy.dtype[numpy.void] - #: Type for enum indices arrays. IndexArray: TypeAlias = Array[EnumDType] @@ -51,7 +34,4 @@ "DTypeLike", "Enum", "EnumArray", - "EnumDict", - "EnumType", - "RecArray", ] diff --git a/openfisca_core/types.py b/openfisca_core/types.py index 702138e39..bebc2c852 100644 --- a/openfisca_core/types.py +++ b/openfisca_core/types.py @@ -2,7 +2,7 @@ from collections.abc import Iterable, Sequence, Sized from numpy.typing import DTypeLike, NDArray -from typing import Any, NewType, TypeVar, Union +from typing import NewType, TypeVar, Union from typing_extensions import Protocol, Self, TypeAlias import abc @@ -28,9 +28,6 @@ #: Type representing an array-like object. ArrayLike: TypeAlias = Sequence[_L] -#: Type for record arrays. -RecArray: TypeAlias = numpy.recarray[object, Any] # type: ignore[misc] - #: Type for bool arrays. DTypeBool: TypeAlias = numpy.bool_ @@ -111,21 +108,7 @@ def plural(self, /) -> None | RolePlural: ... # Indexed enums -class EnumType(enum.EnumMeta): - items: RecArray - - @property - @abc.abstractmethod - def indices(cls) -> Array[DTypeEnum]: ... - @property - @abc.abstractmethod - def names(cls) -> Array[DTypeStr]: ... - @property - @abc.abstractmethod - def enums(cls) -> Array[DTypeGeneric]: ... - - -class Enum(enum.Enum, metaclass=EnumType): +class Enum(enum.Enum, metaclass=enum.EnumMeta): index: int _member_names_: list[str] From 73b296a13c886ac48f690f6fb783a5c435884431 Mon Sep 17 00:00:00 2001 From: Mauko Quiroga Date: Sat, 12 Oct 2024 13:11:21 +0200 Subject: [PATCH 26/29] chore(enums): remove leftovers (#1233) --- openfisca_core/indexed_enums/enum.py | 32 ---------------------- openfisca_core/indexed_enums/enum_array.py | 14 ++++------ 2 files changed, 6 insertions(+), 40 deletions(-) diff --git a/openfisca_core/indexed_enums/enum.py b/openfisca_core/indexed_enums/enum.py index 79e34354b..a08d90659 100644 --- a/openfisca_core/indexed_enums/enum.py +++ b/openfisca_core/indexed_enums/enum.py @@ -129,38 +129,6 @@ def __ne__(self, other: object) -> bool: return self.index != other.index return NotImplemented - def __lt__(self, other: object) -> bool: - if ( - isinstance(other, Enum) - and self.__class__.__name__ == other.__class__.__name__ - ): - return self.index < other.index - return NotImplemented - - def __le__(self, other: object) -> bool: - if ( - isinstance(other, Enum) - and self.__class__.__name__ == other.__class__.__name__ - ): - return self.index <= other.index - return NotImplemented - - def __gt__(self, other: object) -> bool: - if ( - isinstance(other, Enum) - and self.__class__.__name__ == other.__class__.__name__ - ): - return self.index > other.index - return NotImplemented - - def __ge__(self, other: object) -> bool: - if ( - isinstance(other, Enum) - and self.__class__.__name__ == other.__class__.__name__ - ): - return self.index >= other.index - return NotImplemented - @classmethod def encode( cls, diff --git a/openfisca_core/indexed_enums/enum_array.py b/openfisca_core/indexed_enums/enum_array.py index 41653bf70..ce89310d0 100644 --- a/openfisca_core/indexed_enums/enum_array.py +++ b/openfisca_core/indexed_enums/enum_array.py @@ -55,7 +55,7 @@ class EnumArray(t.EnumArray): >>> enum_array = enum.EnumArray(list(Housing), Housing) Traceback (most recent call last): - TypeError: int() argument must be a string, a bytes-like object or a... + AttributeError: 'list' object has no attribute 'view' >>> class OccupancyStatus(variables.Variable): ... value_type = enum.Enum @@ -74,21 +74,19 @@ class EnumArray(t.EnumArray): def __new__( cls, - input_array: object, - possible_values: None | type[t.Enum] = None, + input_array: t.IndexArray, + possible_values: type[t.Enum], ) -> Self: """See comment above.""" - obj = numpy.asarray(input_array).astype(t.EnumDType).view(cls) + obj = input_array.view(cls) obj.possible_values = possible_values return obj - def __array_finalize__(self, obj: None | t.EnumArray | t.ObjArray) -> None: + def __array_finalize__(self, obj: None | t.EnumArray | t.VarArray) -> None: """See comment above.""" if obj is None: return - if isinstance(obj, EnumArray): - self.possible_values = obj.possible_values - return + self.possible_values = getattr(obj, "possible_values", None) def __eq__(self, other: object) -> t.BoolArray: # type: ignore[override] """Compare equality with the item's :attr:`~.Enum.index`. From 02c0576522f33e05ce0d73a256e640724950a524 Mon Sep 17 00:00:00 2001 From: Mauko Quiroga Date: Sun, 13 Oct 2024 17:45:24 +0200 Subject: [PATCH 27/29] refactor(enums): consolidate encode(#1233) --- openfisca_core/indexed_enums/_errors.py | 10 +- openfisca_core/indexed_enums/_guards.py | 119 +++++++++++++-- openfisca_core/indexed_enums/_utils.py | 92 +++++++----- openfisca_core/indexed_enums/enum.py | 136 ++++++++++-------- openfisca_core/indexed_enums/enum_array.py | 3 + .../indexed_enums/tests/test_enum.py | 4 +- 6 files changed, 253 insertions(+), 111 deletions(-) diff --git a/openfisca_core/indexed_enums/_errors.py b/openfisca_core/indexed_enums/_errors.py index 7debd8cda..e9b543fc7 100644 --- a/openfisca_core/indexed_enums/_errors.py +++ b/openfisca_core/indexed_enums/_errors.py @@ -1,10 +1,14 @@ +from __future__ import annotations + from . import types as t class EnumEncodingError(TypeError): """Raised when an enum is encoded with an unsupported type.""" - def __init__(self, enum_class: type[t.Enum], value: t.VarArray) -> None: + def __init__( + self, enum_class: type[t.Enum], value: t.VarArray | t.ArrayLike[object] + ) -> None: msg = ( f"Failed to encode \"{value}\" of type '{value[0].__class__.__name__}', " "as it is not supported. Please, try again with an array of " @@ -16,11 +20,11 @@ def __init__(self, enum_class: type[t.Enum], value: t.VarArray) -> None: class EnumMemberNotFoundError(IndexError): """Raised when a member is not found in an enum.""" - def __init__(self, enum_class: type[t.Enum], value: str) -> None: + def __init__(self, enum_class: type[t.Enum]) -> None: index = [str(enum.index) for enum in enum_class] names = [enum.name for enum in enum_class] msg = ( - f"Member {value} not found in enum '{enum_class.__name__}'. " + f"Some members were not found in enum '{enum_class.__name__}'. " f"Possible values are: {', '.join(names[:-1])}, and {names[-1]!s}; " f"or their corresponding indices: {', '.join(index[:-1])}, and " f"{index[-1]}." diff --git a/openfisca_core/indexed_enums/_guards.py b/openfisca_core/indexed_enums/_guards.py index b1f38322d..659fad6f4 100644 --- a/openfisca_core/indexed_enums/_guards.py +++ b/openfisca_core/indexed_enums/_guards.py @@ -20,10 +20,65 @@ } #: Types for object arrays. -objs: Final = {object, numpy.object_} +objs: Final = {numpy.object_} #: Types for str arrays. -strs: Final = {str, numpy.str_} +strs: Final = {numpy.str_} + + +def _is_enum_array(array: t.VarArray) -> TypeIs[t.ObjArray]: + """Narrow the type of a given array to an array of :obj:`numpy.object_`. + + Args: + array: Array to check. + + Returns: + bool: True if ``array`` is an array of :obj:`numpy.object_`, False otherwise. + + Examples: + >>> import numpy + + >>> from openfisca_core import indexed_enums as enum + + >>> Enum = enum.Enum("Enum", ["A", "B"]) + >>> array = numpy.array([Enum.A], dtype=numpy.object_) + >>> _is_enum_array(array) + True + + >>> array = numpy.array([1.0]) + >>> _is_enum_array(array) + False + + """ + return array.dtype.type in objs + + +def _is_enum_array_like(array: t.ArrayLike[object]) -> TypeIs[t.ArrayLike[t.Enum]]: + """Narrow the type of a given array-like to an sequence of :class:`.Enum`. + + Args: + array: Array to check. + + Returns: + bool: True if ``array`` is an array-like of :class:`.Enum`, False otherwise. + + Examples: + >>> from openfisca_core import indexed_enums as enum + + >>> class Housing(enum.Enum): + ... OWNER = "owner" + ... TENANT = "tenant" + + >>> array = [Housing.OWNER] + >>> _is_enum_array_like(array) + True + + >>> array = ["owner"] + >>> _is_enum_array_like(array) + False + + """ + return all(isinstance(item, t.Enum) for item in array) def _is_int_array(array: t.VarArray) -> TypeIs[t.IndexArray]: @@ -54,31 +109,30 @@ def _is_int_array(array: t.VarArray) -> TypeIs[t.IndexArray]: return array.dtype.type in ints -def _is_obj_array(array: t.VarArray) -> TypeIs[t.ObjArray]: - """Narrow the type of a given array to an array of :obj:`numpy.object_`. +def _is_int_array_like(array: t.ArrayLike[object]) -> TypeIs[t.ArrayLike[int]]: + """Narrow the type of a given array-like to a sequence of :obj:`int`. Args: array: Array to check. Returns: - bool: True if ``array`` is an array of :obj:`numpy.object_`, False otherwise. + bool: True if ``array`` is an array-like of :obj:`int`, False otherwise. Examples: - >>> import numpy - - >>> from openfisca_core import indexed_enums as enum + >>> array = [1] + >>> _is_int_array_like(array) + True - >>> Enum = enum.Enum("Enum", ["A", "B"]) - >>> array = numpy.array([Enum.A], dtype=numpy.object_) - >>> _is_obj_array(array) + >>> array = (1, 2) + >>> _is_int_array_like(array) True - >>> array = numpy.array([1.0]) - >>> _is_obj_array(array) + >>> array = [1.0] + >>> _is_int_array_like(array) False """ - return array.dtype.type in objs + return all(isinstance(item, int) for item in array) def _is_str_array(array: t.VarArray) -> TypeIs[t.StrArray]: @@ -111,4 +165,39 @@ def _is_str_array(array: t.VarArray) -> TypeIs[t.StrArray]: return array.dtype.type in strs -__all__ = ["_is_int_array", "_is_obj_array", "_is_str_array"] +def _is_str_array_like(array: t.ArrayLike[object]) -> TypeIs[t.ArrayLike[str]]: + """Narrow the type of a given array-like to an sequence of :obj:`str`. + + Args: + array: Array to check. + + Returns: + bool: True if ``array`` is an array-like of :obj:`str`, False otherwise. + + Examples: + >>> from openfisca_core import indexed_enums as enum + + >>> class Housing(enum.Enum): + ... OWNER = "owner" + ... TENANT = "tenant" + + >>> array = [Housing.OWNER] + >>> _is_str_array_like(array) + False + + >>> array = ["owner"] + >>> _is_str_array_like(array) + True + + """ + return all(isinstance(item, str) for item in array) + + +__all__ = [ + "_is_enum_array", + "_is_enum_array_like", + "_is_int_array", + "_is_int_array_like", + "_is_str_array", + "_is_str_array_like", +] diff --git a/openfisca_core/indexed_enums/_utils.py b/openfisca_core/indexed_enums/_utils.py index 798c36e4e..aa676b92f 100644 --- a/openfisca_core/indexed_enums/_utils.py +++ b/openfisca_core/indexed_enums/_utils.py @@ -1,14 +1,14 @@ +from __future__ import annotations + import numpy from . import types as t -from ._errors import EnumMemberNotFoundError -def _enum_to_index(enum_class: type[t.Enum], value: t.ObjArray) -> t.IndexArray: +def _enum_to_index(value: t.ObjArray | t.ArrayLike[t.Enum]) -> t.IndexArray: """Transform an array of enum members into an index array. Args: - enum_class: The enum class to encode the enum members array. value: The enum members array to encode. Returns: @@ -34,27 +34,35 @@ def _enum_to_index(enum_class: type[t.Enum], value: t.ObjArray) -> t.IndexArray: >>> class Rogue(enum.Enum): ... BOULEVARD = "More like a shady impasse, to be honest." - >>> _enum_to_index(Road, numpy.array(Road.AVENUE)) + >>> _enum_to_index(Road.AVENUE) + Traceback (most recent call last): + TypeError: 'Road' object is not iterable + + >>> _enum_to_index([Road.AVENUE]) + array([1], dtype=uint8) + + >>> _enum_to_index(numpy.array(Road.AVENUE)) Traceback (most recent call last): TypeError: iteration over a 0-d array - >>> _enum_to_index(Road, numpy.array([Road.AVENUE])) + >>> _enum_to_index(numpy.array([Road.AVENUE])) array([1], dtype=uint8) >>> value = numpy.array([Road.STREET, Road.AVENUE, Road.STREET]) - >>> _enum_to_index(Road, value) + >>> _enum_to_index(value) array([0, 1, 0], dtype=uint8) >>> value = numpy.array([Road.AVENUE, Road.AVENUE, Rogue.BOULEVARD]) - >>> _enum_to_index(Road, value) + >>> _enum_to_index(value) array([1, 1, 0], dtype=uint8) """ - index = [member.index for member in value] - return _int_to_index(enum_class, numpy.array(index)) + return numpy.array([enum.index for enum in value], t.EnumDType) -def _int_to_index(enum_class: type[t.Enum], value: t.IndexArray) -> t.IndexArray: +def _int_to_index( + enum_class: type[t.Enum], value: t.IndexArray | t.ArrayLike[int] +) -> t.IndexArray: """Transform an integer array into an index array. Args: @@ -64,10 +72,9 @@ def _int_to_index(enum_class: type[t.Enum], value: t.IndexArray) -> t.IndexArray Returns: The index array. - Raises: - EnumMemberNotFoundError: If one value is not in the enum class. - Examples: + >>> from array import array + >>> import numpy >>> from openfisca_core import indexed_enums as enum @@ -84,9 +91,23 @@ def _int_to_index(enum_class: type[t.Enum], value: t.IndexArray) -> t.IndexArray ... "traditionally wider." ... ) - >>> _int_to_index(Road, numpy.array(1)) + >>> _int_to_index(Road, 1) + Traceback (most recent call last): + TypeError: 'int' object is not iterable + + >>> _int_to_index(Road, [1]) + array([1], dtype=uint8) + + >>> _int_to_index(Road, array("B", [1])) + array([1], dtype=uint8) + + >>> _int_to_index(Road, memoryview(array("B", [1]))) array([1], dtype=uint8) + >>> _int_to_index(Road, numpy.array(1)) + Traceback (most recent call last): + TypeError: iteration over a 0-d array + >>> _int_to_index(Road, numpy.array([1])) array([1], dtype=uint8) @@ -94,25 +115,17 @@ def _int_to_index(enum_class: type[t.Enum], value: t.IndexArray) -> t.IndexArray array([0, 1, 0], dtype=uint8) >>> _int_to_index(Road, numpy.array([1, 1, 2])) - Traceback (most recent call last): - EnumMemberNotFoundError: Member with index 2 not found in enum 'Road... + array([1, 1], dtype=uint8) """ - # Create a mask to determine which values are in the enum class. - mask = value < len(enum_class._member_names_) - - # Get the values that are not in the enum class. - ko = value[~mask] - - # If there are values that are not in the enum class, raise an error. - if ko.size > 0: - raise EnumMemberNotFoundError(enum_class, f"with index {ko[0]}") + return numpy.array( + [index for index in value if index < len(enum_class.__members__)], t.EnumDType + ) - # Finally, return the index array. - return numpy.array(value[mask], dtype=t.EnumDType) - -def _str_to_index(enum_class: type[t.Enum], value: t.StrArray) -> t.IndexArray: +def _str_to_index( + enum_class: type[t.Enum], value: t.StrArray | t.ArrayLike[str] +) -> t.IndexArray: """Transform a string array into an index array. Args: @@ -123,6 +136,8 @@ def _str_to_index(enum_class: type[t.Enum], value: t.StrArray) -> t.IndexArray: The index array. Examples: + >>> from array import array + >>> import numpy >>> from openfisca_core import indexed_enums as enum @@ -139,6 +154,12 @@ def _str_to_index(enum_class: type[t.Enum], value: t.StrArray) -> t.IndexArray: ... "traditionally wider." ... ) + >>> _str_to_index(Road, "AVENUE") + array([], dtype=uint8) + + >>> _str_to_index(Road, ["AVENUE"]) + array([1], dtype=uint8) + >>> _str_to_index(Road, numpy.array("AVENUE")) Traceback (most recent call last): TypeError: iteration over a 0-d array @@ -150,12 +171,17 @@ def _str_to_index(enum_class: type[t.Enum], value: t.StrArray) -> t.IndexArray: array([0, 1, 0], dtype=uint8) >>> _str_to_index(Road, numpy.array(["AVENUE", "AVENUE", "BOULEVARD"])) - array([1, 1, 0], dtype=uint8) + array([1, 1], dtype=uint8) """ - names = enum_class._member_names_ - index = [enum_class[name].index if name in names else 0 for name in value] - return _int_to_index(enum_class, numpy.array(index)) + return numpy.array( + [ + enum_class.__members__[name].index + for name in value + if name in enum_class._member_names_ + ], + t.EnumDType, + ) __all__ = ["_enum_to_index", "_int_to_index", "_str_to_index"] diff --git a/openfisca_core/indexed_enums/enum.py b/openfisca_core/indexed_enums/enum.py index a08d90659..9ce66bbdc 100644 --- a/openfisca_core/indexed_enums/enum.py +++ b/openfisca_core/indexed_enums/enum.py @@ -1,10 +1,19 @@ from __future__ import annotations +from collections.abc import Sequence + import numpy from . import types as t -from ._errors import EnumEncodingError -from ._guards import _is_int_array, _is_obj_array, _is_str_array +from ._errors import EnumEncodingError, EnumMemberNotFoundError +from ._guards import ( + _is_enum_array, + _is_enum_array_like, + _is_int_array, + _is_int_array_like, + _is_str_array, + _is_str_array_like, +) from ._utils import _enum_to_index, _int_to_index, _str_to_index from .enum_array import EnumArray @@ -111,7 +120,7 @@ def __repr__(self) -> str: return f"{self.__class__.__name__}.{self.name}" def __hash__(self) -> int: - return object.__hash__(self) + return object.__hash__(self.__class__.__name__ + self.name) def __eq__(self, other: object) -> bool: if ( @@ -130,19 +139,7 @@ def __ne__(self, other: object) -> bool: return NotImplemented @classmethod - def encode( - cls, - array: ( - t.EnumArray - | t.IndexArray - | t.IntArray - | t.StrArray - | t.ObjArray - | t.ArrayLike[int] - | t.ArrayLike[str] - | t.ArrayLike[t.Enum] - ), - ) -> t.EnumArray: + def encode(cls, array: t.VarArray | t.ArrayLike[object]) -> t.EnumArray: """Encode an encodable array into an :class:`.EnumArray`. Args: @@ -152,8 +149,9 @@ def encode( EnumArray: An :class:`.EnumArray` with the encoded input values. Raises: + EnumEncodingError: If ``array`` is of diffent :class:`.Enum` type. + EnumMemberNotFoundError: If members are not found in :class:`.Enum`. NotImplementedError: If ``array`` is a scalar :class:`~numpy.ndarray`. - EnumEncodingError: If ``array`` is of a diffent :class:`.Enum` type. Examples: >>> import numpy @@ -203,49 +201,71 @@ def encode( :meth:`.EnumArray.decode` for decoding. """ - # Array-like values need to be converted to a numpy array. - if not isinstance(array, numpy.ndarray): - return cls.encode(numpy.array(array)) - - # Empty arrays are returned as is. - if array.size == 0: - return EnumArray(numpy.array([]), cls) - - # Scalar arrays are not supported. - if array.ndim == 0: - msg = ( - "Scalar arrays are not supported: expecting a vector array, " - f"instead. Please try again with `numpy.array([{array}])`." - ) - raise NotImplementedError(msg) - - # Enum arrays. - if isinstance(array, t.EnumArray): + # Array of indices + indices: t.IndexArray + + if isinstance(array, EnumArray): return array - # Index arrays. - if _is_int_array(array): - return EnumArray(_int_to_index(cls, array), cls) - - # String arrays. - if _is_str_array(array): # type: ignore[unreachable] - return EnumArray(_str_to_index(cls, array), cls) - - # Ensure we are comparing the comparable. The problem this fixes: - # On entering this method "cls" will generally come from - # variable.possible_values, while the array values may come from - # directly importing a module containing an Enum class. However, - # variables (and hence their possible_values) are loaded by a call - # to load_module, which gives them a different identity from the - # ones imported in the usual way. - # - # So, instead of relying on the "cls" passed in, we use only its - # name to check that the values in the array, if non-empty, are of - # the right type. - if _is_obj_array(array) and cls.__name__ is array[0].__class__.__name__: - return EnumArray(_enum_to_index(cls, array), cls) - - raise EnumEncodingError(cls, array) + # Array-like + if isinstance(array, Sequence): + if len(array) == 0: + indices = numpy.array([], t.EnumDType) + + elif _is_int_array_like(array): + indices = _int_to_index(cls, array) + + elif _is_str_array_like(array): + indices = _str_to_index(cls, array) + + elif _is_enum_array_like(array): + indices = _enum_to_index(array) + + else: + raise EnumEncodingError(cls, array) + + else: + # Scalar arrays are not supported. + if array.ndim == 0: + msg = ( + "Scalar arrays are not supported: expecting a vector array, " + f"instead. Please try again with `numpy.array([{array}])`." + ) + raise NotImplementedError(msg) + + # Empty arrays are returned as is. + if array.size == 0: + indices = numpy.array([], t.EnumDType) + + # Index arrays. + elif _is_int_array(array): + indices = _int_to_index(cls, array) + + # String arrays. + elif _is_str_array(array): # type: ignore[unreachable] + indices = _str_to_index(cls, array) + + # Ensure we are comparing the comparable. The problem this fixes: + # On entering this method "cls" will generally come from + # variable.possible_values, while the array values may come from + # directly importing a module containing an Enum class. However, + # variables (and hence their possible_values) are loaded by a call + # to load_module, which gives them a different identity from the + # ones imported in the usual way. + # + # So, instead of relying on the "cls" passed in, we use only its + # name to check that the values in the array, if non-empty, are of + # the right type. + elif _is_enum_array(array) and cls.__name__ is array[0].__class__.__name__: + indices = _enum_to_index(array) + + else: + raise EnumEncodingError(cls, array) + + if indices.size != len(array): + raise EnumMemberNotFoundError(cls) + + return EnumArray(indices, cls) __all__ = ["Enum"] diff --git a/openfisca_core/indexed_enums/enum_array.py b/openfisca_core/indexed_enums/enum_array.py index ce89310d0..bb1468e70 100644 --- a/openfisca_core/indexed_enums/enum_array.py +++ b/openfisca_core/indexed_enums/enum_array.py @@ -137,6 +137,9 @@ def __eq__(self, other: object) -> t.BoolArray: # type: ignore[override] >>> enum_array is None False + >>> enum_array == enum.EnumArray(numpy.array([1]), Housing) + array([ True]) + Note: This breaks the `Liskov substitution principle`_. diff --git a/openfisca_core/indexed_enums/tests/test_enum.py b/openfisca_core/indexed_enums/tests/test_enum.py index 8ffae5dd8..1b3dd0d88 100644 --- a/openfisca_core/indexed_enums/tests/test_enum.py +++ b/openfisca_core/indexed_enums/tests/test_enum.py @@ -105,8 +105,8 @@ def test_enum_encode_with_str_scalar_array(): def test_enum_encode_with_str_with_bad_value(): """Encode encode when called with a value not in an Enum.""" array = numpy.array(["JAIBA"]) - enum_array = Animal.encode(array) - assert Animal.CAT in enum_array + with pytest.raises(IndexError): + Animal.encode(array) # Unsupported encodings From ef85e1f3fd4f6c926627bcff23315ca8d53f805e Mon Sep 17 00:00:00 2001 From: Mauko Quiroga Date: Sun, 13 Oct 2024 19:13:56 +0200 Subject: [PATCH 28/29] refactor(enums): improve performance enum array (#1233) --- openfisca_core/indexed_enums/__init__.py | 2 + openfisca_core/indexed_enums/_enum_type.py | 70 ++++++++++++++ openfisca_core/indexed_enums/_guards.py | 12 ++- openfisca_core/indexed_enums/enum.py | 92 ++++++------------- openfisca_core/indexed_enums/enum_array.py | 26 +++--- .../indexed_enums/tests/test_enum.py | 8 +- openfisca_core/indexed_enums/types.py | 6 +- openfisca_core/types.py | 10 +- 8 files changed, 142 insertions(+), 84 deletions(-) create mode 100644 openfisca_core/indexed_enums/_enum_type.py diff --git a/openfisca_core/indexed_enums/__init__.py b/openfisca_core/indexed_enums/__init__.py index 4c44f881f..494601fc8 100644 --- a/openfisca_core/indexed_enums/__init__.py +++ b/openfisca_core/indexed_enums/__init__.py @@ -1,6 +1,7 @@ """Enumerations for variables with a limited set of possible values.""" from . import types +from ._enum_type import EnumType from ._errors import EnumEncodingError, EnumMemberNotFoundError from .config import ENUM_ARRAY_DTYPE from .enum import Enum @@ -12,5 +13,6 @@ "EnumArray", "EnumEncodingError", "EnumMemberNotFoundError", + "EnumType", "types", ] diff --git a/openfisca_core/indexed_enums/_enum_type.py b/openfisca_core/indexed_enums/_enum_type.py new file mode 100644 index 000000000..8083a6d49 --- /dev/null +++ b/openfisca_core/indexed_enums/_enum_type.py @@ -0,0 +1,70 @@ +from __future__ import annotations + +from typing import final + +import numpy + +from . import types as t + + +@final +class EnumType(t.EnumType): + """Meta class for creating an indexed :class:`.Enum`. + + Examples: + >>> from openfisca_core import indexed_enums as enum + + >>> class Enum(enum.Enum, metaclass=enum.EnumType): + ... pass + + >>> Enum.items + Traceback (most recent call last): + AttributeError: ... + + >>> class Housing(Enum): + ... OWNER = "Owner" + ... TENANT = "Tenant" + + >>> Housing.indices + array([0, 1], dtype=uint8) + + >>> Housing.names + array(['OWNER', 'TENANT'], dtype='>> Housing.enums + array([Housing.OWNER, Housing.TENANT], dtype=object) + + """ + + def __new__( + metacls, + name: str, + bases: tuple[type, ...], + classdict: t.EnumDict, + **kwds: object, + ) -> t.EnumType: + """Create a new indexed enum class.""" + # Create the enum class. + cls = super().__new__(metacls, name, bases, classdict, **kwds) + + # If the enum class has no members, return it as is. + if not cls.__members__: + return cls + + # Add the indices attribute to the enum class. + cls.indices = numpy.arange(len(cls), dtype=t.EnumDType) + + # Add the names attribute to the enum class. + cls.names = numpy.array(cls._member_names_, dtype=t.StrDType) + + # Add the enums attribute to the enum class. + cls.enums = numpy.array(cls, dtype=t.ObjDType) + + # Return the modified enum class. + return cls + + def __dir__(cls) -> list[str]: + return sorted({"indices", "names", "enums", *super().__dir__()}) + + +__all__ = ["EnumType"] diff --git a/openfisca_core/indexed_enums/_guards.py b/openfisca_core/indexed_enums/_guards.py index 659fad6f4..6c47471b3 100644 --- a/openfisca_core/indexed_enums/_guards.py +++ b/openfisca_core/indexed_enums/_guards.py @@ -53,7 +53,9 @@ def _is_enum_array(array: t.VarArray) -> TypeIs[t.ObjArray]: return array.dtype.type in objs -def _is_enum_array_like(array: t.ArrayLike[object]) -> TypeIs[t.ArrayLike[t.Enum]]: +def _is_enum_array_like( + array: t.VarArray | t.ArrayLike[object], +) -> TypeIs[t.ArrayLike[t.Enum]]: """Narrow the type of a given array-like to an sequence of :class:`.Enum`. Args: @@ -109,7 +111,9 @@ def _is_int_array(array: t.VarArray) -> TypeIs[t.IndexArray]: return array.dtype.type in ints -def _is_int_array_like(array: t.ArrayLike[object]) -> TypeIs[t.ArrayLike[int]]: +def _is_int_array_like( + array: t.VarArray | t.ArrayLike[object], +) -> TypeIs[t.ArrayLike[int]]: """Narrow the type of a given array-like to a sequence of :obj:`int`. Args: @@ -165,7 +169,9 @@ def _is_str_array(array: t.VarArray) -> TypeIs[t.StrArray]: return array.dtype.type in strs -def _is_str_array_like(array: t.ArrayLike[object]) -> TypeIs[t.ArrayLike[str]]: +def _is_str_array_like( + array: t.VarArray | t.ArrayLike[object], +) -> TypeIs[t.ArrayLike[str]]: """Narrow the type of a given array-like to an sequence of :obj:`str`. Args: diff --git a/openfisca_core/indexed_enums/enum.py b/openfisca_core/indexed_enums/enum.py index 9ce66bbdc..d116a56ba 100644 --- a/openfisca_core/indexed_enums/enum.py +++ b/openfisca_core/indexed_enums/enum.py @@ -5,6 +5,7 @@ import numpy from . import types as t +from ._enum_type import EnumType from ._errors import EnumEncodingError, EnumMemberNotFoundError from ._guards import ( _is_enum_array, @@ -18,7 +19,7 @@ from .enum_array import EnumArray -class Enum(t.Enum): +class Enum(t.Enum, metaclass=EnumType): """Enum based on `enum34 `_. Its items have an :class:`int` index, useful and performant when running @@ -148,11 +149,6 @@ def encode(cls, array: t.VarArray | t.ArrayLike[object]) -> t.EnumArray: Returns: EnumArray: An :class:`.EnumArray` with the encoded input values. - Raises: - EnumEncodingError: If ``array`` is of diffent :class:`.Enum` type. - EnumMemberNotFoundError: If members are not found in :class:`.Enum`. - NotImplementedError: If ``array`` is a scalar :class:`~numpy.ndarray`. - Examples: >>> import numpy @@ -201,70 +197,40 @@ def encode(cls, array: t.VarArray | t.ArrayLike[object]) -> t.EnumArray: :meth:`.EnumArray.decode` for decoding. """ - # Array of indices - indices: t.IndexArray - if isinstance(array, EnumArray): return array - - # Array-like + if len(array) == 0: + return EnumArray(numpy.asarray(array, t.EnumDType), cls) if isinstance(array, Sequence): - if len(array) == 0: - indices = numpy.array([], t.EnumDType) - - elif _is_int_array_like(array): - indices = _int_to_index(cls, array) - - elif _is_str_array_like(array): - indices = _str_to_index(cls, array) - - elif _is_enum_array_like(array): - indices = _enum_to_index(array) - - else: - raise EnumEncodingError(cls, array) + return cls._encode_array_like(array) + return cls._encode_array(array) + @classmethod + def _encode_array(cls, value: t.VarArray) -> t.EnumArray: + if _is_int_array(value): + indices = _int_to_index(cls, value) + elif _is_str_array(value): # type: ignore[unreachable] + indices = _str_to_index(cls, value) + elif _is_enum_array(value) and cls.__name__ is value[0].__class__.__name__: + indices = _enum_to_index(value) else: - # Scalar arrays are not supported. - if array.ndim == 0: - msg = ( - "Scalar arrays are not supported: expecting a vector array, " - f"instead. Please try again with `numpy.array([{array}])`." - ) - raise NotImplementedError(msg) - - # Empty arrays are returned as is. - if array.size == 0: - indices = numpy.array([], t.EnumDType) - - # Index arrays. - elif _is_int_array(array): - indices = _int_to_index(cls, array) - - # String arrays. - elif _is_str_array(array): # type: ignore[unreachable] - indices = _str_to_index(cls, array) - - # Ensure we are comparing the comparable. The problem this fixes: - # On entering this method "cls" will generally come from - # variable.possible_values, while the array values may come from - # directly importing a module containing an Enum class. However, - # variables (and hence their possible_values) are loaded by a call - # to load_module, which gives them a different identity from the - # ones imported in the usual way. - # - # So, instead of relying on the "cls" passed in, we use only its - # name to check that the values in the array, if non-empty, are of - # the right type. - elif _is_enum_array(array) and cls.__name__ is array[0].__class__.__name__: - indices = _enum_to_index(array) - - else: - raise EnumEncodingError(cls, array) - - if indices.size != len(array): + raise EnumEncodingError(cls, value) + if indices.size != len(value): raise EnumMemberNotFoundError(cls) + return EnumArray(indices, cls) + @classmethod + def _encode_array_like(cls, value: t.ArrayLike[object]) -> t.EnumArray: + if _is_int_array_like(value): + indices = _int_to_index(cls, value) + elif _is_str_array_like(value): # type: ignore[unreachable] + indices = _str_to_index(cls, value) + elif _is_enum_array_like(value): + indices = _enum_to_index(value) + else: + raise EnumEncodingError(cls, value) + if indices.size != len(value): + raise EnumMemberNotFoundError(cls) return EnumArray(indices, cls) diff --git a/openfisca_core/indexed_enums/enum_array.py b/openfisca_core/indexed_enums/enum_array.py index bb1468e70..98f9b4c6a 100644 --- a/openfisca_core/indexed_enums/enum_array.py +++ b/openfisca_core/indexed_enums/enum_array.py @@ -70,7 +70,7 @@ class EnumArray(t.EnumArray): """ #: Enum type of the array items. - possible_values: None | type[t.Enum] = None + possible_values: None | type[t.Enum] def __new__( cls, @@ -157,8 +157,12 @@ def __eq__(self, other: object) -> t.BoolArray: # type: ignore[override] isinstance(other, type(t.Enum)) and other.__name__ is self.possible_values.__name__ ): - index = numpy.array([enum.index for enum in self.possible_values]) - result = self.view(numpy.ndarray) == index[index <= max(self)] + result = ( + self.view(numpy.ndarray) + == self.possible_values.indices[ + self.possible_values.indices <= max(self) + ] + ) return result if ( isinstance(other, t.Enum) @@ -265,16 +269,16 @@ def decode(self) -> t.ObjArray: array([Housing.TENANT], dtype=object) """ + result: t.ObjArray if self.possible_values is None: msg = ( f"The possible values of the {self.__class__.__name__} are " f"not defined." ) raise TypeError(msg) - return numpy.select( - [self == item.index for item in self.possible_values], - list(self.possible_values), # pyright: ignore[reportArgumentType] - ) + array = self.reshape(1).astype(t.EnumDType) if self.ndim == 0 else self + result = self.possible_values.enums[array] + return result def decode_to_str(self) -> t.StrArray: """Decode itself to an array of strings. @@ -300,16 +304,16 @@ def decode_to_str(self) -> t.StrArray: array(['TENANT'], dtype=' str: return f"{self.__class__.__name__}({self.decode()!s})" diff --git a/openfisca_core/indexed_enums/tests/test_enum.py b/openfisca_core/indexed_enums/tests/test_enum.py index 1b3dd0d88..2e49c1742 100644 --- a/openfisca_core/indexed_enums/tests/test_enum.py +++ b/openfisca_core/indexed_enums/tests/test_enum.py @@ -36,7 +36,7 @@ def test_enum_encode_with_enum_sequence(): def test_enum_encode_with_enum_scalar_array(): """Does not encode when called with an enum scalar array.""" array = numpy.array(Animal.DOG) - with pytest.raises(NotImplementedError): + with pytest.raises(TypeError): Animal.encode(array) @@ -67,7 +67,7 @@ def test_enum_encode_with_int_sequence(): def test_enum_encode_with_int_scalar_array(): """Does not encode when called with an int scalar array.""" array = numpy.array(1) - with pytest.raises(NotImplementedError): + with pytest.raises(TypeError): Animal.encode(array) @@ -98,7 +98,7 @@ def test_enum_encode_with_str_sequence(): def test_enum_encode_with_str_scalar_array(): """Does not encode when called with a str scalar array.""" array = numpy.array("DOG") - with pytest.raises(NotImplementedError): + with pytest.raises(TypeError): Animal.encode(array) @@ -124,7 +124,7 @@ def test_enum_encode_with_any_scalar_array(): """Does not encode when called with unsupported types.""" value = 1.5 array = numpy.array(value) - with pytest.raises(NotImplementedError): + with pytest.raises(TypeError): Animal.encode(array) diff --git a/openfisca_core/indexed_enums/types.py b/openfisca_core/indexed_enums/types.py index 0c3b79448..e0a71b322 100644 --- a/openfisca_core/indexed_enums/types.py +++ b/openfisca_core/indexed_enums/types.py @@ -1,6 +1,8 @@ from typing_extensions import TypeAlias -from openfisca_core.types import Array, ArrayLike, DTypeLike, Enum, EnumArray +from openfisca_core.types import Array, ArrayLike, DTypeLike, Enum, EnumArray, EnumType + +from enum import _EnumDict as EnumDict # noqa: PLC2701 from numpy import ( bool_ as BoolDType, @@ -34,4 +36,6 @@ "DTypeLike", "Enum", "EnumArray", + "EnumDict", + "EnumType", ] diff --git a/openfisca_core/types.py b/openfisca_core/types.py index bebc2c852..b79504c72 100644 --- a/openfisca_core/types.py +++ b/openfisca_core/types.py @@ -108,7 +108,13 @@ def plural(self, /) -> None | RolePlural: ... # Indexed enums -class Enum(enum.Enum, metaclass=enum.EnumMeta): +class EnumType(enum.EnumMeta): + indices: Array[DTypeEnum] + names: Array[DTypeStr] + enums: Array[DTypeObject] + + +class Enum(enum.Enum, metaclass=EnumType): index: int _member_names_: list[str] @@ -118,7 +124,7 @@ class EnumArray(Array[DTypeEnum], metaclass=abc.ABCMeta): @abc.abstractmethod def __new__( - cls, input_array: Array[DTypeEnum], possible_values: None | type[Enum] = ... + cls, input_array: Array[DTypeEnum], possible_values: type[Enum] ) -> Self: ... From 9b7d9c9744c7cf07204540c6b264a644cfa0d60d Mon Sep 17 00:00:00 2001 From: Mauko Quiroga Date: Mon, 14 Oct 2024 12:11:26 +0200 Subject: [PATCH 29/29] chore!: version bump (fixes #1271 #1267 #1233) BREAKING_CHANGE: This changeset has not breaking changes to the `indexed_enums` public API. However, as a conservative measure concerning data preparation for large population simulations, it has been marked as a major release. Fixes #1271 Fixes #1267 Fixes #1233 --- .conda/openfisca-country-template/recipe.yaml | 10 +++---- .../openfisca-extension-template/recipe.yaml | 10 +++---- CHANGELOG.md | 27 +++++++------------ setup.py | 4 +-- 4 files changed, 17 insertions(+), 34 deletions(-) diff --git a/.conda/openfisca-country-template/recipe.yaml b/.conda/openfisca-country-template/recipe.yaml index 7b75cf22c..3c9198dee 100644 --- a/.conda/openfisca-country-template/recipe.yaml +++ b/.conda/openfisca-country-template/recipe.yaml @@ -13,8 +13,9 @@ source: sha256: b2f2ac9945d9ccad467aed0925bd82f7f4d5ce4e96b212324cd071b8bee46914 build: + number: 1 noarch: python - script: pip install . -v + script: pip install . -v --no-deps requirements: host: @@ -25,12 +26,7 @@ requirements: run: - numpy - python - - openfisca-core >=42,<43 - -tests: -- python: - imports: - - openfisca_country_template + - openfisca-core >=42,<44 about: summary: OpenFisca Rules as Code model for Country-Template. diff --git a/.conda/openfisca-extension-template/recipe.yaml b/.conda/openfisca-extension-template/recipe.yaml index 03e53d5dd..94075b227 100644 --- a/.conda/openfisca-extension-template/recipe.yaml +++ b/.conda/openfisca-extension-template/recipe.yaml @@ -13,8 +13,9 @@ source: sha256: e16ee9cbefdd5e9ddc1c2c0e12bcd74307c8cb1be55353b3b2788d64a90a5df9 build: + number: 1 noarch: python - script: pip install . -v + script: pip install . -v --no-deps requirements: host: @@ -25,12 +26,7 @@ requirements: run: - numpy - python - - openfisca-country-template >=7,<8 - -tests: -- python: - imports: - - openfisca_extension_template + - openfisca-country-template >=7.1.5,<8 about: summary: An OpenFisca extension that adds some variables to an already-existing diff --git a/CHANGELOG.md b/CHANGELOG.md index b7a2683e1..549588caf 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,28 +1,25 @@ # Changelog -### 42.1.1 [#1224](https://github.com/openfisca/openfisca-core/pull/1224) +# 43.0.0 [#1224](https://github.com/openfisca/openfisca-core/pull/1224) #### Technical changes +- Add documentation to the `indexed_enums` module - Fix type definitions in the enums module - -## 42.1.0 [#1273](https://github.com/openfisca/openfisca-core/pull/1273) +- Fix doctests +- Fix bug in `Enum.encode` when passing a scalar +- Fix bug in `Enum.encode` when encoding values not present in the enum #### New features - Introduce `indexed_enums.EnumType` - Allows for actually fancy indexing `indexed_enums.Enum` -#### Technical changes +#### Note -- Fix doctests - - Now `pytest openfisca_core/indexed_enums` runs without errors -- Fix bug in `Enum.encode` when passing a scalar - - Still raises `TypeError` but with an explanation of why it fails -- Fix bug in `Enum.encode` when encoding values not present in the enum - - When encoding values not present in an enum, `Enum.encode` always encoded - the first item of the enum - - Now, it correctly encodes only the values requested that exist in the enum +This changeset has not breaking changes to the `indexed_enums` public API. +However, as a conservative measure concerning data preparation for large +population simulations, it has been marked as a major release. ##### Before @@ -53,12 +50,6 @@ TestEnum.encode([0,1,2,5]) # EnumArray([ ]) ``` -### 42.0.8 [#1272](https://github.com/openfisca/openfisca-core/pull/1272) - -#### Documentation - -- Add documentation to the `indexed_enums` module - ### 42.0.7 [#1264](https://github.com/openfisca/openfisca-core/pull/1264) #### Technical changes diff --git a/setup.py b/setup.py index 350654ba3..d20cd6bb8 100644 --- a/setup.py +++ b/setup.py @@ -31,7 +31,7 @@ "StrEnum >=0.4.8, <0.5.0", # 3.11.x backport "dpath >=2.1.4, <3.0", "numexpr >=2.10.1, <3.0", - "numpy >=1.24.3, <2.0", + "numpy >=1.24.2, <2.0", "pendulum >=3.0.0, <4.0.0", "psutil >=5.9.4, <6.0", "pytest >=8.3.3, <9.0", @@ -69,7 +69,7 @@ setup( name="OpenFisca-Core", - version="42.1.1", + version="43.0.0", author="OpenFisca Team", author_email="contact@openfisca.org", classifiers=[