Skip to content

Commit

Permalink
refactor: add typing to & do maintenance of periods (#1223)
Browse files Browse the repository at this point in the history
  • Loading branch information
bonjourmauko authored Sep 30, 2024
2 parents 7d451a1 + e68657e commit adcc161
Show file tree
Hide file tree
Showing 22 changed files with 935 additions and 464 deletions.
33 changes: 33 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,5 +1,38 @@
# Changelog

# 42.0.0 [#1223](https://github.com/openfisca/openfisca-core/pull/1223)

#### Breaking changes

- Changes to `eternity` instants and periods
- Eternity instants are now `<Instant(-1, -1, -1)>` instead of
`<Instant(inf, inf, inf)>`
- Eternity periods are now `<Period(('eternity', <Instant(-1, -1, -1)>, -1))>`
instead of `<Period(('eternity', <Instant(inf, inf, inf)>, inf))>`
- The reason is to avoid mixing data types: `inf` is a float, periods and
instants are integers. Mixed data types make memory optimisations impossible.
- Migration should be straightforward. If you have a test that checks for
`inf`, you should update it to check for `-1` or use the `is_eternal` method.
- `periods.instant` no longer returns `None`
- Now, it raises `periods.InstantError`

#### New features

- Introduce `Instant.eternity()`
- This behaviour was duplicated across
- Now it is encapsulated in a single method
- Introduce `Instant.is_eternal` and `Period.is_eternal`
- These methods check if the instant or period are eternity (`bool`).
- Now `periods.instant` parses also ISO calendar strings (weeks)
- For instance, `2022-W01` is now a valid input

#### Technical changes

- Update `pendulum`
- Reduce code complexity
- Remove run-time type-checks
- Add typing to the periods module

### 41.5.7 [#1225](https://github.com/openfisca/openfisca-core/pull/1225)

#### Technical changes
Expand Down
5 changes: 4 additions & 1 deletion openfisca_core/commons/misc.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,10 +30,12 @@ def empty_clone(original: object) -> object:
"""

def __init__(_: object) -> None: ...

Dummy = type(
"Dummy",
(original.__class__,),
{"__init__": lambda _: None},
{"__init__": __init__},
)

new = Dummy()
Expand Down Expand Up @@ -69,6 +71,7 @@ def stringify_array(array: None | t.Array[numpy.generic]) -> str:
"[<class 'list'>, {}, <function stringify_array...]"
"""

if array is None:
return "None"

Expand Down
Empty file.
55 changes: 44 additions & 11 deletions openfisca_core/periods/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,26 +21,59 @@
#
# See: https://www.python.org/dev/peps/pep-0008/#imports

from .config import ( # noqa: F401
DAY,
ETERNITY,
from . import types
from ._errors import InstantError, ParserError, PeriodError
from .config import (
INSTANT_PATTERN,
MONTH,
WEEK,
WEEKDAY,
YEAR,
date_by_instant_cache,
str_by_instant_cache,
year_or_month_or_day_re,
)
from .date_unit import DateUnit # noqa: F401
from .helpers import ( # noqa: F401
from .date_unit import DateUnit
from .helpers import (
instant,
instant_date,
key_period_size,
period,
unit_weight,
unit_weights,
)
from .instant_ import Instant # noqa: F401
from .period_ import Period # noqa: F401
from .instant_ import Instant
from .period_ import Period

WEEKDAY = DateUnit.WEEKDAY
WEEK = DateUnit.WEEK
DAY = DateUnit.DAY
MONTH = DateUnit.MONTH
YEAR = DateUnit.YEAR
ETERNITY = DateUnit.ETERNITY
ISOFORMAT = DateUnit.isoformat
ISOCALENDAR = DateUnit.isocalendar

__all__ = [
"DAY",
"DateUnit",
"ETERNITY",
"INSTANT_PATTERN",
"ISOCALENDAR",
"ISOFORMAT",
"Instant",
"InstantError",
"MONTH",
"ParserError",
"Period",
"PeriodError",
"WEEK",
"WEEKDAY",
"YEAR",
"date_by_instant_cache",
"instant",
"instant_date",
"key_period_size",
"period",
"str_by_instant_cache",
"types",
"unit_weight",
"unit_weights",
"year_or_month_or_day_re",
]
28 changes: 28 additions & 0 deletions openfisca_core/periods/_errors.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
from pendulum.parsing.exceptions import ParserError


class InstantError(ValueError):
"""Raised when an invalid instant-like is provided."""

def __init__(self, value: str) -> None:
msg = (
f"'{value}' is not a valid instant string. Instants are described "
"using either the 'YYYY-MM-DD' format, for instance '2015-06-15', "
"or the 'YYYY-Www-D' format, for instance '2015-W24-1'."
)
super().__init__(msg)


class PeriodError(ValueError):
"""Raised when an invalid period-like is provided."""

def __init__(self, value: str) -> None:
msg = (
"Expected a period (eg. '2017', 'month:2017-01', 'week:2017-W01-1:3', "
f"...); got: '{value}'. Learn more about legal period formats in "
"OpenFisca: <https://openfisca.org/doc/coding-the-legislation/35_periods.html#periods-in-simulations>."
)
super().__init__(msg)


__all__ = ["InstantError", "ParserError", "PeriodError"]
116 changes: 70 additions & 46 deletions openfisca_core/periods/_parsers.py
Original file line number Diff line number Diff line change
@@ -1,62 +1,92 @@
from typing import Optional
"""To parse periods and instants from strings."""

import re
from __future__ import annotations

import datetime

import pendulum
from pendulum.datetime import Date
from pendulum.parsing import ParserError

from . import types as t
from ._errors import InstantError, ParserError, PeriodError
from .date_unit import DateUnit
from .instant_ import Instant
from .period_ import Period

invalid_week = re.compile(r".*(W[1-9]|W[1-9]-[0-9]|W[0-5][0-9]-0)$")

def parse_instant(value: str) -> t.Instant:
"""Parse a string into an instant.
Args:
value (str): The string to parse.
Returns:
An InstantStr.
Raises:
InstantError: When the string is not a valid ISO Calendar/Format.
ParserError: When the string couldn't be parsed.
Examples:
>>> parse_instant("2022")
Instant((2022, 1, 1))
>>> parse_instant("2022-02")
Instant((2022, 2, 1))
>>> parse_instant("2022-W02-7")
Instant((2022, 1, 16))
def _parse_period(value: str) -> Optional[Period]:
>>> parse_instant("2022-W013")
Traceback (most recent call last):
openfisca_core.periods._errors.InstantError: '2022-W013' is not a va...
>>> parse_instant("2022-02-29")
Traceback (most recent call last):
pendulum.parsing.exceptions.ParserError: Unable to parse string [202...
"""

if not isinstance(value, t.InstantStr):
raise InstantError(str(value))

date = pendulum.parse(value, exact=True)

if not isinstance(date, datetime.date):
msg = f"Unable to parse string [{value}]"
raise ParserError(msg)

return Instant((date.year, date.month, date.day))


def parse_period(value: str) -> t.Period:
"""Parses ISO format/calendar periods.
Such as "2012" or "2015-03".
Examples:
>>> _parse_period("2022")
>>> parse_period("2022")
Period((<DateUnit.YEAR: 'year'>, Instant((2022, 1, 1)), 1))
>>> _parse_period("2022-02")
>>> parse_period("2022-02")
Period((<DateUnit.MONTH: 'month'>, Instant((2022, 2, 1)), 1))
>>> _parse_period("2022-W02-7")
>>> parse_period("2022-W02-7")
Period((<DateUnit.WEEKDAY: 'weekday'>, Instant((2022, 1, 16)), 1))
"""
# If it's a complex period, next!
if len(value.split(":")) != 1:
return None

# Check for a non-empty string.
if not (value and isinstance(value, str)):
raise AttributeError
try:
instant = parse_instant(value)

# If it's negative, next!
if value[0] == "-":
raise ValueError
except InstantError as error:
raise PeriodError(value) from error

# If it's an invalid week, next!
if invalid_week.match(value):
raise ParserError

unit = _parse_unit(value)
date = pendulum.parse(value, exact=True)

if not isinstance(date, Date):
raise ValueError

instant = Instant((date.year, date.month, date.day))
unit = parse_unit(value)

return Period((unit, instant, 1))


def _parse_unit(value: str) -> DateUnit:
def parse_unit(value: str) -> t.DateUnit:
"""Determine the date unit of a date string.
Args:
Expand All @@ -66,32 +96,26 @@ def _parse_unit(value: str) -> DateUnit:
A DateUnit.
Raises:
ValueError when no DateUnit can be determined.
InstantError: when no DateUnit can be determined.
Examples:
>>> _parse_unit("2022")
>>> parse_unit("2022")
<DateUnit.YEAR: 'year'>
>>> _parse_unit("2022-W03-01")
>>> parse_unit("2022-W03-1")
<DateUnit.WEEKDAY: 'weekday'>
"""
length = len(value.split("-"))
isweek = value.find("W") != -1

if length == 1:
return DateUnit.YEAR
if not isinstance(value, t.InstantStr):
raise InstantError(str(value))

if length == 2:
if isweek:
return DateUnit.WEEK
length = len(value.split("-"))

return DateUnit.MONTH
if isinstance(value, t.ISOCalendarStr):
return DateUnit.isocalendar[-length]

if length == 3:
if isweek:
return DateUnit.WEEKDAY
return DateUnit.isoformat[-length]

return DateUnit.DAY

raise ValueError
__all__ = ["parse_instant", "parse_period", "parse_unit"]
16 changes: 7 additions & 9 deletions openfisca_core/periods/config.py
Original file line number Diff line number Diff line change
@@ -1,22 +1,20 @@
import re

from .date_unit import DateUnit
import pendulum

WEEKDAY = DateUnit.WEEKDAY
WEEK = DateUnit.WEEK
DAY = DateUnit.DAY
MONTH = DateUnit.MONTH
YEAR = DateUnit.YEAR
ETERNITY = DateUnit.ETERNITY
from . import types as t

# Matches "2015", "2015-01", "2015-01-01"
# Does not match "2015-13", "2015-12-32"
INSTANT_PATTERN = re.compile(
r"^\d{4}(-(0[1-9]|1[012]))?(-(0[1-9]|1[012])-(0[1-9]|[12][0-9]|3[01]))?$",
)

date_by_instant_cache: dict = {}
str_by_instant_cache: dict = {}
date_by_instant_cache: dict[t.Instant, pendulum.Date] = {}
str_by_instant_cache: dict[t.Instant, t.InstantStr] = {}
year_or_month_or_day_re = re.compile(
r"(18|19|20)\d{2}(-(0?[1-9]|1[0-2])(-([0-2]?\d|3[0-1]))?)?$",
)


__all__ = ["INSTANT_PATTERN", "date_by_instant_cache", "str_by_instant_cache"]
Loading

0 comments on commit adcc161

Please sign in to comment.