Skip to content

Commit

Permalink
feat(periods): parse week instants (#1232)
Browse files Browse the repository at this point in the history
fixes #1232
  • Loading branch information
bonjourmauko committed Sep 26, 2024
1 parent d5ef321 commit 022e2c4
Show file tree
Hide file tree
Showing 10 changed files with 465 additions and 263 deletions.
2 changes: 2 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,8 @@
- Introduce `Instant.eternity()`
- This behaviour was duplicated across
- Now it is encapsulated in a single method
- Now `periods.instant` parses also ISO calendar strings (weeks)
- For instance, `2022-W01` is now a valid input

#### Technical changes

Expand Down
3 changes: 2 additions & 1 deletion openfisca_core/commons/misc.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@ def empty_clone(original: object) -> object:
"""

def __init__(_): ...
def __init__(_: object) -> None: ...

Dummy = type(
"Dummy",
Expand Down Expand Up @@ -71,6 +71,7 @@ def stringify_array(array: None | t.Array[numpy.generic]) -> str:
"[<class 'list'>, {}, <function stringify_array...]"
"""

if array is None:
return "None"

Expand Down
3 changes: 2 additions & 1 deletion openfisca_core/periods/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@
# See: https://www.python.org/dev/peps/pep-0008/#imports

from . import types
from ._errors import InstantError, ParserError
from ._errors import InstantError, ParserError, PeriodError
from .config import (
INSTANT_PATTERN,
date_by_instant_cache,
Expand Down Expand Up @@ -62,6 +62,7 @@
"MONTH",
"ParserError",
"Period",
"PeriodError",
"WEEK",
"WEEKDAY",
"YEAR",
Expand Down
19 changes: 16 additions & 3 deletions openfisca_core/periods/_errors.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,10 +6,23 @@ class InstantError(ValueError):

def __init__(self, value: str) -> None:
msg = (
f"'{value}' is not a valid instant. Instants are described "
"using the 'YYYY-MM-DD' format, for instance '2015-06-15'."
f"'{value}' is not a valid instant string. Instants are described "
"using either the 'YYYY-MM-DD' format, for instance '2015-06-15', "
"or the 'YYYY-Www-D' format, for instance '2015-W24-1'."
)
super().__init__(msg)


__all__ = ["InstantError", "ParserError"]
class PeriodError(ValueError):
"""Raised when an invalid period-like is provided."""

def __init__(self, value: str) -> None:
msg = (
"Expected a period (eg. '2017', 'month:2017-01', 'week:2017-W01-1:3', "
f"...); got: '{value}'. Learn more about legal period formats in "
"OpenFisca: <https://openfisca.org/doc/coding-the-legislation/35_periods.html#periods-in-simulations>."
)
super().__init__(msg)


__all__ = ["InstantError", "ParserError", "PeriodError"]
111 changes: 62 additions & 49 deletions openfisca_core/periods/_parsers.py
Original file line number Diff line number Diff line change
@@ -1,19 +1,64 @@
"""To parse periods and instants from strings."""

from __future__ import annotations

import re
import datetime

import pendulum

from . import types as t
from ._errors import ParserError
from ._errors import InstantError, ParserError, PeriodError
from .date_unit import DateUnit
from .instant_ import Instant
from .period_ import Period

invalid_week = re.compile(r".*(W[1-9]|W[1-9]-[0-9]|W[0-5][0-9]-0)$")

def parse_instant(value: str) -> t.Instant:
"""Parse a string into an instant.
Args:
value (str): The string to parse.
Returns:
An InstantStr.
Raises:
InstantError: When the string is not a valid ISO Calendar/Format.
ParserError: When the string couldn't be parsed.
Examples:
>>> parse_instant("2022")
Instant((2022, 1, 1))
>>> parse_instant("2022-02")
Instant((2022, 2, 1))
>>> parse_instant("2022-W02-7")
Instant((2022, 1, 16))
>>> parse_instant("2022-W013")
Traceback (most recent call last):
openfisca_core.periods._errors.InstantError: '2022-W013' is not a va...
>>> parse_instant("2022-02-29")
Traceback (most recent call last):
pendulum.parsing.exceptions.ParserError: Unable to parse string [202...
"""

if not isinstance(value, t.InstantStr):
raise InstantError(str(value))

date = pendulum.parse(value, exact=True)

if not isinstance(date, datetime.date):
msg = f"Unable to parse string [{value}]"
raise ParserError(msg)

return Instant((date.year, date.month, date.day))

def parse_period(value: object) -> None | t.Period:

def parse_period(value: str) -> t.Period:
"""Parses ISO format/calendar periods.
Such as "2012" or "2015-03".
Expand All @@ -30,34 +75,13 @@ def parse_period(value: object) -> None | t.Period:
"""

# If it is not a string, next!
if not isinstance(value, str):
msg = f"Expected a {str.__name__}, got {type(value).__name__}."
raise NotImplementedError(msg)

# If it's a complex period, next!
if len(value.split(":")) != 1:
return None

# Check for a non-empty string.
if len(value) == 0:
raise AttributeError

# If it's negative, next!
if value[0] == "-":
raise ValueError
try:
instant = parse_instant(value)

# If it's an invalid week, next!
if invalid_week.match(value):
raise ParserError
except InstantError as error:
raise PeriodError(value) from error

unit = parse_unit(value)
date = pendulum.parse(value, exact=True)

if not isinstance(date, pendulum.Date):
raise TypeError

instant = Instant((date.year, date.month, date.day))

return Period((unit, instant, 1))

Expand All @@ -72,37 +96,26 @@ def parse_unit(value: str) -> t.DateUnit:
A DateUnit.
Raises:
ValueError when no DateUnit can be determined.
InstantError: when no DateUnit can be determined.
Examples:
>>> parse_unit("2022")
<DateUnit.YEAR: 'year'>
>>> parse_unit("2022-W03-01")
>>> parse_unit("2022-W03-1")
<DateUnit.WEEKDAY: 'weekday'>
"""

format_y, format_ym, format_ymd = 1, 2, 3
length = len(value.split("-"))
isweek = value.find("W") != -1

if length == format_y:
return DateUnit.YEAR

if length == format_ym:
if isweek:
return DateUnit.WEEK
if not isinstance(value, t.InstantStr):
raise InstantError(str(value))

return DateUnit.MONTH

if length == format_ymd:
if isweek:
return DateUnit.WEEKDAY
length = len(value.split("-"))

return DateUnit.DAY
if isinstance(value, t.ISOCalendarStr):
return DateUnit.isocalendar[-length]

raise ValueError
return DateUnit.isoformat[-length]


__all__ = ["parse_period", "parse_unit"]
__all__ = ["parse_instant", "parse_period", "parse_unit"]
Loading

0 comments on commit 022e2c4

Please sign in to comment.