Skip to content

Commit

Permalink
feat(api): add day_of_week.iso_index method for date and timestamp …
Browse files Browse the repository at this point in the history
…types
  • Loading branch information
kaijennissen authored and cpcloud committed Jul 16, 2024
1 parent f50cbfc commit faad9d3
Show file tree
Hide file tree
Showing 19 changed files with 125 additions and 17 deletions.
1 change: 1 addition & 0 deletions ibis/backends/dask/kernels.py
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,7 @@ def inner(df):
),
ops.TimestampFromUNIX: lambda arg, unit: dd.to_datetime(arg, unit=unit.short),
ops.DayOfWeekIndex: lambda arg: dd.to_datetime(arg).dt.dayofweek,
ops.IsoDayOfWeekIndex: lambda arg: dd.to_datetime(arg).dt.dayofweek + 1,
ops.DayOfWeekName: lambda arg: dd.to_datetime(arg).dt.day_name(),
}

Expand Down
1 change: 1 addition & 0 deletions ibis/backends/pandas/kernels.py
Original file line number Diff line number Diff line change
Expand Up @@ -429,6 +429,7 @@ def wrapper(*args, **kwargs):
),
ops.Capitalize: lambda arg: arg.str.capitalize(),
ops.Date: lambda arg: arg.dt.floor("d"),
ops.IsoDayOfWeekIndex: lambda arg: pd.to_datetime(arg).dt.dayofweek + 1,
ops.DayOfWeekIndex: lambda arg: pd.to_datetime(arg).dt.dayofweek,
ops.DayOfWeekName: lambda arg: pd.to_datetime(arg).dt.day_name(),
ops.EndsWith: lambda arg, end: arg.str.endswith(end),
Expand Down
1 change: 1 addition & 0 deletions ibis/backends/polars/compiler.py
Original file line number Diff line number Diff line change
Expand Up @@ -1064,6 +1064,7 @@ def extract_epoch_seconds(op, **kw):
ops.Ceil: lambda arg: arg.ceil().cast(pl.Int64),
ops.Cos: operator.methodcaller("cos"),
ops.Cot: lambda arg: 1.0 / arg.tan(),
ops.IsoDayOfWeekIndex: (lambda arg: arg.dt.weekday().cast(pl.Int16)),
ops.DayOfWeekIndex: (
lambda arg: arg.dt.weekday().cast(pl.Int16) - _day_of_week_offset
),
Expand Down
3 changes: 3 additions & 0 deletions ibis/backends/sql/compilers/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -861,6 +861,9 @@ def visit_TimeTruncate(self, op, *, arg, unit):
def visit_DayOfWeekIndex(self, op, *, arg):
return (self.f.dayofweek(arg) + 6) % 7

def visit_IsoDayOfWeekIndex(self, op, *, arg):
return ((self.f.dayofweek(arg) + 6) % 7) + 1

def visit_DayOfWeekName(self, op, *, arg):
# day of week number is 0-indexed
# Sunday == 0
Expand Down
3 changes: 3 additions & 0 deletions ibis/backends/sql/compilers/bigquery.py
Original file line number Diff line number Diff line change
Expand Up @@ -218,6 +218,9 @@ def visit_StringJoin(self, op, *, arg, sep):
def visit_DayOfWeekIndex(self, op, *, arg):
return self.f.mod(self.f.extract(self.v.dayofweek, arg) + 5, 7)

def visit_IsoDayOfWeekIndex(self, op, *, arg):
return self.f.mod(self.f.extract(self.v.dayofweek, arg) + 5, 7) + 1

Check warning on line 222 in ibis/backends/sql/compilers/bigquery.py

View check run for this annotation

Codecov / codecov/patch

ibis/backends/sql/compilers/bigquery.py#L222

Added line #L222 was not covered by tests

def visit_DayOfWeekName(self, op, *, arg):
return self.f.initcap(sge.Cast(this=arg, to="STRING FORMAT 'DAY'"))

Expand Down
3 changes: 3 additions & 0 deletions ibis/backends/sql/compilers/clickhouse.py
Original file line number Diff line number Diff line change
Expand Up @@ -471,6 +471,9 @@ def visit_DayOfWeekIndex(self, op, *, arg):
weekdays = len(calendar.day_name)
return (((self.f.toDayOfWeek(arg) - 1) % weekdays) + weekdays) % weekdays

def visit_IsoDayOfWeekIndex(self, op, *, arg):
return self.f.toDayOfWeek(arg)

def visit_DayOfWeekName(self, op, *, arg):
# ClickHouse 20 doesn't support dateName
#
Expand Down
3 changes: 3 additions & 0 deletions ibis/backends/sql/compilers/datafusion.py
Original file line number Diff line number Diff line change
Expand Up @@ -247,6 +247,9 @@ def visit_ExtractDayOfYear(self, op, *, arg):
def visit_DayOfWeekIndex(self, op, *, arg):
return (self.f.date_part("dow", arg) + 6) % 7

def visit_IsoDayOfWeekIndex(self, op, *, arg):
return ((self.f.date_part("dow", arg) + 6) % 7) + 1

def visit_DayOfWeekName(self, op, *, arg):
return sg.exp.Case(
this=sge.paren(self.f.date_part("dow", arg) + 6, copy=False) % 7,
Expand Down
7 changes: 6 additions & 1 deletion ibis/backends/sql/compilers/exasol.py
Original file line number Diff line number Diff line change
Expand Up @@ -51,7 +51,6 @@ class ExasolCompiler(SQLGlotCompiler):
ops.DateAdd,
ops.DateSub,
ops.DateFromYMD,
ops.DayOfWeekIndex,
ops.ElementWiseVectorizedUDF,
ops.IntervalFromInteger,
ops.IsInf,
Expand Down Expand Up @@ -200,6 +199,12 @@ def visit_ExtractDayOfYear(self, op, *, arg):
def visit_ExtractWeekOfYear(self, op, *, arg):
return self.cast(self.f.to_char(arg, "IW"), op.dtype)

def visit_DayOfWeekIndex(self, op, *, arg):
return self.cast(self.f.to_char(arg, "ID"), op.dtype) - 1

def visit_IsoDayOfWeekIndex(self, op, *, arg):
return self.cast(self.f.to_char(arg, "ID"), op.dtype)

def visit_ExtractIsoYear(self, op, *, arg):
return self.cast(self.f.to_char(arg, "IYYY"), op.dtype)

Expand Down
3 changes: 3 additions & 0 deletions ibis/backends/sql/compilers/flink.py
Original file line number Diff line number Diff line change
Expand Up @@ -441,6 +441,9 @@ def visit_ExtractMicrosecond(self, op, *, arg):
def visit_DayOfWeekIndex(self, op, *, arg):
return (self.f.dayofweek(arg) + 5) % 7

def visit_IsoDayOfWeekIndex(self, op, *, arg):
return ((self.f.dayofweek(arg) + 5) % 7) + 1

def visit_DayOfWeekName(self, op, *, arg):
index = self.cast(self.f.dayofweek(self.cast(arg, dt.date)), op.dtype)
lookup_table = self.f.str_to_map(
Expand Down
3 changes: 3 additions & 0 deletions ibis/backends/sql/compilers/impala.py
Original file line number Diff line number Diff line change
Expand Up @@ -137,6 +137,9 @@ def visit_RandomScalar(self, op, **_):
def visit_DayOfWeekIndex(self, op, *, arg):
return self.f.pmod(self.f.dayofweek(arg) - 2, 7)

def visit_IsoDayOfWeekIndex(self, op, *, arg):
return self.f.pmod(self.f.dayofweek(arg) - 2, 7) + 1

def visit_ExtractMillisecond(self, op, *, arg):
return self.f.extract(self.v.millisecond, arg) % 1_000

Expand Down
3 changes: 3 additions & 0 deletions ibis/backends/sql/compilers/mssql.py
Original file line number Diff line number Diff line change
Expand Up @@ -203,6 +203,9 @@ def visit_CountDistinct(self, op, *, arg, where):
def visit_DayOfWeekIndex(self, op, *, arg):
return self.f.datepart(self.v.weekday, arg) - 1

def visit_IsoDayOfWeekIndex(self, op, *, arg):
return self.f.datepart(self.v.weekday, arg)

def visit_DayOfWeekName(self, op, *, arg):
days = calendar.day_name
return sge.Case(
Expand Down
3 changes: 3 additions & 0 deletions ibis/backends/sql/compilers/mysql.py
Original file line number Diff line number Diff line change
Expand Up @@ -177,6 +177,9 @@ def visit_GroupConcat(self, op, *, arg, sep, where):
def visit_DayOfWeekIndex(self, op, *, arg):
return (self.f.dayofweek(arg) + 5) % 7

def visit_IsoDayOfWeekIndex(self, op, *, arg):
return ((self.f.dayofweek(arg) + 5) % 7) + 1

def visit_Literal(self, op, *, value, dtype):
# avoid casting NULL: the set of types allowed by MySQL and
# MariaDB when casting is a strict subset of allowed types in other
Expand Down
3 changes: 3 additions & 0 deletions ibis/backends/sql/compilers/postgres.py
Original file line number Diff line number Diff line change
Expand Up @@ -490,6 +490,9 @@ def visit_TimestampBucket(self, op, *, arg, interval, offset):
def visit_DayOfWeekIndex(self, op, *, arg):
return self.cast(self.f.extract("dow", arg) + 6, dt.int16) % 7

def visit_IsoDayOfWeekIndex(self, op, *, arg):
return self.cast(self.f.extract("isodow", arg), dt.int16)

def visit_DayOfWeekName(self, op, *, arg):
return self.f.trim(self.f.to_char(arg, "Day"), string.whitespace)

Expand Down
3 changes: 3 additions & 0 deletions ibis/backends/sql/compilers/pyspark.py
Original file line number Diff line number Diff line change
Expand Up @@ -164,6 +164,9 @@ def visit_IntervalFromInteger(self, op, *, arg, unit):
def visit_DayOfWeekIndex(self, op, *, arg):
return (self.f.dayofweek(arg) + 5) % 7

def visit_IsoDayOfWeekIndex(self, op, *, arg):
return ((self.f.dayofweek(arg) + 5) % 7) + 1

def visit_DayOfWeekName(self, op, *, arg):
return sge.Case(
this=(self.f.dayofweek(arg) + 5) % 7,
Expand Down
10 changes: 10 additions & 0 deletions ibis/backends/sql/compilers/sqlite.py
Original file line number Diff line number Diff line change
Expand Up @@ -414,6 +414,16 @@ def visit_DayOfWeekIndex(self, op, *, arg):
self.f.mod(self.cast(self.f.strftime("%w", arg) + 6, dt.int64), 7), dt.int64
)

def visit_IsoDayOfWeekIndex(self, op, *, arg):
# return self.cast(self.f.strftime("%u", arg), dt.int64)
return (
self.cast(
self.f.mod(self.cast(self.f.strftime("%w", arg) + 6, dt.int64), 7),
dt.int64,
)
+ 1
)

def visit_DayOfWeekName(self, op, *, arg):
return sge.Case(
this=self.f.strftime("%w", arg),
Expand Down
3 changes: 3 additions & 0 deletions ibis/backends/sql/compilers/trino.py
Original file line number Diff line number Diff line change
Expand Up @@ -209,6 +209,9 @@ def visit_DayOfWeekIndex(self, op, *, arg):
sge.paren(self.f.day_of_week(arg) + 6, copy=False) % 7, op.dtype
)

def visit_IsoDayOfWeekIndex(self, op, *, arg):
return self.cast(sge.paren(self.f.day_of_week(arg), copy=False), op.dtype)

def visit_DayOfWeekName(self, op, *, arg):
return self.f.date_format(arg, "%W")

Expand Down
37 changes: 22 additions & 15 deletions ibis/backends/tests/test_temporal.py
Original file line number Diff line number Diff line change
Expand Up @@ -164,7 +164,7 @@ def test_iso_year_does_not_match_date_year(con):
id="day_of_week_index",
marks=[
pytest.mark.notimpl(
["druid", "oracle", "exasol"], raises=com.OperationNotDefinedError
["druid", "oracle"], raises=com.OperationNotDefinedError
),
],
),
Expand Down Expand Up @@ -1561,29 +1561,34 @@ def test_string_to_date(alltypes, fmt):


@pytest.mark.parametrize(
("date", "expected_index", "expected_day"),
("date", "expected_index", "expected_iso_index", "expected_day"),
[
param("2017-01-01", 6, "Sunday", id="sunday"),
param("2017-01-02", 0, "Monday", id="monday"),
param("2017-01-03", 1, "Tuesday", id="tuesday"),
param("2017-01-04", 2, "Wednesday", id="wednesday"),
param("2017-01-05", 3, "Thursday", id="thursday"),
param("2017-01-06", 4, "Friday", id="friday"),
param("2017-01-07", 5, "Saturday", id="saturday"),
param("2017-01-01", 6, 7, "Sunday", id="sunday"),
param("2017-01-02", 0, 1, "Monday", id="monday"),
param("2017-01-03", 1, 2, "Tuesday", id="tuesday"),
param("2017-01-04", 2, 3, "Wednesday", id="wednesday"),
param("2017-01-05", 3, 4, "Thursday", id="thursday"),
param("2017-01-06", 4, 5, "Friday", id="friday"),
param("2017-01-07", 5, 6, "Saturday", id="saturday"),
],
)
@pytest.mark.notimpl(["druid", "oracle"], raises=com.OperationNotDefinedError)
@pytest.mark.notimpl(["exasol"], raises=com.OperationNotDefinedError)
# @pytest.mark.notimpl(["exasol"], raises=com.OperationNotDefinedError)
@pytest.mark.broken(
["risingwave"],
raises=AssertionError,
reason="Refer to https://github.com/risingwavelabs/risingwave/issues/14670",
)
def test_day_of_week_scalar(con, date, expected_index, expected_day):
def test_day_of_week_scalar(
con, date, expected_index, expected_iso_index, expected_day
):
expr = ibis.literal(date).cast(dt.date)
result_index = con.execute(expr.day_of_week.index().name("tmp"))
assert result_index == expected_index

result_iso_index = con.execute(expr.day_of_week.iso_index().name("tmp"))
assert result_iso_index == expected_iso_index

result_day = con.execute(expr.day_of_week.full_name().name("tmp"))
assert result_day.lower() == expected_day.lower()

Expand All @@ -1594,7 +1599,7 @@ def test_day_of_week_scalar(con, date, expected_index, expected_day):
raises=AttributeError,
reason="StringColumn' object has no attribute 'day_of_week'",
)
@pytest.mark.notimpl(["exasol"], raises=com.OperationNotDefinedError)
# @pytest.mark.notimpl(["exasol"], raises=com.OperationNotDefinedError)
@pytest.mark.broken(
["risingwave"],
raises=AssertionError,
Expand All @@ -1608,6 +1613,11 @@ def test_day_of_week_column(backend, alltypes, df):

backend.assert_series_equal(result_index, expected_index, check_names=False)

result_iso_index = expr.iso_index().name("tmp").execute()
expected_iso_index = df.timestamp_col.dt.isocalendar().day.astype("int16")

backend.assert_series_equal(result_iso_index, expected_iso_index, check_names=False)

result_day = expr.full_name().name("tmp").execute()
expected_day = df.timestamp_col.dt.day_name()

Expand All @@ -1621,9 +1631,6 @@ def test_day_of_week_column(backend, alltypes, df):
lambda t: t.timestamp_col.day_of_week.index().count(),
lambda s: s.dt.dayofweek.count(),
id="day_of_week_index",
marks=[
pytest.mark.notimpl(["exasol"], raises=com.OperationNotDefinedError)
],
),
param(
lambda t: t.timestamp_col.day_of_week.full_name().length().sum(),
Expand Down
14 changes: 14 additions & 0 deletions ibis/expr/operations/temporal.py
Original file line number Diff line number Diff line change
Expand Up @@ -190,6 +190,13 @@ class DayOfWeekIndex(Unary):
dtype = dt.int16


@public
class IsoDayOfWeekIndex(Unary):
arg: Value[dt.Date | dt.Timestamp]

dtype = dt.int16


@public
class DayOfWeekName(Unary):
"""Extract the name of the day of the week from a date or timestamp."""
Expand All @@ -199,6 +206,13 @@ class DayOfWeekName(Unary):
dtype = dt.string


@public
class IsoDayOfWeekName(Unary):
arg: Value[dt.Date | dt.Timestamp]

dtype = dt.string


@public
class Time(Unary):
"""Extract the time from a timestamp."""
Expand Down
38 changes: 37 additions & 1 deletion ibis/expr/types/temporal.py
Original file line number Diff line number Diff line change
Expand Up @@ -52,8 +52,34 @@ def day_of_week(self) -> DayOfWeek:
Returns
-------
DayOfWeek
An namespace expression containing methods to use to extract
A namespace expression with methods for extracting day-of-week
information.
Examples
--------
>>> import ibis
>>> import datetime as dt
>>> from ibis import _
>>> ibis.options.interactive = True
>>> t = ibis.memtable({"date": [dt.datetime(2024, 4, x) for x in range(14, 21)]})
>>> t.mutate(
... day_of_week=_.date.day_of_week.index(),
... day_of_week_name=_.date.day_of_week.full_name(),
... iso_day_of_week=_.date.day_of_week.iso_index(),
... )
┏━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━┓
┃ date ┃ day_of_week ┃ day_of_week_name ┃ iso_day_of_week ┃
┡━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━┩
│ timestamp │ int16 │ string │ int16 │
├─────────────────────┼─────────────┼──────────────────┼─────────────────┤
│ 2024-04-14 00:00:00 │ 6 │ Sunday │ 7 │
│ 2024-04-15 00:00:00 │ 0 │ Monday │ 1 │
│ 2024-04-16 00:00:00 │ 1 │ Tuesday │ 2 │
│ 2024-04-17 00:00:00 │ 2 │ Wednesday │ 3 │
│ 2024-04-18 00:00:00 │ 3 │ Thursday │ 4 │
│ 2024-04-19 00:00:00 │ 4 │ Friday │ 5 │
│ 2024-04-20 00:00:00 │ 5 │ Saturday │ 6 │
└─────────────────────┴─────────────┴──────────────────┴─────────────────┘
"""
return DayOfWeek(self)

Expand Down Expand Up @@ -994,3 +1020,13 @@ def full_name(self):
The name of the day of the week
"""
return ops.DayOfWeekName(self._expr).to_expr()

def iso_index(self):
"""Get the index of the day of the week in iso-format (1=Monday, 7=Sunday).
Returns
-------
IntegerValue
The index of the day of the week in iso-format (1=Monday, 7=Sunday).
"""
return ops.IsoDayOfWeekIndex(self._expr).to_expr()

0 comments on commit faad9d3

Please sign in to comment.