Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

🎉 allow non pd.DatetimeIndex for processing #127

Open
wants to merge 1 commit into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 1 addition & 2 deletions tsflex/processing/series_pipeline.py
Original file line number Diff line number Diff line change
Expand Up @@ -194,8 +194,7 @@ def process(
for s in to_series_list(data):
# Assert the assumptions we make!
if len(s):
assert isinstance(s.index, pd.DatetimeIndex)
# TODO: also check monotonic increasing?
assert s.index.is_monotonic_increasing

if s.name in self.get_required_series():
series_dict[str(s.name)] = s.copy() if copy else s
Expand Down
8 changes: 4 additions & 4 deletions tsflex/processing/series_processor.py
Original file line number Diff line number Diff line change
Expand Up @@ -321,9 +321,9 @@ def _handle_seriesprocessor_func_output(
# Nothing has to be done! A pd.DataFrame can be added to a series_dict using
# series_dict.update(df)
# Note: converting this to a dictionary (to_dict()) is **very** inefficient!
# Assert that the DataFrame has a time-index
# Assert that the DataFrame has a time-index or a range-index
if len(func_output):
assert isinstance(func_output.index, pd.DatetimeIndex)
assert isinstance(func_output.index, (pd.DatetimeIndex, pd.RangeIndex))
# Assert that the DataFrame columns are named
assert all(
func_output.columns.values != [i for i in range(func_output.shape[1])]
Expand All @@ -334,9 +334,9 @@ def _handle_seriesprocessor_func_output(
# Convert series to series_dict and return
# => if func_output.name is in the required_dict, than the original series will
# be replaced by this new series.
# Assert that the series has a time-index
# Assert that the series has a time-index or a range-index
if len(func_output):
assert isinstance(func_output.index, pd.DatetimeIndex)
assert isinstance(func_output.index, (pd.DatetimeIndex, pd.RangeIndex))
# Assert (func_output.name is not None) | (len(required_dict) == 1)
if func_output.name is None:
# If a series without a name is returned that is constructed from just 1
Expand Down
10 changes: 9 additions & 1 deletion tsflex/utils/data.py
Original file line number Diff line number Diff line change
Expand Up @@ -42,9 +42,17 @@ def series_dict_to_df(series_dict: Dict[str, pd.Series]) -> pd.DataFrame:
return pd.DataFrame(series_dict)
# 1. Check if the time-indexes of the series are equal, to create the df efficiently
try:

def _get_index_freq(index): # type: ignore[no-untyped-def]
if isinstance(index, pd.DatetimeIndex):
return index.freq
elif isinstance(index, pd.RangeIndex):
return index.step
return None

index_info = set(
[
(s.index[0], s.index[-1], len(s), s.index.freq)
(s.index[0], s.index[-1], len(s), _get_index_freq(s.index))
for s in series_dict.values()
]
)
Expand Down
Loading