Skip to content

Commit

Permalink
hotfix/manual-frequency-override (#patch) (#139)
Browse files Browse the repository at this point in the history
* feature(TimeSeries): 'freq' parameter of constructor now leads to overriding the automatic frequency detection in case of missing dates when passed


#patch

Co-authored-by: pennfranc <flaessig#student.ethz.ch>
  • Loading branch information
pennfranc authored Jul 13, 2020
1 parent 9255260 commit a29e28e
Showing 1 changed file with 30 additions and 20 deletions.
50 changes: 30 additions & 20 deletions darts/timeseries.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,8 +33,11 @@ def __init__(self,
df
The actual time series, as a pandas DataFrame with a proper time index.
freq
Optionally, a string representing the frequency of the Pandas DataFrame. When creating a TimeSeries
instance with a length smaller than 3, this argument must be passed.
Optionally, a Pandas offset alias representing the frequency of the DataFrame.
(https://pandas.pydata.org/pandas-docs/stable/user_guide/timeseries.html#offset-aliases)
When creating a TimeSeries instance with a length smaller than 3, this argument must be passed.
Furthermore, this argument can be used to override the automatic frequency detection if the
index is incomplete.
fill_missing_dates
Optionally, a boolean value indicating whether to fill missing dates with NaN values
in case the frequency of `series` cannot be inferred.
Expand Down Expand Up @@ -63,7 +66,7 @@ def __init__(self,
else:
if not df.index.inferred_freq:
if fill_missing_dates:
self._df = self._fill_missing_dates(self._df)
self._df = self._fill_missing_dates(self._df, freq)
else:
raise_if_not(False, 'Could not infer frequency. Are some dates missing? '
'Try specifying `fill_missing_dates=True`.', logger)
Expand Down Expand Up @@ -934,39 +937,46 @@ def _combine_from_pd_ops(self, other: 'TimeSeries',
return TimeSeries(series, self.freq())

@staticmethod
def _fill_missing_dates(series: pd.DataFrame) -> pd.DataFrame:
def _fill_missing_dates(series: pd.DataFrame, freq: Optional[str] = None) -> pd.DataFrame:
"""
Tries to fill missing dates in series with NaN.
Method is successful only when explicit frequency can be determined from all consecutive triple timestamps.
If no value for the `freq` argument is provided, the method is successful only when explicit frequency
can be determined from all consecutive triple timestamps.
If a value for `freq` is given, this value will be used to determine the new frequency.
Parameters
----------
series
The actual time series, as a pandas DataFrame with a proper time index.
freq
Optionally, the desired frequency of the TimeSeries instance.
Returns
-------
pandas.Series
A new Pandas DataFrame without missing dates.
"""
date_axis = series.index
samples_size = 3
observed_frequencies = [
date_axis[x:x + samples_size].inferred_freq
for x
in range(len(date_axis) - samples_size + 1)]

observed_frequencies = set(filter(None.__ne__, observed_frequencies))
if not freq:
date_axis = series.index
samples_size = 3
observed_frequencies = [
date_axis[x:x + samples_size].inferred_freq
for x
in range(len(date_axis) - samples_size + 1)]

observed_frequencies = set(filter(None.__ne__, observed_frequencies))

raise_if_not(
len(observed_frequencies) == 1,
"Could not infer explicit frequency. Observed frequencies: "
+ ('none' if len(observed_frequencies) == 0 else str(observed_frequencies))
+ ". Is Series too short (n=2)?",
logger)

raise_if_not(
len(observed_frequencies) == 1,
"Could not infer explicit frequency. Observed frequencies: "
+ ('none' if len(observed_frequencies) == 0 else str(observed_frequencies))
+ ". Is Series too short (n=2)?",
logger)
freq = observed_frequencies.pop()

inferred_frequency = observed_frequencies.pop()
return series.resample(inferred_frequency).asfreq(fill_value=None)
return series.resample(freq).asfreq(fill_value=None)

"""
Definition of some useful statistical methods.
Expand Down

0 comments on commit a29e28e

Please sign in to comment.