|
class BookingsArrivalsRelated(Series): |
|
timestamp_format = "yyyy-MM-dd" |
|
columns = { |
|
"timestamp": ColInfo(type="timestamp"), |
|
"item_id": ColInfo(type="string"), |
|
# "customers": ColInfo(method=None), |
|
# "suppliers": ColInfo(method=None), |
|
# "bookings": ColInfo(method=None), |
|
"cancelled": ColInfo(method=None), |
|
"trend_7d": ColInfo(method=None), |
|
"trend_30d": ColInfo(method=None), |
|
"trend_90d": ColInfo(method=None), |
|
} |
|
|
|
def __init__(self, data_df, start, end, freq, rolling_avg=0): |
|
super().__init__("bookings", self.columns, start, end, freq, |
|
rolling_avg=rolling_avg, use_cache=False) |
|
cols = list(set(self.columns.keys()) & set(data_df.columns)) |
|
self.df = data_df[cols] |
|
|
|
def keepler_regressors(self, target: Series): |
|
# The additional trend regressors proposed by Keepler |
|
|
|
bdata = target.df[['item_id', 'timestamp', 'demand']].copy() |
|
for it in bdata.item_id.unique(): |
|
# Optimized version for the rolling average per each item |
|
# it first filters by items and assigne them to another column using loc() |
|
filtered = self.df[self.item_col] == it |
|
for w in [7, 30, 90, 365]: |
|
df = bdata.loc[filtered, [self.timestamp_col, 'demand']] |
|
roll = df.rolling(window=w, min_periods=1, on=self.timestamp_col) |
|
bdata.loc[filtered, f"{w}d"] = roll['demand'].mean() |
|
|
|
# Trend per each average on the 365d average |
|
for d in [7, 30, 90]: |
|
bdata[f'trend_{d}d'] = bdata['365d'] - bdata[f'{d}d'] |
|
|
|
bdata.drop(columns=[f"{d}d" for d in [7, 30, 90, 365]] + ["demand"], inplace=True) |
|
|
|
self.df = self.df.merge(bdata, on=["item_id", "timestamp"]) |