You signed in with another tab or window. Reload to refresh your session.You signed out in another tab or window. Reload to refresh your session.You switched accounts on another tab or window. Reload to refresh your session.Dismiss alert
# load data as pandas dataframedata=get_stallion_data()
# Make sur each row can be identified with a time step and a time series.# add time index that is incremented by one for each time step.data["time_idx"] =data["date"].dt.year*12+data["date"].dt.monthdata["time_idx"] -=data["time_idx"].min()
# Add additional features# categories have to be stringsdata["month"] =data["date"].dt.month.astype(str).astype("category")
data["log_volume"] =np.log(data.volume+1e-8)
data["avg_volume_by_sku"] = (data.groupby(["time_idx", "sku"], observed=True).volume.transform("mean"))
data["avg_volume_by_agency"] = (data.groupby(["time_idx", "agency"], observed=True).volume.transform("mean"))
# Encode special days as unique identifier# first reverse one-hot encodingspecial_days= [
"easter_day", "good_friday", "new_year", "christmas",
"labor_day", "independence_day", "revolution_day_memorial",
"regional_games", "fifa_u_17_world_cup", "football_gold_cup",
"beer_capital", "music_fest"
]
data[special_days] = (
data[special_days]
.apply(lambdax: x.map({0: "-", 1: x.name}))
.astype("category")
)
# Sample data previewdata.sample(10, random_state=521)
# use the last six months as a validation set, and compare to forcast resultmax_prediction_length=6# forecast 6 monthsmax_encoder_length=24# use 24 months of historytraining_cutoff=data["time_idx"].max() -max_prediction_length# Normalize data: scale each time series separately and indicate that values are always positivefrompytorch_forecasting.dataimportTimeSeriesDataSet, GroupNormalizer
# Create training settraining=TimeSeriesDataSet(
data[lambdax: x.time_idx<=training_cutoff],
time_idx="time_idx",
target="volume",
group_ids=["agency", "sku"],
min_encoder_length=0, # allow predictions without historymax_encoder_length=max_encoder_length,
min_prediction_length=1,
max_prediction_length=max_prediction_length,
static_categoricals=["agency", "sku"],
static_reals=[
"avg_population_2017",
"avg_yearly_household_income_2017"
],
time_varying_known_categoricals=["special_days", "month"],
# group of categorical variables can be treated as# one variable --> special days' listvariable_groups={"special_days": special_days},
time_varying_known_reals=[
"time_idx",
"price_regular",
"discount_in_percent"
],
time_varying_unknown_categoricals=[],
time_varying_unknown_reals=[
"volume",
"log_volume",
"industry_volume",
"soda_volume",
"avg_max_temp",
"avg_volume_by_agency",
"avg_volume_by_sku",
],
target_normalizer=GroupNormalizer(
groups=["agency", "sku"], coerce_positive=1.0
), # use softplus with beta=1.0 and normalize by groupadd_relative_time_idx=True, # add as featureadd_target_scales=True, # add as featureadd_encoder_length=True, # add as feature
)
# create validation set (predict=True) which means to predict the# last max_prediction_length points in time for each seriesvalidation=TimeSeriesDataSet.from_dataset(
training, data, predict=True, stop_randomization=True
)
importtorchfrompytorch_forecastingimportBaseline# calculate baseline mean absolute error, i.e. predict next value as the last available value from the historyactuals=torch.cat([yforx, yiniter(val_dataloader)])
baseline_predictions=Baseline().predict(val_dataloader)
(actuals-baseline_predictions).abs().mean().item()
pl.seed_everything(42)
trainer=pl.Trainer(
gpus=0,
# clipping gradients is a hyperparameter and important to prevent divergance# of the gradient for recurrent neural networksgradient_clip_val=0.1,
)
tft=TemporalFusionTransformer.from_dataset(
training,
learning_rate=0.03,# not meaningful for finding the learning rate but otherwise very importanthidden_size=16, # most important hyperparameter apart from learning rate# number of attention heads. Set to up to 4 for large datasetsattention_head_size=1,
dropout=0.1, # between 0.1 and 0.3 are good valueshidden_continuous_size=8, # set to <= hidden_sizeoutput_size=7, # 7 quantiles by defaultloss=QuantileLoss(),
# reduce learning rate if no improvement in validation loss after x epochsreduce_on_plateau_patience=4,
)
# Halt training when loss metric does not improve on validation setearly_stop_callback=EarlyStopping(
monitor="val_loss",
min_delta=1e-4,
patience=10,
verbose=False,
mode="min"
)
#Log datalr_logger=LearningRateLogger() # log the learning ratelogger=TensorBoardLogger("lightning_logs") # log result to tensorboard# create trainer using PyTorch Lightningtrainer=pl.Trainer(
max_epochs=30,
gpus=[0], # 0 to train on CPU whereas [0] for GPUgradient_clip_val=0.1,
early_stop_callback=early_stop_callback,
limit_train_batches=30, # running validation every 30 batches# fast_dev_run=True, # comment in to quickly check for bugscallbacks=[lr_logger],
logger=logger,
)
# initialise modeltft=TemporalFusionTransformer.from_dataset(
training,
learning_rate=0.03,
hidden_size=16, # biggest influence network sizeattention_head_size=1,
dropout=0.1,
hidden_continuous_size=8,
output_size=7, # QuantileLoss has 7 quantiles by defaultloss=QuantileLoss(),
log_interval=10, # log example every 10 batchesreduce_on_plateau_patience=4, # reduce learning automatically
)
Training data in Tensorboard (predictions on the training and validation set)
Evaluate the trained model
frompytorch_forecasting.metricsimportMAE# load the best model according to the validation loss (given that# we use early stopping, this is not necessarily the last epoch)best_model_path=trainer.checkpoint_callback.best_model_pathbest_tft=TemporalFusionTransformer.load_from_checkpoint(best_model_path)
# calculate mean absolute error on validation setactuals=torch.cat([yforx, yiniter(val_dataloader)])
predictions=best_tft.predict(val_dataloader)
(actuals-predictions).abs().mean()
tensor(249.1484)
MAE(predictions, actuals)
MAE()
# raw predictions are a dictionary from which all kind of information including quantiles can be extractedraw_predictions, x=best_tft.predict(val_dataloader, mode="raw", return_x=True)
{'attention': <Figure size 432x288 with 1 Axes>,
'static_variables': <Figure size 504x270 with 1 Axes>,
'encoder_variables': <Figure size 504x378 with 1 Axes>,
'decoder_variables': <Figure size 504x252 with 1 Axes>}
As observered, price related variables are the among the top 2 predictors for both encoder and decoder. Next, past observed volume is statistically proven to be the most important static and encoder variable. Time related variables seem to rather lless important, this may prove that recent data are more significant than the older ones.