diff --git a/reproductions/finetuning/cal_ql_finetune.py b/reproductions/finetuning/cal_ql_finetune.py index 635caff4..e7131235 100644 --- a/reproductions/finetuning/cal_ql_finetune.py +++ b/reproductions/finetuning/cal_ql_finetune.py @@ -1,4 +1,5 @@ import argparse +import math import d3rlpy @@ -43,7 +44,7 @@ def main() -> None: critic_learning_rate=3e-4, temp_learning_rate=1e-4, alpha_learning_rate=3e-4, - initial_alpha=2.72, + initial_alpha=math.e, batch_size=256, conservative_weight=5.0, critic_encoder_factory=encoder,