diff --git a/optimum/intel/ipex/modeling_base.py b/optimum/intel/ipex/modeling_base.py index 6309e6b9a..81172090b 100644 --- a/optimum/intel/ipex/modeling_base.py +++ b/optimum/intel/ipex/modeling_base.py @@ -298,6 +298,7 @@ def forward( def _prepare_generation_config( self, generation_config: Optional[GenerationConfig], **kwargs: Dict ) -> Tuple[GenerationConfig, Dict]: + kwargs["use_cache"] = self.use_cache generation_config, model_kwargs = super()._prepare_generation_config(generation_config, **kwargs) generation_method = generation_config.get_generation_mode().value if self.compiled and generation_config.cache_implementation != "ipex_paged" and self._supports_static_cache: