diff --git a/STT/whisper_stt_handler.py b/STT/whisper_stt_handler.py index b1596a9b701cbed35640e69989bca15fd6639e39..1470bfbdbf0a307d4128a556190d3d95e84fd7e7 100644 --- a/STT/whisper_stt_handler.py +++ b/STT/whisper_stt_handler.py @@ -66,6 +66,7 @@ class WhisperSTTHandler(BaseHandler): if self.compile_mode not in (None, "default"): # generating more tokens than previously will trigger CUDA graphs capture # one should warmup with a number of generated tokens above max tokens targeted for subsequent generation + # hence, having min_new_tokens < max_new_tokens in the future doesn't make sense warmup_gen_kwargs = { "min_new_tokens": self.gen_kwargs["max_new_tokens"], # Yes, assign max_new_tokens to min_new_tokens "max_new_tokens": self.gen_kwargs["max_new_tokens"],