Skip to content
Snippets Groups Projects
Unverified Commit feb12358 authored by Andrés Marafioti's avatar Andrés Marafioti Committed by GitHub
Browse files

Merge pull request #58 from huggingface/fix_stt_compile_mode

Assigning min new tokens to a compiled whisper graph on a thread brea…
parents fc9f9602 3b685e26
No related branches found
No related tags found
No related merge requests found
......@@ -66,8 +66,9 @@ class WhisperSTTHandler(BaseHandler):
if self.compile_mode not in (None, "default"):
# generating more tokens than previously will trigger CUDA graphs capture
# one should warmup with a number of generated tokens above max tokens targeted for subsequent generation
# hence, having min_new_tokens < max_new_tokens in the future doesn't make sense
warmup_gen_kwargs = {
"min_new_tokens": self.gen_kwargs["min_new_tokens"],
"min_new_tokens": self.gen_kwargs["max_new_tokens"], # Yes, assign max_new_tokens to min_new_tokens
"max_new_tokens": self.gen_kwargs["max_new_tokens"],
**self.gen_kwargs,
}
......
......@@ -33,12 +33,6 @@ class WhisperSTTHandlerArguments:
"help": "The maximum number of new tokens to generate. Default is 128."
},
)
stt_gen_min_new_tokens: int = field(
default=0,
metadata={
"help": "The minimum number of new tokens to generate. Default is 0."
},
)
stt_gen_num_beams: int = field(
default=1,
metadata={
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment