Skip to content
Snippets Groups Projects
Commit aeec7d72 authored by 0xsynapse's avatar 0xsynapse
Browse files

made small fixes in arguments_classes and TTS folder

parent 725184a8
No related branches found
No related tags found
No related merge requests found
...@@ -64,8 +64,8 @@ class ChatTTSHandler(BaseHandler): ...@@ -64,8 +64,8 @@ class ChatTTSHandler(BaseHandler):
audio_chunk = librosa.resample(gen[0], orig_sr=24000, target_sr=16000) audio_chunk = librosa.resample(gen[0], orig_sr=24000, target_sr=16000)
audio_chunk = (audio_chunk * 32768).astype(np.int16)[0] audio_chunk = (audio_chunk * 32768).astype(np.int16)[0]
while len(audio_chunk) > self.chunk_size: while len(audio_chunk) > self.chunk_size:
yield audio_chunk[: self.chunk_size] # 返回前 chunk_size 字节的数据 yield audio_chunk[: self.chunk_size] # Return the first chunk_size samples of the audio data
audio_chunk = audio_chunk[self.chunk_size :] # 移除已返回的数据 audio_chunk = audio_chunk[self.chunk_size :] # Remove the samples that have already been returned
yield np.pad(audio_chunk, (0, self.chunk_size - len(audio_chunk))) yield np.pad(audio_chunk, (0, self.chunk_size - len(audio_chunk)))
else: else:
wavs = wavs_gen wavs = wavs_gen
......
...@@ -6,7 +6,7 @@ class LanguageModelHandlerArguments: ...@@ -6,7 +6,7 @@ class LanguageModelHandlerArguments:
lm_model_name: str = field( lm_model_name: str = field(
default="HuggingFaceTB/SmolLM-360M-Instruct", default="HuggingFaceTB/SmolLM-360M-Instruct",
metadata={ metadata={
"help": "The pretrained language model to use. Default is 'microsoft/Phi-3-mini-4k-instruct'." "help": "The pretrained language model to use. Default is 'HuggingFaceTB/SmolLM-360M-Instruct'."
}, },
) )
lm_device: str = field( lm_device: str = field(
......
...@@ -6,7 +6,7 @@ class MLXLanguageModelHandlerArguments: ...@@ -6,7 +6,7 @@ class MLXLanguageModelHandlerArguments:
mlx_lm_model_name: str = field( mlx_lm_model_name: str = field(
default="mlx-community/SmolLM-360M-Instruct", default="mlx-community/SmolLM-360M-Instruct",
metadata={ metadata={
"help": "The pretrained language model to use. Default is 'microsoft/Phi-3-mini-4k-instruct'." "help": "The pretrained language model to use. Default is 'mlx-community/SmolLM-360M-Instruct'."
}, },
) )
mlx_lm_device: str = field( mlx_lm_device: str = field(
......
...@@ -7,7 +7,7 @@ class OpenApiLanguageModelHandlerArguments: ...@@ -7,7 +7,7 @@ class OpenApiLanguageModelHandlerArguments:
# default="HuggingFaceTB/SmolLM-360M-Instruct", # default="HuggingFaceTB/SmolLM-360M-Instruct",
default="deepseek-chat", default="deepseek-chat",
metadata={ metadata={
"help": "The pretrained language model to use. Default is 'microsoft/Phi-3-mini-4k-instruct'." "help": "The pretrained language model to use. Default is 'deepseek-chat'."
}, },
) )
open_api_user_role: str = field( open_api_user_role: str = field(
......
...@@ -30,13 +30,13 @@ class ParlerTTSHandlerArguments: ...@@ -30,13 +30,13 @@ class ParlerTTSHandlerArguments:
tts_gen_min_new_tokens: int = field( tts_gen_min_new_tokens: int = field(
default=64, default=64,
metadata={ metadata={
"help": "Maximum number of new tokens to generate in a single completion. Default is 10, which corresponds to ~0.1 secs" "help": "Maximum number of new tokens to generate in a single completion. Default is 64, which corresponds to ~0.64 secs"
}, },
) )
tts_gen_max_new_tokens: int = field( tts_gen_max_new_tokens: int = field(
default=512, default=512,
metadata={ metadata={
"help": "Maximum number of new tokens to generate in a single completion. Default is 256, which corresponds to ~6 secs" "help": "Maximum number of new tokens to generate in a single completion. Default is 512, which corresponds to ~12 secs"
}, },
) )
description: str = field( description: str = field(
...@@ -51,7 +51,7 @@ class ParlerTTSHandlerArguments: ...@@ -51,7 +51,7 @@ class ParlerTTSHandlerArguments:
play_steps_s: float = field( play_steps_s: float = field(
default=1.0, default=1.0,
metadata={ metadata={
"help": "The time interval in seconds for playing back the generated speech in steps. Default is 0.5 seconds." "help": "The time interval in seconds for playing back the generated speech in steps. Default is 1.0 seconds."
}, },
) )
max_prompt_pad_length: int = field( max_prompt_pad_length: int = field(
......
...@@ -36,7 +36,7 @@ class VADHandlerArguments: ...@@ -36,7 +36,7 @@ class VADHandlerArguments:
speech_pad_ms: int = field( speech_pad_ms: int = field(
default=500, default=500,
metadata={ metadata={
"help": "Amount of padding added to the beginning and end of detected speech segments. Measured in milliseconds. Default is 250 ms." "help": "Amount of padding added to the beginning and end of detected speech segments. Measured in milliseconds. Default is 500 ms."
}, },
) )
audio_enhancement: bool = field( audio_enhancement: bool = field(
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment