Skip to content
Snippets Groups Projects
Commit aeec7d72 authored by 0xsynapse's avatar 0xsynapse
Browse files

made small fixes in arguments_classes and TTS folder

parent 725184a8
Branches master
No related tags found
No related merge requests found
......@@ -64,8 +64,8 @@ class ChatTTSHandler(BaseHandler):
audio_chunk = librosa.resample(gen[0], orig_sr=24000, target_sr=16000)
audio_chunk = (audio_chunk * 32768).astype(np.int16)[0]
while len(audio_chunk) > self.chunk_size:
yield audio_chunk[: self.chunk_size] # 返回前 chunk_size 字节的数据
audio_chunk = audio_chunk[self.chunk_size :] # 移除已返回的数据
yield audio_chunk[: self.chunk_size] # Return the first chunk_size samples of the audio data
audio_chunk = audio_chunk[self.chunk_size :] # Remove the samples that have already been returned
yield np.pad(audio_chunk, (0, self.chunk_size - len(audio_chunk)))
else:
wavs = wavs_gen
......
......@@ -6,7 +6,7 @@ class LanguageModelHandlerArguments:
lm_model_name: str = field(
default="HuggingFaceTB/SmolLM-360M-Instruct",
metadata={
"help": "The pretrained language model to use. Default is 'microsoft/Phi-3-mini-4k-instruct'."
"help": "The pretrained language model to use. Default is 'HuggingFaceTB/SmolLM-360M-Instruct'."
},
)
lm_device: str = field(
......
......@@ -6,7 +6,7 @@ class MLXLanguageModelHandlerArguments:
mlx_lm_model_name: str = field(
default="mlx-community/SmolLM-360M-Instruct",
metadata={
"help": "The pretrained language model to use. Default is 'microsoft/Phi-3-mini-4k-instruct'."
"help": "The pretrained language model to use. Default is 'mlx-community/SmolLM-360M-Instruct'."
},
)
mlx_lm_device: str = field(
......
......@@ -7,7 +7,7 @@ class OpenApiLanguageModelHandlerArguments:
# default="HuggingFaceTB/SmolLM-360M-Instruct",
default="deepseek-chat",
metadata={
"help": "The pretrained language model to use. Default is 'microsoft/Phi-3-mini-4k-instruct'."
"help": "The pretrained language model to use. Default is 'deepseek-chat'."
},
)
open_api_user_role: str = field(
......
......@@ -30,13 +30,13 @@ class ParlerTTSHandlerArguments:
tts_gen_min_new_tokens: int = field(
default=64,
metadata={
"help": "Maximum number of new tokens to generate in a single completion. Default is 10, which corresponds to ~0.1 secs"
"help": "Maximum number of new tokens to generate in a single completion. Default is 64, which corresponds to ~0.64 secs"
},
)
tts_gen_max_new_tokens: int = field(
default=512,
metadata={
"help": "Maximum number of new tokens to generate in a single completion. Default is 256, which corresponds to ~6 secs"
"help": "Maximum number of new tokens to generate in a single completion. Default is 512, which corresponds to ~12 secs"
},
)
description: str = field(
......@@ -51,7 +51,7 @@ class ParlerTTSHandlerArguments:
play_steps_s: float = field(
default=1.0,
metadata={
"help": "The time interval in seconds for playing back the generated speech in steps. Default is 0.5 seconds."
"help": "The time interval in seconds for playing back the generated speech in steps. Default is 1.0 seconds."
},
)
max_prompt_pad_length: int = field(
......
......@@ -36,7 +36,7 @@ class VADHandlerArguments:
speech_pad_ms: int = field(
default=500,
metadata={
"help": "Amount of padding added to the beginning and end of detected speech segments. Measured in milliseconds. Default is 250 ms."
"help": "Amount of padding added to the beginning and end of detected speech segments. Measured in milliseconds. Default is 500 ms."
},
)
audio_enhancement: bool = field(
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment