diff --git a/VAD/vad_handler.py b/VAD/vad_handler.py index e70a44c2afc6d6a1de456d7c0fb0dd81d7e65e95..1dc64008f142cac08a3ca1cb7ed068f00a221097 100644 --- a/VAD/vad_handler.py +++ b/VAD/vad_handler.py @@ -29,7 +29,7 @@ class VADHandler(BaseHandler): min_speech_ms=500, max_speech_ms=float("inf"), speech_pad_ms=30, - audio_enhancement=True + audio_enhancement=False, ): self.should_listen = should_listen self.sample_rate = sample_rate @@ -65,11 +65,24 @@ class VADHandler(BaseHandler): logger.debug("Stop listening") if self.audio_enhancement: if self.sample_rate != self.df_state.sr(): - audio_float32 = torchaudio.functional.resample(torch.from_numpy(array),orig_freq=self.sample_rate, - new_freq=self.df_state.sr()) - enhanced = enhance(self.enhanced_model, self.df_state, audio_float32.unsqueeze(0)) - enhanced = torchaudio.functional.resample(enhanced, orig_freq=self.df_state.sr(),new_freq=self.sample_rate) + audio_float32 = torchaudio.functional.resample( + torch.from_numpy(array), + orig_freq=self.sample_rate, + new_freq=self.df_state.sr(), + ) + enhanced = enhance( + self.enhanced_model, + self.df_state, + audio_float32.unsqueeze(0), + ) + enhanced = torchaudio.functional.resample( + enhanced, + orig_freq=self.df_state.sr(), + new_freq=self.sample_rate, + ) else: - enhanced = enhance(self.enhanced_model, self.df_state, audio_float32) + enhanced = enhance( + self.enhanced_model, self.df_state, audio_float32 + ) array = enhanced.numpy().squeeze() yield array diff --git a/arguments_classes/vad_arguments.py b/arguments_classes/vad_arguments.py index 41f4b6d6a252a22d7acf481fc26de0bfbf989a91..8f57fb4bc8b1b34906da00bb325d7969ce5d9300 100644 --- a/arguments_classes/vad_arguments.py +++ b/arguments_classes/vad_arguments.py @@ -39,8 +39,8 @@ class VADHandlerArguments: "help": "Amount of padding added to the beginning and end of detected speech segments. Measured in milliseconds. Default is 250 ms." }, ) - audio_enhancement:bool = field( - default=True, + audio_enhancement: bool = field( + default=False, metadata={ "help": "improves sound quality by applying techniques like noise reduction, equalization, and echo cancellation. Default is True." },