From 5e667f94a43152fec53d561213c630d0fbc40721 Mon Sep 17 00:00:00 2001 From: Andres Marafioti <andimarafioti@gmail.com> Date: Tue, 3 Sep 2024 15:50:06 +0200 Subject: [PATCH] linting and set audio enhancement off by default --- VAD/vad_handler.py | 25 +++++++++++++++++++------ arguments_classes/vad_arguments.py | 4 ++-- 2 files changed, 21 insertions(+), 8 deletions(-) diff --git a/VAD/vad_handler.py b/VAD/vad_handler.py index e70a44c..1dc6400 100644 --- a/VAD/vad_handler.py +++ b/VAD/vad_handler.py @@ -29,7 +29,7 @@ class VADHandler(BaseHandler): min_speech_ms=500, max_speech_ms=float("inf"), speech_pad_ms=30, - audio_enhancement=True + audio_enhancement=False, ): self.should_listen = should_listen self.sample_rate = sample_rate @@ -65,11 +65,24 @@ class VADHandler(BaseHandler): logger.debug("Stop listening") if self.audio_enhancement: if self.sample_rate != self.df_state.sr(): - audio_float32 = torchaudio.functional.resample(torch.from_numpy(array),orig_freq=self.sample_rate, - new_freq=self.df_state.sr()) - enhanced = enhance(self.enhanced_model, self.df_state, audio_float32.unsqueeze(0)) - enhanced = torchaudio.functional.resample(enhanced, orig_freq=self.df_state.sr(),new_freq=self.sample_rate) + audio_float32 = torchaudio.functional.resample( + torch.from_numpy(array), + orig_freq=self.sample_rate, + new_freq=self.df_state.sr(), + ) + enhanced = enhance( + self.enhanced_model, + self.df_state, + audio_float32.unsqueeze(0), + ) + enhanced = torchaudio.functional.resample( + enhanced, + orig_freq=self.df_state.sr(), + new_freq=self.sample_rate, + ) else: - enhanced = enhance(self.enhanced_model, self.df_state, audio_float32) + enhanced = enhance( + self.enhanced_model, self.df_state, audio_float32 + ) array = enhanced.numpy().squeeze() yield array diff --git a/arguments_classes/vad_arguments.py b/arguments_classes/vad_arguments.py index 41f4b6d..8f57fb4 100644 --- a/arguments_classes/vad_arguments.py +++ b/arguments_classes/vad_arguments.py @@ -39,8 +39,8 @@ class VADHandlerArguments: "help": "Amount of padding added to the beginning and end of detected speech segments. Measured in milliseconds. Default is 250 ms." }, ) - audio_enhancement:bool = field( - default=True, + audio_enhancement: bool = field( + default=False, metadata={ "help": "improves sound quality by applying techniques like noise reduction, equalization, and echo cancellation. Default is True." }, -- GitLab