Skip to content
Snippets Groups Projects
Commit 5e667f94 authored by Andres Marafioti's avatar Andres Marafioti
Browse files

linting and set audio enhancement off by default

parent 4b1e6274
Branches
No related tags found
No related merge requests found
......@@ -29,7 +29,7 @@ class VADHandler(BaseHandler):
min_speech_ms=500,
max_speech_ms=float("inf"),
speech_pad_ms=30,
audio_enhancement=True
audio_enhancement=False,
):
self.should_listen = should_listen
self.sample_rate = sample_rate
......@@ -65,11 +65,24 @@ class VADHandler(BaseHandler):
logger.debug("Stop listening")
if self.audio_enhancement:
if self.sample_rate != self.df_state.sr():
audio_float32 = torchaudio.functional.resample(torch.from_numpy(array),orig_freq=self.sample_rate,
new_freq=self.df_state.sr())
enhanced = enhance(self.enhanced_model, self.df_state, audio_float32.unsqueeze(0))
enhanced = torchaudio.functional.resample(enhanced, orig_freq=self.df_state.sr(),new_freq=self.sample_rate)
audio_float32 = torchaudio.functional.resample(
torch.from_numpy(array),
orig_freq=self.sample_rate,
new_freq=self.df_state.sr(),
)
enhanced = enhance(
self.enhanced_model,
self.df_state,
audio_float32.unsqueeze(0),
)
enhanced = torchaudio.functional.resample(
enhanced,
orig_freq=self.df_state.sr(),
new_freq=self.sample_rate,
)
else:
enhanced = enhance(self.enhanced_model, self.df_state, audio_float32)
enhanced = enhance(
self.enhanced_model, self.df_state, audio_float32
)
array = enhanced.numpy().squeeze()
yield array
......@@ -39,8 +39,8 @@ class VADHandlerArguments:
"help": "Amount of padding added to the beginning and end of detected speech segments. Measured in milliseconds. Default is 250 ms."
},
)
audio_enhancement:bool = field(
default=True,
audio_enhancement: bool = field(
default=False,
metadata={
"help": "improves sound quality by applying techniques like noise reduction, equalization, and echo cancellation. Default is True."
},
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment