Skip to content
Snippets Groups Projects
Commit 5e667f94 authored by Andres Marafioti's avatar Andres Marafioti
Browse files

linting and set audio enhancement off by default

parent 4b1e6274
No related branches found
No related tags found
No related merge requests found
...@@ -29,7 +29,7 @@ class VADHandler(BaseHandler): ...@@ -29,7 +29,7 @@ class VADHandler(BaseHandler):
min_speech_ms=500, min_speech_ms=500,
max_speech_ms=float("inf"), max_speech_ms=float("inf"),
speech_pad_ms=30, speech_pad_ms=30,
audio_enhancement=True audio_enhancement=False,
): ):
self.should_listen = should_listen self.should_listen = should_listen
self.sample_rate = sample_rate self.sample_rate = sample_rate
...@@ -65,11 +65,24 @@ class VADHandler(BaseHandler): ...@@ -65,11 +65,24 @@ class VADHandler(BaseHandler):
logger.debug("Stop listening") logger.debug("Stop listening")
if self.audio_enhancement: if self.audio_enhancement:
if self.sample_rate != self.df_state.sr(): if self.sample_rate != self.df_state.sr():
audio_float32 = torchaudio.functional.resample(torch.from_numpy(array),orig_freq=self.sample_rate, audio_float32 = torchaudio.functional.resample(
new_freq=self.df_state.sr()) torch.from_numpy(array),
enhanced = enhance(self.enhanced_model, self.df_state, audio_float32.unsqueeze(0)) orig_freq=self.sample_rate,
enhanced = torchaudio.functional.resample(enhanced, orig_freq=self.df_state.sr(),new_freq=self.sample_rate) new_freq=self.df_state.sr(),
)
enhanced = enhance(
self.enhanced_model,
self.df_state,
audio_float32.unsqueeze(0),
)
enhanced = torchaudio.functional.resample(
enhanced,
orig_freq=self.df_state.sr(),
new_freq=self.sample_rate,
)
else: else:
enhanced = enhance(self.enhanced_model, self.df_state, audio_float32) enhanced = enhance(
self.enhanced_model, self.df_state, audio_float32
)
array = enhanced.numpy().squeeze() array = enhanced.numpy().squeeze()
yield array yield array
...@@ -39,8 +39,8 @@ class VADHandlerArguments: ...@@ -39,8 +39,8 @@ class VADHandlerArguments:
"help": "Amount of padding added to the beginning and end of detected speech segments. Measured in milliseconds. Default is 250 ms." "help": "Amount of padding added to the beginning and end of detected speech segments. Measured in milliseconds. Default is 250 ms."
}, },
) )
audio_enhancement:bool = field( audio_enhancement: bool = field(
default=True, default=False,
metadata={ metadata={
"help": "improves sound quality by applying techniques like noise reduction, equalization, and echo cancellation. Default is True." "help": "improves sound quality by applying techniques like noise reduction, equalization, and echo cancellation. Default is True."
}, },
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment