Skip to content
Snippets Groups Projects
Commit 363a74de authored by wuhongsheng's avatar wuhongsheng
Browse files

feat:add DeepFilterNet for speech enhancement to obtain clear speech input

parent 7978683c
No related branches found
No related tags found
No related merge requests found
import torchaudio
from VAD.vad_iterator import VADIterator
from baseHandler import BaseHandler
import numpy as np
......@@ -5,7 +6,7 @@ import torch
from rich.console import Console
from utils.utils import int2float
from df.enhance import enhance, init_df
import logging
logger = logging.getLogger(__name__)
......@@ -28,6 +29,7 @@ class VADHandler(BaseHandler):
min_speech_ms=500,
max_speech_ms=float("inf"),
speech_pad_ms=30,
audio_enhancement=True
):
self.should_listen = should_listen
self.sample_rate = sample_rate
......@@ -42,6 +44,9 @@ class VADHandler(BaseHandler):
min_silence_duration_ms=min_silence_ms,
speech_pad_ms=speech_pad_ms,
)
self.audio_enhancement = audio_enhancement
if audio_enhancement:
self.enhanced_model, self.df_state, _ = init_df()
def process(self, audio_chunk):
audio_int16 = np.frombuffer(audio_chunk, dtype=np.int16)
......@@ -58,4 +63,13 @@ class VADHandler(BaseHandler):
else:
self.should_listen.clear()
logger.debug("Stop listening")
if self.audio_enhancement:
if self.sample_rate != self.df_state.sr():
audio_float32 = torchaudio.functional.resample(torch.from_numpy(array),orig_freq=self.sample_rate,
new_freq=self.df_state.sr())
enhanced = enhance(self.enhanced_model, self.df_state, audio_float32.unsqueeze(0))
enhanced = torchaudio.functional.resample(enhanced, orig_freq=self.df_state.sr(),new_freq=self.sample_rate)
else:
enhanced = enhance(self.enhanced_model, self.df_state, audio_float32)
array = enhanced.numpy().squeeze()
yield array
......@@ -39,3 +39,9 @@ class VADHandlerArguments:
"help": "Amount of padding added to the beginning and end of detected speech segments. Measured in milliseconds. Default is 250 ms."
},
)
audio_enhancement:bool = field(
default=True,
metadata={
"help": "improves sound quality by applying techniques like noise reduction, equalization, and echo cancellation. Default is True."
},
)
......@@ -4,4 +4,5 @@ melotts @ git+https://github.com/andimarafioti/MeloTTS.git#egg=MeloTTS # made a
torch==2.4.0
sounddevice==0.5.0
funasr
modelscope
\ No newline at end of file
modelscope
deepfilternet
......@@ -6,4 +6,5 @@ sounddevice==0.5.0
lightning-whisper-mlx>=0.0.10
mlx-lm>=0.14.0
funasr>=1.1.6
modelscope>=1.17.1
\ No newline at end of file
modelscope>=1.17.1
deepfilternet
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment