Skip to content
Snippets Groups Projects
vad_arguments.py 1.69 KiB
Newer Older
  • Learn to ignore specific revisions
  • from dataclasses import dataclass, field
    
    
    @dataclass
    class VADHandlerArguments:
        thresh: float = field(
            default=0.3,
            metadata={
                "help": "The threshold value for voice activity detection (VAD). Values typically range from 0 to 1, with higher values requiring higher confidence in speech detection."
            },
        )
        sample_rate: int = field(
            default=16000,
            metadata={
                "help": "The sample rate of the audio in Hertz. Default is 16000 Hz, which is a common setting for voice audio."
            },
        )
        min_silence_ms: int = field(
            default=250,
            metadata={
                "help": "Minimum length of silence intervals to be used for segmenting speech. Measured in milliseconds. Default is 250 ms."
            },
        )
        min_speech_ms: int = field(
            default=500,
            metadata={
                "help": "Minimum length of speech segments to be considered valid speech. Measured in milliseconds. Default is 500 ms."
            },
        )
        max_speech_ms: float = field(
            default=float("inf"),
            metadata={
                "help": "Maximum length of continuous speech before forcing a split. Default is infinite, allowing for uninterrupted speech segments."
            },
        )
        speech_pad_ms: int = field(
    
            default=500,
    
            metadata={
                "help": "Amount of padding added to the beginning and end of detected speech segments. Measured in milliseconds. Default is 250 ms."
            },
        )
    
        audio_enhancement: bool = field(
            default=False,
    
                "help": "improves sound quality by applying techniques like noise reduction, equalization, and echo cancellation. Default is False."