diff --git a/homeassistant/components/assist_pipeline/audio_enhancer.py b/homeassistant/components/assist_pipeline/audio_enhancer.py
index ff2b122187a7ede9117a6b6579fbb435b64c11c7..1fabc7790e73f5baf2e76376bfa61a29d814f252 100644
--- a/homeassistant/components/assist_pipeline/audio_enhancer.py
+++ b/homeassistant/components/assist_pipeline/audio_enhancer.py
@@ -22,8 +22,8 @@ class EnhancedAudioChunk:
     timestamp_ms: int
     """Timestamp relative to start of audio stream (milliseconds)"""
 
-    is_speech: bool | None
-    """True if audio chunk likely contains speech, False if not, None if unknown"""
+    speech_probability: float | None
+    """Probability that audio chunk contains speech (0-1), None if unknown"""
 
 
 class AudioEnhancer(ABC):
@@ -70,27 +70,27 @@ class MicroVadSpeexEnhancer(AudioEnhancer):
             )
 
         self.vad: MicroVad | None = None
-        self.threshold = 0.5
 
         if self.is_vad_enabled:
             self.vad = MicroVad()
-            _LOGGER.debug("Initialized microVAD with threshold=%s", self.threshold)
+            _LOGGER.debug("Initialized microVAD")
 
     def enhance_chunk(self, audio: bytes, timestamp_ms: int) -> EnhancedAudioChunk:
         """Enhance 10ms chunk of PCM audio @ 16Khz with 16-bit mono samples."""
-        is_speech: bool | None = None
+        speech_probability: float | None = None
 
         assert len(audio) == BYTES_PER_CHUNK
 
         if self.vad is not None:
             # Run VAD
-            speech_prob = self.vad.Process10ms(audio)
-            is_speech = speech_prob > self.threshold
+            speech_probability = self.vad.Process10ms(audio)
 
         if self.audio_processor is not None:
             # Run noise suppression and auto gain
             audio = self.audio_processor.Process10ms(audio).audio
 
         return EnhancedAudioChunk(
-            audio=audio, timestamp_ms=timestamp_ms, is_speech=is_speech
+            audio=audio,
+            timestamp_ms=timestamp_ms,
+            speech_probability=speech_probability,
         )
diff --git a/homeassistant/components/assist_pipeline/pipeline.py b/homeassistant/components/assist_pipeline/pipeline.py
index a4255e377568b17f4ed02d4c690f8700e295c414..a55e23ae05189b81a9ba2fcf3c73d6b2799d2c62 100644
--- a/homeassistant/components/assist_pipeline/pipeline.py
+++ b/homeassistant/components/assist_pipeline/pipeline.py
@@ -780,7 +780,9 @@ class PipelineRun:
                 # speaking the voice command.
                 audio_chunks_for_stt.extend(
                     EnhancedAudioChunk(
-                        audio=chunk_ts[0], timestamp_ms=chunk_ts[1], is_speech=False
+                        audio=chunk_ts[0],
+                        timestamp_ms=chunk_ts[1],
+                        speech_probability=None,
                     )
                     for chunk_ts in result.queued_audio
                 )
@@ -827,7 +829,7 @@ class PipelineRun:
 
             if wake_word_vad is not None:
                 chunk_seconds = (len(chunk.audio) // sample_width) / sample_rate
-                if not wake_word_vad.process(chunk_seconds, chunk.is_speech):
+                if not wake_word_vad.process(chunk_seconds, chunk.speech_probability):
                     raise WakeWordTimeoutError(
                         code="wake-word-timeout", message="Wake word was not detected"
                     )
@@ -955,7 +957,7 @@ class PipelineRun:
 
             if stt_vad is not None:
                 chunk_seconds = (len(chunk.audio) // sample_width) / sample_rate
-                if not stt_vad.process(chunk_seconds, chunk.is_speech):
+                if not stt_vad.process(chunk_seconds, chunk.speech_probability):
                     # Silence detected at the end of voice command
                     self.process_event(
                         PipelineEvent(
@@ -1221,7 +1223,7 @@ class PipelineRun:
                 yield EnhancedAudioChunk(
                     audio=sub_chunk,
                     timestamp_ms=timestamp_ms,
-                    is_speech=None,  # no VAD
+                    speech_probability=None,  # no VAD
                 )
                 timestamp_ms += MS_PER_CHUNK
 
diff --git a/homeassistant/components/assist_pipeline/vad.py b/homeassistant/components/assist_pipeline/vad.py
index 4782d14dee47da294e56c06db62721a03c074528..deae5b9b7b38764b5e8283bce38b9797b214d8db 100644
--- a/homeassistant/components/assist_pipeline/vad.py
+++ b/homeassistant/components/assist_pipeline/vad.py
@@ -75,7 +75,7 @@ class AudioBuffer:
 class VoiceCommandSegmenter:
     """Segments an audio stream into voice commands."""
 
-    speech_seconds: float = 0.3
+    speech_seconds: float = 0.1
     """Seconds of speech before voice command has started."""
 
     command_seconds: float = 1.0
@@ -96,6 +96,12 @@ class VoiceCommandSegmenter:
     timed_out: bool = False
     """True a timeout occurred during voice command."""
 
+    before_command_speech_threshold: float = 0.2
+    """Probability threshold for speech before voice command."""
+
+    in_command_speech_threshold: float = 0.5
+    """Probability threshold for speech during voice command."""
+
     _speech_seconds_left: float = 0.0
     """Seconds left before considering voice command as started."""
 
@@ -124,7 +130,7 @@ class VoiceCommandSegmenter:
         self._reset_seconds_left = self.reset_seconds
         self.in_command = False
 
-    def process(self, chunk_seconds: float, is_speech: bool | None) -> bool:
+    def process(self, chunk_seconds: float, speech_probability: float | None) -> bool:
         """Process samples using external VAD.
 
         Returns False when command is done.
@@ -142,7 +148,12 @@ class VoiceCommandSegmenter:
             self.timed_out = True
             return False
 
+        if speech_probability is None:
+            speech_probability = 0.0
+
         if not self.in_command:
+            # Before command
+            is_speech = speech_probability > self.before_command_speech_threshold
             if is_speech:
                 self._reset_seconds_left = self.reset_seconds
                 self._speech_seconds_left -= chunk_seconds
@@ -160,24 +171,29 @@ class VoiceCommandSegmenter:
                 if self._reset_seconds_left <= 0:
                     self._speech_seconds_left = self.speech_seconds
                     self._reset_seconds_left = self.reset_seconds
-        elif not is_speech:
-            # Silence in command
-            self._reset_seconds_left = self.reset_seconds
-            self._silence_seconds_left -= chunk_seconds
-            self._command_seconds_left -= chunk_seconds
-            if (self._silence_seconds_left <= 0) and (self._command_seconds_left <= 0):
-                # Command finished successfully
-                self.reset()
-                _LOGGER.debug("Voice command finished")
-                return False
         else:
-            # Speech in command.
-            # Reset silence counter if enough speech.
-            self._reset_seconds_left -= chunk_seconds
-            self._command_seconds_left -= chunk_seconds
-            if self._reset_seconds_left <= 0:
-                self._silence_seconds_left = self.silence_seconds
+            # In command
+            is_speech = speech_probability > self.in_command_speech_threshold
+            if not is_speech:
+                # Silence in command
                 self._reset_seconds_left = self.reset_seconds
+                self._silence_seconds_left -= chunk_seconds
+                self._command_seconds_left -= chunk_seconds
+                if (self._silence_seconds_left <= 0) and (
+                    self._command_seconds_left <= 0
+                ):
+                    # Command finished successfully
+                    self.reset()
+                    _LOGGER.debug("Voice command finished")
+                    return False
+            else:
+                # Speech in command.
+                # Reset silence counter if enough speech.
+                self._reset_seconds_left -= chunk_seconds
+                self._command_seconds_left -= chunk_seconds
+                if self._reset_seconds_left <= 0:
+                    self._silence_seconds_left = self.silence_seconds
+                    self._reset_seconds_left = self.reset_seconds
 
         return True
 
@@ -226,6 +242,9 @@ class VoiceActivityTimeout:
     reset_seconds: float = 0.5
     """Seconds of speech before resetting timeout."""
 
+    speech_threshold: float = 0.5
+    """Threshold for speech."""
+
     _silence_seconds_left: float = 0.0
     """Seconds left before considering voice command as stopped."""
 
@@ -241,12 +260,15 @@ class VoiceActivityTimeout:
         self._silence_seconds_left = self.silence_seconds
         self._reset_seconds_left = self.reset_seconds
 
-    def process(self, chunk_seconds: float, is_speech: bool | None) -> bool:
+    def process(self, chunk_seconds: float, speech_probability: float | None) -> bool:
         """Process samples using external VAD.
 
         Returns False when timeout is reached.
         """
-        if is_speech:
+        if speech_probability is None:
+            speech_probability = 0.0
+
+        if speech_probability > self.speech_threshold:
             # Speech
             self._reset_seconds_left -= chunk_seconds
             if self._reset_seconds_left <= 0:
diff --git a/tests/components/assist_pipeline/test_vad.py b/tests/components/assist_pipeline/test_vad.py
index fda26d2fb94e66f74a39ce9b95a4349de25edd71..bd07601cd5d10f2459eda98de5701407e29c1169 100644
--- a/tests/components/assist_pipeline/test_vad.py
+++ b/tests/components/assist_pipeline/test_vad.py
@@ -16,7 +16,7 @@ def test_silence() -> None:
     segmenter = VoiceCommandSegmenter()
 
     # True return value indicates voice command has not finished
-    assert segmenter.process(_ONE_SECOND * 3, False)
+    assert segmenter.process(_ONE_SECOND * 3, 0.0)
     assert not segmenter.in_command
 
 
@@ -26,15 +26,15 @@ def test_speech() -> None:
     segmenter = VoiceCommandSegmenter()
 
     # silence
-    assert segmenter.process(_ONE_SECOND, False)
+    assert segmenter.process(_ONE_SECOND, 0.0)
 
     # "speech"
-    assert segmenter.process(_ONE_SECOND, True)
+    assert segmenter.process(_ONE_SECOND, 1.0)
     assert segmenter.in_command
 
     # silence
     # False return value indicates voice command is finished
-    assert not segmenter.process(_ONE_SECOND, False)
+    assert not segmenter.process(_ONE_SECOND, 0.0)
     assert not segmenter.in_command
 
 
@@ -112,19 +112,19 @@ def test_silence_seconds() -> None:
     segmenter = VoiceCommandSegmenter(silence_seconds=1.0)
 
     # silence
-    assert segmenter.process(_ONE_SECOND, False)
+    assert segmenter.process(_ONE_SECOND, 0.0)
     assert not segmenter.in_command
 
     # "speech"
-    assert segmenter.process(_ONE_SECOND, True)
+    assert segmenter.process(_ONE_SECOND, 1.0)
     assert segmenter.in_command
 
     # not enough silence to end
-    assert segmenter.process(_ONE_SECOND * 0.5, False)
+    assert segmenter.process(_ONE_SECOND * 0.5, 0.0)
     assert segmenter.in_command
 
     # exactly enough silence now
-    assert not segmenter.process(_ONE_SECOND * 0.5, False)
+    assert not segmenter.process(_ONE_SECOND * 0.5, 0.0)
     assert not segmenter.in_command
 
 
@@ -134,27 +134,27 @@ def test_silence_reset() -> None:
     segmenter = VoiceCommandSegmenter(silence_seconds=1.0, reset_seconds=0.5)
 
     # silence
-    assert segmenter.process(_ONE_SECOND, False)
+    assert segmenter.process(_ONE_SECOND, 0.0)
     assert not segmenter.in_command
 
     # "speech"
-    assert segmenter.process(_ONE_SECOND, True)
+    assert segmenter.process(_ONE_SECOND, 1.0)
     assert segmenter.in_command
 
     # not enough silence to end
-    assert segmenter.process(_ONE_SECOND * 0.5, False)
+    assert segmenter.process(_ONE_SECOND * 0.5, 0.0)
     assert segmenter.in_command
 
     # speech should reset silence detection
-    assert segmenter.process(_ONE_SECOND * 0.5, True)
+    assert segmenter.process(_ONE_SECOND * 0.5, 1.0)
     assert segmenter.in_command
 
     # not enough silence to end
-    assert segmenter.process(_ONE_SECOND * 0.5, False)
+    assert segmenter.process(_ONE_SECOND * 0.5, 0.0)
     assert segmenter.in_command
 
     # exactly enough silence now
-    assert not segmenter.process(_ONE_SECOND * 0.5, False)
+    assert not segmenter.process(_ONE_SECOND * 0.5, 0.0)
     assert not segmenter.in_command
 
 
@@ -166,23 +166,23 @@ def test_speech_reset() -> None:
     )
 
     # silence
-    assert segmenter.process(_ONE_SECOND, False)
+    assert segmenter.process(_ONE_SECOND, 0.0)
     assert not segmenter.in_command
 
     # not enough speech to start voice command
-    assert segmenter.process(_ONE_SECOND * 0.5, True)
+    assert segmenter.process(_ONE_SECOND * 0.5, 1.0)
     assert not segmenter.in_command
 
     # silence should reset speech detection
-    assert segmenter.process(_ONE_SECOND, False)
+    assert segmenter.process(_ONE_SECOND, 0.0)
     assert not segmenter.in_command
 
     # not enough speech to start voice command
-    assert segmenter.process(_ONE_SECOND * 0.5, True)
+    assert segmenter.process(_ONE_SECOND * 0.5, 1.0)
     assert not segmenter.in_command
 
     # exactly enough speech now
-    assert segmenter.process(_ONE_SECOND * 0.5, True)
+    assert segmenter.process(_ONE_SECOND * 0.5, 1.0)
     assert segmenter.in_command
 
 
@@ -193,18 +193,18 @@ def test_timeout() -> None:
 
     # not enough to time out
     assert not segmenter.timed_out
-    assert segmenter.process(_ONE_SECOND * 0.5, False)
+    assert segmenter.process(_ONE_SECOND * 0.5, 0.0)
     assert not segmenter.timed_out
 
     # enough to time out
-    assert not segmenter.process(_ONE_SECOND * 0.5, True)
+    assert not segmenter.process(_ONE_SECOND * 0.5, 1.0)
     assert segmenter.timed_out
 
     # flag resets with more audio
-    assert segmenter.process(_ONE_SECOND * 0.5, True)
+    assert segmenter.process(_ONE_SECOND * 0.5, 1.0)
     assert not segmenter.timed_out
 
-    assert not segmenter.process(_ONE_SECOND * 0.5, False)
+    assert not segmenter.process(_ONE_SECOND * 0.5, 0.0)
     assert segmenter.timed_out
 
 
@@ -215,14 +215,38 @@ def test_command_seconds() -> None:
         command_seconds=3, speech_seconds=1, silence_seconds=1, reset_seconds=1
     )
 
-    assert segmenter.process(_ONE_SECOND, True)
+    assert segmenter.process(_ONE_SECOND, 1.0)
 
     # Silence counts towards total command length
-    assert segmenter.process(_ONE_SECOND * 0.5, False)
+    assert segmenter.process(_ONE_SECOND * 0.5, 0.0)
 
     # Enough to finish command now
-    assert segmenter.process(_ONE_SECOND, True)
-    assert segmenter.process(_ONE_SECOND * 0.5, False)
+    assert segmenter.process(_ONE_SECOND, 1.0)
+    assert segmenter.process(_ONE_SECOND * 0.5, 0.0)
 
     # Silence to finish
-    assert not segmenter.process(_ONE_SECOND * 0.5, False)
+    assert not segmenter.process(_ONE_SECOND * 0.5, 0.0)
+
+
+def test_speech_thresholds() -> None:
+    """Test before/in command speech thresholds."""
+
+    segmenter = VoiceCommandSegmenter(
+        before_command_speech_threshold=0.2,
+        in_command_speech_threshold=0.5,
+        command_seconds=2,
+        speech_seconds=1,
+        silence_seconds=1,
+    )
+
+    # Not high enough probability to trigger command
+    assert segmenter.process(_ONE_SECOND, 0.1)
+    assert not segmenter.in_command
+
+    # Triggers command
+    assert segmenter.process(_ONE_SECOND, 0.3)
+    assert segmenter.in_command
+
+    # Now that same probability is considered silence.
+    # Finishes command.
+    assert not segmenter.process(_ONE_SECOND, 0.3)