From 884f28d6245134401ab4d4caac8477fbc4912295 Mon Sep 17 00:00:00 2001 From: DerinSozen <67414435+DerinSozen@users.noreply.github.com> Date: Thu, 18 Sep 2025 17:41:30 -0500 Subject: [PATCH] silence based filtering --- whisper-service/device_config.template.json | 3 ++- whisper-service/model_bases/buffer_audio_model_base.py | 8 +++++++- 2 files changed, 9 insertions(+), 2 deletions(-) diff --git a/whisper-service/device_config.template.json b/whisper-service/device_config.template.json index 08c9c5d..1031c95 100644 --- a/whisper-service/device_config.template.json +++ b/whisper-service/device_config.template.json @@ -15,7 +15,8 @@ "device": "cpu", "local_agree_dim": 2, "min_new_samples": 48000, - "max_segment_samples": 480000 + "max_segment_samples": 480000, + "silence_threshold": 0.01 }, "available_features": {} } diff --git a/whisper-service/model_bases/buffer_audio_model_base.py b/whisper-service/model_bases/buffer_audio_model_base.py index 3894a7d..b098508 100644 --- a/whisper-service/model_bases/buffer_audio_model_base.py +++ b/whisper-service/model_bases/buffer_audio_model_base.py @@ -26,7 +26,7 @@ class BufferAudioModelBase(TranscriptionModelBase): and process_segment() methods must be implemented. ''' __slots__ = ['max_segment_samples', 'min_new_samples', - 'num_last_processed_samples', 'num_purged_samples', 'buffer'] + 'num_last_processed_samples', 'num_purged_samples', 'buffer','silence_threshold'] SAMPLE_RATE = 16_000 def __init__(self, ws, config): @@ -41,6 +41,7 @@ def __init__(self, ws, config): super().__init__(ws, config) self.max_segment_samples = config['max_segment_samples'] self.min_new_samples = config['min_new_samples'] + self.silence_threshold = config['silence_threshold'] self.num_last_processed_samples = 0 self.num_purged_samples = 0 @@ -130,6 +131,11 @@ async def queue_audio_chunk(self, audio_chunk) -> None: audio_chunk (io.BytesIO): A buffer containing wav audio ''' audio = decode_wav(audio_chunk) + + # Filter out silent audio + if np.abs(audio).max() < self.silence_threshold: + return + extra_audio = self.buffer.append_sequence(audio) # If buffer is full, process segments until entire audio chunk can be