✨ feat: rename EOF silence duration to EOS silence and update related configurations

yokotoka · yokotoka · commit c7a79a97af19 · 2025-10-29T23:17:12.000-03:00
diff --git a/src/palabra_ai/config.py b/src/palabra_ai/config.py
@@ -24,7 +24,7 @@
     CONTEXT_SIZE_DEFAULT,
     DESIRED_QUEUE_LEVEL_MS_DEFAULT,
     ENERGY_VARIANCE_FACTOR_DEFAULT,
-    EOF_SILENCE_DURATION_S,
+    EOS_SILENCE_S,
     F0_VARIANCE_FACTOR_DEFAULT,
     FORCE_END_OF_SEGMENT_DEFAULT,
     FORCE_SPLIT_MIN_CHARACTERS_DEFAULT,
@@ -81,6 +81,7 @@
 TIMEOUT = env.int("TIMEOUT", default=0)
 LOG_FILE = env.path("LOG_FILE", default=None)
 RICH_DEFAULT_CONFIG = env.bool("RICH_DEFAULT_CONFIG", default=False)
+EOS_SILENCE_S_ENV = env.float("EOS_SILENCE_S", default=EOS_SILENCE_S)
 
 # Materialized paths for fields that should always be included in serialization
 # when rich_default_config is enabled, even with exclude_unset=True
@@ -167,7 +168,6 @@ class IoMode(BaseModel):
     output_sample_rate: int
     num_channels: int
     input_chunk_duration_ms: int
-    eof_silence_duration_s: float = EOF_SILENCE_DURATION_S
 
     @cached_property
     def input_samples_per_channel(self) -> int:
@@ -538,6 +538,9 @@ class Config(BaseModel):
     rich_default_config: SkipJsonSchema[bool] = Field(
         default=RICH_DEFAULT_CONFIG, exclude=True
     )
+    eos_silence_s: SkipJsonSchema[float] = Field(
+        default=EOS_SILENCE_S_ENV, exclude=True
+    )
 
     def __init__(
         self,
diff --git a/src/palabra_ai/constant.py b/src/palabra_ai/constant.py
@@ -102,4 +102,4 @@
 WEBRTC_MODE_CHANNELS = 1
 WEBRTC_MODE_CHUNK_DURATION_MS = 320
 
-EOF_SILENCE_DURATION_S = 10.0
+EOS_SILENCE_S = 10.0
diff --git a/src/palabra_ai/internal/audio.py b/src/palabra_ai/internal/audio.py
@@ -308,7 +308,7 @@ def simple_preprocess_audio_file(
     target_rate: int,
     normalize: bool = False,
     progress_callback=None,
-    eof_silence_duration_s: float = 0.0,
+    eos_silence_s: float = 0.0,
 ) -> tuple[bytes, dict]:
     """Simple preprocessing: load with librosa/PyAV, resample only if not 16kHz."""
     debug(f"Simple preprocessing audio file {file_path}...")
@@ -359,14 +359,12 @@ def simple_preprocess_audio_file(
     audio_int16 = (audio_array * np.iinfo(np.int16).max).astype(np.int16)
 
     # Add silence padding at the end
-    if eof_silence_duration_s > 0:
-        silence_samples = int(eof_silence_duration_s * final_rate)
+    if eos_silence_s > 0:
+        silence_samples = int(eos_silence_s * final_rate)
         audio_int16 = np.concatenate(
             [audio_int16, np.zeros(silence_samples, dtype=np.int16)]
         )
-        debug(
-            f"Added {eof_silence_duration_s}s ({silence_samples} samples) of silence padding"
-        )
+        debug(f"Added {eos_silence_s}s ({silence_samples} samples) of silence padding")
 
     processed_data = audio_int16.tobytes()
 
diff --git a/src/palabra_ai/task/adapter/file.py b/src/palabra_ai/task/adapter/file.py
@@ -75,7 +75,7 @@ def progress_callback(samples):
                 target_rate=self.cfg.mode.input_sample_rate,
                 normalize=normalize,
                 progress_callback=progress_callback,
-                eof_silence_duration_s=self.cfg.mode.eof_silence_duration_s,
+                eos_silence_s=self.cfg.eos_silence_s,
             )
             # Simple mode uses config as-is
             debug(
diff --git a/tests/test_internal_audio.py b/tests/test_internal_audio.py
@@ -430,3 +430,220 @@ def test_pull_immediate_block(self):
         result = pull_until_blocked(mock_graph)
 
         assert result == []
+
+
+class TestSimplePreprocessAudioFileEOSilence:
+    """Test EOS silence padding in simple_preprocess_audio_file"""
+
+    @patch('palabra_ai.internal.audio.av.open')
+    @patch('palabra_ai.internal.audio.open_audio_file')
+    @patch('builtins.open', create=True)
+    def test_eos_silence_adds_padding(self, mock_file_open, mock_open_audio_file, mock_av_open):
+        """Test that EOS silence padding is added correctly"""
+        from palabra_ai.internal.audio import simple_preprocess_audio_file
+
+        # Mock file read
+        mock_file = MagicMock()
+        mock_file.read.return_value = b"audio_data"
+        mock_file.__enter__.return_value = mock_file
+        mock_file_open.return_value = mock_file
+
+        # Mock av container
+        mock_container = MagicMock()
+        mock_stream = MagicMock()
+        mock_stream.type = "audio"
+        mock_stream.sample_rate = 16000
+        mock_stream.duration = 16000
+        mock_stream.time_base = 1/16000
+        mock_stream.channels = 1
+        mock_container.streams = [mock_stream]
+        mock_av_open.return_value = mock_container
+
+        # Mock audio processing - return 1 second of audio (16000 samples)
+        audio_array = np.zeros(16000, dtype=np.float32)
+        mock_open_audio_file.return_value = audio_array
+
+        # Test with 5 seconds of silence padding
+        eos_silence_s = 5.0
+        result_bytes, metadata = simple_preprocess_audio_file(
+            "test.wav",
+            target_rate=16000,
+            eos_silence_s=eos_silence_s
+        )
+
+        # Convert result back to int16 array
+        result_array = np.frombuffer(result_bytes, dtype=np.int16)
+
+        # Expected: 16000 original samples + 5*16000 silence samples = 96000 total
+        expected_length = 16000 + int(eos_silence_s * 16000)
+        assert len(result_array) == expected_length
+
+        # Check that last samples are zeros (silence)
+        silence_samples = int(eos_silence_s * 16000)
+        assert np.all(result_array[-silence_samples:] == 0)
+
+    @patch('palabra_ai.internal.audio.av.open')
+    @patch('palabra_ai.internal.audio.open_audio_file')
+    @patch('builtins.open', create=True)
+    def test_eos_silence_zero_no_padding(self, mock_file_open, mock_open_audio_file, mock_av_open):
+        """Test that eos_silence_s=0 does not add padding"""
+        from palabra_ai.internal.audio import simple_preprocess_audio_file
+
+        # Mock file read
+        mock_file = MagicMock()
+        mock_file.read.return_value = b"audio_data"
+        mock_file.__enter__.return_value = mock_file
+        mock_file_open.return_value = mock_file
+
+        # Mock av container
+        mock_container = MagicMock()
+        mock_stream = MagicMock()
+        mock_stream.type = "audio"
+        mock_stream.sample_rate = 16000
+        mock_stream.duration = 16000
+        mock_stream.time_base = 1/16000
+        mock_stream.channels = 1
+        mock_container.streams = [mock_stream]
+        mock_av_open.return_value = mock_container
+
+        # Mock audio processing - return 1 second of audio
+        audio_array = np.zeros(16000, dtype=np.float32)
+        mock_open_audio_file.return_value = audio_array
+
+        # Test with 0 seconds of silence
+        result_bytes, metadata = simple_preprocess_audio_file(
+            "test.wav",
+            target_rate=16000,
+            eos_silence_s=0.0
+        )
+
+        result_array = np.frombuffer(result_bytes, dtype=np.int16)
+
+        # Should be exactly 16000 samples, no padding
+        assert len(result_array) == 16000
+
+    @patch('palabra_ai.internal.audio.av.open')
+    @patch('palabra_ai.internal.audio.open_audio_file')
+    @patch('builtins.open', create=True)
+    def test_eos_silence_negative_no_padding(self, mock_file_open, mock_open_audio_file, mock_av_open):
+        """Test that negative eos_silence_s does not add padding"""
+        from palabra_ai.internal.audio import simple_preprocess_audio_file
+
+        # Mock file read
+        mock_file = MagicMock()
+        mock_file.read.return_value = b"audio_data"
+        mock_file.__enter__.return_value = mock_file
+        mock_file_open.return_value = mock_file
+
+        # Mock av container
+        mock_container = MagicMock()
+        mock_stream = MagicMock()
+        mock_stream.type = "audio"
+        mock_stream.sample_rate = 16000
+        mock_stream.duration = 16000
+        mock_stream.time_base = 1/16000
+        mock_stream.channels = 1
+        mock_container.streams = [mock_stream]
+        mock_av_open.return_value = mock_container
+
+        # Mock audio processing
+        audio_array = np.zeros(16000, dtype=np.float32)
+        mock_open_audio_file.return_value = audio_array
+
+        # Test with negative value
+        result_bytes, metadata = simple_preprocess_audio_file(
+            "test.wav",
+            target_rate=16000,
+            eos_silence_s=-5.0
+        )
+
+        result_array = np.frombuffer(result_bytes, dtype=np.int16)
+
+        # Should be exactly 16000 samples, no padding
+        assert len(result_array) == 16000
+
+    @patch('palabra_ai.internal.audio.av.open')
+    @patch('palabra_ai.internal.audio.open_audio_file')
+    @patch('builtins.open', create=True)
+    def test_eos_silence_various_durations(self, mock_file_open, mock_open_audio_file, mock_av_open):
+        """Test EOS silence with various durations (1s, 5s, 15s)"""
+        from palabra_ai.internal.audio import simple_preprocess_audio_file
+
+        # Mock file read
+        mock_file = MagicMock()
+        mock_file.read.return_value = b"audio_data"
+        mock_file.__enter__.return_value = mock_file
+        mock_file_open.return_value = mock_file
+
+        # Mock av container
+        mock_container = MagicMock()
+        mock_stream = MagicMock()
+        mock_stream.type = "audio"
+        mock_stream.sample_rate = 16000
+        mock_stream.duration = 16000
+        mock_stream.time_base = 1/16000
+        mock_stream.channels = 1
+        mock_container.streams = [mock_stream]
+        mock_av_open.return_value = mock_container
+
+        # Mock audio processing
+        audio_array = np.zeros(16000, dtype=np.float32)
+        mock_open_audio_file.return_value = audio_array
+
+        # Test various durations
+        for duration in [1.0, 5.0, 15.0]:
+            result_bytes, metadata = simple_preprocess_audio_file(
+                "test.wav",
+                target_rate=16000,
+                eos_silence_s=duration
+            )
+
+            result_array = np.frombuffer(result_bytes, dtype=np.int16)
+            expected_length = 16000 + int(duration * 16000)
+            assert len(result_array) == expected_length
+
+    @patch('palabra_ai.internal.audio.av.open')
+    @patch('palabra_ai.internal.audio.open_audio_file')
+    @patch('builtins.open', create=True)
+    def test_eos_silence_sample_accuracy(self, mock_file_open, mock_open_audio_file, mock_av_open):
+        """Test that silence padding sample count matches formula exactly"""
+        from palabra_ai.internal.audio import simple_preprocess_audio_file
+
+        # Mock file read
+        mock_file = MagicMock()
+        mock_file.read.return_value = b"audio_data"
+        mock_file.__enter__.return_value = mock_file
+        mock_file_open.return_value = mock_file
+
+        # Mock av container with 24kHz sample rate
+        mock_container = MagicMock()
+        mock_stream = MagicMock()
+        mock_stream.type = "audio"
+        mock_stream.sample_rate = 24000
+        mock_stream.duration = 24000
+        mock_stream.time_base = 1/24000
+        mock_stream.channels = 1
+        mock_container.streams = [mock_stream]
+        mock_av_open.return_value = mock_container
+
+        # Mock audio processing
+        audio_array = np.zeros(24000, dtype=np.float32)
+        mock_open_audio_file.return_value = audio_array
+
+        # Test with 10 seconds at 24kHz
+        eos_silence_s = 10.0
+        target_rate = 24000
+        result_bytes, metadata = simple_preprocess_audio_file(
+            "test.wav",
+            target_rate=target_rate,
+            eos_silence_s=eos_silence_s
+        )
+
+        result_array = np.frombuffer(result_bytes, dtype=np.int16)
+
+        # Formula: silence_samples = int(eos_silence_s * sample_rate)
+        expected_silence_samples = int(eos_silence_s * target_rate)
+        expected_total = 24000 + expected_silence_samples
+
+        assert len(result_array) == expected_total
+        assert expected_silence_samples == 240000

Original file line number	Diff line number	Diff line change
`@@ -75,7 +75,7 @@ def progress_callback(samples):`
`75`	`75`	`target_rate=self.cfg.mode.input_sample_rate,`
`76`	`76`	`normalize=normalize,`
`77`	`77`	`progress_callback=progress_callback,`
`78`		`- eof_silence_duration_s=self.cfg.mode.eof_silence_duration_s,`
	`78`	`+ eos_silence_s=self.cfg.eos_silence_s,`
`79`	`79`	`)`
`80`	`80`	`# Simple mode uses config as-is`
`81`	`81`	`debug(`