Skip to content

Commit c7a79a9

Browse files
author
yokotoka
committed
✨ feat: rename EOF silence duration to EOS silence and update related configurations
1 parent 11f6522 commit c7a79a9

File tree

5 files changed

+228
-10
lines changed

5 files changed

+228
-10
lines changed

src/palabra_ai/config.py

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -24,7 +24,7 @@
2424
CONTEXT_SIZE_DEFAULT,
2525
DESIRED_QUEUE_LEVEL_MS_DEFAULT,
2626
ENERGY_VARIANCE_FACTOR_DEFAULT,
27-
EOF_SILENCE_DURATION_S,
27+
EOS_SILENCE_S,
2828
F0_VARIANCE_FACTOR_DEFAULT,
2929
FORCE_END_OF_SEGMENT_DEFAULT,
3030
FORCE_SPLIT_MIN_CHARACTERS_DEFAULT,
@@ -81,6 +81,7 @@
8181
TIMEOUT = env.int("TIMEOUT", default=0)
8282
LOG_FILE = env.path("LOG_FILE", default=None)
8383
RICH_DEFAULT_CONFIG = env.bool("RICH_DEFAULT_CONFIG", default=False)
84+
EOS_SILENCE_S_ENV = env.float("EOS_SILENCE_S", default=EOS_SILENCE_S)
8485

8586
# Materialized paths for fields that should always be included in serialization
8687
# when rich_default_config is enabled, even with exclude_unset=True
@@ -167,7 +168,6 @@ class IoMode(BaseModel):
167168
output_sample_rate: int
168169
num_channels: int
169170
input_chunk_duration_ms: int
170-
eof_silence_duration_s: float = EOF_SILENCE_DURATION_S
171171

172172
@cached_property
173173
def input_samples_per_channel(self) -> int:
@@ -538,6 +538,9 @@ class Config(BaseModel):
538538
rich_default_config: SkipJsonSchema[bool] = Field(
539539
default=RICH_DEFAULT_CONFIG, exclude=True
540540
)
541+
eos_silence_s: SkipJsonSchema[float] = Field(
542+
default=EOS_SILENCE_S_ENV, exclude=True
543+
)
541544

542545
def __init__(
543546
self,

src/palabra_ai/constant.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -102,4 +102,4 @@
102102
WEBRTC_MODE_CHANNELS = 1
103103
WEBRTC_MODE_CHUNK_DURATION_MS = 320
104104

105-
EOF_SILENCE_DURATION_S = 10.0
105+
EOS_SILENCE_S = 10.0

src/palabra_ai/internal/audio.py

Lines changed: 4 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -308,7 +308,7 @@ def simple_preprocess_audio_file(
308308
target_rate: int,
309309
normalize: bool = False,
310310
progress_callback=None,
311-
eof_silence_duration_s: float = 0.0,
311+
eos_silence_s: float = 0.0,
312312
) -> tuple[bytes, dict]:
313313
"""Simple preprocessing: load with librosa/PyAV, resample only if not 16kHz."""
314314
debug(f"Simple preprocessing audio file {file_path}...")
@@ -359,14 +359,12 @@ def simple_preprocess_audio_file(
359359
audio_int16 = (audio_array * np.iinfo(np.int16).max).astype(np.int16)
360360

361361
# Add silence padding at the end
362-
if eof_silence_duration_s > 0:
363-
silence_samples = int(eof_silence_duration_s * final_rate)
362+
if eos_silence_s > 0:
363+
silence_samples = int(eos_silence_s * final_rate)
364364
audio_int16 = np.concatenate(
365365
[audio_int16, np.zeros(silence_samples, dtype=np.int16)]
366366
)
367-
debug(
368-
f"Added {eof_silence_duration_s}s ({silence_samples} samples) of silence padding"
369-
)
367+
debug(f"Added {eos_silence_s}s ({silence_samples} samples) of silence padding")
370368

371369
processed_data = audio_int16.tobytes()
372370

src/palabra_ai/task/adapter/file.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -75,7 +75,7 @@ def progress_callback(samples):
7575
target_rate=self.cfg.mode.input_sample_rate,
7676
normalize=normalize,
7777
progress_callback=progress_callback,
78-
eof_silence_duration_s=self.cfg.mode.eof_silence_duration_s,
78+
eos_silence_s=self.cfg.eos_silence_s,
7979
)
8080
# Simple mode uses config as-is
8181
debug(

tests/test_internal_audio.py

Lines changed: 217 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -430,3 +430,220 @@ def test_pull_immediate_block(self):
430430
result = pull_until_blocked(mock_graph)
431431

432432
assert result == []
433+
434+
435+
class TestSimplePreprocessAudioFileEOSilence:
436+
"""Test EOS silence padding in simple_preprocess_audio_file"""
437+
438+
@patch('palabra_ai.internal.audio.av.open')
439+
@patch('palabra_ai.internal.audio.open_audio_file')
440+
@patch('builtins.open', create=True)
441+
def test_eos_silence_adds_padding(self, mock_file_open, mock_open_audio_file, mock_av_open):
442+
"""Test that EOS silence padding is added correctly"""
443+
from palabra_ai.internal.audio import simple_preprocess_audio_file
444+
445+
# Mock file read
446+
mock_file = MagicMock()
447+
mock_file.read.return_value = b"audio_data"
448+
mock_file.__enter__.return_value = mock_file
449+
mock_file_open.return_value = mock_file
450+
451+
# Mock av container
452+
mock_container = MagicMock()
453+
mock_stream = MagicMock()
454+
mock_stream.type = "audio"
455+
mock_stream.sample_rate = 16000
456+
mock_stream.duration = 16000
457+
mock_stream.time_base = 1/16000
458+
mock_stream.channels = 1
459+
mock_container.streams = [mock_stream]
460+
mock_av_open.return_value = mock_container
461+
462+
# Mock audio processing - return 1 second of audio (16000 samples)
463+
audio_array = np.zeros(16000, dtype=np.float32)
464+
mock_open_audio_file.return_value = audio_array
465+
466+
# Test with 5 seconds of silence padding
467+
eos_silence_s = 5.0
468+
result_bytes, metadata = simple_preprocess_audio_file(
469+
"test.wav",
470+
target_rate=16000,
471+
eos_silence_s=eos_silence_s
472+
)
473+
474+
# Convert result back to int16 array
475+
result_array = np.frombuffer(result_bytes, dtype=np.int16)
476+
477+
# Expected: 16000 original samples + 5*16000 silence samples = 96000 total
478+
expected_length = 16000 + int(eos_silence_s * 16000)
479+
assert len(result_array) == expected_length
480+
481+
# Check that last samples are zeros (silence)
482+
silence_samples = int(eos_silence_s * 16000)
483+
assert np.all(result_array[-silence_samples:] == 0)
484+
485+
@patch('palabra_ai.internal.audio.av.open')
486+
@patch('palabra_ai.internal.audio.open_audio_file')
487+
@patch('builtins.open', create=True)
488+
def test_eos_silence_zero_no_padding(self, mock_file_open, mock_open_audio_file, mock_av_open):
489+
"""Test that eos_silence_s=0 does not add padding"""
490+
from palabra_ai.internal.audio import simple_preprocess_audio_file
491+
492+
# Mock file read
493+
mock_file = MagicMock()
494+
mock_file.read.return_value = b"audio_data"
495+
mock_file.__enter__.return_value = mock_file
496+
mock_file_open.return_value = mock_file
497+
498+
# Mock av container
499+
mock_container = MagicMock()
500+
mock_stream = MagicMock()
501+
mock_stream.type = "audio"
502+
mock_stream.sample_rate = 16000
503+
mock_stream.duration = 16000
504+
mock_stream.time_base = 1/16000
505+
mock_stream.channels = 1
506+
mock_container.streams = [mock_stream]
507+
mock_av_open.return_value = mock_container
508+
509+
# Mock audio processing - return 1 second of audio
510+
audio_array = np.zeros(16000, dtype=np.float32)
511+
mock_open_audio_file.return_value = audio_array
512+
513+
# Test with 0 seconds of silence
514+
result_bytes, metadata = simple_preprocess_audio_file(
515+
"test.wav",
516+
target_rate=16000,
517+
eos_silence_s=0.0
518+
)
519+
520+
result_array = np.frombuffer(result_bytes, dtype=np.int16)
521+
522+
# Should be exactly 16000 samples, no padding
523+
assert len(result_array) == 16000
524+
525+
@patch('palabra_ai.internal.audio.av.open')
526+
@patch('palabra_ai.internal.audio.open_audio_file')
527+
@patch('builtins.open', create=True)
528+
def test_eos_silence_negative_no_padding(self, mock_file_open, mock_open_audio_file, mock_av_open):
529+
"""Test that negative eos_silence_s does not add padding"""
530+
from palabra_ai.internal.audio import simple_preprocess_audio_file
531+
532+
# Mock file read
533+
mock_file = MagicMock()
534+
mock_file.read.return_value = b"audio_data"
535+
mock_file.__enter__.return_value = mock_file
536+
mock_file_open.return_value = mock_file
537+
538+
# Mock av container
539+
mock_container = MagicMock()
540+
mock_stream = MagicMock()
541+
mock_stream.type = "audio"
542+
mock_stream.sample_rate = 16000
543+
mock_stream.duration = 16000
544+
mock_stream.time_base = 1/16000
545+
mock_stream.channels = 1
546+
mock_container.streams = [mock_stream]
547+
mock_av_open.return_value = mock_container
548+
549+
# Mock audio processing
550+
audio_array = np.zeros(16000, dtype=np.float32)
551+
mock_open_audio_file.return_value = audio_array
552+
553+
# Test with negative value
554+
result_bytes, metadata = simple_preprocess_audio_file(
555+
"test.wav",
556+
target_rate=16000,
557+
eos_silence_s=-5.0
558+
)
559+
560+
result_array = np.frombuffer(result_bytes, dtype=np.int16)
561+
562+
# Should be exactly 16000 samples, no padding
563+
assert len(result_array) == 16000
564+
565+
@patch('palabra_ai.internal.audio.av.open')
566+
@patch('palabra_ai.internal.audio.open_audio_file')
567+
@patch('builtins.open', create=True)
568+
def test_eos_silence_various_durations(self, mock_file_open, mock_open_audio_file, mock_av_open):
569+
"""Test EOS silence with various durations (1s, 5s, 15s)"""
570+
from palabra_ai.internal.audio import simple_preprocess_audio_file
571+
572+
# Mock file read
573+
mock_file = MagicMock()
574+
mock_file.read.return_value = b"audio_data"
575+
mock_file.__enter__.return_value = mock_file
576+
mock_file_open.return_value = mock_file
577+
578+
# Mock av container
579+
mock_container = MagicMock()
580+
mock_stream = MagicMock()
581+
mock_stream.type = "audio"
582+
mock_stream.sample_rate = 16000
583+
mock_stream.duration = 16000
584+
mock_stream.time_base = 1/16000
585+
mock_stream.channels = 1
586+
mock_container.streams = [mock_stream]
587+
mock_av_open.return_value = mock_container
588+
589+
# Mock audio processing
590+
audio_array = np.zeros(16000, dtype=np.float32)
591+
mock_open_audio_file.return_value = audio_array
592+
593+
# Test various durations
594+
for duration in [1.0, 5.0, 15.0]:
595+
result_bytes, metadata = simple_preprocess_audio_file(
596+
"test.wav",
597+
target_rate=16000,
598+
eos_silence_s=duration
599+
)
600+
601+
result_array = np.frombuffer(result_bytes, dtype=np.int16)
602+
expected_length = 16000 + int(duration * 16000)
603+
assert len(result_array) == expected_length
604+
605+
@patch('palabra_ai.internal.audio.av.open')
606+
@patch('palabra_ai.internal.audio.open_audio_file')
607+
@patch('builtins.open', create=True)
608+
def test_eos_silence_sample_accuracy(self, mock_file_open, mock_open_audio_file, mock_av_open):
609+
"""Test that silence padding sample count matches formula exactly"""
610+
from palabra_ai.internal.audio import simple_preprocess_audio_file
611+
612+
# Mock file read
613+
mock_file = MagicMock()
614+
mock_file.read.return_value = b"audio_data"
615+
mock_file.__enter__.return_value = mock_file
616+
mock_file_open.return_value = mock_file
617+
618+
# Mock av container with 24kHz sample rate
619+
mock_container = MagicMock()
620+
mock_stream = MagicMock()
621+
mock_stream.type = "audio"
622+
mock_stream.sample_rate = 24000
623+
mock_stream.duration = 24000
624+
mock_stream.time_base = 1/24000
625+
mock_stream.channels = 1
626+
mock_container.streams = [mock_stream]
627+
mock_av_open.return_value = mock_container
628+
629+
# Mock audio processing
630+
audio_array = np.zeros(24000, dtype=np.float32)
631+
mock_open_audio_file.return_value = audio_array
632+
633+
# Test with 10 seconds at 24kHz
634+
eos_silence_s = 10.0
635+
target_rate = 24000
636+
result_bytes, metadata = simple_preprocess_audio_file(
637+
"test.wav",
638+
target_rate=target_rate,
639+
eos_silence_s=eos_silence_s
640+
)
641+
642+
result_array = np.frombuffer(result_bytes, dtype=np.int16)
643+
644+
# Formula: silence_samples = int(eos_silence_s * sample_rate)
645+
expected_silence_samples = int(eos_silence_s * target_rate)
646+
expected_total = 24000 + expected_silence_samples
647+
648+
assert len(result_array) == expected_total
649+
assert expected_silence_samples == 240000

0 commit comments

Comments
 (0)