@@ -430,3 +430,220 @@ def test_pull_immediate_block(self):
430430 result = pull_until_blocked (mock_graph )
431431
432432 assert result == []
433+
434+
435+ class TestSimplePreprocessAudioFileEOSilence :
436+ """Test EOS silence padding in simple_preprocess_audio_file"""
437+
438+ @patch ('palabra_ai.internal.audio.av.open' )
439+ @patch ('palabra_ai.internal.audio.open_audio_file' )
440+ @patch ('builtins.open' , create = True )
441+ def test_eos_silence_adds_padding (self , mock_file_open , mock_open_audio_file , mock_av_open ):
442+ """Test that EOS silence padding is added correctly"""
443+ from palabra_ai .internal .audio import simple_preprocess_audio_file
444+
445+ # Mock file read
446+ mock_file = MagicMock ()
447+ mock_file .read .return_value = b"audio_data"
448+ mock_file .__enter__ .return_value = mock_file
449+ mock_file_open .return_value = mock_file
450+
451+ # Mock av container
452+ mock_container = MagicMock ()
453+ mock_stream = MagicMock ()
454+ mock_stream .type = "audio"
455+ mock_stream .sample_rate = 16000
456+ mock_stream .duration = 16000
457+ mock_stream .time_base = 1 / 16000
458+ mock_stream .channels = 1
459+ mock_container .streams = [mock_stream ]
460+ mock_av_open .return_value = mock_container
461+
462+ # Mock audio processing - return 1 second of audio (16000 samples)
463+ audio_array = np .zeros (16000 , dtype = np .float32 )
464+ mock_open_audio_file .return_value = audio_array
465+
466+ # Test with 5 seconds of silence padding
467+ eos_silence_s = 5.0
468+ result_bytes , metadata = simple_preprocess_audio_file (
469+ "test.wav" ,
470+ target_rate = 16000 ,
471+ eos_silence_s = eos_silence_s
472+ )
473+
474+ # Convert result back to int16 array
475+ result_array = np .frombuffer (result_bytes , dtype = np .int16 )
476+
477+ # Expected: 16000 original samples + 5*16000 silence samples = 96000 total
478+ expected_length = 16000 + int (eos_silence_s * 16000 )
479+ assert len (result_array ) == expected_length
480+
481+ # Check that last samples are zeros (silence)
482+ silence_samples = int (eos_silence_s * 16000 )
483+ assert np .all (result_array [- silence_samples :] == 0 )
484+
485+ @patch ('palabra_ai.internal.audio.av.open' )
486+ @patch ('palabra_ai.internal.audio.open_audio_file' )
487+ @patch ('builtins.open' , create = True )
488+ def test_eos_silence_zero_no_padding (self , mock_file_open , mock_open_audio_file , mock_av_open ):
489+ """Test that eos_silence_s=0 does not add padding"""
490+ from palabra_ai .internal .audio import simple_preprocess_audio_file
491+
492+ # Mock file read
493+ mock_file = MagicMock ()
494+ mock_file .read .return_value = b"audio_data"
495+ mock_file .__enter__ .return_value = mock_file
496+ mock_file_open .return_value = mock_file
497+
498+ # Mock av container
499+ mock_container = MagicMock ()
500+ mock_stream = MagicMock ()
501+ mock_stream .type = "audio"
502+ mock_stream .sample_rate = 16000
503+ mock_stream .duration = 16000
504+ mock_stream .time_base = 1 / 16000
505+ mock_stream .channels = 1
506+ mock_container .streams = [mock_stream ]
507+ mock_av_open .return_value = mock_container
508+
509+ # Mock audio processing - return 1 second of audio
510+ audio_array = np .zeros (16000 , dtype = np .float32 )
511+ mock_open_audio_file .return_value = audio_array
512+
513+ # Test with 0 seconds of silence
514+ result_bytes , metadata = simple_preprocess_audio_file (
515+ "test.wav" ,
516+ target_rate = 16000 ,
517+ eos_silence_s = 0.0
518+ )
519+
520+ result_array = np .frombuffer (result_bytes , dtype = np .int16 )
521+
522+ # Should be exactly 16000 samples, no padding
523+ assert len (result_array ) == 16000
524+
525+ @patch ('palabra_ai.internal.audio.av.open' )
526+ @patch ('palabra_ai.internal.audio.open_audio_file' )
527+ @patch ('builtins.open' , create = True )
528+ def test_eos_silence_negative_no_padding (self , mock_file_open , mock_open_audio_file , mock_av_open ):
529+ """Test that negative eos_silence_s does not add padding"""
530+ from palabra_ai .internal .audio import simple_preprocess_audio_file
531+
532+ # Mock file read
533+ mock_file = MagicMock ()
534+ mock_file .read .return_value = b"audio_data"
535+ mock_file .__enter__ .return_value = mock_file
536+ mock_file_open .return_value = mock_file
537+
538+ # Mock av container
539+ mock_container = MagicMock ()
540+ mock_stream = MagicMock ()
541+ mock_stream .type = "audio"
542+ mock_stream .sample_rate = 16000
543+ mock_stream .duration = 16000
544+ mock_stream .time_base = 1 / 16000
545+ mock_stream .channels = 1
546+ mock_container .streams = [mock_stream ]
547+ mock_av_open .return_value = mock_container
548+
549+ # Mock audio processing
550+ audio_array = np .zeros (16000 , dtype = np .float32 )
551+ mock_open_audio_file .return_value = audio_array
552+
553+ # Test with negative value
554+ result_bytes , metadata = simple_preprocess_audio_file (
555+ "test.wav" ,
556+ target_rate = 16000 ,
557+ eos_silence_s = - 5.0
558+ )
559+
560+ result_array = np .frombuffer (result_bytes , dtype = np .int16 )
561+
562+ # Should be exactly 16000 samples, no padding
563+ assert len (result_array ) == 16000
564+
565+ @patch ('palabra_ai.internal.audio.av.open' )
566+ @patch ('palabra_ai.internal.audio.open_audio_file' )
567+ @patch ('builtins.open' , create = True )
568+ def test_eos_silence_various_durations (self , mock_file_open , mock_open_audio_file , mock_av_open ):
569+ """Test EOS silence with various durations (1s, 5s, 15s)"""
570+ from palabra_ai .internal .audio import simple_preprocess_audio_file
571+
572+ # Mock file read
573+ mock_file = MagicMock ()
574+ mock_file .read .return_value = b"audio_data"
575+ mock_file .__enter__ .return_value = mock_file
576+ mock_file_open .return_value = mock_file
577+
578+ # Mock av container
579+ mock_container = MagicMock ()
580+ mock_stream = MagicMock ()
581+ mock_stream .type = "audio"
582+ mock_stream .sample_rate = 16000
583+ mock_stream .duration = 16000
584+ mock_stream .time_base = 1 / 16000
585+ mock_stream .channels = 1
586+ mock_container .streams = [mock_stream ]
587+ mock_av_open .return_value = mock_container
588+
589+ # Mock audio processing
590+ audio_array = np .zeros (16000 , dtype = np .float32 )
591+ mock_open_audio_file .return_value = audio_array
592+
593+ # Test various durations
594+ for duration in [1.0 , 5.0 , 15.0 ]:
595+ result_bytes , metadata = simple_preprocess_audio_file (
596+ "test.wav" ,
597+ target_rate = 16000 ,
598+ eos_silence_s = duration
599+ )
600+
601+ result_array = np .frombuffer (result_bytes , dtype = np .int16 )
602+ expected_length = 16000 + int (duration * 16000 )
603+ assert len (result_array ) == expected_length
604+
605+ @patch ('palabra_ai.internal.audio.av.open' )
606+ @patch ('palabra_ai.internal.audio.open_audio_file' )
607+ @patch ('builtins.open' , create = True )
608+ def test_eos_silence_sample_accuracy (self , mock_file_open , mock_open_audio_file , mock_av_open ):
609+ """Test that silence padding sample count matches formula exactly"""
610+ from palabra_ai .internal .audio import simple_preprocess_audio_file
611+
612+ # Mock file read
613+ mock_file = MagicMock ()
614+ mock_file .read .return_value = b"audio_data"
615+ mock_file .__enter__ .return_value = mock_file
616+ mock_file_open .return_value = mock_file
617+
618+ # Mock av container with 24kHz sample rate
619+ mock_container = MagicMock ()
620+ mock_stream = MagicMock ()
621+ mock_stream .type = "audio"
622+ mock_stream .sample_rate = 24000
623+ mock_stream .duration = 24000
624+ mock_stream .time_base = 1 / 24000
625+ mock_stream .channels = 1
626+ mock_container .streams = [mock_stream ]
627+ mock_av_open .return_value = mock_container
628+
629+ # Mock audio processing
630+ audio_array = np .zeros (24000 , dtype = np .float32 )
631+ mock_open_audio_file .return_value = audio_array
632+
633+ # Test with 10 seconds at 24kHz
634+ eos_silence_s = 10.0
635+ target_rate = 24000
636+ result_bytes , metadata = simple_preprocess_audio_file (
637+ "test.wav" ,
638+ target_rate = target_rate ,
639+ eos_silence_s = eos_silence_s
640+ )
641+
642+ result_array = np .frombuffer (result_bytes , dtype = np .int16 )
643+
644+ # Formula: silence_samples = int(eos_silence_s * sample_rate)
645+ expected_silence_samples = int (eos_silence_s * target_rate )
646+ expected_total = 24000 + expected_silence_samples
647+
648+ assert len (result_array ) == expected_total
649+ assert expected_silence_samples == 240000
0 commit comments