3737from .log import logger
3838from .models import TTSEncoding , TTSModels
3939
40- _DefaultEncoding : TTSEncoding = "mp3_44100"
40+ # by default, use 22.05kHz sample rate at 32kbps
41+ # in our testing, reduce TTFB by about ~110ms
42+ _DefaultEncoding : TTSEncoding = "mp3_22050_32"
4143
4244
4345def _sample_rate_from_format (output_format : TTSEncoding ) -> int :
@@ -102,6 +104,7 @@ def __init__(
102104 * ,
103105 voice : Voice = DEFAULT_VOICE ,
104106 model : TTSModels | str = "eleven_flash_v2_5" ,
107+ encoding : TTSEncoding | None = None ,
105108 api_key : str | None = None ,
106109 base_url : str | None = None ,
107110 streaming_latency : int = 0 ,
@@ -131,11 +134,14 @@ def __init__(
131134 language (str | None): Language code for the TTS model, as of 10/24/24 only valid for "eleven_turbo_v2_5". Optional.
132135 """
133136
137+ if not encoding :
138+ encoding = _DefaultEncoding
139+
134140 super ().__init__ (
135141 capabilities = tts .TTSCapabilities (
136142 streaming = True ,
137143 ),
138- sample_rate = _sample_rate_from_format (_DefaultEncoding ),
144+ sample_rate = _sample_rate_from_format (encoding ),
139145 num_channels = 1 ,
140146 )
141147
@@ -161,7 +167,7 @@ def __init__(
161167 model = model ,
162168 api_key = api_key ,
163169 base_url = base_url or API_BASE_URL_V1 ,
164- encoding = _DefaultEncoding ,
170+ encoding = encoding ,
165171 sample_rate = self .sample_rate ,
166172 streaming_latency = streaming_latency ,
167173 word_tokenizer = word_tokenizer ,
0 commit comments