Skip to content

Commit c84aabb

Browse files
committed
use 22.05khz by default for 11labs
shorter TTFB
1 parent 8098d4a commit c84aabb

File tree

2 files changed

+18
-4
lines changed
  • livekit-plugins/livekit-plugins-elevenlabs/livekit/plugins/elevenlabs

2 files changed

+18
-4
lines changed

livekit-plugins/livekit-plugins-elevenlabs/livekit/plugins/elevenlabs/models.py

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -10,4 +10,12 @@
1010
"eleven_flash_v2",
1111
]
1212

13-
TTSEncoding = Literal["mp3_44100",]
13+
TTSEncoding = Literal[
14+
"mp3_22050_32",
15+
"mp3_44100",
16+
"mp3_44100_32",
17+
"mp3_44100_64",
18+
"mp3_44100_96",
19+
"mp3_44100_128",
20+
"mp3_44100_192",
21+
]

livekit-plugins/livekit-plugins-elevenlabs/livekit/plugins/elevenlabs/tts.py

Lines changed: 9 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -37,7 +37,9 @@
3737
from .log import logger
3838
from .models import TTSEncoding, TTSModels
3939

40-
_DefaultEncoding: TTSEncoding = "mp3_44100"
40+
# by default, use 22.05kHz sample rate at 32kbps
41+
# in our testing, reduce TTFB by about ~110ms
42+
_DefaultEncoding: TTSEncoding = "mp3_22050_32"
4143

4244

4345
def _sample_rate_from_format(output_format: TTSEncoding) -> int:
@@ -102,6 +104,7 @@ def __init__(
102104
*,
103105
voice: Voice = DEFAULT_VOICE,
104106
model: TTSModels | str = "eleven_flash_v2_5",
107+
encoding: TTSEncoding | None = None,
105108
api_key: str | None = None,
106109
base_url: str | None = None,
107110
streaming_latency: int = 0,
@@ -131,11 +134,14 @@ def __init__(
131134
language (str | None): Language code for the TTS model, as of 10/24/24 only valid for "eleven_turbo_v2_5". Optional.
132135
"""
133136

137+
if not encoding:
138+
encoding = _DefaultEncoding
139+
134140
super().__init__(
135141
capabilities=tts.TTSCapabilities(
136142
streaming=True,
137143
),
138-
sample_rate=_sample_rate_from_format(_DefaultEncoding),
144+
sample_rate=_sample_rate_from_format(encoding),
139145
num_channels=1,
140146
)
141147

@@ -161,7 +167,7 @@ def __init__(
161167
model=model,
162168
api_key=api_key,
163169
base_url=base_url or API_BASE_URL_V1,
164-
encoding=_DefaultEncoding,
170+
encoding=encoding,
165171
sample_rate=self.sample_rate,
166172
streaming_latency=streaming_latency,
167173
word_tokenizer=word_tokenizer,

0 commit comments

Comments
 (0)