Skip to content

Commit a077a5d

Browse files
Add Resemble AI as a new TTS provider plugin (#1631)
Co-authored-by: Zohaib Ahmed <zohaib@resemble.ai>
1 parent 0c12991 commit a077a5d

File tree

10 files changed

+850
-0
lines changed

10 files changed

+850
-0
lines changed
Lines changed: 118 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,118 @@
1+
# LiveKit Plugins Resemble
2+
3+
Agent Framework plugin for voice synthesis with the [Resemble AI](https://www.resemble.ai/) API, using both their REST API and WebSocket streaming interface.
4+
5+
## Installation
6+
7+
```bash
8+
pip install livekit-plugins-resemble
9+
```
10+
11+
## Pre-requisites
12+
13+
You'll need an API key from Resemble AI. It can be set as an environment variable: `RESEMBLE_API_KEY`
14+
15+
Additionally, you'll need the voice UUID from your Resemble AI account.
16+
17+
## Examples
18+
19+
### Recommended
20+
21+
```python
22+
import asyncio
23+
from livekit.plugins.resemble import TTS
24+
25+
async def run_tts_example():
26+
# Use TTS with async context manager for automatic resource cleanup
27+
async with TTS(
28+
api_key="your_api_key", # or set RESEMBLE_API_KEY environment variable
29+
voice_uuid="your_voice_uuid",
30+
# Optional parameters
31+
sample_rate=44100, # Sample rate in Hz (default: 44100)
32+
precision="PCM_16", # Audio precision (PCM_32, PCM_24, PCM_16, MULAW)
33+
output_format="wav", # Output format (wav or mp3)
34+
) as tts:
35+
# One-off synthesis (uses REST API)
36+
audio_stream = tts.synthesize("Hello, world!")
37+
38+
# Process chunks as they arrive
39+
async for chunk in audio_stream:
40+
# Audio data is in the 'frame.data' attribute of SynthesizedAudio objects
41+
audio_data = chunk.frame.data
42+
print(f"Received chunk: {len(audio_data)} bytes")
43+
44+
# Alternative: collect all audio at once into a single AudioFrame
45+
audio_stream = tts.synthesize("Another example sentence.")
46+
audio_frame = await audio_stream.collect()
47+
print(f"Collected complete audio: {len(audio_frame.data)} bytes")
48+
49+
# Real-time streaming synthesis (uses WebSocket API)
50+
# Only available for Business plan users in Resemble AI
51+
stream = tts.stream()
52+
await stream.synthesize_text("Hello, world!")
53+
54+
55+
56+
# Run the example
57+
asyncio.run(run_tts_example())
58+
```
59+
60+
### Alternative: Manual Resource Management
61+
62+
If you prefer to manage resources manually, make sure to properly clean up:
63+
64+
```python
65+
import asyncio
66+
from livekit.plugins.resemble import TTS
67+
68+
async def run_tts_example():
69+
# Initialize TTS with your credentials
70+
tts = TTS(
71+
api_key="your_api_key",
72+
voice_uuid="your_voice_uuid",
73+
)
74+
75+
try:
76+
# TTS operations
77+
audio_stream = tts.synthesize("Hello, world!")
78+
async for chunk in audio_stream:
79+
# Access audio data correctly
80+
process_audio(chunk.frame.data)
81+
finally:
82+
# Always clean up resources when done
83+
await tts.aclose()
84+
85+
# Run the example
86+
asyncio.run(run_tts_example())
87+
```
88+
89+
### Resource Management
90+
91+
When using this plugin outside of the LiveKit agent framework, it's important to properly manage the TTS instance lifecycle:
92+
93+
1. **Preferred method**: Use the async context manager pattern (`async with TTS(...) as tts:`)
94+
2. If managing manually, always call `await tts.aclose()` in a finally block
95+
3. If you prefer to provide your own HTTP session, you can pass it using the `http_session` parameter:
96+
97+
```python
98+
import aiohttp
99+
100+
async def with_custom_session():
101+
async with aiohttp.ClientSession() as session:
102+
async with TTS(
103+
api_key="your_api_key",
104+
voice_uuid="your_voice_uuid",
105+
http_session=session
106+
) as tts:
107+
# Use TTS...
108+
# No need to manually close anything - context managers handle it all
109+
```
110+
111+
## Implementation Details
112+
113+
This plugin uses two different approaches to generate speech:
114+
115+
1. **One-off Synthesis** - Uses Resemble's REST API for simple text-to-speech conversion
116+
2. **Streaming Synthesis** - Uses Resemble's WebSocket API for real-time streaming synthesis
117+
118+
The WebSocket streaming API is only available for Resemble AI Business plan users.
Lines changed: 37 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,37 @@
1+
# Copyright 2023 LiveKit, Inc.
2+
#
3+
# Licensed under the Apache License, Version 2.0 (the "License");
4+
# you may not use this file except in compliance with the License.
5+
# You may obtain a copy of the License at
6+
#
7+
# http://www.apache.org/licenses/LICENSE-2.0
8+
#
9+
# Unless required by applicable law or agreed to in writing, software
10+
# distributed under the License is distributed on an "AS IS" BASIS,
11+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
# See the License for the specific language governing permissions and
13+
# limitations under the License.
14+
15+
from .tts import TTS, ChunkedStream, SynthesizeStream
16+
from .version import __version__
17+
18+
__all__ = ["TTS", "ChunkedStream", "SynthesizeStream", "__version__"]
19+
20+
from livekit.agents import Plugin
21+
22+
23+
class ResemblePlugin(Plugin):
24+
def __init__(self) -> None:
25+
super().__init__(__name__, __version__, __package__)
26+
27+
28+
Plugin.register_plugin(ResemblePlugin())
29+
30+
# Cleanup docs of unexported modules
31+
_module = dir()
32+
NOT_IN_ALL = [m for m in _module if m not in __all__]
33+
34+
__pdoc__ = {}
35+
36+
for n in NOT_IN_ALL:
37+
__pdoc__[n] = False
Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
import logging
2+
3+
logger = logging.getLogger("livekit.plugins.resemble")
Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,10 @@
1+
from enum import Enum
2+
3+
4+
class OutputFormat(str, Enum):
5+
WAV = "wav"
6+
MP3 = "mp3"
7+
8+
9+
class Precision(str, Enum):
10+
PCM_16 = "PCM_16"
Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
2+
3+

0 commit comments

Comments
 (0)