Skip to content

Realtime API connection timeout triggers unhandled error and endless retries #934

@SilverHof

Description

@SilverHof

Bug Description

I have self-hosted LiveKit. The problem is:

Summary
While running the agent with @livekit/agents-plugin-openai (Realtime model), the OpenAI Realtime API connection times out, emits an unhandled error, and the plugin keeps retrying. Memory usage rises and the process logs “not authenticated” once before failing.
Environment
@livekit/agents 1.0.31
@livekit/agents-plugin-openai 1.0.31
@livekit/rtc-node 0.13.22
Node.js (same as used in logs; running on Linux)
Using RealtimeModel (OpenAI), default retry settings, turn detection enabled
What happens

  1. Agent starts and connects to the room.
  2. Realtime model creates a speech handle.
  3. Connection to OpenAI Realtime API times out.
  4. Unhandled error is thrown from RealtimeSession and reported as recoverable, but retries loop with attempt=0.
  5. Memory usage keeps growing above the warn threshold (~570 MB).

package.json
{ "name": "hr-gpt-agent", "version": "0.1.0", "private": true, "type": "module", "scripts": { "build": "tsup", "agent:start": "pnpm build && npx puppeteer browsers install chrome && node --trace-deprecation dist/agent.js dev", "agent:start:docker": "docker compose up", "agent:start:old": "pnpm build && node --trace-deprecation dist/agent-old.js dev", "agent:start:test": "pnpm build && node --trace-deprecation dist/agent-test.js dev", "download-files": "pnpm build && node dist/agent.js download-files", "format": "prettier --write \"**/*.{js,jsx,ts,tsx,json,scss}\" --config .config/prettier/prettier.config.cjs --ignore-path .config/prettier/.prettierignore", "oxlint": "oxlint --config .config/oxlint/.oxlintrc.config.json" }, "packageManager": "pnpm@10.13.1", "dependencies": { "@livekit/agents": "1.0.31", "@livekit/agents-plugin-livekit": "1.0.31", "@livekit/agents-plugin-openai": "1.0.31", "@livekit/agents-plugin-silero": "1.0.31", "@livekit/noise-cancellation-node": "0.1.9", "@livekit/rtc-node": "^0.13.22", "@sentry/integrations": "^7.114.0", "@sentry/node": "^8.54.0", "axios": "^1.9.0", "dotenv": "^17.2.3", "form-data": "^4.0.2", "md-to-pdf": "^5.2.4", "puppeteer": "^24.33.1", "uuid": "^11.1.0", "zod": "^4.2.1" }, "devDependencies": { "@trivago/prettier-plugin-sort-imports": "^4.3.0", "@types/node": "^22.9.1", "@types/uuid": "^10.0.0", "prettier": "^3.7.4", "tsup": "8.5.1", "typescript": "^5.6.3" } }

agent.ts
`import type { JobContext, JobProcess } from '@livekit/agents';
import { ServerOptions, cli, defineAgent, llm, metrics, voice } from '@livekit/agents';

import * as livekit from '@livekit/agents-plugin-livekit';
import * as openai from '@livekit/agents-plugin-openai';
import * as silero from '@livekit/agents-plugin-silero';

import * as Sentry from '@sentry/node';
import dotenv from 'dotenv';
import path from 'node:path';
import process from 'node:process';
import { fileURLToPath } from 'node:url';
import { v4 as uuidv4 } from 'uuid';

import './instrument';
import { TranscriptionManager } from './lib/transcription/transcription-manager';
import { BackgroundVoiceCancellation } from '@livekit/noise-cancellation-node';
import { getInstruction } from './lib/instruction/instruction-config';
import { RemoteParticipant, Room, RoomEvent, TrackSource } from '@livekit/rtc-node';
import { isValidReport, saveReport } from './lib/report/save-report';
import { getCandidateInfo } from './lib/get-candidate-info';
import { saveReportSchema } from './schemas/save-report.schema';
import { getTranscriptionsSignalSchema } from './schemas/get-transcription-signal.schema';
import { executeGetTranscriptionsSignal } from './lib/transcription/execute-get-transcription-signal';
import { buildGreetingMessage } from './constants/ru/base/build-greeting-message-ru';

const __dirname = path.dirname(fileURLToPath(import.meta.url));
const envPath = path.join(__dirname, '../.env.local');
const reportsDir = path.resolve(__dirname, '../reports');

dotenv.config({ path: envPath });

export class RealtimeAgent extends voice.Agent {
override async onEnter(): Promise {
this.session.say('Hello, how can I help you today?', {
allowInterruptions: false,
});

return Promise.resolve();

}

static create({ room }: { room: Room }) {
return new RealtimeAgent({
instructions:
'You are a helpful agent. Your name is Mario. You can check the weather forecast, search information in the knowledge base and transfer to human.',
});
}
}

const agent = defineAgent({
prewarm: async (proc: JobProcess) => {
console.log('Starting VAD model prewarm...');
const startTime = Date.now();
try {
proc.userData.vad = await silero.VAD.load();
const loadTime = Date.now() - startTime;
console.log(VAD model loaded in ${loadTime}ms);
} catch (error) {
console.error('Failed to prewarm VAD model:', error);
throw error;
}
},
entry: async (ctx: JobContext) => {
await ctx.connect();

const vad = ctx.proc.userData.vad! as silero.VAD;

const transcriptionManager = new TranscriptionManager();

const states = {
  lastTranscriptId: null as string | null,
  isPaused: false as boolean,
};

const { candidateInfo, candidateId } = await getCandidateInfo(ctx);

const realtimeModel = new openai.realtime.RealtimeModel({
  // model: 'gpt-4o-mini-realtime-preview-2024-12-17',
  model: 'gpt-realtime-mini-2025-12-15',
  temperature: 0.6,
  maxSessionDuration: 60 * 27, // 27 minutes
  voice: 'shimmer',
  speed: 1.1,
  turnDetection: {
    threshold: 0.8,
    type: 'server_vad',
    silence_duration_ms: 2000,
    prefix_padding_ms: 300,
    create_response: true,
    interrupt_response: true,
  },
});

const tts = new openai.TTS({
  model: 'gpt-4o-mini-tts',
  voice: 'shimmer',
  speed: 1.1,
});

const session = new voice.AgentSession({
  vad: vad,
  llm: realtimeModel,
  tts: tts,
  turnDetection: new livekit.turnDetector.MultilingualModel(),
});

const realtimeSession = (session.llm as openai.realtime.RealtimeModel)?.session();

const agent = new voice.Agent({
  instructions: getInstruction(candidateInfo),
  tools: {
    saveReport: llm.tool({
      description: 'Save interview report',
      parameters: saveReportSchema,
      execute: async ({ candidateName, report }) =>
        await saveReport({
          candidateName,
          report,
          candidateId,
          candidateInfo,
          ctx,
          session: (session.llm as openai.realtime.RealtimeModel)?.session(),
          transcriptionManager,
          reportsDir,
          isValidReport,
        }),
    }),
    getTranscriptionsSignal: llm.tool({
      description: 'Get all interview transcriptions to review before writing the report',
      parameters: getTranscriptionsSignalSchema,
      execute: async () =>
        executeGetTranscriptionsSignal(
          transcriptionManager.getAllTranscriptions,
          realtimeSession as openai.realtime.RealtimeSession,
        ),
    }),
  },
});

const usageCollector = new metrics.UsageCollector();

await session.start({
  room: ctx.room,
  agent,
  inputOptions: {
    noiseCancellation: BackgroundVoiceCancellation(),
  },
});

const sessionReplier = session.generateReply({
  userInput: '',
  instructions: buildGreetingMessage(candidateInfo),
});

await sessionReplier.waitForPlayout();

session.on(voice.AgentSessionEventTypes.MetricsCollected, (ev) => {
  metrics.logMetrics(ev.metrics);
  usageCollector.collect(ev.metrics);
});

ctx.addShutdownCallback(async () => {
  const summary = usageCollector.getSummary();
  console.log(`Usage: ${JSON.stringify(summary)}`);
});

ctx.room.on(RoomEvent.ParticipantDisconnected, async () => {
  console.log('👋 Participant disconnected, saving transcriptions...');
  try {
    const filePath = await transcriptionManager.saveTranscriptions();
    console.log(`💾 Session ended. Transcriptions saved to ${filePath}`);
  } catch (error) {
    console.error('❌ Error saving transcriptions:', error);
    Sentry.captureException(error);
  }
});

realtimeSession.on('input_speech_transcription_completed', (event) => {
  const remoteParticipant = Array.from(ctx.room.remoteParticipants.values())[0];

  const trackPublication = Array.from(remoteParticipant.trackPublications.values()).find(
    (track) => track.source === TrackSource.SOURCE_MICROPHONE,
  );

  const newTranscriptId = uuidv4();

  transcriptionManager.addTranscription({
    room: ctx.room,
    participant: remoteParticipant,
    trackSid: trackPublication?.track?.sid as string,
    segmentId: newTranscriptId,
    text: event.transcript,
    isFinal: true,
  });

  if (event.text && event.text.trim() !== '') {
    console.log(`Added candidate transcription: "${event.text}"`);
  }

  states.lastTranscriptId = null;
});

realtimeSession.on('response_content_done', (data) => {
  if (data.text) {
    transcriptionManager.addTranscription({
      room: ctx.room,
      trackSid: 'virtual-track-sid',
      segmentId: uuidv4(),
      text: data.text,
      isFinal: true,
      participant: {
        identity: 'agent',
        trackPublications: new Map([
          [
            'virtual-track',
            {
              trackSid: 'virtual-track-sid',
              source: TrackSource.SOURCE_MICROPHONE,
            },
          ],
        ]),
      } as unknown as RemoteParticipant,
    });
  }
});

realtimeSession.on('error', async () => {
  try {
    await realtimeSession.close();
  } catch {}
});

process.on('SIGINT', async () => {
  try {
    if (transcriptionManager) {
      const filePath = await transcriptionManager.saveTranscriptions();
      console.log(`💾 Transcriptions saved to ${filePath} before shutdown`);
    }
  } catch (error) {
    console.error('❌ Error saving transcriptions during shutdown:', error);
    Sentry.captureException(error);
  } finally {
    process.exit(0);
  }
});

},
});

// ============================================================================
// ЗАПУСК
// ============================================================================
cli.runApp(new ServerOptions({ agent: fileURLToPath(import.meta.url), maxRetry: 3 }));

export default agent;
`

Expected Behavior

Connection timeout should be handled without unhandled errors.
Retries should increment attempts or respect maxRetries, and stop cleanly with a surfaced error.
Memory should not grow with repeated retry attempts.

Reproduction Steps

1.

Operating System

Ubuntu 24

Models Used

RealTime setup

Package Versions

"@livekit/agents": "1.0.31",
    "@livekit/agents-plugin-livekit": "1.0.31",
    "@livekit/agents-plugin-openai": "1.0.31",
    "@livekit/agents-plugin-silero": "1.0.31",
    "@livekit/noise-cancellation-node": "0.1.9",
    "@livekit/rtc-node": "^0.13.22",

Session/Room/Call IDs

No response

Proposed Solution

Additional Context

No response

Screenshots and Recordings

No response

Metadata

Metadata

Assignees

No one assigned

    Labels

    bugSomething isn't working

    Type

    No type

    Projects

    No projects

    Milestone

    No milestone

    Relationships

    None yet

    Development

    No branches or pull requests

    Issue actions