Skip to content

Commit 7886e12

Browse files
authored
feat(core): Support embeddings in langchain (#20017)
Adds embeddings instrumentation for LangChain (`embedQuery`, `embedDocuments`). LangChain's `Embeddings` base class doesn't use the callback system that we rely on for chat models, so this uses direct prototype patching instead. In Node.js, embedding classes from provider packages are auto-instrumented. For other runtimes: ```js const embeddings = Sentry.instrumentLangChainEmbeddings( new OpenAIEmbeddings({ model: 'text-embedding-3-small' }) ); await embeddings.embedQuery('Hello world'); ``` Creates `gen_ai.embeddings` spans. Token usage is not available since LangChain's embedding methods only return raw vectors. Closes #19774
1 parent 8487cd5 commit 7886e12

File tree

30 files changed

+618
-10
lines changed

30 files changed

+618
-10
lines changed

CHANGELOG.md

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,19 @@
2323

2424
If you reference these attributes in hooks (e.g. `beforeSendTransaction`), update them to the `gen_ai.*` equivalents.
2525

26+
- feat(langchain): Support embeddings APIs in LangChain ([#20017](https://github.com/getsentry/sentry-javascript/pull/20017))
27+
28+
Adds instrumentation for LangChain embeddings (`embedQuery`, `embedDocuments`), creating `gen_ai.embeddings` spans. In Node.js, embedding classes from `@langchain/openai`, `@langchain/google-genai`, `@langchain/mistralai`, and `@langchain/google-vertexai` are auto-instrumented. For other runtimes, use the new `instrumentLangChainEmbeddings` API:
29+
30+
```javascript
31+
import * as Sentry from '@sentry/cloudflare';
32+
import { OpenAIEmbeddings } from '@langchain/openai';
33+
34+
const embeddings = Sentry.instrumentLangChainEmbeddings(new OpenAIEmbeddings({ model: 'text-embedding-3-small' }));
35+
36+
await embeddings.embedQuery('Hello world');
37+
```
38+
2639
## 10.47.0
2740

2841
### Important Changes

dev-packages/browser-integration-tests/suites/tracing/ai-providers/langchain/mocks.js

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,21 @@
1+
// Mock LangChain Embeddings for browser testing
2+
export class MockOpenAIEmbeddings {
3+
constructor(params) {
4+
this.model = params.model;
5+
this.dimensions = params.dimensions;
6+
}
7+
8+
async embedQuery(_text) {
9+
await new Promise(resolve => setTimeout(resolve, 10));
10+
return [0.1, 0.2, 0.3];
11+
}
12+
13+
async embedDocuments(documents) {
14+
await new Promise(resolve => setTimeout(resolve, 10));
15+
return documents.map(() => [0.1, 0.2, 0.3]);
16+
}
17+
}
18+
119
// Mock LangChain Chat Model for browser testing
220
export class MockChatAnthropic {
321
constructor(params) {

dev-packages/browser-integration-tests/suites/tracing/ai-providers/langchain/subject.js

Lines changed: 10 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
import { createLangChainCallbackHandler } from '@sentry/browser';
2-
import { MockChatAnthropic } from './mocks.js';
2+
import { instrumentLangChainEmbeddings } from '@sentry/browser';
3+
import { MockChatAnthropic, MockOpenAIEmbeddings } from './mocks.js';
34

45
const callbackHandler = createLangChainCallbackHandler({
56
recordInputs: false,
@@ -20,3 +21,11 @@ const response = await chatModel.invoke('What is the capital of France?', {
2021
});
2122

2223
console.log('Received response', response);
24+
25+
// Test embeddings instrumentation
26+
const embeddings = instrumentLangChainEmbeddings(
27+
new MockOpenAIEmbeddings({ model: 'text-embedding-3-small', dimensions: 1536 }),
28+
);
29+
30+
const embedding = await embeddings.embedQuery('Hello world');
31+
console.log('Received embedding', embedding);

dev-packages/browser-integration-tests/suites/tracing/ai-providers/langchain/test.ts

Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -35,3 +35,29 @@ sentryTest('manual LangChain instrumentation sends gen_ai transactions', async (
3535
'gen_ai.usage.total_tokens': 25,
3636
});
3737
});
38+
39+
sentryTest(
40+
'manual LangChain embeddings instrumentation sends gen_ai transactions',
41+
async ({ getLocalTestUrl, page }) => {
42+
const transactionPromise = waitForTransactionRequest(page, event => {
43+
return !!event.transaction?.includes('text-embedding-3-small');
44+
});
45+
46+
const url = await getLocalTestUrl({ testDir: __dirname });
47+
await page.goto(url);
48+
49+
const req = await transactionPromise;
50+
51+
const eventData = envelopeRequestParser(req);
52+
53+
expect(eventData.transaction).toBe('embeddings text-embedding-3-small');
54+
expect(eventData.contexts?.trace?.op).toBe('gen_ai.embeddings');
55+
expect(eventData.contexts?.trace?.origin).toBe('auto.ai.langchain');
56+
expect(eventData.contexts?.trace?.data).toMatchObject({
57+
'gen_ai.operation.name': 'embeddings',
58+
'gen_ai.system': 'openai',
59+
'gen_ai.request.model': 'text-embedding-3-small',
60+
'gen_ai.request.dimensions': 1536,
61+
});
62+
},
63+
);

dev-packages/browser-integration-tests/utils/generatePlugin.ts

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -42,6 +42,7 @@ const IMPORTED_INTEGRATION_CDN_BUNDLE_PATHS: Record<string, string> = {
4242
instrumentGoogleGenAIClient: 'instrumentgooglegenaiclient',
4343
instrumentLangGraph: 'instrumentlanggraph',
4444
createLangChainCallbackHandler: 'createlangchaincallbackhandler',
45+
instrumentLangChainEmbeddings: 'instrumentlangchainembeddings',
4546
// technically, this is not an integration, but let's add it anyway for simplicity
4647
makeMultiplexedTransport: 'multiplexedtransport',
4748
};

dev-packages/node-integration-tests/package.json

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -32,6 +32,7 @@
3232
"@hono/node-server": "^1.19.10",
3333
"@langchain/anthropic": "^0.3.10",
3434
"@langchain/core": "^0.3.80",
35+
"@langchain/openai": "^0.5.0",
3536
"@langchain/langgraph": "^0.2.32",
3637
"@nestjs/common": "^11",
3738
"@nestjs/core": "^11",

dev-packages/node-integration-tests/suites/tracing/langchain/instrument-with-pii.mjs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,7 @@ Sentry.init({
99
transport: loggingTransport,
1010
beforeSendTransaction: event => {
1111
// Filter out mock express server transactions
12-
if (event.transaction.includes('/v1/messages')) {
12+
if (event.transaction.includes('/v1/messages') || event.transaction.includes('/v1/embeddings')) {
1313
return null;
1414
}
1515
return event;

dev-packages/node-integration-tests/suites/tracing/langchain/instrument.mjs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,7 @@ Sentry.init({
99
transport: loggingTransport,
1010
beforeSendTransaction: event => {
1111
// Filter out mock express server transactions
12-
if (event.transaction.includes('/v1/messages')) {
12+
if (event.transaction.includes('/v1/messages') || event.transaction.includes('/v1/embeddings')) {
1313
return null;
1414
}
1515
return event;
Lines changed: 82 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,82 @@
1+
import { OpenAIEmbeddings } from '@langchain/openai';
2+
import * as Sentry from '@sentry/node';
3+
import express from 'express';
4+
5+
function startMockOpenAIServer() {
6+
const app = express();
7+
app.use(express.json());
8+
9+
app.post('/v1/embeddings', (req, res) => {
10+
const { model, input } = req.body;
11+
12+
if (model === 'error-model') {
13+
res.status(400).json({
14+
error: {
15+
message: 'Model not found',
16+
type: 'invalid_request_error',
17+
},
18+
});
19+
return;
20+
}
21+
22+
const inputs = Array.isArray(input) ? input : [input];
23+
res.json({
24+
object: 'list',
25+
data: inputs.map((_, i) => ({
26+
object: 'embedding',
27+
embedding: [0.1, 0.2, 0.3],
28+
index: i,
29+
})),
30+
model: model,
31+
usage: {
32+
prompt_tokens: 10,
33+
total_tokens: 10,
34+
},
35+
});
36+
});
37+
38+
return new Promise(resolve => {
39+
const server = app.listen(0, () => {
40+
resolve(server);
41+
});
42+
});
43+
}
44+
45+
async function run() {
46+
const server = await startMockOpenAIServer();
47+
const baseUrl = `http://localhost:${server.address().port}/v1`;
48+
49+
await Sentry.startSpan({ op: 'function', name: 'main' }, async () => {
50+
// Test 1: embedQuery
51+
const embeddings = new OpenAIEmbeddings({
52+
model: 'text-embedding-3-small',
53+
dimensions: 1536,
54+
apiKey: 'mock-api-key',
55+
configuration: { baseURL: baseUrl },
56+
});
57+
58+
await embeddings.embedQuery('Hello world');
59+
60+
// Test 2: embedDocuments
61+
await embeddings.embedDocuments(['First document', 'Second document']);
62+
63+
// Test 3: Error handling
64+
const errorEmbeddings = new OpenAIEmbeddings({
65+
model: 'error-model',
66+
apiKey: 'mock-api-key',
67+
configuration: { baseURL: baseUrl },
68+
});
69+
70+
try {
71+
await errorEmbeddings.embedQuery('This will fail');
72+
} catch {
73+
// Expected error
74+
}
75+
});
76+
77+
await Sentry.flush(2000);
78+
79+
server.close();
80+
}
81+
82+
run();

dev-packages/node-integration-tests/suites/tracing/langchain/test.ts

Lines changed: 119 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,12 @@
11
import { SEMANTIC_ATTRIBUTE_SENTRY_OP, SEMANTIC_ATTRIBUTE_SENTRY_ORIGIN } from '@sentry/core';
22
import { afterAll, describe, expect } from 'vitest';
33
import {
4+
GEN_AI_EMBEDDINGS_OPERATION_ATTRIBUTE,
5+
GEN_AI_EMBEDDINGS_INPUT_ATTRIBUTE,
46
GEN_AI_INPUT_MESSAGES_ATTRIBUTE,
57
GEN_AI_INPUT_MESSAGES_ORIGINAL_LENGTH_ATTRIBUTE,
68
GEN_AI_OPERATION_NAME_ATTRIBUTE,
9+
GEN_AI_REQUEST_DIMENSIONS_ATTRIBUTE,
710
GEN_AI_REQUEST_MAX_TOKENS_ATTRIBUTE,
811
GEN_AI_REQUEST_MODEL_ATTRIBUTE,
912
GEN_AI_REQUEST_TEMPERATURE_ATTRIBUTE,
@@ -430,4 +433,120 @@ describe('LangChain integration', () => {
430433
.completed();
431434
});
432435
});
436+
437+
// =========================================================================
438+
// Embeddings tests
439+
// =========================================================================
440+
441+
const EXPECTED_TRANSACTION_EMBEDDINGS = {
442+
transaction: 'main',
443+
spans: expect.arrayContaining([
444+
// embedQuery span
445+
expect.objectContaining({
446+
data: expect.objectContaining({
447+
[GEN_AI_OPERATION_NAME_ATTRIBUTE]: 'embeddings',
448+
[SEMANTIC_ATTRIBUTE_SENTRY_OP]: GEN_AI_EMBEDDINGS_OPERATION_ATTRIBUTE,
449+
[SEMANTIC_ATTRIBUTE_SENTRY_ORIGIN]: 'auto.ai.langchain',
450+
[GEN_AI_SYSTEM_ATTRIBUTE]: 'openai',
451+
[GEN_AI_REQUEST_MODEL_ATTRIBUTE]: 'text-embedding-3-small',
452+
[GEN_AI_REQUEST_DIMENSIONS_ATTRIBUTE]: 1536,
453+
}),
454+
description: 'embeddings text-embedding-3-small',
455+
op: GEN_AI_EMBEDDINGS_OPERATION_ATTRIBUTE,
456+
origin: 'auto.ai.langchain',
457+
status: 'ok',
458+
}),
459+
// embedDocuments span
460+
expect.objectContaining({
461+
data: expect.objectContaining({
462+
[GEN_AI_OPERATION_NAME_ATTRIBUTE]: 'embeddings',
463+
[SEMANTIC_ATTRIBUTE_SENTRY_OP]: GEN_AI_EMBEDDINGS_OPERATION_ATTRIBUTE,
464+
[SEMANTIC_ATTRIBUTE_SENTRY_ORIGIN]: 'auto.ai.langchain',
465+
[GEN_AI_SYSTEM_ATTRIBUTE]: 'openai',
466+
[GEN_AI_REQUEST_MODEL_ATTRIBUTE]: 'text-embedding-3-small',
467+
}),
468+
description: 'embeddings text-embedding-3-small',
469+
op: GEN_AI_EMBEDDINGS_OPERATION_ATTRIBUTE,
470+
origin: 'auto.ai.langchain',
471+
status: 'ok',
472+
}),
473+
// Error span
474+
expect.objectContaining({
475+
data: expect.objectContaining({
476+
[GEN_AI_OPERATION_NAME_ATTRIBUTE]: 'embeddings',
477+
[SEMANTIC_ATTRIBUTE_SENTRY_OP]: GEN_AI_EMBEDDINGS_OPERATION_ATTRIBUTE,
478+
[SEMANTIC_ATTRIBUTE_SENTRY_ORIGIN]: 'auto.ai.langchain',
479+
[GEN_AI_SYSTEM_ATTRIBUTE]: 'openai',
480+
[GEN_AI_REQUEST_MODEL_ATTRIBUTE]: 'error-model',
481+
}),
482+
description: 'embeddings error-model',
483+
op: GEN_AI_EMBEDDINGS_OPERATION_ATTRIBUTE,
484+
origin: 'auto.ai.langchain',
485+
status: 'internal_error',
486+
}),
487+
]),
488+
};
489+
490+
const EXPECTED_TRANSACTION_EMBEDDINGS_PII = {
491+
transaction: 'main',
492+
spans: expect.arrayContaining([
493+
// embedQuery span with input recorded
494+
expect.objectContaining({
495+
data: expect.objectContaining({
496+
[GEN_AI_OPERATION_NAME_ATTRIBUTE]: 'embeddings',
497+
[SEMANTIC_ATTRIBUTE_SENTRY_OP]: GEN_AI_EMBEDDINGS_OPERATION_ATTRIBUTE,
498+
[SEMANTIC_ATTRIBUTE_SENTRY_ORIGIN]: 'auto.ai.langchain',
499+
[GEN_AI_SYSTEM_ATTRIBUTE]: 'openai',
500+
[GEN_AI_REQUEST_MODEL_ATTRIBUTE]: 'text-embedding-3-small',
501+
[GEN_AI_EMBEDDINGS_INPUT_ATTRIBUTE]: 'Hello world',
502+
}),
503+
description: 'embeddings text-embedding-3-small',
504+
op: GEN_AI_EMBEDDINGS_OPERATION_ATTRIBUTE,
505+
origin: 'auto.ai.langchain',
506+
status: 'ok',
507+
}),
508+
// embedDocuments span with input recorded
509+
expect.objectContaining({
510+
data: expect.objectContaining({
511+
[GEN_AI_OPERATION_NAME_ATTRIBUTE]: 'embeddings',
512+
[GEN_AI_EMBEDDINGS_INPUT_ATTRIBUTE]: JSON.stringify(['First document', 'Second document']),
513+
}),
514+
description: 'embeddings text-embedding-3-small',
515+
op: GEN_AI_EMBEDDINGS_OPERATION_ATTRIBUTE,
516+
origin: 'auto.ai.langchain',
517+
status: 'ok',
518+
}),
519+
]),
520+
};
521+
522+
createEsmAndCjsTests(__dirname, 'scenario-embeddings.mjs', 'instrument.mjs', (createRunner, test) => {
523+
test('creates embedding spans with sendDefaultPii: false', async () => {
524+
await createRunner().ignore('event').expect({ transaction: EXPECTED_TRANSACTION_EMBEDDINGS }).start().completed();
525+
});
526+
527+
test('does not create duplicate embedding spans from double module patching', async () => {
528+
await createRunner()
529+
.ignore('event')
530+
.expect({
531+
transaction: event => {
532+
const spans = event.spans || [];
533+
const embeddingSpans = spans.filter(span => span.op === GEN_AI_EMBEDDINGS_OPERATION_ATTRIBUTE);
534+
// The scenario makes 3 embedding calls (2 successful + 1 error).
535+
expect(embeddingSpans).toHaveLength(3);
536+
},
537+
})
538+
.start()
539+
.completed();
540+
});
541+
});
542+
543+
createEsmAndCjsTests(__dirname, 'scenario-embeddings.mjs', 'instrument-with-pii.mjs', (createRunner, test) => {
544+
test('creates embedding spans with sendDefaultPii: true', async () => {
545+
await createRunner()
546+
.ignore('event')
547+
.expect({ transaction: EXPECTED_TRANSACTION_EMBEDDINGS_PII })
548+
.start()
549+
.completed();
550+
});
551+
});
433552
});

0 commit comments

Comments
 (0)