diff --git a/.changeset/harden-tenant-isolation.md b/.changeset/harden-tenant-isolation.md index b324184..32151a9 100644 --- a/.changeset/harden-tenant-isolation.md +++ b/.changeset/harden-tenant-isolation.md @@ -8,10 +8,10 @@ Tenant-isolation hardening, a type-safe reactive search index, and a consistent **Tenant isolation** -- `ChatHistory` is keyed per user (`::`), so a chat can never be read or overwritten by a different user. Every method takes a single object; `userId`/`sessionId` are required and validated non-empty. -- `AgentMemory` requires a non-empty `userId` on every call (no silent shared bucket); `add`/`recall` take a single object param. -- `ToolCache` keys are `:::` — scoped per user, then per tool. -- `createRateLimit`/`createRateLimitAuth` require an explicit `limiter` (removed `limit`/`window`); eve's `createRateLimitAuth` requires `identifier` (no implicit global bucket). +- `ChatHistory` is keyed per user (`::`), so a chat can never be read or overwritten by a different user. Every method takes a single object; `userId`/`sessionId` are required, validated non-empty, and rejected if they contain the `:` key separator (which would otherwise let keys collide across users). +- `AgentMemory` requires a non-empty `userId` on every call (no silent shared bucket) and rejects a `:` in `userId`; `add`/`recall` take a single object param. +- `ToolCache` keys are `:::` — scoped per user, then per tool; `userId`/`toolName` are rejected if they contain `:`. +- `createRateLimit`/`createRateLimitAuth` require an explicit `limiter` (removed `limit`/`window`); eve's `createRateLimitAuth` requires `identifier` (no implicit global bucket) and counts only `POST` requests, so a turn (a message `POST` plus its follow-up stream `GET`) is charged once, not twice. - The eve sandbox denies network egress by default. **Reactive search index** diff --git a/CLAUDE.md b/CLAUDE.md index d81d512..dfcba88 100644 --- a/CLAUDE.md +++ b/CLAUDE.md @@ -205,13 +205,17 @@ pnpm -r --filter "./examples/*" build # build both demo apps - Conventional commits; use `!` for breaking changes. Commit at meaningful checkpoints. ## TODO (current task) +> **Historical log — superseded naming.** The items below record a completed task and use the +> intermediate `namespace` name, which was later renamed to **`userId`** (the per-call tenant value) +> plus **`toolName`** (the cache's tool segment). For the live key naming and conventions, see the +> "API conventions" section above — not this checklist. - [x] Remove model cache (code + examples done; READMEs pending below). - [x] ai-sdk: add `cachedTools` (map of `tool()`-built tools, namespace defaults to map key) alongside `cachedTool`; `cachePrefix` → `namespace`; dropped `toolCache` from the config. - [x] `cachedTool`/`cachedTools` are fully type-safe (config extends the AI SDK `tool()` type — input/output inference, no `any`). - [x] Search tools: ensure the index (create + `waitIndexing`, memoized) before running each tool — a missing Upstash index returns `null`/`-1` rather than throwing, so we ensure up front. - [x] `createMemoryTools` (ai-sdk) + eve memory tools: `scope` → `namespace` (string or per-call function). Core `AgentMemory` add/recall/forget use `namespace`. - [x] Rate limiting: `namespace` is a plain string; prefix `agentkit:rateLimit`. -- [x] Key naming: `agentkit:rateLimit:`, `agentkit:toolCache::`, `agentkit:memory::`. +- [x] Key naming (now `userId`/`toolName`, not `namespace`): `agentkit:rateLimit:`, `agentkit:toolCache:::`, `agentkit:memory::`. - [x] Unit/e2e tests use `gpt-4o` (`TEST_MODEL`). - [x] eve: dropped the `./model` subpath — model wrappers are exported from the package root. - [x] ai-sdk example app fleshed out (memory + search + cached tool + rate limit). diff --git a/examples/ai-sdk-demo/app/api/chat/route.ts b/examples/ai-sdk-demo/app/api/chat/route.ts index 6ed249e..3e3014a 100644 --- a/examples/ai-sdk-demo/app/api/chat/route.ts +++ b/examples/ai-sdk-demo/app/api/chat/route.ts @@ -27,6 +27,11 @@ export async function POST(req: Request) { // as a header. Memory, history, tool cache and rate limit are all scoped to this user; only the // shared books index is common to everyone. const { id, messages } = (await req.json()) as { id: string; messages: UIMessage[] }; + // ⚠️ DEMO ONLY: this trusts a client-supplied header to identify the user, so anyone can send + // `x-user-id: ` and read/overwrite their data. It's only safe here because + // `normalizeUser` allow-lists it to two fixed demo users. In production, derive `userId` from a + // VERIFIED server-side session (Clerk, Auth.js/NextAuth, Supabase Auth, Auth0, …) — e.g. + // `const { userId } = await auth()` with Clerk — and NEVER from a request header/param/body. const userId = normalizeUser(req.headers.get(USER_HEADER)); const redis = getRedis(); diff --git a/examples/ai-sdk-demo/app/api/chats/route.ts b/examples/ai-sdk-demo/app/api/chats/route.ts index 7fcc383..9746edd 100644 --- a/examples/ai-sdk-demo/app/api/chats/route.ts +++ b/examples/ai-sdk-demo/app/api/chats/route.ts @@ -5,7 +5,9 @@ export const runtime = "nodejs"; // GET /api/chats → list the user's chats (summaries, newest first) // GET /api/chats?q= → fuzzy-search the user's chats by what was said -// The active user is identified by the `x-user-id` header, so each user sees only their own chats. +// ⚠️ DEMO ONLY: the user is identified by the client-supplied `x-user-id` header (allow-listed to two +// fixed demo users by `normalizeUser`). In production, derive `userId` from a VERIFIED server-side +// session (Clerk, Auth.js/NextAuth, Supabase Auth, Auth0, …), never from a client-supplied value. export async function GET(req: Request) { const history = getHistory(); const userId = normalizeUser(req.headers.get(USER_HEADER)); diff --git a/packages/ai-sdk/README.md b/packages/ai-sdk/README.md index 993c75e..8a9c9aa 100644 --- a/packages/ai-sdk/README.md +++ b/packages/ai-sdk/README.md @@ -28,9 +28,12 @@ const history = createChatHistory({ }); ``` -Every method takes a single object; `userId` is **required, non-empty, and must be unique per user** -(your auth subject id). It's the tenant boundary — a chat can't be read or overwritten under a -different `userId`. `saveChat` overwrites the **whole** message array — `useChat` sends the full +Every method takes a single object; `userId` is **required, non-empty, and may not contain `:`**. It's +the tenant boundary, so **derive it from a verified server-side auth source** — the subject/user id +from your auth provider (Clerk, Auth.js/NextAuth, Supabase Auth, Auth0, …) — and **never from a +client-supplied header, query param, or body** (e.g. read it from the session in your route, not from +the request the browser controls). A chat can't be read or overwritten under a different `userId`. +`saveChat` overwrites the **whole** message array — `useChat` sends the full conversation, so there's no transport trimming and no delta to merge. Persist from your route's `onFinish`: @@ -85,9 +88,10 @@ const tools = createMemoryTools({ await generateText({ model, tools, stopWhen: stepCountIs(5), prompt: "What do you know about me?" }); ``` -> **`userId` is required and must be non-empty** — it's the only tenant boundary for memory. -> Make it **unique per user** (pass the user id, or a `(input, options) => string` deriving it); an -> empty value throws rather than collapsing every user into one shared bucket. +> **`userId` is required, non-empty, and may not contain `:`** — it's the only tenant boundary for +> memory. **Derive it from a verified server-side auth source** (the subject/user id from Clerk, +> Auth.js/NextAuth, Supabase Auth, Auth0, …), passed as a string or a `(input, options) => string`; +> **never trust a client-supplied value.** An empty/separator-bearing value throws. ## Search tools diff --git a/packages/ai-sdk/src/memory.ts b/packages/ai-sdk/src/memory.ts index f5fe8cb..1206e8e 100644 --- a/packages/ai-sdk/src/memory.ts +++ b/packages/ai-sdk/src/memory.ts @@ -29,7 +29,7 @@ export interface CreateMemoryToolsConfig { /** * Build `recall_memory` and `save_memory` AI SDK tools backed by long-term {@link AgentMemory}. Spread - * the returned map into `generateText({ tools })`. Pass only a `scope`; `redis` defaults to + * the returned map into `generateText({ tools })`. Pass only a `userId`; `redis` defaults to * `Redis.fromEnv()`. * * ```ts diff --git a/packages/eve/README.md b/packages/eve/README.md index c3fc7e8..3756d04 100644 --- a/packages/eve/README.md +++ b/packages/eve/README.md @@ -7,7 +7,7 @@ code-execution **sandbox backend** powered by [Upstash Box](https://github.com/u cached tools. ```bash -pnpm add @upstash/agentkit-eve @upstash/agentkit-sdk @upstash/redis +pnpm add @upstash/agentkit-eve @upstash/redis # in your app (Eve + the OpenAI provider, plus Box only if you use /sandbox): pnpm add eve @ai-sdk/openai @upstash/box ``` @@ -47,6 +47,12 @@ export default defineMemorySaveTool({ }); ``` +> **`userId` is the tenant boundary** (required, non-empty, no `:`). Derive it from eve's **verified +> session auth** — `ctx.session.auth.current?.principalId` (the authenticated principal, gated by your +> channel's `auth` walk) — as above, not from anything the client supplies. Configure a real +> authenticator (`vercelOidc()`, an OIDC/JWT provider like Clerk, …) so `principalId` is trustworthy; +> the `?? ctx.session.id` fallback only applies when a request is unauthenticated. + ## Search tools (`agent/tools/*.ts`) `defineSearchTools` builds `search` / `aggregate` / `count` eve tools over an Upstash Redis Search @@ -107,6 +113,12 @@ export default eveChannel({ > one abusive caller can exhaust the window for everyone, so for per-user limiting derive it per > request (an authenticated user id, an API key, or `x-forwarded-for` for per-IP). +> **Only `POST` requests are counted.** eve runs each turn as two authenticated requests — the message +> `POST` (which invokes the model) and a follow-up `GET …/stream` that opens the reply stream — and the +> `auth` walk runs on both. `createRateLimitAuth` throttles only the `POST`s, so **one turn costs one +> token** of your limiter (a `Ratelimit.slidingWindow(20, "1 m")` allows 20 turns/min, not 10); the +> session-read `GET`s fall through unthrottled. + ## Code-execution sandbox (`agent/sandbox.ts`) `upstash()` is a drop-in replacement for Eve's `vercel()` backend, powered by Upstash Box. Swap the diff --git a/packages/eve/src/auth.test.ts b/packages/eve/src/auth.test.ts index 00b7774..827bd32 100644 --- a/packages/eve/src/auth.test.ts +++ b/packages/eve/src/auth.test.ts @@ -4,7 +4,8 @@ import { ForbiddenError } from "eve/channels/auth"; import { Ratelimit, createRateLimitAuth } from "./index.js"; import { hasRedisCreds, testRedis } from "./test-support.js"; -const req = () => new Request("http://localhost/agent"); +// Only POST requests (the model-invoking message submissions) are counted, so default to POST here. +const req = () => new Request("http://localhost/agent", { method: "POST" }); describe.skipIf(!hasRedisCreds)("createRateLimitAuth (live Redis)", () => { const redis = testRedis(); @@ -35,11 +36,27 @@ describe.skipIf(!hasRedisCreds)("createRateLimitAuth (live Redis)", () => { }); const reqFor = (user: string) => - new Request("http://localhost/agent", { headers: { "x-user": user } }); + new Request("http://localhost/agent", { method: "POST", headers: { "x-user": user } }); // Exhaust A's bucket; B's is untouched and still passes. expect(await auth(reqFor("a"))).toBeNull(); await expect(auth(reqFor("a"))).rejects.toBeInstanceOf(ForbiddenError); expect(await auth(reqFor("b"))).toBeNull(); }); + + // eve runs the auth walk on the follow-up `GET …/stream` too; only POSTs (which invoke the model) + // are counted, so a turn isn't charged twice. A GET always falls through without touching the limiter. + it("does not count non-POST requests (e.g. the stream GET)", async () => { + const auth = createRateLimitAuth({ + redis, + limiter: Ratelimit.slidingWindow(1, "60 s"), + identifier: `test:${randomUUID().slice(0, 8)}`, + }); + const get = () => new Request("http://localhost/agent", { method: "GET" }); + + // Many GETs in a row never exhaust the (size-1) window — they're not counted at all. + expect(await auth(get())).toBeNull(); + expect(await auth(get())).toBeNull(); + expect(await auth(get())).toBeNull(); + }); }); diff --git a/packages/eve/src/auth.ts b/packages/eve/src/auth.ts index 103d460..d76bbfa 100644 --- a/packages/eve/src/auth.ts +++ b/packages/eve/src/auth.ts @@ -24,6 +24,12 @@ export interface RateLimitAuthConfig extends Omit { * Over the limit it throws a `ForbiddenError` (HTTP 403). Backed by {@link createRateLimit}; keys are * `agentkit:rateLimit:`. * + * **Only `POST` requests are counted** (the message-submitting routes that actually invoke the model: + * `POST /eve/v1/session` and `POST /eve/v1/session/:id`). eve drives each turn as two authenticated + * requests — the message `POST` **and** a follow-up `GET …/stream` that opens the reply stream — and + * the auth walk runs on both. Counting both would charge every turn twice; gating only the `POST` + * makes one turn cost exactly one token, while the session-read `GET`s fall through unthrottled. + * * ```ts * // agent/channels/eve.ts * import { createRateLimitAuth, Ratelimit } from "@upstash/agentkit-eve"; @@ -47,6 +53,10 @@ export function createRateLimitAuth(config: RateLimitAuthConfig): AuthFn { + // Only throttle the model-invoking message submissions (POST). The follow-up `GET …/stream` (and + // other session reads) share this auth walk but shouldn't each cost a token — let them through so + // a single turn = a single increment. + if (request.method !== "POST") return null; const id = typeof identifier === "function" ? await identifier(request) : identifier; const { success } = await ratelimit.limit(id); if (!success) { diff --git a/packages/sdk/README.md b/packages/sdk/README.md index 56b0ce4..8cf4f90 100644 --- a/packages/sdk/README.md +++ b/packages/sdk/README.md @@ -81,9 +81,12 @@ const hits = await history.searchChats({ await history.deleteChat({ userId: "user-123", sessionId: "session-abc" }); // delete (also de-indexes it) ``` -> **`userId` and `sessionId` are required and must be non-empty** — they are the only tenant -> boundary, so an empty value throws rather than silently mis-scoping a chat. Make `userId` unique -> per user (e.g. your auth subject id); a chat can't be read or overwritten by a different `userId`. +> **`userId` and `sessionId` are required, non-empty, and may not contain `:`** (the key separator) — +> they are the only tenant boundary, so an empty or separator-bearing value throws rather than +> silently mis-scoping a chat. **Derive `userId` from a verified server-side auth source** — the +> subject/user id from your auth provider (Clerk, Auth.js/NextAuth, Supabase Auth, Auth0, …) — and +> **never from a client-supplied header, query param, or request body**, or a caller can impersonate +> any user. A chat can't be read or overwritten under a different `userId`. ### Agent memory @@ -113,9 +116,11 @@ const hits = await memory.recall({ await memory.forget("pref-lang", { userId: "user-123" }); // required, non-empty userId ``` -> **`userId` is required and must be non-empty** on every method — it's the only tenant boundary -> for memory, so an empty value throws rather than collapsing all callers into one shared bucket. -> Make it **unique per user** (e.g. the user id) to keep each user's memories isolated. +> **`userId` is required, non-empty, and may not contain `:`** on every method — it's the only tenant +> boundary for memory, so an empty or separator-bearing value throws rather than collapsing or +> colliding callers. **Derive it from a verified server-side auth source** (the subject/user id from +> Clerk, Auth.js/NextAuth, Supabase Auth, Auth0, …) — **never from a client-supplied value** — to keep +> each user's memories isolated. ### Search tools @@ -185,9 +190,10 @@ const getWeather = tools.wrap( ); ``` -> **`userId` and `toolName` are both required and must be non-empty** (`get`/`set`/`invalidate`/`wrap` -> all throw on an empty value). The entry is scoped to the user first, so one user's cached result is -> never served to another. +> **`userId` and `toolName` are both required, non-empty, and may not contain `:`** (`get`/`set`/ +> `invalidate`/`wrap` all throw otherwise). The entry is scoped to the user first, so one user's cached +> result is never served to another — provided `userId` comes from a verified auth source, not a +> client-supplied value. ## Testing diff --git a/packages/sdk/src/chat-history.test.ts b/packages/sdk/src/chat-history.test.ts index 8bfb6ab..f047d0b 100644 --- a/packages/sdk/src/chat-history.test.ts +++ b/packages/sdk/src/chat-history.test.ts @@ -93,6 +93,22 @@ describe.skipIf(!hasRedisCreds)("ChatHistory (live Redis Search)", () => { await expect(history.searchChats({ userId: "", query: "hi" })).rejects.toThrow(/userId/); }); + // A ':' in userId or sessionId would let `:` keys collide across users, so both + // are rejected up front (e.g. user "a" + session "b:c" must not reach user "a:b" + session "c"). + it("throws on a ':' in userId or sessionId", async () => { + await expect(history.saveChat({ userId: "a:b", sessionId: "x", messages: [] })).rejects.toThrow( + /userId.*':'/, + ); + await expect(history.getChat({ userId: "a:b", sessionId: "x" })).rejects.toThrow(/userId.*':'/); + await expect(history.getChat({ userId: user, sessionId: "b:c" })).rejects.toThrow( + /sessionId.*':'/, + ); + await expect(history.deleteChat({ userId: user, sessionId: "b:c" })).rejects.toThrow( + /sessionId.*':'/, + ); + await expect(history.listChats({ userId: "a:b" })).rejects.toThrow(/userId.*':'/); + }); + it("lists a user's chats (filtered by userId in the index)", async () => { await history.saveChat({ userId: user, diff --git a/packages/sdk/src/chat-history.ts b/packages/sdk/src/chat-history.ts index 6f0deea..7865f99 100644 --- a/packages/sdk/src/chat-history.ts +++ b/packages/sdk/src/chat-history.ts @@ -4,13 +4,20 @@ import { ReactiveSearchIndex } from "./reactive-index.js"; import { now } from "./utils.js"; /** - * Reject an empty/missing id. `userId` and `sessionId` are the only tenant boundary, so a blank one - * would silently mis-scope a chat (or, for `userId`, let any caller read/overwrite another user's chat). + * Reject an empty/missing id, or one containing the `:` key separator. `userId` and `sessionId` are + * the only tenant boundary, so a blank one would silently mis-scope a chat (or, for `userId`, let any + * caller read/overwrite another user's chat). Keys are `::` and the + * direct-key reads (`getChat`/`deleteChat`/`saveChat`) trust that structure, so a `:` in either id + * would let `"a"` + sessionId `"b:c"` collide with user `"a:b"` + sessionId `"c"` and reach the other + * user's chat. Forbidding `:` in both keeps the boundary unambiguous. */ function assertId(value: string | undefined, name: string): asserts value is string { if (value === undefined || value === "") { throw new Error(`ChatHistory: \`${name}\` is required and must be a non-empty string.`); } + if (value.includes(":")) { + throw new Error(`ChatHistory: \`${name}\` must not contain ':' (it is the key separator).`); + } } /** Lightweight summary of a chat, returned by list/search (no raw messages). */ diff --git a/packages/sdk/src/memory.test.ts b/packages/sdk/src/memory.test.ts index 621d278..af98f46 100644 --- a/packages/sdk/src/memory.test.ts +++ b/packages/sdk/src/memory.test.ts @@ -99,6 +99,12 @@ describe.skipIf(!hasRedisCreds)("AgentMemory (live Redis)", () => { await expect(memory.forget("some-id", { userId: "" })).rejects.toThrow(/userId/i); }); + it("rejects a userId containing the ':' key separator (no cross-user key collision)", async () => { + await expect(memory.add({ text: "x", userId: "a:b" })).rejects.toThrow(/':'/); + await expect(memory.recall({ userId: "a:b" })).rejects.toThrow(/':'/); + await expect(memory.forget("id", { userId: "a:b" })).rejects.toThrow(/':'/); + }); + it("round-trips createdAt", async () => { await memory.add({ text: "a dated fact", userId: "meta" }); await memory.searchIndex.waitIndexing(); diff --git a/packages/sdk/src/memory.ts b/packages/sdk/src/memory.ts index bbd90f6..595a52d 100644 --- a/packages/sdk/src/memory.ts +++ b/packages/sdk/src/memory.ts @@ -5,13 +5,18 @@ import { ReactiveSearchIndex } from "./reactive-index.js"; import { now } from "./utils.js"; /** - * Reject an empty/missing userId. The userId is the only tenant boundary for memory, so a blank one - * would silently collapse every caller into one shared bucket and leak memories across users. + * Reject an empty/missing userId, or one containing the `:` key separator. The userId is the only + * tenant boundary for memory, so a blank one would silently collapse every caller into one shared + * bucket — and a `:` would let `:` collide across users (e.g. userId `"a"` + id `"b:c"` + * lands on the same key as userId `"a:b"` + id `"c"`), breaking the per-user isolation by direct key. */ function assertUserId(userId: string | undefined): asserts userId is string { if (userId === undefined || userId === "") { throw new Error("AgentMemory: `userId` is required and must be a non-empty string."); } + if (userId.includes(":")) { + throw new Error("AgentMemory: `userId` must not contain ':' (it is the key separator)."); + } } export interface MemoryRecord { diff --git a/packages/sdk/src/search-tools.ts b/packages/sdk/src/search-tools.ts index 4906c60..11aeea8 100644 --- a/packages/sdk/src/search-tools.ts +++ b/packages/sdk/src/search-tools.ts @@ -86,7 +86,7 @@ const FILTER_GUIDE = [ * `schema` so the model learns the fields, their types, and which filter operators apply. * * The index is provisioned **reactively**: each op runs straight away, and only if the index doesn't - * exist yet does it get created (+ `waitIndexing`) and the op retried — see {@link withIndex}. + * exist yet does it get created (+ `waitIndexing`) and the op retried — see {@link ReactiveSearchIndex}. */ export function createSearchToolDefs( config: SearchToolDefsConfig, diff --git a/packages/sdk/src/tool-cache.test.ts b/packages/sdk/src/tool-cache.test.ts index 0190784..2836a39 100644 --- a/packages/sdk/src/tool-cache.test.ts +++ b/packages/sdk/src/tool-cache.test.ts @@ -71,6 +71,13 @@ describe.skipIf(!hasRedisCreds)("ToolCache (live Redis)", () => { await expect(cache.wrap("", "tool", async () => 1)({})).rejects.toThrow(/userId/i); }); + it("rejects a ':' in userId or toolName (the key separator)", async () => { + const cache = new ToolCache({ redis, prefix }); + await expect(cache.get("a:b", "tool", { x: 1 })).rejects.toThrow(/userId.*':'/); + await expect(cache.get(U, "to:ol", { x: 1 })).rejects.toThrow(/toolName.*':'/); + await expect(cache.set("a:b", "tool", { x: 1 }, "v")).rejects.toThrow(/userId.*':'/); + }); + it("honors TTL", async () => { const cache = new ToolCache({ redis, prefix, ttlSeconds: 1 }); await cache.set(U, "ttl", { x: 1 }, "v"); diff --git a/packages/sdk/src/tool-cache.ts b/packages/sdk/src/tool-cache.ts index 72d6fc3..b382471 100644 --- a/packages/sdk/src/tool-cache.ts +++ b/packages/sdk/src/tool-cache.ts @@ -2,13 +2,18 @@ import type { Redis } from "@upstash/redis"; import { key, stableHash } from "./utils.js"; /** - * Reject an empty/missing key part. `userId` and `toolName` are both part of the cache key, so a blank - * one would collapse unrelated users (or unrelated tools) into one shared cache entry. + * Reject an empty/missing key part, or one containing the `:` key separator. `userId` and `toolName` + * are both segments of the cache key `:::`, so a blank one would + * collapse unrelated users (or tools) into one shared entry — and a `:` would let segments slide across + * the boundary (e.g. one user's entry colliding with another's), defeating the per-user scoping. */ function assertKeyPart(value: string | undefined, name: string): asserts value is string { if (value === undefined || value === "") { throw new Error(`ToolCache: \`${name}\` is required and must be a non-empty string.`); } + if (value.includes(":")) { + throw new Error(`ToolCache: \`${name}\` must not contain ':' (it is the key separator).`); + } } export interface ToolCacheConfig {