diff --git a/cli/src/codex-runtime.test.ts b/cli/src/codex-runtime.test.ts index bfb621f..9bf7ac4 100644 --- a/cli/src/codex-runtime.test.ts +++ b/cli/src/codex-runtime.test.ts @@ -110,16 +110,41 @@ describe("formatCodexRolloutLines", () => { ]; const posts = formatCodexRolloutLines(objs); assert.equal(posts.length, 4); - // The injected prompt is mirrored like the Claude UserPromptSubmit announce. + // Prompt and agent messages route to the channel root (kind=text); the + // exec_command_begin routes into the thread under the last text (kind=tool). assert.equal(posts[0].role, "user"); + assert.equal(posts[0].kind, "text"); assert.match(posts[0].text, /^>>> Received user message/); assert.match(posts[0].text, /Please review PR 519\./); + assert.equal(posts[1].kind, "text"); assert.equal(posts[1].text, "I'll review the PR now."); + assert.equal(posts[2].kind, "tool"); assert.match(posts[2].text, /gh pr view 519/); + assert.equal(posts[3].kind, "text"); assert.equal(posts[3].text, "No blockers; 2 nits."); assert.ok(posts.slice(1).every((p) => p.role === "assistant")); }); + test("coalesces consecutive codex exec_command_begin into one tool post", () => { + const objs = [ + { type: "event_msg", payload: { type: "user_message", message: "Look at PR 519.", images: [] } }, + { type: "event_msg", payload: { type: "agent_message", message: "Pulling details." } }, + { type: "event_msg", payload: { type: "exec_command_begin", command: ["gh", "pr", "view", "519"] } }, + { type: "event_msg", payload: { type: "exec_command_begin", command: ["gh", "pr", "diff", "519"] } }, + { type: "event_msg", payload: { type: "exec_command_begin", command: ["gh", "pr", "checks", "519"] } }, + ]; + const posts = formatCodexRolloutLines(objs); + // user + agent text + ONE tool (3 commands coalesced). + assert.equal(posts.length, 3); + assert.deepEqual(posts.map((p) => p.kind), ["text", "text", "tool"]); + assert.match(posts[2].text, /gh pr view 519/); + assert.match(posts[2].text, /gh pr diff 519/); + assert.match(posts[2].text, /gh pr checks 519/); + // Pill follows the latest command in the coalesced batch so the channel + // shows "now running gh pr checks 519" rather than the first one. + assert.match(posts[2].statusLabel ?? "", /gh pr checks 519/); + }); + test("mirrors an out-of-credits failure when a turn produces no output", () => { const objs = [ { type: "event_msg", payload: { type: "user_message", message: "Please review PR 4288.", images: [] } }, @@ -136,6 +161,10 @@ describe("formatCodexRolloutLines", () => { const posts = formatCodexRolloutLines(objs); assert.equal(posts.length, 2); assert.equal(posts[0].role, "user"); + assert.equal(posts[0].kind, "text"); + // The credits warning has to surface at the channel root, not buried in a + // thread — operators won't see "no output" otherwise. + assert.equal(posts[1].kind, "text"); assert.match(posts[1].text, /out of credits/i); assert.match(posts[1].text, /plan: plus, balance 0/); assert.match(posts[1].text, /chatgpt\.com\/codex\/settings\/usage/); @@ -152,6 +181,7 @@ describe("formatCodexRolloutLines", () => { ]; const posts = formatCodexRolloutLines(objs); assert.equal(posts.length, 1); + assert.equal(posts[0].kind, "text"); assert.equal(posts[0].text, "Reviewed, LGTM."); }); }); diff --git a/cli/src/slack-format.test.ts b/cli/src/slack-format.test.ts index f551931..c4a41bc 100644 --- a/cli/src/slack-format.test.ts +++ b/cli/src/slack-format.test.ts @@ -34,7 +34,7 @@ describe("formatTranscriptLines", () => { const asst = (content: any) => ({ type: "assistant", message: { role: "assistant", content } }); const usr = (content: any) => ({ type: "user", message: { role: "user", content } }); - test("merges consecutive assistant lines (thinking + reply + tool) into one post", () => { + test("emits one post per content block with the right kind", () => { const posts = formatTranscriptLines([ asst([{ type: "thinking", thinking: "let me check the deps" }]), asst([ @@ -42,31 +42,86 @@ describe("formatTranscriptLines", () => { { type: "tool_use", name: "Bash", input: { description: "run tests" } }, ]), ]); - assert.equal(posts.length, 1); - assert.equal(posts[0].role, "assistant"); + // 3 blocks total, so 3 posts (text/tool/thinking each routed independently + // by the bridge: text -> channel root, tool/thinking -> in-thread). + assert.equal(posts.length, 3); + assert.deepEqual( + posts.map((p) => p.kind), + ["thinking", "text", "tool"] + ); assert.match(posts[0].text, /💭 _let me check the deps_/); - assert.match(posts[0].text, /Reviewing the bump\./); - // Command-style tool labels render in a fenced code block, not inline. - assert.match(posts[0].text, /```\nBash\(run tests\)\n```/); + assert.equal(posts[1].text, "Reviewing the bump."); + assert.match(posts[2].text, /```\nBash\(run tests\)\n```/); + assert.ok(posts.every((p) => p.role === "assistant")); + }); + + test("coalesces consecutive tool posts into one (debounce against Slack rate limits)", () => { + const posts = formatTranscriptLines([ + asst([{ type: "text", text: "Checking the PR." }]), + asst([{ type: "tool_use", name: "Bash", input: { description: "view PR" } }]), + asst([{ type: "tool_use", name: "Bash", input: { description: "view diff" } }]), + asst([{ type: "tool_use", name: "Bash", input: { description: "view checks" } }]), + ]); + // The 3 Bash calls fold into 1 tool post (single Slack API call instead + // of 3); the text post stays separate so it can anchor the new thread. + assert.equal(posts.length, 2); + assert.equal(posts[0].kind, "text"); + assert.equal(posts[0].text, "Checking the PR."); + assert.equal(posts[1].kind, "tool"); + assert.match(posts[1].text, /Bash\(view PR\)/); + assert.match(posts[1].text, /Bash\(view diff\)/); + assert.match(posts[1].text, /Bash\(view checks\)/); + }); + + test("a text post in the middle breaks the tool coalescing run", () => { + // Real-world shape: tools, then the agent says something, then more tools. + // The new text must anchor a new thread, so the second tool run can't fold + // into the first. + const posts = formatTranscriptLines([ + asst([{ type: "tool_use", name: "Bash", input: { description: "a" } }]), + asst([{ type: "tool_use", name: "Bash", input: { description: "b" } }]), + asst([{ type: "text", text: "Found two." }]), + asst([{ type: "tool_use", name: "Bash", input: { description: "c" } }]), + ]); + assert.equal(posts.length, 3); + assert.deepEqual( + posts.map((p) => p.kind), + ["tool", "text", "tool"] + ); + assert.match(posts[0].text, /Bash\(a\)/); + assert.match(posts[0].text, /Bash\(b\)/); + assert.equal(posts[1].text, "Found two."); + assert.match(posts[2].text, /Bash\(c\)/); }); - test("emoji/prose tool labels are not fenced", () => { + test("emoji/prose tool labels are not fenced and still coalesce as tool", () => { const posts = formatTranscriptLines([ asst([{ type: "tool_use", name: "AskUserQuestion", input: { questions: [{ question: "Ship it?" }] } }]), ]); + assert.equal(posts.length, 1); + assert.equal(posts[0].kind, "tool"); assert.doesNotMatch(posts[0].text, /```/); assert.match(posts[0].text, /❓ asking:/); }); - test("a user turn between assistant turns splits them, and user turns are not emitted", () => { + test("user turns (incl. tool_result lines) are not emitted", () => { const posts = formatTranscriptLines([ asst([{ type: "text", text: "first" }]), + usr([{ type: "tool_result", tool_use_id: "t1", content: "huge output..." }]), usr("a human steering message"), asst([{ type: "text", text: "second" }]), ]); + // 2 text posts, both as root-bound; the human message + tool_result are + // dropped (the human already sees their own Slack message; tool results + // are the noise we're trying to hide). assert.equal(posts.length, 2); - assert.deepEqual(posts.map((p) => p.text), ["first", "second"]); - assert.ok(posts.every((p) => p.role === "assistant")); + assert.deepEqual( + posts.map((p) => ({ kind: p.kind, text: p.text })), + [ + { kind: "text", text: "first" }, + { kind: "text", text: "second" }, + ] + ); }); test("empty assistant content produces no post", () => { @@ -74,10 +129,34 @@ describe("formatTranscriptLines", () => { assert.equal(posts.length, 0); }); - test("tool_result lines (user role) are skipped", () => { + test("tool and thinking posts carry a short statusLabel for the slack pill", () => { const posts = formatTranscriptLines([ - usr([{ type: "tool_result", tool_use_id: "t1", content: "huge output..." }]), + asst([{ type: "thinking", thinking: "let me check" }]), + asst([{ type: "tool_use", name: "Bash", input: { description: "view PR" } }]), + asst([{ type: "tool_use", name: "Read", input: { file_path: "/a/b/c/d.ts" } }]), + asst([{ type: "text", text: "Done." }]), ]); - assert.equal(posts.length, 0); + // 3 in-thread posts (thinking + 2 tools fold into 1) + 1 root text. + assert.equal(posts.length, 3); + assert.equal(posts[0].kind, "thinking"); + assert.equal(posts[0].statusLabel, "💭 thinking…"); + assert.equal(posts[1].kind, "tool"); + // After coalescing, the latest tool's label wins so the pill tracks + // current activity, not the first one in the batch. + assert.equal(posts[1].statusLabel, "🛠️ Read(.../b/c/d.ts)…"); + assert.equal(posts[2].kind, "text"); + assert.equal(posts[2].statusLabel, undefined); + }); + + test("a very long Bash description in the status label is truncated", () => { + const desc = "x".repeat(200); + const posts = formatTranscriptLines([ + asst([{ type: "tool_use", name: "Bash", input: { description: desc } }]), + ]); + // Status width is capped — we do not want the pill to overflow Slack's UI. + assert.equal(posts.length, 1); + assert.ok((posts[0].statusLabel ?? "").length <= "🛠️ ".length + 80 + 2, + `statusLabel too long: ${posts[0].statusLabel}`); + assert.match(posts[0].statusLabel ?? "", /^🛠️ Bash\(.*…$/); }); }); diff --git a/cli/src/slack/announce.ts b/cli/src/slack/announce.ts index 3cf8dcc..12f22ac 100644 --- a/cli/src/slack/announce.ts +++ b/cli/src/slack/announce.ts @@ -59,7 +59,19 @@ export async function announceToSlack(slug: string, text: string, opts: Announce // The received-prompt message opens the thread for this turn; record its // ts so the bridge threads the agent's assistant turns under it. const ts = await c.post(channel, formatReceivedMessage(text)); - if (ts) writeThreadRoot(slug, ts); + if (ts) { + writeThreadRoot(slug, ts); + // Light the "💭 thinking…" pill immediately so the channel reflects + // that the agent has started before its first tool call appears in + // the transcript a couple of poll ticks later. The bridge takes over + // refreshing (and updating with tool labels) once it sees activity. + // Failures are non-fatal — the pill is a UX nicety, not the message. + try { + await c.setStatus(channel, ts, "💭 thinking…"); + } catch { + /* setStatus is best-effort; channel + thread already exist */ + } + } return true; } catch { return false; diff --git a/cli/src/slack/bridge.ts b/cli/src/slack/bridge.ts index 780ac21..06925d5 100644 --- a/cli/src/slack/bridge.ts +++ b/cli/src/slack/bridge.ts @@ -103,6 +103,15 @@ export async function runSlackBridge(opts: BridgeOptions): Promise { tails.push({ slug: a.slug, runtime, sessionId, cwd, channelId, path, offset: path ? statSync(path).size : 0 }); } + // Per-agent active "working…" pill. Set on each tool/thinking post, cleared + // implicitly by Slack the moment we post a text reply in the thread (and by + // Slack's own 2-minute idle TTL when the agent stalls or crashes). We refresh + // every REFRESH_MS while a turn is open so the TTL does not drop the pill + // mid-turn during long bash bursts or large diffs. + const REFRESH_MS = 60_000; + interface ActivePill { channelId: string; threadTs: string; label: string; lastSetMs: number; } + const active = new Map(); + // agent -> slack setInterval(async () => { for (const t of tails) { @@ -130,15 +139,52 @@ export async function runSlackBridge(opts: BridgeOptions): Promise { // in the channel as their message). if (t.runtime === "codex" && post.role === "user" && consumeRelayEcho(t.slug, post.text)) continue; try { - if (post.role === "user") { - // A received prompt opens a new thread; the agent's work for this - // turn replies under it instead of cluttering the channel root. - // (Claude's received prompt is posted by the daemon's announce, - // which records the same root, so Claude threads too.) + // Text posts (user prompts AND assistant text) sit at the channel + // root and become the new thread anchor; tool/thinking blocks reply + // under that anchor. This keeps the channel readable as a single + // back-and-forth while the tool noise lives in the threads. + // (Claude's received-prompt path goes through the daemon's announce, + // which writes the same thread-root file the bridge reads here.) + if (post.kind === "text") { const ts = await client.post(t.channelId, post.text); if (ts) writeThreadRoot(t.slug, ts); + // A text reply auto-clears the "working…" pill in Slack — drop + // our refresh state so we do not redundantly keep it alive after + // the turn settled. + active.delete(t.slug); + // For a user prompt (codex user_message lands here), light an + // immediate "💭 thinking…" pill so the channel reflects that the + // agent has started work before its first tool call shows up. + // Assistant text posts wrap up a turn, so they do not get a pill. + if (post.role === "user" && ts) { + const label = "💭 thinking…"; + try { + await client.setStatus(t.channelId, ts, label); + active.set(t.slug, { channelId: t.channelId, threadTs: ts, label, lastSetMs: Date.now() }); + } catch (e) { + console.error(`setStatus (prompt) for ${t.slug} failed:`, e); + } + } } else { - await client.post(t.channelId, post.text, readThreadRoot(t.slug)); + const threadTs = readThreadRoot(t.slug); + await client.post(t.channelId, post.text, threadTs); + // Light the pill (or refresh it with the latest tool's label) so + // the channel shows the agent is still working between text + // posts. If we have no thread root yet (no announce or codex + // user_message landed), skip — there is nowhere to anchor it. + if (threadTs && post.statusLabel) { + try { + await client.setStatus(t.channelId, threadTs, post.statusLabel); + active.set(t.slug, { + channelId: t.channelId, + threadTs, + label: post.statusLabel, + lastSetMs: Date.now(), + }); + } catch (e) { + console.error(`setStatus for ${t.slug} failed:`, e); + } + } } } catch (e) { console.error(`post to ${t.slug} failed:`, e); @@ -147,6 +193,22 @@ export async function runSlackBridge(opts: BridgeOptions): Promise { } }, pollMs); + // Refresh "working…" pills that Slack would otherwise drop after its + // built-in 2-minute idle TTL. We re-set every REFRESH_MS so an agent grinding + // through a long bash sequence keeps the channel visibly active. + setInterval(async () => { + const now = Date.now(); + for (const [slug, pill] of active) { + if (now - pill.lastSetMs < REFRESH_MS) continue; + try { + await client.setStatus(pill.channelId, pill.threadTs, pill.label); + pill.lastSetMs = now; + } catch (e) { + console.error(`setStatus refresh for ${slug} failed:`, e); + } + } + }, Math.floor(REFRESH_MS / 2)); + // slack -> agent const socket = new SocketModeClient({ appToken: opts.appToken }); socket.on("message", async ({ event, ack }: any) => { diff --git a/cli/src/slack/client.ts b/cli/src/slack/client.ts index 0eeab3a..affb216 100644 --- a/cli/src/slack/client.ts +++ b/cli/src/slack/client.ts @@ -27,6 +27,18 @@ export class SlackClient { return r.ts as string | undefined; } + /// Set the "working…" pill on a thread (Slack's Agents & Assistants UI). + /// An empty status clears the pill explicitly; a normal post in the same + /// thread also clears it automatically. Slack drops the pill on its own + /// after a 2-minute idle TTL, so callers need to refresh it for long turns. + async setStatus(channelId: string, threadTs: string, status: string): Promise { + await this.web.apiCall("assistant.threads.setStatus", { + channel_id: channelId, + thread_ts: threadTs, + status, + }); + } + /// Resolve "#name" / "name" to a channel id. Ids (C…/G…) are returned as-is. async resolveChannelId(nameOrId: string): Promise { if (/^[CG][A-Z0-9]+$/.test(nameOrId)) return nameOrId; diff --git a/cli/src/slack/format.ts b/cli/src/slack/format.ts index 48a0f08..83fd7e8 100644 --- a/cli/src/slack/format.ts +++ b/cli/src/slack/format.ts @@ -1,10 +1,20 @@ /// Formats Claude Code transcript (.jsonl) lines into Slack messages, porting /// the rendering lessons from Kanban Code's chat view (TranscriptReader): -/// compact tool labels, merged consecutive assistant lines, summarized results. +/// compact tool labels, summarized results. export interface SlackPost { role: "assistant" | "user"; + /// "text" -> assistant or user prose (lands in the channel root and + /// becomes the new thread anchor for following tool calls). + /// "tool" -> a tool_use / shell-exec block (lands in the thread under + /// the most recent text post; consecutive ones coalesce). + /// "thinking" -> a brief thinking-trace excerpt (also lands in the thread). + kind: "text" | "tool" | "thinking"; text: string; // Slack mrkdwn + /// Short label suitable for the Slack "working…" pill (assistant.threads. + /// setStatus). Populated only for tool/thinking posts. Bridge uses it to + /// reflect the current activity in the pill (e.g. "🛠️ Bash(view PR)…"). + statusLabel?: string; } const MAX_TEXT = 2800; // keep individual posts readable / within Slack block limits @@ -82,61 +92,90 @@ function fenceBlock(label: string): string { return "```\n" + label + "\n```"; } -/// Render one assistant message's content blocks into Slack mrkdwn. -function renderAssistantContent(content: any): string { - if (typeof content === "string") return truncate(content); - if (!Array.isArray(content)) return ""; +/// Compose the Slack "working…" pill label for a tool block. Short, prefixed +/// with a hammer for a visual cue, and capped so a noisy Bash description does +/// not overflow Slack's status width. +function statusLabelForTool(label: string): string { + const trimmed = label.length > 80 ? label.slice(0, 80) + "…" : label; + return `🛠️ ${trimmed}…`; +} + +function role(obj: any): string | undefined { + return obj?.type ?? obj?.message?.role; +} - const parts: string[] = []; +/// Emit one SlackPost per content block on an assistant turn. Each block keeps +/// its own kind so the bridge can route it correctly (text -> channel root and +/// new thread anchor, tool/thinking -> thread under the previous text). +function emitAssistantBlocks(content: any, out: SlackPost[]): void { + if (typeof content === "string") { + const t = truncate(content); + if (t) out.push({ role: "assistant", kind: "text", text: t }); + return; + } + if (!Array.isArray(content)) return; for (const block of content) { switch (block?.type) { - case "text": - if (block.text?.trim()) parts.push(truncate(block.text)); + case "text": { + if (block.text?.trim()) out.push({ role: "assistant", kind: "text", text: truncate(block.text) }); break; - case "thinking": - if (block.thinking?.trim()) parts.push(`💭 _${truncate(block.thinking, 280)}_`); + } + case "thinking": { + if (block.thinking?.trim()) { + out.push({ + role: "assistant", + kind: "thinking", + text: `💭 _${truncate(block.thinking, 280)}_`, + statusLabel: "💭 thinking…", + }); + } break; + } case "tool_use": { const label = toolLabel(block.name, block.input ?? {}); - parts.push(PROSE_TOOLS.has(block.name) ? label : fenceBlock(label)); + const text = PROSE_TOOLS.has(block.name) ? label : fenceBlock(label); + out.push({ role: "assistant", kind: "tool", text, statusLabel: statusLabelForTool(label) }); break; } default: break; } } - return parts.join("\n"); } -function role(obj: any): string | undefined { - return obj?.type ?? obj?.message?.role; +/// Coalesce consecutive tool posts (same role) into a single post so a batch +/// of N parallel/sequential tool_use blocks turns into ONE Slack message in the +/// thread instead of N. Cuts API call volume — Slack's "high volume of +/// activity, not displaying some messages" rate-limit kicks in fast otherwise. +/// Text and thinking posts are NOT coalesced (text anchors a new thread, and +/// thinking-as-italic-quote doesn't merge cleanly). +function coalesceTools(posts: SlackPost[]): SlackPost[] { + const out: SlackPost[] = []; + for (const p of posts) { + const last = out[out.length - 1]; + if (p.kind === "tool" && last?.kind === "tool" && last.role === p.role) { + last.text = last.text + "\n" + p.text; + // The pill should reflect the LATEST tool in the run, not the first; + // a "🛠️ Bash(view checks)…" replaces "🛠️ Bash(view PR)…". + if (p.statusLabel) last.statusLabel = p.statusLabel; + } else { + out.push({ ...p }); + } + } + return out; } -/// Convert a batch of transcript lines into Slack posts, merging consecutive -/// assistant lines (Claude writes thinking and the reply on separate lines) into -/// one logical message. User turns and tool_result lines are not emitted here — -/// the bridge posts agent activity; relayed human messages already appear in -/// Slack, and automated prompts are announced separately. +/// Convert a batch of transcript lines into Slack posts. User turns and +/// tool_result lines are skipped: relayed human messages already appear in +/// Slack as that human's own message, and automated prompts are announced +/// separately by the daemon. export function formatTranscriptLines(objs: any[]): SlackPost[] { const posts: SlackPost[] = []; - let buffer: string[] = []; - - const flush = () => { - const text = buffer.join("\n").trim(); - if (text) posts.push({ role: "assistant", text }); - buffer = []; - }; - for (const obj of objs) { - if (role(obj) === "assistant") { - const rendered = renderAssistantContent(obj.message?.content ?? obj.content); - if (rendered) buffer.push(rendered); - } else { - flush(); - } + if (role(obj) !== "assistant") continue; + emitAssistantBlocks(obj.message?.content ?? obj.content, posts); } - flush(); - return posts; + return coalesceTools(posts); } /// Format Codex rollout (.jsonl) records into Slack posts. Codex logs a stream @@ -160,24 +199,33 @@ export function formatCodexRolloutLines(objs: any[]): SlackPost[] { if (o?.type !== "event_msg") continue; const p = o.payload ?? {}; if (p.type === "user_message" && typeof p.message === "string" && p.message.trim()) { - posts.push({ role: "user", text: formatReceivedMessage(truncate(p.message)) }); + posts.push({ role: "user", kind: "text", text: formatReceivedMessage(truncate(p.message)) }); } else if (p.type === "agent_message" && typeof p.message === "string" && p.message.trim()) { - posts.push({ role: "assistant", text: truncate(p.message) }); + posts.push({ role: "assistant", kind: "text", text: truncate(p.message) }); } else if (p.type === "exec_command_begin") { const cmd = Array.isArray(p.command) ? p.command.join(" ") : String(p.command ?? ""); - if (cmd.trim()) posts.push({ role: "assistant", text: fenceBlock(`$ ${truncate(cmd, 300)}`) }); + if (cmd.trim()) { + posts.push({ + role: "assistant", + kind: "tool", + text: fenceBlock(`$ ${truncate(cmd, 300)}`), + statusLabel: statusLabelForTool(`$ ${truncate(cmd, 60)}`), + }); + } } else if (p.type === "token_count" && p.rate_limits?.credits) { credits = p.rate_limits.credits; planType = p.rate_limits.plan_type; } else if (p.type === "task_complete" && p.last_agent_message == null && credits?.has_credits === false) { // Prompt was received but the turn produced no output because Codex ran - // out of credits. Surface it instead of mirroring silence. + // out of credits. Surface it instead of mirroring silence. Routed as + // text so it anchors at the channel root (operators need to see it). const plan = planType ? ` (plan: ${planType}, balance ${credits.balance ?? "0"})` : ""; posts.push({ role: "assistant", + kind: "text", text: `:warning: Codex is out of credits${plan}. The prompt was received but no output was produced, and this will keep failing until credits are topped up at https://chatgpt.com/codex/settings/usage`, }); } } - return posts; + return coalesceTools(posts); }