langwatch · rogeriochaves · May 30, 2026 · May 30, 2026 · May 30, 2026
diff --git a/cli/src/codex-runtime.test.ts b/cli/src/codex-runtime.test.ts
@@ -110,16 +110,41 @@ describe("formatCodexRolloutLines", () => {
     ];
     const posts = formatCodexRolloutLines(objs);
     assert.equal(posts.length, 4);
-    // The injected prompt is mirrored like the Claude UserPromptSubmit announce.
+    // Prompt and agent messages route to the channel root (kind=text); the
+    // exec_command_begin routes into the thread under the last text (kind=tool).
     assert.equal(posts[0].role, "user");
+    assert.equal(posts[0].kind, "text");
     assert.match(posts[0].text, /^>>> Received user message/);
     assert.match(posts[0].text, /Please review PR 519\./);
+    assert.equal(posts[1].kind, "text");
     assert.equal(posts[1].text, "I'll review the PR now.");
+    assert.equal(posts[2].kind, "tool");
     assert.match(posts[2].text, /gh pr view 519/);
+    assert.equal(posts[3].kind, "text");
     assert.equal(posts[3].text, "No blockers; 2 nits.");
     assert.ok(posts.slice(1).every((p) => p.role === "assistant"));
   });
 
+  test("coalesces consecutive codex exec_command_begin into one tool post", () => {
+    const objs = [
+      { type: "event_msg", payload: { type: "user_message", message: "Look at PR 519.", images: [] } },
+      { type: "event_msg", payload: { type: "agent_message", message: "Pulling details." } },
+      { type: "event_msg", payload: { type: "exec_command_begin", command: ["gh", "pr", "view", "519"] } },
+      { type: "event_msg", payload: { type: "exec_command_begin", command: ["gh", "pr", "diff", "519"] } },
+      { type: "event_msg", payload: { type: "exec_command_begin", command: ["gh", "pr", "checks", "519"] } },
+    ];
+    const posts = formatCodexRolloutLines(objs);
+    // user + agent text + ONE tool (3 commands coalesced).
+    assert.equal(posts.length, 3);
+    assert.deepEqual(posts.map((p) => p.kind), ["text", "text", "tool"]);
+    assert.match(posts[2].text, /gh pr view 519/);
+    assert.match(posts[2].text, /gh pr diff 519/);
+    assert.match(posts[2].text, /gh pr checks 519/);
+    // Pill follows the latest command in the coalesced batch so the channel
+    // shows "now running gh pr checks 519" rather than the first one.
+    assert.match(posts[2].statusLabel ?? "", /gh pr checks 519/);
+  });
+
   test("mirrors an out-of-credits failure when a turn produces no output", () => {
     const objs = [
       { type: "event_msg", payload: { type: "user_message", message: "Please review PR 4288.", images: [] } },
@@ -136,6 +161,10 @@ describe("formatCodexRolloutLines", () => {
     const posts = formatCodexRolloutLines(objs);
     assert.equal(posts.length, 2);
     assert.equal(posts[0].role, "user");
+    assert.equal(posts[0].kind, "text");
+    // The credits warning has to surface at the channel root, not buried in a
+    // thread — operators won't see "no output" otherwise.
+    assert.equal(posts[1].kind, "text");
     assert.match(posts[1].text, /out of credits/i);
     assert.match(posts[1].text, /plan: plus, balance 0/);
     assert.match(posts[1].text, /chatgpt\.com\/codex\/settings\/usage/);
@@ -152,6 +181,7 @@ describe("formatCodexRolloutLines", () => {
     ];
     const posts = formatCodexRolloutLines(objs);
     assert.equal(posts.length, 1);
+    assert.equal(posts[0].kind, "text");
     assert.equal(posts[0].text, "Reviewed, LGTM.");
   });
 });

diff --git a/cli/src/slack-format.test.ts b/cli/src/slack-format.test.ts
@@ -34,50 +34,129 @@ describe("formatTranscriptLines", () => {
   const asst = (content: any) => ({ type: "assistant", message: { role: "assistant", content } });
   const usr = (content: any) => ({ type: "user", message: { role: "user", content } });
 
-  test("merges consecutive assistant lines (thinking + reply + tool) into one post", () => {
+  test("emits one post per content block with the right kind", () => {
     const posts = formatTranscriptLines([
       asst([{ type: "thinking", thinking: "let me check the deps" }]),
       asst([
         { type: "text", text: "Reviewing the bump." },
         { type: "tool_use", name: "Bash", input: { description: "run tests" } },
       ]),
     ]);
-    assert.equal(posts.length, 1);
-    assert.equal(posts[0].role, "assistant");
+    // 3 blocks total, so 3 posts (text/tool/thinking each routed independently
+    // by the bridge: text -> channel root, tool/thinking -> in-thread).
+    assert.equal(posts.length, 3);
+    assert.deepEqual(
+      posts.map((p) => p.kind),
+      ["thinking", "text", "tool"]
+    );
     assert.match(posts[0].text, /💭 _let me check the deps_/);
-    assert.match(posts[0].text, /Reviewing the bump\./);
-    // Command-style tool labels render in a fenced code block, not inline.
-    assert.match(posts[0].text, /```\nBash\(run tests\)\n```/);
+    assert.equal(posts[1].text, "Reviewing the bump.");
+    assert.match(posts[2].text, /```\nBash\(run tests\)\n```/);
+    assert.ok(posts.every((p) => p.role === "assistant"));
+  });
+
+  test("coalesces consecutive tool posts into one (debounce against Slack rate limits)", () => {
+    const posts = formatTranscriptLines([
+      asst([{ type: "text", text: "Checking the PR." }]),
+      asst([{ type: "tool_use", name: "Bash", input: { description: "view PR" } }]),
+      asst([{ type: "tool_use", name: "Bash", input: { description: "view diff" } }]),
+      asst([{ type: "tool_use", name: "Bash", input: { description: "view checks" } }]),
+    ]);
+    // The 3 Bash calls fold into 1 tool post (single Slack API call instead
+    // of 3); the text post stays separate so it can anchor the new thread.
+    assert.equal(posts.length, 2);
+    assert.equal(posts[0].kind, "text");
+    assert.equal(posts[0].text, "Checking the PR.");
+    assert.equal(posts[1].kind, "tool");
+    assert.match(posts[1].text, /Bash\(view PR\)/);
+    assert.match(posts[1].text, /Bash\(view diff\)/);
+    assert.match(posts[1].text, /Bash\(view checks\)/);
+  });
+
+  test("a text post in the middle breaks the tool coalescing run", () => {
+    // Real-world shape: tools, then the agent says something, then more tools.
+    // The new text must anchor a new thread, so the second tool run can't fold
+    // into the first.
+    const posts = formatTranscriptLines([
+      asst([{ type: "tool_use", name: "Bash", input: { description: "a" } }]),
+      asst([{ type: "tool_use", name: "Bash", input: { description: "b" } }]),
+      asst([{ type: "text", text: "Found two." }]),
+      asst([{ type: "tool_use", name: "Bash", input: { description: "c" } }]),
+    ]);
+    assert.equal(posts.length, 3);
+    assert.deepEqual(
+      posts.map((p) => p.kind),
+      ["tool", "text", "tool"]
+    );
+    assert.match(posts[0].text, /Bash\(a\)/);
+    assert.match(posts[0].text, /Bash\(b\)/);
+    assert.equal(posts[1].text, "Found two.");
+    assert.match(posts[2].text, /Bash\(c\)/);
   });
 
-  test("emoji/prose tool labels are not fenced", () => {
+  test("emoji/prose tool labels are not fenced and still coalesce as tool", () => {
     const posts = formatTranscriptLines([
       asst([{ type: "tool_use", name: "AskUserQuestion", input: { questions: [{ question: "Ship it?" }] } }]),
     ]);
+    assert.equal(posts.length, 1);
+    assert.equal(posts[0].kind, "tool");
     assert.doesNotMatch(posts[0].text, /```/);
     assert.match(posts[0].text, /❓ asking:/);
   });
 
-  test("a user turn between assistant turns splits them, and user turns are not emitted", () => {
+  test("user turns (incl. tool_result lines) are not emitted", () => {
     const posts = formatTranscriptLines([
       asst([{ type: "text", text: "first" }]),
+      usr([{ type: "tool_result", tool_use_id: "t1", content: "huge output..." }]),
       usr("a human steering message"),
       asst([{ type: "text", text: "second" }]),
     ]);
+    // 2 text posts, both as root-bound; the human message + tool_result are
+    // dropped (the human already sees their own Slack message; tool results
+    // are the noise we're trying to hide).
     assert.equal(posts.length, 2);
-    assert.deepEqual(posts.map((p) => p.text), ["first", "second"]);
-    assert.ok(posts.every((p) => p.role === "assistant"));
+    assert.deepEqual(
+      posts.map((p) => ({ kind: p.kind, text: p.text })),
+      [
+        { kind: "text", text: "first" },
+        { kind: "text", text: "second" },
+      ]
+    );
   });
 
   test("empty assistant content produces no post", () => {
     const posts = formatTranscriptLines([asst([{ type: "text", text: "   " }])]);
     assert.equal(posts.length, 0);
   });
 
-  test("tool_result lines (user role) are skipped", () => {
+  test("tool and thinking posts carry a short statusLabel for the slack pill", () => {
     const posts = formatTranscriptLines([
-      usr([{ type: "tool_result", tool_use_id: "t1", content: "huge output..." }]),
+      asst([{ type: "thinking", thinking: "let me check" }]),
+      asst([{ type: "tool_use", name: "Bash", input: { description: "view PR" } }]),
+      asst([{ type: "tool_use", name: "Read", input: { file_path: "/a/b/c/d.ts" } }]),
+      asst([{ type: "text", text: "Done." }]),
     ]);
-    assert.equal(posts.length, 0);
+    // 3 in-thread posts (thinking + 2 tools fold into 1) + 1 root text.
+    assert.equal(posts.length, 3);
+    assert.equal(posts[0].kind, "thinking");
+    assert.equal(posts[0].statusLabel, "💭 thinking…");
+    assert.equal(posts[1].kind, "tool");
+    // After coalescing, the latest tool's label wins so the pill tracks
+    // current activity, not the first one in the batch.
+    assert.equal(posts[1].statusLabel, "🛠️ Read(.../b/c/d.ts)…");
+    assert.equal(posts[2].kind, "text");
+    assert.equal(posts[2].statusLabel, undefined);
+  });
+
+  test("a very long Bash description in the status label is truncated", () => {
+    const desc = "x".repeat(200);
+    const posts = formatTranscriptLines([
+      asst([{ type: "tool_use", name: "Bash", input: { description: desc } }]),
+    ]);
+    // Status width is capped — we do not want the pill to overflow Slack's UI.
+    assert.equal(posts.length, 1);
+    assert.ok((posts[0].statusLabel ?? "").length <= "🛠️ ".length + 80 + 2,
+      `statusLabel too long: ${posts[0].statusLabel}`);
+    assert.match(posts[0].statusLabel ?? "", /^🛠️ Bash\(.*…$/);
   });
 });
diff --git a/cli/src/slack/announce.ts b/cli/src/slack/announce.ts
@@ -59,7 +59,19 @@ export async function announceToSlack(slug: string, text: string, opts: Announce
     // The received-prompt message opens the thread for this turn; record its
     // ts so the bridge threads the agent's assistant turns under it.
     const ts = await c.post(channel, formatReceivedMessage(text));
-    if (ts) writeThreadRoot(slug, ts);
+    if (ts) {
+      writeThreadRoot(slug, ts);
+      // Light the "💭 thinking…" pill immediately so the channel reflects
+      // that the agent has started before its first tool call appears in
+      // the transcript a couple of poll ticks later. The bridge takes over
+      // refreshing (and updating with tool labels) once it sees activity.
+      // Failures are non-fatal — the pill is a UX nicety, not the message.
+      try {
+        await c.setStatus(channel, ts, "💭 thinking…");
+      } catch {
+        /* setStatus is best-effort; channel + thread already exist */
+      }
+    }
     return true;
   } catch {
     return false;

diff --git a/cli/src/slack/bridge.ts b/cli/src/slack/bridge.ts
@@ -103,6 +103,15 @@ export async function runSlackBridge(opts: BridgeOptions): Promise<void> {
     tails.push({ slug: a.slug, runtime, sessionId, cwd, channelId, path, offset: path ? statSync(path).size : 0 });
   }
 
+  // Per-agent active "working…" pill. Set on each tool/thinking post, cleared
+  // implicitly by Slack the moment we post a text reply in the thread (and by
+  // Slack's own 2-minute idle TTL when the agent stalls or crashes). We refresh
+  // every REFRESH_MS while a turn is open so the TTL does not drop the pill
+  // mid-turn during long bash bursts or large diffs.
+  const REFRESH_MS = 60_000;
+  interface ActivePill { channelId: string; threadTs: string; label: string; lastSetMs: number; }
+  const active = new Map<string /* slug */, ActivePill>();
+
   // agent -> slack
   setInterval(async () => {
     for (const t of tails) {
@@ -130,15 +139,52 @@ export async function runSlackBridge(opts: BridgeOptions): Promise<void> {
         // in the channel as their message).
         if (t.runtime === "codex" && post.role === "user" && consumeRelayEcho(t.slug, post.text)) continue;
         try {
-          if (post.role === "user") {
-            // A received prompt opens a new thread; the agent's work for this
-            // turn replies under it instead of cluttering the channel root.
-            // (Claude's received prompt is posted by the daemon's announce,
-            // which records the same root, so Claude threads too.)
+          // Text posts (user prompts AND assistant text) sit at the channel
+          // root and become the new thread anchor; tool/thinking blocks reply
+          // under that anchor. This keeps the channel readable as a single
+          // back-and-forth while the tool noise lives in the threads.
+          // (Claude's received-prompt path goes through the daemon's announce,
+          // which writes the same thread-root file the bridge reads here.)
+          if (post.kind === "text") {
             const ts = await client.post(t.channelId, post.text);
             if (ts) writeThreadRoot(t.slug, ts);
+            // A text reply auto-clears the "working…" pill in Slack — drop
+            // our refresh state so we do not redundantly keep it alive after
+            // the turn settled.
+            active.delete(t.slug);
+            // For a user prompt (codex user_message lands here), light an
+            // immediate "💭 thinking…" pill so the channel reflects that the
+            // agent has started work before its first tool call shows up.
+            // Assistant text posts wrap up a turn, so they do not get a pill.
+            if (post.role === "user" && ts) {
+              const label = "💭 thinking…";
+              try {
+                await client.setStatus(t.channelId, ts, label);
+                active.set(t.slug, { channelId: t.channelId, threadTs: ts, label, lastSetMs: Date.now() });
+              } catch (e) {
+                console.error(`setStatus (prompt) for ${t.slug} failed:`, e);
+              }
+            }
           } else {
-            await client.post(t.channelId, post.text, readThreadRoot(t.slug));
+            const threadTs = readThreadRoot(t.slug);
+            await client.post(t.channelId, post.text, threadTs);
+            // Light the pill (or refresh it with the latest tool's label) so
+            // the channel shows the agent is still working between text
+            // posts. If we have no thread root yet (no announce or codex
+            // user_message landed), skip — there is nowhere to anchor it.
+            if (threadTs && post.statusLabel) {
+              try {
+                await client.setStatus(t.channelId, threadTs, post.statusLabel);
+                active.set(t.slug, {
+                  channelId: t.channelId,
+                  threadTs,
+                  label: post.statusLabel,
+                  lastSetMs: Date.now(),
+                });
+              } catch (e) {
+                console.error(`setStatus for ${t.slug} failed:`, e);
+              }
+            }
           }
         } catch (e) {
           console.error(`post to ${t.slug} failed:`, e);
@@ -147,6 +193,22 @@ export async function runSlackBridge(opts: BridgeOptions): Promise<void> {
     }
   }, pollMs);
 
+  // Refresh "working…" pills that Slack would otherwise drop after its
+  // built-in 2-minute idle TTL. We re-set every REFRESH_MS so an agent grinding
+  // through a long bash sequence keeps the channel visibly active.
+  setInterval(async () => {
+    const now = Date.now();
+    for (const [slug, pill] of active) {
+      if (now - pill.lastSetMs < REFRESH_MS) continue;
+      try {
+        await client.setStatus(pill.channelId, pill.threadTs, pill.label);
+        pill.lastSetMs = now;
+      } catch (e) {
+        console.error(`setStatus refresh for ${slug} failed:`, e);
+      }
+    }
+  }, Math.floor(REFRESH_MS / 2));
+
   // slack -> agent
   const socket = new SocketModeClient({ appToken: opts.appToken });
   socket.on("message", async ({ event, ack }: any) => {

diff --git a/cli/src/slack/client.ts b/cli/src/slack/client.ts
@@ -27,6 +27,18 @@ export class SlackClient {
     return r.ts as string | undefined;
   }
 
+  /// Set the "working…" pill on a thread (Slack's Agents & Assistants UI).
+  /// An empty status clears the pill explicitly; a normal post in the same
+  /// thread also clears it automatically. Slack drops the pill on its own
+  /// after a 2-minute idle TTL, so callers need to refresh it for long turns.
+  async setStatus(channelId: string, threadTs: string, status: string): Promise<void> {
+    await this.web.apiCall("assistant.threads.setStatus", {
+      channel_id: channelId,
+      thread_ts: threadTs,
+      status,
+    });
+  }
+
   /// Resolve "#name" / "name" to a channel id. Ids (C…/G…) are returned as-is.
   async resolveChannelId(nameOrId: string): Promise<string | undefined> {
     if (/^[CG][A-Z0-9]+$/.test(nameOrId)) return nameOrId;