Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
32 changes: 31 additions & 1 deletion cli/src/codex-runtime.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -110,16 +110,41 @@ describe("formatCodexRolloutLines", () => {
];
const posts = formatCodexRolloutLines(objs);
assert.equal(posts.length, 4);
// The injected prompt is mirrored like the Claude UserPromptSubmit announce.
// Prompt and agent messages route to the channel root (kind=text); the
// exec_command_begin routes into the thread under the last text (kind=tool).
assert.equal(posts[0].role, "user");
assert.equal(posts[0].kind, "text");
assert.match(posts[0].text, /^>>> Received user message/);
assert.match(posts[0].text, /Please review PR 519\./);
assert.equal(posts[1].kind, "text");
assert.equal(posts[1].text, "I'll review the PR now.");
assert.equal(posts[2].kind, "tool");
assert.match(posts[2].text, /gh pr view 519/);
assert.equal(posts[3].kind, "text");
assert.equal(posts[3].text, "No blockers; 2 nits.");
assert.ok(posts.slice(1).every((p) => p.role === "assistant"));
});

test("coalesces consecutive codex exec_command_begin into one tool post", () => {
const objs = [
{ type: "event_msg", payload: { type: "user_message", message: "Look at PR 519.", images: [] } },
{ type: "event_msg", payload: { type: "agent_message", message: "Pulling details." } },
{ type: "event_msg", payload: { type: "exec_command_begin", command: ["gh", "pr", "view", "519"] } },
{ type: "event_msg", payload: { type: "exec_command_begin", command: ["gh", "pr", "diff", "519"] } },
{ type: "event_msg", payload: { type: "exec_command_begin", command: ["gh", "pr", "checks", "519"] } },
];
const posts = formatCodexRolloutLines(objs);
// user + agent text + ONE tool (3 commands coalesced).
assert.equal(posts.length, 3);
assert.deepEqual(posts.map((p) => p.kind), ["text", "text", "tool"]);
assert.match(posts[2].text, /gh pr view 519/);
assert.match(posts[2].text, /gh pr diff 519/);
assert.match(posts[2].text, /gh pr checks 519/);
// Pill follows the latest command in the coalesced batch so the channel
// shows "now running gh pr checks 519" rather than the first one.
assert.match(posts[2].statusLabel ?? "", /gh pr checks 519/);
});

test("mirrors an out-of-credits failure when a turn produces no output", () => {
const objs = [
{ type: "event_msg", payload: { type: "user_message", message: "Please review PR 4288.", images: [] } },
Expand All @@ -136,6 +161,10 @@ describe("formatCodexRolloutLines", () => {
const posts = formatCodexRolloutLines(objs);
assert.equal(posts.length, 2);
assert.equal(posts[0].role, "user");
assert.equal(posts[0].kind, "text");
// The credits warning has to surface at the channel root, not buried in a
// thread — operators won't see "no output" otherwise.
assert.equal(posts[1].kind, "text");
assert.match(posts[1].text, /out of credits/i);
assert.match(posts[1].text, /plan: plus, balance 0/);
assert.match(posts[1].text, /chatgpt\.com\/codex\/settings\/usage/);
Expand All @@ -152,6 +181,7 @@ describe("formatCodexRolloutLines", () => {
];
const posts = formatCodexRolloutLines(objs);
assert.equal(posts.length, 1);
assert.equal(posts[0].kind, "text");
assert.equal(posts[0].text, "Reviewed, LGTM.");
});
});
Expand Down
105 changes: 92 additions & 13 deletions cli/src/slack-format.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -34,50 +34,129 @@ describe("formatTranscriptLines", () => {
const asst = (content: any) => ({ type: "assistant", message: { role: "assistant", content } });
const usr = (content: any) => ({ type: "user", message: { role: "user", content } });

test("merges consecutive assistant lines (thinking + reply + tool) into one post", () => {
test("emits one post per content block with the right kind", () => {
const posts = formatTranscriptLines([
asst([{ type: "thinking", thinking: "let me check the deps" }]),
asst([
{ type: "text", text: "Reviewing the bump." },
{ type: "tool_use", name: "Bash", input: { description: "run tests" } },
]),
]);
assert.equal(posts.length, 1);
assert.equal(posts[0].role, "assistant");
// 3 blocks total, so 3 posts (text/tool/thinking each routed independently
// by the bridge: text -> channel root, tool/thinking -> in-thread).
assert.equal(posts.length, 3);
assert.deepEqual(
posts.map((p) => p.kind),
["thinking", "text", "tool"]
);
assert.match(posts[0].text, /💭 _let me check the deps_/);
assert.match(posts[0].text, /Reviewing the bump\./);
// Command-style tool labels render in a fenced code block, not inline.
assert.match(posts[0].text, /```\nBash\(run tests\)\n```/);
assert.equal(posts[1].text, "Reviewing the bump.");
assert.match(posts[2].text, /```\nBash\(run tests\)\n```/);
assert.ok(posts.every((p) => p.role === "assistant"));
});

test("coalesces consecutive tool posts into one (debounce against Slack rate limits)", () => {
const posts = formatTranscriptLines([
asst([{ type: "text", text: "Checking the PR." }]),
asst([{ type: "tool_use", name: "Bash", input: { description: "view PR" } }]),
asst([{ type: "tool_use", name: "Bash", input: { description: "view diff" } }]),
asst([{ type: "tool_use", name: "Bash", input: { description: "view checks" } }]),
]);
// The 3 Bash calls fold into 1 tool post (single Slack API call instead
// of 3); the text post stays separate so it can anchor the new thread.
assert.equal(posts.length, 2);
assert.equal(posts[0].kind, "text");
assert.equal(posts[0].text, "Checking the PR.");
assert.equal(posts[1].kind, "tool");
assert.match(posts[1].text, /Bash\(view PR\)/);
assert.match(posts[1].text, /Bash\(view diff\)/);
assert.match(posts[1].text, /Bash\(view checks\)/);
});

test("a text post in the middle breaks the tool coalescing run", () => {
// Real-world shape: tools, then the agent says something, then more tools.
// The new text must anchor a new thread, so the second tool run can't fold
// into the first.
const posts = formatTranscriptLines([
asst([{ type: "tool_use", name: "Bash", input: { description: "a" } }]),
asst([{ type: "tool_use", name: "Bash", input: { description: "b" } }]),
asst([{ type: "text", text: "Found two." }]),
asst([{ type: "tool_use", name: "Bash", input: { description: "c" } }]),
]);
assert.equal(posts.length, 3);
assert.deepEqual(
posts.map((p) => p.kind),
["tool", "text", "tool"]
);
assert.match(posts[0].text, /Bash\(a\)/);
assert.match(posts[0].text, /Bash\(b\)/);
assert.equal(posts[1].text, "Found two.");
assert.match(posts[2].text, /Bash\(c\)/);
});

test("emoji/prose tool labels are not fenced", () => {
test("emoji/prose tool labels are not fenced and still coalesce as tool", () => {
const posts = formatTranscriptLines([
asst([{ type: "tool_use", name: "AskUserQuestion", input: { questions: [{ question: "Ship it?" }] } }]),
]);
assert.equal(posts.length, 1);
assert.equal(posts[0].kind, "tool");
assert.doesNotMatch(posts[0].text, /```/);
assert.match(posts[0].text, /❓ asking:/);
});

test("a user turn between assistant turns splits them, and user turns are not emitted", () => {
test("user turns (incl. tool_result lines) are not emitted", () => {
const posts = formatTranscriptLines([
asst([{ type: "text", text: "first" }]),
usr([{ type: "tool_result", tool_use_id: "t1", content: "huge output..." }]),
usr("a human steering message"),
asst([{ type: "text", text: "second" }]),
]);
// 2 text posts, both as root-bound; the human message + tool_result are
// dropped (the human already sees their own Slack message; tool results
// are the noise we're trying to hide).
assert.equal(posts.length, 2);
assert.deepEqual(posts.map((p) => p.text), ["first", "second"]);
assert.ok(posts.every((p) => p.role === "assistant"));
assert.deepEqual(
posts.map((p) => ({ kind: p.kind, text: p.text })),
[
{ kind: "text", text: "first" },
{ kind: "text", text: "second" },
]
);
});

test("empty assistant content produces no post", () => {
const posts = formatTranscriptLines([asst([{ type: "text", text: " " }])]);
assert.equal(posts.length, 0);
});

test("tool_result lines (user role) are skipped", () => {
test("tool and thinking posts carry a short statusLabel for the slack pill", () => {
const posts = formatTranscriptLines([
usr([{ type: "tool_result", tool_use_id: "t1", content: "huge output..." }]),
asst([{ type: "thinking", thinking: "let me check" }]),
asst([{ type: "tool_use", name: "Bash", input: { description: "view PR" } }]),
asst([{ type: "tool_use", name: "Read", input: { file_path: "/a/b/c/d.ts" } }]),
asst([{ type: "text", text: "Done." }]),
]);
assert.equal(posts.length, 0);
// 3 in-thread posts (thinking + 2 tools fold into 1) + 1 root text.
assert.equal(posts.length, 3);
assert.equal(posts[0].kind, "thinking");
assert.equal(posts[0].statusLabel, "💭 thinking…");
assert.equal(posts[1].kind, "tool");
// After coalescing, the latest tool's label wins so the pill tracks
// current activity, not the first one in the batch.
assert.equal(posts[1].statusLabel, "🛠️ Read(.../b/c/d.ts)…");
assert.equal(posts[2].kind, "text");
assert.equal(posts[2].statusLabel, undefined);
});

test("a very long Bash description in the status label is truncated", () => {
const desc = "x".repeat(200);
const posts = formatTranscriptLines([
asst([{ type: "tool_use", name: "Bash", input: { description: desc } }]),
]);
// Status width is capped — we do not want the pill to overflow Slack's UI.
assert.equal(posts.length, 1);
assert.ok((posts[0].statusLabel ?? "").length <= "🛠️ ".length + 80 + 2,
`statusLabel too long: ${posts[0].statusLabel}`);
assert.match(posts[0].statusLabel ?? "", /^🛠️ Bash\(.*…$/);
});
});
14 changes: 13 additions & 1 deletion cli/src/slack/announce.ts
Original file line number Diff line number Diff line change
Expand Up @@ -59,7 +59,19 @@ export async function announceToSlack(slug: string, text: string, opts: Announce
// The received-prompt message opens the thread for this turn; record its
// ts so the bridge threads the agent's assistant turns under it.
const ts = await c.post(channel, formatReceivedMessage(text));
if (ts) writeThreadRoot(slug, ts);
if (ts) {
writeThreadRoot(slug, ts);
// Light the "💭 thinking…" pill immediately so the channel reflects
// that the agent has started before its first tool call appears in
// the transcript a couple of poll ticks later. The bridge takes over
// refreshing (and updating with tool labels) once it sees activity.
// Failures are non-fatal — the pill is a UX nicety, not the message.
try {
await c.setStatus(channel, ts, "💭 thinking…");
} catch {
/* setStatus is best-effort; channel + thread already exist */
}
}
return true;
} catch {
return false;
Expand Down
74 changes: 68 additions & 6 deletions cli/src/slack/bridge.ts
Original file line number Diff line number Diff line change
Expand Up @@ -103,6 +103,15 @@ export async function runSlackBridge(opts: BridgeOptions): Promise<void> {
tails.push({ slug: a.slug, runtime, sessionId, cwd, channelId, path, offset: path ? statSync(path).size : 0 });
}

// Per-agent active "working…" pill. Set on each tool/thinking post, cleared
// implicitly by Slack the moment we post a text reply in the thread (and by
// Slack's own 2-minute idle TTL when the agent stalls or crashes). We refresh
// every REFRESH_MS while a turn is open so the TTL does not drop the pill
// mid-turn during long bash bursts or large diffs.
const REFRESH_MS = 60_000;
interface ActivePill { channelId: string; threadTs: string; label: string; lastSetMs: number; }
const active = new Map<string /* slug */, ActivePill>();

// agent -> slack
setInterval(async () => {
for (const t of tails) {
Expand Down Expand Up @@ -130,15 +139,52 @@ export async function runSlackBridge(opts: BridgeOptions): Promise<void> {
// in the channel as their message).
if (t.runtime === "codex" && post.role === "user" && consumeRelayEcho(t.slug, post.text)) continue;
try {
if (post.role === "user") {
// A received prompt opens a new thread; the agent's work for this
// turn replies under it instead of cluttering the channel root.
// (Claude's received prompt is posted by the daemon's announce,
// which records the same root, so Claude threads too.)
// Text posts (user prompts AND assistant text) sit at the channel
// root and become the new thread anchor; tool/thinking blocks reply
// under that anchor. This keeps the channel readable as a single
// back-and-forth while the tool noise lives in the threads.
// (Claude's received-prompt path goes through the daemon's announce,
// which writes the same thread-root file the bridge reads here.)
if (post.kind === "text") {
const ts = await client.post(t.channelId, post.text);
if (ts) writeThreadRoot(t.slug, ts);
// A text reply auto-clears the "working…" pill in Slack — drop
// our refresh state so we do not redundantly keep it alive after
// the turn settled.
active.delete(t.slug);
// For a user prompt (codex user_message lands here), light an
// immediate "💭 thinking…" pill so the channel reflects that the
// agent has started work before its first tool call shows up.
// Assistant text posts wrap up a turn, so they do not get a pill.
if (post.role === "user" && ts) {
const label = "💭 thinking…";
try {
await client.setStatus(t.channelId, ts, label);
active.set(t.slug, { channelId: t.channelId, threadTs: ts, label, lastSetMs: Date.now() });
} catch (e) {
console.error(`setStatus (prompt) for ${t.slug} failed:`, e);
}
}
} else {
await client.post(t.channelId, post.text, readThreadRoot(t.slug));
const threadTs = readThreadRoot(t.slug);
await client.post(t.channelId, post.text, threadTs);
// Light the pill (or refresh it with the latest tool's label) so
// the channel shows the agent is still working between text
// posts. If we have no thread root yet (no announce or codex
// user_message landed), skip — there is nowhere to anchor it.
if (threadTs && post.statusLabel) {
try {
await client.setStatus(t.channelId, threadTs, post.statusLabel);
active.set(t.slug, {
channelId: t.channelId,
threadTs,
label: post.statusLabel,
lastSetMs: Date.now(),
});
} catch (e) {
console.error(`setStatus for ${t.slug} failed:`, e);
}
}
}
} catch (e) {
console.error(`post to ${t.slug} failed:`, e);
Expand All @@ -147,6 +193,22 @@ export async function runSlackBridge(opts: BridgeOptions): Promise<void> {
}
}, pollMs);

// Refresh "working…" pills that Slack would otherwise drop after its
// built-in 2-minute idle TTL. We re-set every REFRESH_MS so an agent grinding
// through a long bash sequence keeps the channel visibly active.
setInterval(async () => {
const now = Date.now();
for (const [slug, pill] of active) {
if (now - pill.lastSetMs < REFRESH_MS) continue;
try {
await client.setStatus(pill.channelId, pill.threadTs, pill.label);
pill.lastSetMs = now;
} catch (e) {
console.error(`setStatus refresh for ${slug} failed:`, e);
}
}
}, Math.floor(REFRESH_MS / 2));

// slack -> agent
const socket = new SocketModeClient({ appToken: opts.appToken });
socket.on("message", async ({ event, ack }: any) => {
Expand Down
12 changes: 12 additions & 0 deletions cli/src/slack/client.ts
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,18 @@ export class SlackClient {
return r.ts as string | undefined;
}

/// Set the "working…" pill on a thread (Slack's Agents & Assistants UI).
/// An empty status clears the pill explicitly; a normal post in the same
/// thread also clears it automatically. Slack drops the pill on its own
/// after a 2-minute idle TTL, so callers need to refresh it for long turns.
async setStatus(channelId: string, threadTs: string, status: string): Promise<void> {
await this.web.apiCall("assistant.threads.setStatus", {
channel_id: channelId,
thread_ts: threadTs,
status,
});
}

/// Resolve "#name" / "name" to a channel id. Ids (C…/G…) are returned as-is.
async resolveChannelId(nameOrId: string): Promise<string | undefined> {
if (/^[CG][A-Z0-9]+$/.test(nameOrId)) return nameOrId;
Expand Down
Loading
Loading