|
| 1 | +import { describe, test, expect, beforeEach } from "bun:test" |
| 2 | +import { setupDeps, insertTeam, insertMember } from "./helpers" |
| 3 | +import { executeTeamCreate } from "../src/tools/team-create" |
| 4 | +import { executeTeamSpawn } from "../src/tools/team-spawn" |
| 5 | +import { executeTeamMessage } from "../src/tools/team-message" |
| 6 | +import { handleSessionStatusEvent } from "../src/hooks" |
| 7 | +import { hasReportedCompletion } from "../src/messaging" |
| 8 | +import { sendMessage } from "../src/messaging" |
| 9 | + |
| 10 | +type Deps = ReturnType<typeof setupDeps> |
| 11 | + |
| 12 | +describe("issue #3: completion loop prevention", () => { |
| 13 | + let deps: Deps |
| 14 | + const leadSession = "lead-sess" |
| 15 | + |
| 16 | + beforeEach(() => { |
| 17 | + deps = setupDeps() |
| 18 | + }) |
| 19 | + |
| 20 | + /** Helper: spawn a teammate, have them message lead, then transition busy→ready. */ |
| 21 | + async function spawnAndComplete(teamName: string, memberName: string): Promise<{ teamId: string; memberSession: string }> { |
| 22 | + await executeTeamCreate(deps, { name: teamName }, leadSession) |
| 23 | + const team = deps.db.query("SELECT id FROM team WHERE name = ?").get(teamName) as { id: string } |
| 24 | + await executeTeamSpawn(deps, { name: memberName, agent: "build", prompt: "task", worktree: false }, leadSession) |
| 25 | + const memberSession = (deps.db.query("SELECT session_id FROM team_member WHERE name = ?").get(memberName) as { session_id: string }).session_id |
| 26 | + |
| 27 | + // Teammate messages lead |
| 28 | + await executeTeamMessage(deps, { to: "lead", text: "here are my findings" }, memberSession) |
| 29 | + |
| 30 | + // Simulate busy→ready transition (teammate finished work) |
| 31 | + deps.db.run("UPDATE team_member SET status = 'busy' WHERE team_id = ? AND name = ?", [team.id, memberName]) |
| 32 | + handleSessionStatusEvent(deps.db, deps.registry, memberSession, "idle") |
| 33 | + |
| 34 | + return { teamId: team.id, memberSession } |
| 35 | + } |
| 36 | + |
| 37 | + test("hasReportedCompletion is false after messaging lead but BEFORE going idle", async () => { |
| 38 | + await executeTeamCreate(deps, { name: "report-team" }, leadSession) |
| 39 | + const team = deps.db.query("SELECT id FROM team WHERE name = 'report-team'").get() as { id: string } |
| 40 | + await executeTeamSpawn(deps, { name: "alice", agent: "build", prompt: "task", worktree: false }, leadSession) |
| 41 | + const aliceSession = (deps.db.query("SELECT session_id FROM team_member WHERE name = 'alice'").get() as { session_id: string }).session_id |
| 42 | + |
| 43 | + expect(hasReportedCompletion(deps.db, team.id, "alice")).toBe(false) |
| 44 | + |
| 45 | + // Alice messages lead — flag should NOT be set yet (she's still working) |
| 46 | + await executeTeamMessage(deps, { to: "lead", text: "done with my task" }, aliceSession) |
| 47 | + expect(hasReportedCompletion(deps.db, team.id, "alice")).toBe(false) |
| 48 | + |
| 49 | + // Alice goes idle (busy→ready) — NOW the flag should be set |
| 50 | + deps.db.run("UPDATE team_member SET status = 'busy' WHERE team_id = ? AND name = 'alice'", [team.id]) |
| 51 | + handleSessionStatusEvent(deps.db, deps.registry, aliceSession, "idle") |
| 52 | + expect(hasReportedCompletion(deps.db, team.id, "alice")).toBe(true) |
| 53 | + }) |
| 54 | + |
| 55 | + test("hasReportedCompletion stays false if teammate goes idle WITHOUT messaging lead", async () => { |
| 56 | + await executeTeamCreate(deps, { name: "no-msg-team" }, leadSession) |
| 57 | + const team = deps.db.query("SELECT id FROM team WHERE name = 'no-msg-team'").get() as { id: string } |
| 58 | + await executeTeamSpawn(deps, { name: "bob", agent: "build", prompt: "task", worktree: false }, leadSession) |
| 59 | + const bobSession = (deps.db.query("SELECT session_id FROM team_member WHERE name = 'bob'").get() as { session_id: string }).session_id |
| 60 | + |
| 61 | + // Bob goes idle without ever messaging lead |
| 62 | + deps.db.run("UPDATE team_member SET status = 'busy' WHERE team_id = ? AND name = 'bob'", [team.id]) |
| 63 | + handleSessionStatusEvent(deps.db, deps.registry, bobSession, "idle") |
| 64 | + expect(hasReportedCompletion(deps.db, team.id, "bob")).toBe(false) |
| 65 | + }) |
| 66 | + |
| 67 | + test("messages to completed teammates are stored but NOT pushed via promptAsync", async () => { |
| 68 | + const { teamId, memberSession } = await spawnAndComplete("guard-team", "charlie") |
| 69 | + deps.client.calls.length = 0 |
| 70 | + |
| 71 | + // Lead sends a reply (this is what Kimi K2.6 does — courtesy replies) |
| 72 | + const result = await executeTeamMessage(deps, { to: "charlie", text: "thanks charlie!" }, leadSession) |
| 73 | + |
| 74 | + // Message IS stored in DB |
| 75 | + const msg = deps.db.query("SELECT content FROM team_message WHERE team_id = ? AND to_name = 'charlie' AND from_name = 'lead'").get(teamId) as { content: string } | null |
| 76 | + expect(msg).toBeTruthy() |
| 77 | + expect(msg!.content).toBe("thanks charlie!") |
| 78 | + |
| 79 | + // But promptAsync was NOT called to deliver it |
| 80 | + const promptCalls = deps.client.calls.filter(c => c.method === "session.promptAsync") |
| 81 | + expect(promptCalls).toHaveLength(0) |
| 82 | + |
| 83 | + // Return value warns the lead |
| 84 | + expect(result).toContain("completed") |
| 85 | + }) |
| 86 | + |
| 87 | + test("wake-lead skips when all teammates are ready/shutdown", async () => { |
| 88 | + const { teamId } = await spawnAndComplete("done-team", "dave") |
| 89 | + |
| 90 | + // Insert another undelivered message to lead (simulating the loop) |
| 91 | + sendMessage(deps.db, { teamId, from: "dave", to: "lead", content: "duplicate" }) |
| 92 | + |
| 93 | + // All members should be in a terminal state |
| 94 | + const activeBusy = deps.db.query( |
| 95 | + "SELECT COUNT(*) as c FROM team_member WHERE team_id = ? AND status NOT IN ('ready', 'shutdown', 'error')" |
| 96 | + ).get(teamId) as { c: number } |
| 97 | + expect(activeBusy.c).toBe(0) |
| 98 | + }) |
| 99 | + |
| 100 | + test("peer-flush skips for completed teammates", async () => { |
| 101 | + const { teamId } = await spawnAndComplete("flush-team", "eve") |
| 102 | + |
| 103 | + expect(hasReportedCompletion(deps.db, teamId, "eve")).toBe(true) |
| 104 | + |
| 105 | + // Insert a stale peer message addressed to eve (simulating the loop) |
| 106 | + deps.db.run( |
| 107 | + "INSERT INTO team_message (id, team_id, from_name, to_name, content, delivered, time_created) VALUES (?, ?, 'lead', 'eve', 'follow up', 0, ?)", |
| 108 | + ["msg-stale", teamId, Date.now() - 10_000] |
| 109 | + ) |
| 110 | + |
| 111 | + const reported = deps.db.query("SELECT reported_to_lead FROM team_member WHERE team_id = ? AND name = 'eve'").get(teamId) as { reported_to_lead: number } |
| 112 | + expect(reported.reported_to_lead).toBe(1) |
| 113 | + }) |
| 114 | + |
| 115 | + test("full ping-pong regression: promptAsync calls are bounded after completion", async () => { |
| 116 | + const { teamId, memberSession } = await spawnAndComplete("loop-team", "frank") |
| 117 | + |
| 118 | + // Reset call log — everything after this should be bounded |
| 119 | + deps.client.calls.length = 0 |
| 120 | + |
| 121 | + // Simulate the ping-pong loop that Kimi K2.6 triggers: |
| 122 | + const reply1 = await executeTeamMessage(deps, { to: "frank", text: "thanks for the report" }, leadSession) |
| 123 | + const reply2 = await executeTeamMessage(deps, { to: "frank", text: "anything else?" }, leadSession) |
| 124 | + |
| 125 | + // Both replies should be stored but NOT delivered |
| 126 | + expect(reply1).toContain("completed") |
| 127 | + expect(reply2).toContain("completed") |
| 128 | + |
| 129 | + // Zero promptAsync calls to frank's session after he reported |
| 130 | + const frankCalls = deps.client.calls.filter(c => { |
| 131 | + if (c.method !== "session.promptAsync") return false |
| 132 | + const args = c.args[0] as { sessionID: string } |
| 133 | + return args.sessionID === memberSession |
| 134 | + }) |
| 135 | + expect(frankCalls).toHaveLength(0) |
| 136 | + }) |
| 137 | + |
| 138 | + test("teammate can still receive messages BEFORE going idle (Q&A works)", async () => { |
| 139 | + await executeTeamCreate(deps, { name: "qa-team" }, leadSession) |
| 140 | + const team = deps.db.query("SELECT id FROM team WHERE name = 'qa-team'").get() as { id: string } |
| 141 | + await executeTeamSpawn(deps, { name: "grace", agent: "build", prompt: "task", worktree: false }, leadSession) |
| 142 | + const graceSession = (deps.db.query("SELECT session_id FROM team_member WHERE name = 'grace'").get() as { session_id: string }).session_id |
| 143 | + |
| 144 | + // Grace asks lead a question (messages lead, but is still busy) |
| 145 | + await executeTeamMessage(deps, { to: "lead", text: "I have a question about the API" }, graceSession) |
| 146 | + |
| 147 | + // Grace is NOT marked as completed yet (still busy) |
| 148 | + expect(hasReportedCompletion(deps.db, team.id, "grace")).toBe(false) |
| 149 | + |
| 150 | + deps.client.calls.length = 0 |
| 151 | + |
| 152 | + // Lead answers — this SHOULD be delivered (grace hasn't completed) |
| 153 | + const result = await executeTeamMessage(deps, { to: "grace", text: "use the v2 endpoint" }, leadSession) |
| 154 | + |
| 155 | + // Message was delivered via promptAsync (not blocked) |
| 156 | + expect(result).toBe("Message sent to grace.") |
| 157 | + const promptCalls = deps.client.calls.filter(c => c.method === "session.promptAsync") |
| 158 | + expect(promptCalls).toHaveLength(1) |
| 159 | + }) |
| 160 | +}) |
0 commit comments