Skip to content

Commit ffa8cc5

Browse files
authored
Merge pull request GCWing#653 from nonoqing/yuyiqing/dev
fix(agentic): make truncated Write recovery actionable end-to-end
2 parents f82fa72 + 8c7a366 commit ffa8cc5

2 files changed

Lines changed: 114 additions & 2 deletions

File tree

src/crates/core/src/agentic/agents/deep_research_agent.rs

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,10 @@ impl DeepResearchAgent {
2727
"LS".to_string(),
2828
// File output (save report)
2929
"Write".to_string(),
30+
// Needed to append/continue a report when a prior Write was
31+
// truncated by max_tokens (recovery path injects an "use Edit
32+
// to continue" hint into result_for_assistant).
33+
"Edit".to_string(),
3034
// Terminal — run commands to gather data (e.g. git log, curl, jq)
3135
"Bash".to_string(),
3236
"TerminalControl".to_string(),
@@ -86,6 +90,10 @@ mod tests {
8690
assert!(tools.contains(&"WebSearch".to_string()));
8791
assert!(tools.contains(&"WebFetch".to_string()));
8892
assert!(tools.contains(&"Write".to_string()));
93+
assert!(
94+
tools.contains(&"Edit".to_string()),
95+
"Edit required so the agent can continue a Write that was truncated by max_tokens"
96+
);
8997
assert!(tools.contains(&"Bash".to_string()));
9098
assert!(tools.contains(&"TerminalControl".to_string()));
9199
assert!(tools.contains(&"ControlHub".to_string()));

src/crates/core/src/agentic/tools/pipeline/tool_pipeline.rs

Lines changed: 106 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,7 @@ use crate::util::errors::{BitFunError, BitFunResult};
1515
use dashmap::DashMap;
1616
use futures::future::join_all;
1717
use log::{debug, error, info, warn};
18-
use std::collections::HashMap;
18+
use std::collections::{HashMap, VecDeque};
1919
use std::sync::Arc;
2020
use std::time::{Instant, SystemTime};
2121
use tokio::sync::{oneshot, RwLock as TokioRwLock};
@@ -28,6 +28,23 @@ struct ToolBatch {
2828
is_concurrent: bool,
2929
}
3030

31+
/// Number of *consecutive* identical tool calls (same name + deep-equal
32+
/// arguments) tolerated before the pipeline blocks further attempts as a
33+
/// detected loop. The (N+1)-th identical call is the one that gets blocked.
34+
const TOOL_CALL_LOOP_THRESHOLD: usize = 3;
35+
36+
/// Cap on per-session recent tool call history. Bounded so a long-lived
37+
/// session does not accumulate unbounded memory; only the tail of the window
38+
/// participates in loop detection anyway.
39+
const TOOL_CALL_HISTORY_WINDOW: usize = 10;
40+
41+
/// Snapshot of a recently attempted tool call, used to detect agent loops.
42+
#[derive(Debug, Clone)]
43+
struct RecentToolCall {
44+
tool_name: String,
45+
arguments: serde_json::Value,
46+
}
47+
3148
/// Convert framework::ToolResult to core::ToolResult
3249
///
3350
/// Ensure always has result_for_assistant, avoid tool message content being empty
@@ -287,6 +304,10 @@ pub struct ToolPipeline {
287304
confirmation_channels: Arc<DashMap<String, oneshot::Sender<ConfirmationResponse>>>,
288305
/// Cancellation token management (tool_id -> CancellationToken)
289306
cancellation_tokens: Arc<DashMap<String, CancellationToken>>,
307+
/// Per-session ring buffer of recent tool calls for loop detection.
308+
/// Keyed by session_id; entries store (tool_name, arguments) so that
309+
/// "same tool with deep-equal arguments" can be recognized across rounds.
310+
recent_tool_calls: Arc<DashMap<String, VecDeque<RecentToolCall>>>,
290311
computer_use_host: Option<ComputerUseHostRef>,
291312
}
292313

@@ -301,10 +322,56 @@ impl ToolPipeline {
301322
state_manager,
302323
confirmation_channels: Arc::new(DashMap::new()),
303324
cancellation_tokens: Arc::new(DashMap::new()),
325+
recent_tool_calls: Arc::new(DashMap::new()),
304326
computer_use_host,
305327
}
306328
}
307329

330+
/// Check whether this tool call forms a loop (the last
331+
/// `TOOL_CALL_LOOP_THRESHOLD` consecutive calls in this session all had
332+
/// the same name AND deep-equal arguments). Always records the call into
333+
/// the per-session history so that persistent loops continue to register.
334+
/// Returns `true` if this call should be blocked.
335+
fn check_and_record_tool_call(
336+
&self,
337+
session_id: &str,
338+
tool_name: &str,
339+
arguments: &serde_json::Value,
340+
) -> bool {
341+
let mut entry = self
342+
.recent_tool_calls
343+
.entry(session_id.to_string())
344+
.or_default();
345+
let history = entry.value_mut();
346+
347+
// Count *consecutive* matches from the tail. A non-matching call
348+
// anywhere in the window resets the streak.
349+
let identical_priors = history
350+
.iter()
351+
.rev()
352+
.take(TOOL_CALL_LOOP_THRESHOLD)
353+
.take_while(|past| past.tool_name == tool_name && &past.arguments == arguments)
354+
.count();
355+
let is_loop = identical_priors >= TOOL_CALL_LOOP_THRESHOLD;
356+
357+
history.push_back(RecentToolCall {
358+
tool_name: tool_name.to_string(),
359+
arguments: arguments.clone(),
360+
});
361+
while history.len() > TOOL_CALL_HISTORY_WINDOW {
362+
history.pop_front();
363+
}
364+
365+
is_loop
366+
}
367+
368+
/// Drop the loop-detection history for a session that is ending. Bounded
369+
/// memory either way (max 10 entries per session) but this prevents
370+
/// long-lived processes from accumulating stale sessions.
371+
pub fn clear_session_tool_call_history(&self, session_id: &str) {
372+
self.recent_tool_calls.remove(session_id);
373+
}
374+
308375
pub fn computer_use_host(&self) -> Option<ComputerUseHostRef> {
309376
self.computer_use_host.clone()
310377
}
@@ -624,6 +691,43 @@ impl ToolPipeline {
624691
return Err(BitFunError::Validation(error_msg));
625692
}
626693

694+
// Loop detection: refuse to execute the same tool call repeatedly with
695+
// identical arguments. Triggered on the (THRESHOLD + 1)-th consecutive
696+
// identical call within the per-session sliding window.
697+
if self.check_and_record_tool_call(
698+
&task.context.session_id,
699+
&tool_name,
700+
&tool_args,
701+
) {
702+
let error_msg = format!(
703+
"Tool-call loop blocked: '{}' was already called {} times in a row in this session with identical arguments. Refusing to execute this {}th identical call. Issue a different tool call, or stop tool-calling and respond to the user. If you wrote a file recently and want to continue it, its full content is already visible in your earlier tool_use message — use Edit with `old_string` taken from the end of that content; do not Read the file again.",
704+
tool_name,
705+
TOOL_CALL_LOOP_THRESHOLD,
706+
TOOL_CALL_LOOP_THRESHOLD + 1
707+
);
708+
warn!(
709+
"Tool-call loop blocked: tool_name={}, tool_id={}, session_id={}, threshold={}",
710+
tool_name, tool_id, task.context.session_id, TOOL_CALL_LOOP_THRESHOLD
711+
);
712+
713+
self.state_manager
714+
.update_state(
715+
&tool_id,
716+
ToolExecutionState::Failed {
717+
error: error_msg.clone(),
718+
is_retryable: false,
719+
duration_ms: None,
720+
queue_wait_ms: None,
721+
preflight_ms: None,
722+
confirmation_wait_ms: None,
723+
execution_ms: None,
724+
},
725+
)
726+
.await;
727+
728+
return Err(BitFunError::Validation(error_msg));
729+
}
730+
627731
// Security check: check if the tool is in the allowed list
628732
// If allowed_tools is not empty, only allow execution of tools in the whitelist
629733
if !task.context.allowed_tools.is_empty()
@@ -923,7 +1027,7 @@ impl ToolPipeline {
9231027
if recovered_from_truncation {
9241028
let original = tool_result.result_for_assistant.unwrap_or_default();
9251029
let notice = format!(
926-
"[WARNING: tool arguments were truncated by the model (likely hit max_tokens) and were auto-repaired before this {} call executed. The written content stops at the truncation point and may be incomplete; verify the result and, if needed, continue with a follow-up call that appends the remaining content (do not rewrite the whole file from scratch). Original tool result follows.]\n\n",
1030+
"[Your previous {} call was truncated mid-stream by max_tokens and was auto-repaired before execution; the file was written with the partial content. The full truncated content — including the exact stopping point — is visible in the `input` of your previous tool_use message, so you do NOT need to read the file again. To finish it, issue ONE Edit call where `old_string` is the final unique substring of your truncated content and `new_string` is that same substring plus the continuation. If you do not have a concrete plan for the continuation, stop tool-calling and tell the user the output was truncated (suggest raising max_tokens). Do NOT call Read on this file and do NOT rewrite the whole file with Write.]\n\nOriginal tool result follows.\n\n",
9271031
tool_name
9281032
);
9291033
tool_result.result_for_assistant = Some(if original.is_empty() {

0 commit comments

Comments
 (0)