Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 8 additions & 1 deletion apps/cli/ai/agent.ts
Original file line number Diff line number Diff line change
Expand Up @@ -42,8 +42,15 @@ const pathApprovalSession = createPathApprovalSession();
// are unhandled because they originate inside the SDK cleanup path rather
// than propagating through the async iterator. Without this handler,
// Node.js terminates the process on unhandled rejections.
const SDK_INTERRUPT_CLEANUP_ERRORS = [
'Query closed',
'ProcessTransport is not ready for writing',
Copy link
Copy Markdown
Contributor Author

@epeicher epeicher Apr 17, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is not strictly related to these changes, but I have identified that if the user presses ESC while in the Take screenshot step, the process terminates abruptly, so I added this to prevent that scenario. Now, it is correctly handled.

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It feels like there should be a better way to catch these SDK errors other than checking strings but it seems there's no other valid option now?

];
process.on( 'unhandledRejection', ( reason ) => {
if ( reason instanceof Error && reason.message.includes( 'Query closed' ) ) {
if (
reason instanceof Error &&
SDK_INTERRUPT_CLEANUP_ERRORS.some( ( msg ) => reason.message.includes( msg ) )
) {
return;
}
throw reason;
Expand Down
4 changes: 2 additions & 2 deletions apps/cli/ai/output-adapter.ts
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ import type { AiProviderId } from 'cli/ai/providers';
import type { SiteInfo } from 'cli/ai/ui';

export type HandleMessageResult =
| { type: 'result'; sessionId: string; success: boolean }
| { type: 'result'; sessionId: string; success: boolean; interrupted?: boolean }
| { type: 'max_turns'; sessionId: string; numTurns: number; costUsd?: number };

export interface AiOutputAdapter {
Expand Down Expand Up @@ -125,7 +125,7 @@ export class JsonAdapter implements AiOutputAdapter {
return {
type: 'result',
sessionId: message.session_id,
success: message.subtype === 'success',
success: ! message.is_error,
};
}

Expand Down
6 changes: 6 additions & 0 deletions apps/cli/ai/providers.ts
Original file line number Diff line number Diff line change
Expand Up @@ -74,6 +74,12 @@ function createBaseEnvironment(): Record< string, string > {
delete env.ANTHROPIC_CUSTOM_HEADERS;
delete env.CLAUDE_CODE_DISABLE_EXPERIMENTAL_BETAS;

// Fail fast on transient API errors so the user-mediated retry prompt can
// intervene instead of the SDK burning through its default 10 retries.
if ( ! env.CLAUDE_CODE_MAX_RETRIES ) {
env.CLAUDE_CODE_MAX_RETRIES = '1';
Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is to prevent the SDK internally re-trying. The default is 10 so before this change, the SDK was re-trying after a timeout without notifying the client code, so if the timeout is 4 mins, it would take 40 minutes to fail. Now, it fails immediately, and the agent handles the error; the user is notified that a hiccup on the server has happened.

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I wonder if we should retry at least once without user knowing.

}

return env;
}

Expand Down
104 changes: 59 additions & 45 deletions apps/cli/ai/ui.ts
Original file line number Diff line number Diff line change
Expand Up @@ -2137,29 +2137,17 @@ export class AiChatUI implements AiOutputAdapter {
}
case 'result': {
this.hideLoader();
if ( message.subtype === 'success' ) {
const thinkingSec = Math.round( ( this.nowMs() - this.turnStartTime ) / 1000 );
if ( ! this.hasShownResponseMarker ) {
this.messages.addChild(
new Text( '\n ' + chalk.blue( '⏺' ) + ' ' + __( 'Done' ), 0, 0 )
);
}
this.showInfo(
sprintf(
/* translators: 1: seconds spent thinking, 2: number of turns */
_n(
'Thought for %1$ds · %2$d turn',
'Thought for %1$ds · %2$d turns',
message.num_turns
),
thinkingSec,
message.num_turns
)
);
return { type: 'result', sessionId: message.session_id, success: true };

// Max-turns exhaustion has dedicated upstream handling (prompts user to continue).
if ( message.subtype === 'error_max_turns' ) {
return {
type: 'max_turns',
sessionId: message.session_id,
numTurns: message.num_turns,
};
}

// User-initiated interruption: show friendly message instead of error
// User-initiated interruption: friendly message, suppress retry prompt.
if ( this.wasInterrupted ) {
const thinkingSec = Math.round( ( this.nowMs() - this.turnStartTime ) / 1000 );
this.messages.addChild(
Expand All @@ -2176,36 +2164,62 @@ export class AiChatUI implements AiOutputAdapter {
thinkingSec
)
);
return { type: 'result', sessionId: message.session_id, success: false };
}

// Build detailed error message
const parts: string[] = [];
if ( 'errors' in message && message.errors?.length ) {
parts.push( ...message.errors );
}
if ( message.subtype === 'error_max_turns' ) {
return {
type: 'max_turns',
type: 'result',
sessionId: message.session_id,
numTurns: message.num_turns,
success: false,
interrupted: true,
};
} else if ( message.subtype ) {
parts.push( `(${ message.subtype })` );
}
if ( 'permission_denials' in message && message.permission_denials?.length ) {
for ( const denial of message.permission_denials ) {
parts.push(
sprintf(
/* translators: %s: tool name */
__( 'Permission denied: %s' ),
denial.tool_name
)
);

// is_error is the authoritative failure signal. A message can have
// subtype='success' and still carry is_error=true (e.g. API 504 after
// the SDK exhausts retries and emits the error text as the result).
if ( message.is_error ) {
const parts: string[] = [];
if ( 'errors' in message && message.errors?.length ) {
parts.push( ...message.errors );
}
if ( 'result' in message && typeof message.result === 'string' && message.result ) {
parts.push( message.result );
} else if ( message.subtype && message.subtype !== 'success' ) {
parts.push( `(${ message.subtype })` );
}
if ( 'permission_denials' in message && message.permission_denials?.length ) {
for ( const denial of message.permission_denials ) {
parts.push(
sprintf(
/* translators: %s: tool name */
__( 'Permission denied: %s' ),
denial.tool_name
)
);
}
}
this.showError( parts.length > 0 ? parts.join( '\n' ) : __( 'Unknown error' ) );
return { type: 'result', sessionId: message.session_id, success: false };
}

// Genuine success.
const thinkingSec = Math.round( ( this.nowMs() - this.turnStartTime ) / 1000 );
if ( ! this.hasShownResponseMarker ) {
this.messages.addChild(
new Text( '\n ' + chalk.blue( '⏺' ) + ' ' + __( 'Done' ), 0, 0 )
);
}
this.showError( parts.length > 0 ? parts.join( '\n' ) : __( 'Unknown error' ) );
return { type: 'result', sessionId: message.session_id, success: false };
this.showInfo(
sprintf(
/* translators: 1: seconds spent thinking, 2: number of turns */
_n(
'Thought for %1$ds · %2$d turn',
'Thought for %1$ds · %2$d turns',
message.num_turns
),
thinkingSec,
message.num_turns
)
);
return { type: 'result', sessionId: message.session_id, success: true };
}
case 'system': {
if ( message.subtype === 'status' && message.status === 'compacting' ) {
Expand Down
42 changes: 40 additions & 2 deletions apps/cli/commands/ai/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -358,8 +358,11 @@ export async function runCommand( options: {
return answers;
}

const MAX_RETRY_ATTEMPTS = 4;

async function runAgentTurn(
prompt: string
prompt: string,
retryAttempt = 0
): Promise< { status: TurnStatus; usage?: { numTurns: number; costUsd?: number } } > {
const env = await resolveAiEnvironment( currentProvider );
ui.beginAgentTurn();
Expand Down Expand Up @@ -425,14 +428,22 @@ export async function runCommand( options: {
numTurns: result.numTurns,
};
turnStatus = 'max_turns';
} else if ( result.interrupted ) {
turnStatus = 'interrupted';
} else {
turnStatus = result.success ? 'success' : 'error';
}
}
}
} catch ( error ) {
turnStatus = 'error';
throw error;
// In JSON mode there's no interactive retry, so re-throw and let
// the caller record the error. In interactive mode, fall through
// so the post-loop retry prompt offers the user a chance to retry.
if ( isJsonMode ) {
throw error;
}
ui.showError( getErrorMessage( error ) );
} finally {
await persist( ( recorder ) => recorder.recordTurnClosed( turnStatus ) );
ui.endAgentTurn();
Expand Down Expand Up @@ -462,6 +473,33 @@ export async function runCommand( options: {
}
}

if ( turnStatus === 'error' && ! isJsonMode ) {
if ( retryAttempt >= MAX_RETRY_ATTEMPTS ) {
ui.showInfo(
__( 'The server has not recovered after multiple attempts. Please try again later.' )
);
} else {
const answer = await ui.askUser( [
{
question: __( 'There was a hiccup on the server. Do you want to continue?' ),
options: [
{ label: 'Yes', description: __( 'Continue from where you left off' ) },
{ label: 'No', description: __( 'Stop here' ) },
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I wonder if a third-option (other) is needed where the user can give different instructions.

],
},
] );
const choice = Object.values( answer )[ 0 ]?.toLowerCase();
if ( choice === 'yes' ) {
ui.showInfo( __( 'Retrying…' ) );
// If the SDK threw before emitting any result, sessionId is
// still whatever it was before this turn; without one to resume,
// replay the original prompt instead.
const retryPrompt = sessionId ? 'Continue from where you left off.' : prompt;
return runAgentTurn( retryPrompt, retryAttempt + 1 );
}
}
}

return {
status: turnStatus,
usage: maxTurnsResult,
Expand Down
Loading