Automattic · epeicher · Apr 17, 2026 · epeicher · Apr 17, 2026 · youknowriad
diff --git a/apps/cli/ai/agent.ts b/apps/cli/ai/agent.ts
@@ -42,8 +42,15 @@ const pathApprovalSession = createPathApprovalSession();
 // are unhandled because they originate inside the SDK cleanup path rather
 // than propagating through the async iterator. Without this handler,
 // Node.js terminates the process on unhandled rejections.
+const SDK_INTERRUPT_CLEANUP_ERRORS = [
+	'Query closed',
+	'ProcessTransport is not ready for writing',
+];
 process.on( 'unhandledRejection', ( reason ) => {
-	if ( reason instanceof Error && reason.message.includes( 'Query closed' ) ) {
+	if (
+		reason instanceof Error &&
+		SDK_INTERRUPT_CLEANUP_ERRORS.some( ( msg ) => reason.message.includes( msg ) )
+	) {
 		return;
 	}
 	throw reason;

diff --git a/apps/cli/ai/output-adapter.ts b/apps/cli/ai/output-adapter.ts
@@ -5,7 +5,7 @@ import type { AiProviderId } from 'cli/ai/providers';
 import type { SiteInfo } from 'cli/ai/ui';
 
 export type HandleMessageResult =
-	| { type: 'result'; sessionId: string; success: boolean }
+	| { type: 'result'; sessionId: string; success: boolean; interrupted?: boolean }
 	| { type: 'max_turns'; sessionId: string; numTurns: number; costUsd?: number };
 
 export interface AiOutputAdapter {
@@ -125,7 +125,7 @@ export class JsonAdapter implements AiOutputAdapter {
 			return {
 				type: 'result',
 				sessionId: message.session_id,
-				success: message.subtype === 'success',
+				success: ! message.is_error,
 			};
 		}
 

diff --git a/apps/cli/ai/providers.ts b/apps/cli/ai/providers.ts
@@ -74,6 +74,12 @@ function createBaseEnvironment(): Record< string, string > {
 	delete env.ANTHROPIC_CUSTOM_HEADERS;
 	delete env.CLAUDE_CODE_DISABLE_EXPERIMENTAL_BETAS;
 
+	// Fail fast on transient API errors so the user-mediated retry prompt can
+	// intervene instead of the SDK burning through its default 10 retries.
+	if ( ! env.CLAUDE_CODE_MAX_RETRIES ) {
+		env.CLAUDE_CODE_MAX_RETRIES = '1';
+	}
+
 	return env;
 }
 

diff --git a/apps/cli/ai/ui.ts b/apps/cli/ai/ui.ts
@@ -2137,29 +2137,17 @@ export class AiChatUI implements AiOutputAdapter {
 			}
 			case 'result': {
 				this.hideLoader();
-				if ( message.subtype === 'success' ) {
-					const thinkingSec = Math.round( ( this.nowMs() - this.turnStartTime ) / 1000 );
-					if ( ! this.hasShownResponseMarker ) {
-						this.messages.addChild(
-							new Text( '\n ' + chalk.blue( '⏺' ) + ' ' + __( 'Done' ), 0, 0 )
-						);
-					}
-					this.showInfo(
-						sprintf(
-							/* translators: 1: seconds spent thinking, 2: number of turns */
-							_n(
-								'Thought for %1$ds · %2$d turn',
-								'Thought for %1$ds · %2$d turns',
-								message.num_turns
-							),
-							thinkingSec,
-							message.num_turns
-						)
-					);
-					return { type: 'result', sessionId: message.session_id, success: true };
+
+				// Max-turns exhaustion has dedicated upstream handling (prompts user to continue).
+				if ( message.subtype === 'error_max_turns' ) {
+					return {
+						type: 'max_turns',
+						sessionId: message.session_id,
+						numTurns: message.num_turns,
+					};
 				}
 
-				// User-initiated interruption: show friendly message instead of error
+				// User-initiated interruption: friendly message, suppress retry prompt.
 				if ( this.wasInterrupted ) {
 					const thinkingSec = Math.round( ( this.nowMs() - this.turnStartTime ) / 1000 );
 					this.messages.addChild(
@@ -2176,36 +2164,62 @@ export class AiChatUI implements AiOutputAdapter {
 							thinkingSec
 						)
 					);
-					return { type: 'result', sessionId: message.session_id, success: false };
-				}
-
-				// Build detailed error message
-				const parts: string[] = [];
-				if ( 'errors' in message && message.errors?.length ) {
-					parts.push( ...message.errors );
-				}
-				if ( message.subtype === 'error_max_turns' ) {
 					return {
-						type: 'max_turns',
+						type: 'result',
 						sessionId: message.session_id,
-						numTurns: message.num_turns,
+						success: false,
+						interrupted: true,
 					};
-				} else if ( message.subtype ) {
-					parts.push( `(${ message.subtype })` );
 				}
-				if ( 'permission_denials' in message && message.permission_denials?.length ) {
-					for ( const denial of message.permission_denials ) {
-						parts.push(
-							sprintf(
-								/* translators: %s: tool name */
-								__( 'Permission denied: %s' ),
-								denial.tool_name
-							)
-						);
+
+				// is_error is the authoritative failure signal. A message can have
+				// subtype='success' and still carry is_error=true (e.g. API 504 after
+				// the SDK exhausts retries and emits the error text as the result).
+				if ( message.is_error ) {
+					const parts: string[] = [];
+					if ( 'errors' in message && message.errors?.length ) {
+						parts.push( ...message.errors );
 					}
+					if ( 'result' in message && typeof message.result === 'string' && message.result ) {
+						parts.push( message.result );
+					} else if ( message.subtype && message.subtype !== 'success' ) {
+						parts.push( `(${ message.subtype })` );
+					}
+					if ( 'permission_denials' in message && message.permission_denials?.length ) {
+						for ( const denial of message.permission_denials ) {
+							parts.push(
+								sprintf(
+									/* translators: %s: tool name */
+									__( 'Permission denied: %s' ),
+									denial.tool_name
+								)
+							);
+						}
+					}
+					this.showError( parts.length > 0 ? parts.join( '\n' ) : __( 'Unknown error' ) );
+					return { type: 'result', sessionId: message.session_id, success: false };
+				}
+
+				// Genuine success.
+				const thinkingSec = Math.round( ( this.nowMs() - this.turnStartTime ) / 1000 );
+				if ( ! this.hasShownResponseMarker ) {
+					this.messages.addChild(
+						new Text( '\n ' + chalk.blue( '⏺' ) + ' ' + __( 'Done' ), 0, 0 )
+					);
 				}
-				this.showError( parts.length > 0 ? parts.join( '\n' ) : __( 'Unknown error' ) );
-				return { type: 'result', sessionId: message.session_id, success: false };
+				this.showInfo(
+					sprintf(
+						/* translators: 1: seconds spent thinking, 2: number of turns */
+						_n(
+							'Thought for %1$ds · %2$d turn',
+							'Thought for %1$ds · %2$d turns',
+							message.num_turns
+						),
+						thinkingSec,
+						message.num_turns
+					)
+				);
+				return { type: 'result', sessionId: message.session_id, success: true };
 			}
 			case 'system': {
 				if ( message.subtype === 'status' && message.status === 'compacting' ) {

diff --git a/apps/cli/commands/ai/index.ts b/apps/cli/commands/ai/index.ts
@@ -358,8 +358,11 @@ export async function runCommand( options: {
 		return answers;
 	}
 
+	const MAX_RETRY_ATTEMPTS = 4;
+
 	async function runAgentTurn(
-		prompt: string
+		prompt: string,
+		retryAttempt = 0
 	): Promise< { status: TurnStatus; usage?: { numTurns: number; costUsd?: number } } > {
 		const env = await resolveAiEnvironment( currentProvider );
 		ui.beginAgentTurn();
@@ -425,14 +428,22 @@ export async function runCommand( options: {
 							numTurns: result.numTurns,
 						};
 						turnStatus = 'max_turns';
+					} else if ( result.interrupted ) {
+						turnStatus = 'interrupted';
 					} else {
 						turnStatus = result.success ? 'success' : 'error';
 					}
 				}
 			}
 		} catch ( error ) {
 			turnStatus = 'error';
-			throw error;
+			// In JSON mode there's no interactive retry, so re-throw and let
+			// the caller record the error. In interactive mode, fall through
+			// so the post-loop retry prompt offers the user a chance to retry.
+			if ( isJsonMode ) {
+				throw error;
+			}
+			ui.showError( getErrorMessage( error ) );
 		} finally {
 			await persist( ( recorder ) => recorder.recordTurnClosed( turnStatus ) );
 			ui.endAgentTurn();
@@ -462,6 +473,33 @@ export async function runCommand( options: {
 			}
 		}
 
+		if ( turnStatus === 'error' && ! isJsonMode ) {
+			if ( retryAttempt >= MAX_RETRY_ATTEMPTS ) {
+				ui.showInfo(
+					__( 'The server has not recovered after multiple attempts. Please try again later.' )
+				);
+			} else {
+				const answer = await ui.askUser( [
+					{
+						question: __( 'There was a hiccup on the server. Do you want to continue?' ),
+						options: [
+							{ label: 'Yes', description: __( 'Continue from where you left off' ) },
+							{ label: 'No', description: __( 'Stop here' ) },
+						],
+					},
+				] );
+				const choice = Object.values( answer )[ 0 ]?.toLowerCase();
+				if ( choice === 'yes' ) {
+					ui.showInfo( __( 'Retrying…' ) );
+					// If the SDK threw before emitting any result, sessionId is
+					// still whatever it was before this turn; without one to resume,
+					// replay the original prompt instead.
+					const retryPrompt = sessionId ? 'Continue from where you left off.' : prompt;
+					return runAgentTurn( retryPrompt, retryAttempt + 1 );
+				}
+			}
+		}
+
 		return {
 			status: turnStatus,
 			usage: maxTurnsResult,