Skip to content

Commit bea065b

Browse files
charley-oaicodex
andcommitted
core: snapshot full first forked request
Co-authored-by: Codex <[email protected]>
1 parent 20bb569 commit bea065b

File tree

2 files changed

+128
-61
lines changed

2 files changed

+128
-61
lines changed

codex-rs/core/src/codex_tests.rs

Lines changed: 116 additions & 53 deletions
Original file line numberDiff line numberDiff line change
@@ -64,17 +64,31 @@ use codex_execpolicy::NetworkRuleProtocol;
6464
use codex_execpolicy::Policy;
6565
use codex_network_proxy::NetworkProxyConfig;
6666
use codex_otel::TelemetryAuthMode;
67+
use codex_protocol::config_types::CollaborationMode;
68+
use codex_protocol::config_types::ModeKind;
69+
use codex_protocol::config_types::Settings;
6770
use codex_protocol::models::BaseInstructions;
6871
use codex_protocol::models::ContentItem;
6972
use codex_protocol::models::DeveloperInstructions;
7073
use codex_protocol::models::ResponseInputItem;
7174
use codex_protocol::models::ResponseItem;
7275
use codex_protocol::openai_models::ModelsResponse;
76+
use codex_protocol::protocol::AskForApproval;
7377
use codex_protocol::protocol::ConversationAudioParams;
7478
use codex_protocol::protocol::RealtimeAudioFrame;
7579
use codex_protocol::protocol::Submission;
7680
use codex_protocol::protocol::W3cTraceContext;
81+
use core_test_support::context_snapshot;
82+
use core_test_support::context_snapshot::ContextSnapshotOptions;
83+
use core_test_support::context_snapshot::ContextSnapshotRenderMode;
84+
use core_test_support::responses::ev_completed;
85+
use core_test_support::responses::ev_response_created;
86+
use core_test_support::responses::mount_sse_once;
87+
use core_test_support::responses::sse;
88+
use core_test_support::responses::start_mock_server;
89+
use core_test_support::test_codex::test_codex;
7790
use core_test_support::tracing::install_test_tracing;
91+
use core_test_support::wait_for_event;
7892
use opentelemetry::trace::TraceContextExt;
7993
use opentelemetry::trace::TraceId;
8094
use std::path::Path;
@@ -1115,66 +1129,113 @@ async fn record_initial_history_reconstructs_forked_transcript() {
11151129
assert_eq!(expected, history.raw_items());
11161130
}
11171131

1118-
#[tokio::test]
1119-
async fn fork_startup_context_then_first_turn_diff_snapshot() {
1120-
let (session, turn_context) = make_session_and_context().await;
1121-
let expected_history = vec![user_message("forked seed")];
1122-
let rollout_items = vec![
1123-
RolloutItem::ResponseItem(expected_history[0].clone()),
1124-
RolloutItem::TurnContext(turn_context.to_turn_context_item()),
1125-
];
1126-
1127-
session
1128-
.record_initial_history(InitialHistory::Forked(rollout_items))
1129-
.await;
1132+
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
1133+
async fn fork_startup_context_then_first_turn_diff_snapshot() -> anyhow::Result<()> {
1134+
let server = start_mock_server().await;
1135+
let _initial_request = mount_sse_once(
1136+
&server,
1137+
sse(vec![ev_response_created("resp-1"), ev_completed("resp-1")]),
1138+
)
1139+
.await;
1140+
let first_forked_request = mount_sse_once(
1141+
&server,
1142+
sse(vec![ev_response_created("resp-2"), ev_completed("resp-2")]),
1143+
)
1144+
.await;
11301145

1131-
let history_after_fork = session.clone_history().await;
1132-
let startup_injection = &history_after_fork.raw_items()[expected_history.len()..];
1146+
let mut builder = test_codex().with_config(|config| {
1147+
config.permissions.approval_policy =
1148+
codex_config::Constrained::allow_any(AskForApproval::OnRequest);
1149+
});
1150+
let initial = builder.build(&server).await?;
1151+
let rollout_path = initial
1152+
.session_configured
1153+
.rollout_path
1154+
.clone()
1155+
.expect("rollout path");
11331156

1134-
let next_model = if turn_context.model_info.slug == "gpt-5.1" {
1135-
"gpt-5"
1136-
} else {
1137-
"gpt-5.1"
1138-
};
1139-
let first_turn_context = turn_context
1140-
.with_model(next_model.to_string(), &session.services.models_manager)
1141-
.await;
1157+
initial
1158+
.codex
1159+
.submit(Op::UserInput {
1160+
items: vec![UserInput::Text {
1161+
text: "fork seed".into(),
1162+
text_elements: Vec::new(),
1163+
}],
1164+
final_output_json_schema: None,
1165+
})
1166+
.await?;
1167+
wait_for_event(&initial.codex, |ev| matches!(ev, EventMsg::TurnComplete(_))).await;
11421168

1143-
let history_len_after_fork = history_after_fork.raw_items().len();
1144-
session
1145-
.record_context_updates_and_set_reference_context_item(&first_turn_context)
1146-
.await;
1147-
let history_after_first_turn = session.clone_history().await;
1148-
let first_turn_updates = &history_after_first_turn.raw_items()[history_len_after_fork..];
1169+
let mut fork_config = initial.config.clone();
1170+
fork_config.permissions.approval_policy =
1171+
codex_config::Constrained::allow_any(AskForApproval::UnlessTrusted);
1172+
let forked = initial
1173+
.thread_manager
1174+
.fork_thread(usize::MAX, fork_config, rollout_path, false, None)
1175+
.await?;
11491176

1150-
let format_item_kinds = |items: &[ResponseItem]| {
1151-
let lines = items
1152-
.iter()
1153-
.enumerate()
1154-
.map(|(idx, item)| match item {
1155-
ResponseItem::Message { role, .. } => format!("{idx:02}:message/{role}"),
1156-
other => {
1157-
let item = serde_json::to_value(other).expect("serialize snapshot item");
1158-
let item_type = item
1159-
.get("type")
1160-
.and_then(serde_json::Value::as_str)
1161-
.unwrap_or("<missing_type>");
1162-
format!("{idx:02}:{item_type}")
1163-
}
1164-
})
1165-
.collect::<Vec<_>>();
1166-
if lines.is_empty() {
1167-
"<none>".to_string()
1168-
} else {
1169-
lines.join("\n")
1170-
}
1177+
let collaboration_mode = CollaborationMode {
1178+
mode: ModeKind::Plan,
1179+
settings: Settings {
1180+
model: forked.session_configured.model.clone(),
1181+
reasoning_effort: None,
1182+
developer_instructions: Some("Fork turn collaboration instructions.".to_string()),
1183+
},
11711184
};
1185+
forked
1186+
.thread
1187+
.submit(Op::OverrideTurnContext {
1188+
cwd: None,
1189+
approval_policy: Some(AskForApproval::Never),
1190+
approvals_reviewer: None,
1191+
sandbox_policy: None,
1192+
windows_sandbox_level: None,
1193+
model: None,
1194+
effort: None,
1195+
summary: None,
1196+
service_tier: None,
1197+
collaboration_mode: Some(collaboration_mode),
1198+
personality: None,
1199+
})
1200+
.await?;
11721201

1173-
let snapshot = format!(
1174-
"Scenario: Fork startup context injection followed by first-turn diff injection\n\n## Fork Startup Injection\n{}\n\n## First Turn Context Updates\n{}",
1175-
format_item_kinds(startup_injection),
1176-
format_item_kinds(first_turn_updates),
1202+
forked
1203+
.thread
1204+
.submit(Op::UserInput {
1205+
items: vec![UserInput::Text {
1206+
text: "after fork".into(),
1207+
text_elements: Vec::new(),
1208+
}],
1209+
final_output_json_schema: None,
1210+
})
1211+
.await?;
1212+
wait_for_event(&forked.thread, |ev| matches!(ev, EventMsg::TurnComplete(_))).await;
1213+
1214+
let request = first_forked_request.single_request();
1215+
let snapshot = context_snapshot::format_labeled_requests_snapshot(
1216+
"First request after fork when fork startup changes approval policy and the first forked turn changes approval policy again and enters plan mode.",
1217+
&[("First Forked Turn Request", &request)],
1218+
&ContextSnapshotOptions::default()
1219+
.render_mode(ContextSnapshotRenderMode::FullText)
1220+
.strip_capability_instructions()
1221+
.strip_agents_md_user_context(),
11771222
);
1223+
let snapshot = snapshot
1224+
.lines()
1225+
.map(|line| {
1226+
let mut line = line.to_string();
1227+
for (tag, replacement) in [("cwd", "<CWD>"), ("current_date", "<CURRENT_DATE>")] {
1228+
let open_tag = format!("<{tag}>");
1229+
let close_tag = format!("</{tag}>");
1230+
if let (Some(start), Some(end)) = (line.find(&open_tag), line.find(&close_tag)) {
1231+
let start = start + open_tag.len();
1232+
line = format!("{}{replacement}{}", &line[..start], &line[end..]);
1233+
}
1234+
}
1235+
line
1236+
})
1237+
.collect::<Vec<_>>()
1238+
.join("\n");
11781239

11791240
let mut settings = insta::Settings::clone_current();
11801241
settings.set_snapshot_path("snapshots");
@@ -1185,6 +1246,8 @@ async fn fork_startup_context_then_first_turn_diff_snapshot() {
11851246
snapshot
11861247
);
11871248
});
1249+
1250+
Ok(())
11881251
}
11891252

11901253
#[tokio::test]
Lines changed: 12 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -1,13 +1,17 @@
11
---
22
source: core/src/codex_tests.rs
3-
assertion_line: 1212
3+
assertion_line: 1262
44
expression: snapshot
55
---
6-
Scenario: Fork startup context injection followed by first-turn diff injection
6+
Scenario: First request after fork when fork startup changes approval policy and the first forked turn changes approval policy again and enters plan mode.
77

8-
## Fork Startup Injection
9-
00:message/developer
10-
01:message/user
11-
12-
## First Turn Context Updates
13-
00:message/developer
8+
## First Forked Turn Request
9+
00:message/developer:<permissions instructions>\nFilesystem sandboxing defines which files can be read or written. `sandbox_mode` is `read-only`: The sandbox only permits reading files. Network access is restricted.\n# Escalation Requests\n\nCommands are run outside the sandbox if they are approved by the user, or match an existing rule that allows it to run unrestricted. The command string is split into independent command segments at shell control operators, including but not limited to:\n\n- Pipes: |\n- Logical operators: &&, ||\n- Command separators: ;\n- Subshell boundaries: (...), $(...)\n\nEach resulting segment is evaluated independently for sandbox restrictions and approval requirements.\n\nExample:\n\ngit pull | tee output.txt\n\nThis is treated as two command segments:\n\n["git", "pull"]\n\n["tee", "output.txt"]\n\nCommands that use more advanced shell features like redirection (>, >>, <), substitutions ($(...), ...), environment variables (FOO=bar), or wildcard patterns (*, ?) will not be evaluated against rules, to limit the scope of what an approved rule allows.\n\n## How to request escalation\n\nIMPORTANT: To request approval to execute a command that will require escalated privileges:\n\n- Provide the `sandbox_permissions` parameter with the value `"require_escalated"`\n- Include a short question asking the user if they want to allow the action in `justification` parameter. e.g. "Do you want to download and install dependencies for this project?"\n- Optionally suggest a `prefix_rule` - this will be shown to the user with an option to persist the rule approval for future sessions.\n\nIf you run a command that is important to solving the user's query, but it fails because of sandboxing or with a likely sandbox-related network error (for example DNS/host resolution, registry/index access, or dependency download failure), rerun the command with "require_escalated". ALWAYS proceed to use the `justification` parameter - do not message the user before requesting approval for the command.\n\n## When to request escalation\n\nWhile commands are running inside the sandbox, here are some scenarios that will require escalation outside the sandbox:\n\n- You need to run a command that writes to a directory that requires it (e.g. running tests that write to /var)\n- You need to run a GUI app (e.g., open/xdg-open/osascript) to open browsers or files.\n- If you run a command that is important to solving the user's query, but it fails because of sandboxing or with a likely sandbox-related network error (for example DNS/host resolution, registry/index access, or dependency download failure), rerun the command with `require_escalated`. ALWAYS proceed to use the `sandbox_permissions` and `justification` parameters. do not message the user before requesting approval for the command.\n- You are about to take a potentially destructive action such as an `rm` or `git reset` that the user did not explicitly ask for.\n- Be judicious with escalating, but if completing the user's request requires it, you should do so - don't try and circumvent approvals by using other tools.\n\n## prefix_rule guidance\n\nWhen choosing a `prefix_rule`, request one that will allow you to fulfill similar requests from the user in the future without re-requesting escalation. It should be categorical and reasonably scoped to similar capabilities. You should rarely pass the entire command into `prefix_rule`.\n\n### Banned prefix_rules \nAvoid requesting overly broad prefixes that the user would be ill-advised to approve. For example, do not request ["python3"], ["python", "-"], or other similar prefixes that would allow arbitrary scripting.\nNEVER provide a prefix_rule argument for destructive commands like rm.\nNEVER provide a prefix_rule if your command uses a heredoc or herestring. \n\n### Examples\nGood examples of prefixes:\n- ["npm", "run", "dev"]\n- ["gh", "pr", "check"]\n- ["cargo", "test"]\n</permissions instructions>
10+
01:message/user:<environment_context>\n <cwd><CWD></cwd>\n <shell>zsh</shell>\n <current_date><CURRENT_DATE></current_date>\n <timezone>America/Los_Angeles</timezone>\n</environment_context>
11+
02:message/user:fork seed
12+
03:message/developer:<permissions instructions>\nFilesystem sandboxing defines which files can be read or written. `sandbox_mode` is `read-only`: The sandbox only permits reading files. Network access is restricted.\n Approvals are your mechanism to get user consent to run shell commands without the sandbox. `approval_policy` is `unless-trusted`: The harness will escalate most commands for user approval, apart from a limited allowlist of safe "read" commands.\n</permissions instructions>
13+
04:message/user:<environment_context>\n <cwd><CWD></cwd>\n <shell>zsh</shell>\n <current_date><CURRENT_DATE></current_date>\n <timezone>America/Los_Angeles</timezone>\n</environment_context>
14+
05:message/developer[2]:
15+
[01] <permissions instructions>\nFilesystem sandboxing defines which files can be read or written. `sandbox_mode` is `read-only`: The sandbox only permits reading files. Network access is restricted.\nApproval policy is currently never. Do not provide the `sandbox_permissions` for any reason, commands will be rejected.\n</permissions instructions>
16+
[02] <collaboration_mode>Fork turn collaboration instructions.</collaboration_mode>
17+
06:message/user:after fork

0 commit comments

Comments
 (0)