openai · cassirer-openai · Apr 15, 2026 · Apr 15, 2026 · Apr 15, 2026 · Apr 15, 2026
diff --git a/codex-rs/Cargo.lock b/codex-rs/Cargo.lock
diff --git a/codex-rs/Cargo.toml b/codex-rs/Cargo.toml
@@ -51,6 +51,7 @@ members = [
     "protocol",
     "realtime-webrtc",
     "rollout",
+    "rollout-trace",
     "rmcp-client",
     "responses-api-proxy",
     "response-debug-context",
@@ -159,6 +160,7 @@ codex-responses-api-proxy = { path = "responses-api-proxy" }
 codex-response-debug-context = { path = "response-debug-context" }
 codex-rmcp-client = { path = "rmcp-client" }
 codex-rollout = { path = "rollout" }
+codex-rollout-trace = { path = "rollout-trace" }
 codex-sandboxing = { path = "sandboxing" }
 codex-secrets = { path = "secrets" }
 codex-shell-command = { path = "shell-command" }

diff --git a/codex-rs/cli/Cargo.toml b/codex-rs/cli/Cargo.toml
@@ -40,6 +40,7 @@ codex-mcp-server = { workspace = true }
 codex-protocol = { workspace = true }
 codex-responses-api-proxy = { workspace = true }
 codex-rmcp-client = { workspace = true }
+codex-rollout-trace = { workspace = true }
 codex-sandboxing = { workspace = true }
 codex-state = { workspace = true }
 codex-stdio-to-uds = { workspace = true }

diff --git a/codex-rs/cli/src/main.rs b/codex-rs/cli/src/main.rs
@@ -22,6 +22,8 @@ use codex_exec::Command as ExecCommand;
 use codex_exec::ReviewArgs;
 use codex_execpolicy::ExecPolicyCheckCommand;
 use codex_responses_api_proxy::Args as ResponsesApiProxyArgs;
+use codex_rollout_trace::REDUCED_STATE_FILE_NAME;
+use codex_rollout_trace::replay_bundle;
 use codex_state::StateRuntime;
 use codex_state::state_db_path;
 use codex_tui::AppExitInfo;
@@ -190,6 +192,9 @@ enum DebugSubcommand {
     /// Render the model-visible prompt input list as JSON.
     PromptInput(DebugPromptInputCommand),
 
+    /// Replay a rollout trace bundle and write reduced state JSON.
+    TraceReduce(DebugTraceReduceCommand),
+
     /// Internal: reset local memory state for a fresh start.
     #[clap(hide = true)]
     ClearMemories,
@@ -224,6 +229,17 @@ struct DebugPromptInputCommand {
     images: Vec<PathBuf>,
 }
 
+#[derive(Debug, Parser)]
+struct DebugTraceReduceCommand {
+    /// Trace bundle directory containing manifest.json and trace.jsonl.
+    #[arg(value_name = "TRACE_BUNDLE")]
+    trace_bundle: PathBuf,
+
+    /// Output path for reduced RolloutTrace JSON. Defaults to TRACE_BUNDLE/state.json.
+    #[arg(long = "output", short = 'o', value_name = "FILE")]
+    output: Option<PathBuf>,
+}
+
 #[derive(Debug, Parser)]
 struct ResumeCommand {
     /// Conversation/session id (UUID) or thread name. UUIDs take precedence if it parses.
@@ -991,6 +1007,14 @@ async fn cli_main(arg0_paths: Arg0DispatchPaths) -> anyhow::Result<()> {
                 )
                 .await?;
             }
+            DebugSubcommand::TraceReduce(cmd) => {
+                reject_remote_mode_for_subcommand(
+                    root_remote.as_deref(),
+                    root_remote_auth_token_env.as_deref(),
+                    "debug trace-reduce",
+                )?;
+                run_debug_trace_reduce_command(cmd).await?;
+            }
             DebugSubcommand::ClearMemories => {
                 reject_remote_mode_for_subcommand(
                     root_remote.as_deref(),
@@ -1192,6 +1216,19 @@ fn maybe_print_under_development_feature_warning(
     );
 }
 
+async fn run_debug_trace_reduce_command(cmd: DebugTraceReduceCommand) -> anyhow::Result<()> {
+    let output = cmd
+        .output
+        .unwrap_or_else(|| cmd.trace_bundle.join(REDUCED_STATE_FILE_NAME));
+
+    let trace = replay_bundle(&cmd.trace_bundle)?;
+    let reduced_json = serde_json::to_vec_pretty(&trace)?;
+    tokio::fs::write(&output, reduced_json).await?;
+    println!("{}", output.display());
+
+    Ok(())
+}
+
 async fn run_debug_prompt_input_command(
     cmd: DebugPromptInputCommand,
     root_config_overrides: CliConfigOverrides,

diff --git a/codex-rs/code-mode/src/lib.rs b/codex-rs/code-mode/src/lib.rs
@@ -23,7 +23,9 @@ pub use runtime::DEFAULT_WAIT_YIELD_TIME_MS;
 pub use runtime::ExecuteRequest;
 pub use runtime::RuntimeResponse;
 pub use runtime::WaitRequest;
+pub use runtime::WaitResponse;
 pub use service::CodeModeService;
+pub use service::CodeModeToolInvocation;
 pub use service::CodeModeTurnHost;
 pub use service::CodeModeTurnWorker;
 

diff --git a/codex-rs/code-mode/src/runtime/mod.rs b/codex-rs/code-mode/src/runtime/mod.rs
@@ -10,6 +10,7 @@ use std::sync::mpsc as std_mpsc;
 use std::thread;
 
 use codex_protocol::ToolName;
+use serde::Serialize;
 use serde_json::Value as JsonValue;
 use tokio::sync::mpsc;
 
@@ -25,6 +26,12 @@ const EXIT_SENTINEL: &str = "__codex_code_mode_exit__";
 
 #[derive(Clone, Debug)]
 pub struct ExecuteRequest {
+    /// Runtime cell id to use for this execution.
+    ///
+    /// Hosts that need to trace work before JavaScript starts can allocate an id
+    /// first and pass it here. `None` keeps the service-owned allocation path
+    /// for callers that only need the id once a runtime response is returned.
+    pub cell_id: Option<String>,
     pub tool_call_id: String,
     pub enabled_tools: Vec<ToolDefinition>,
     pub source: String,
@@ -41,6 +48,33 @@ pub struct WaitRequest {
 }
 
 #[derive(Debug, PartialEq)]
+pub enum WaitResponse {
+    /// The requested cell was live when the wait command was accepted.
+    ///
+    /// Non-yielding responses from this variant are terminal lifecycle points
+    /// for the matching code cell.
+    Cell(RuntimeResponse),
+    /// The requested cell was not live, so the response is only the result of
+    /// the `wait` tool call. It must not be treated as a code-cell lifecycle
+    /// event because there is no cell to complete.
+    MissingCell(RuntimeResponse),
+}
+
+impl WaitResponse {
+    pub fn into_runtime_response(self) -> RuntimeResponse {
+        match self {
+            WaitResponse::Cell(response) | WaitResponse::MissingCell(response) => response,
+        }
+    }
+
+    pub fn runtime_response(&self) -> &RuntimeResponse {
+        match self {
+            WaitResponse::Cell(response) | WaitResponse::MissingCell(response) => response,
+        }
+    }
+}
+
+#[derive(Debug, PartialEq, Serialize)]
 pub enum RuntimeResponse {
     Yielded {
         cell_id: String,
@@ -331,6 +365,7 @@ mod tests {
 
     fn execute_request(source: &str) -> ExecuteRequest {
         ExecuteRequest {
+            cell_id: None,
             tool_call_id: "call_1".to_string(),
             enabled_tools: Vec::new(),
             source: source.to_string(),

diff --git a/codex-rs/code-mode/src/service.rs b/codex-rs/code-mode/src/service.rs
@@ -21,14 +21,26 @@ use crate::runtime::RuntimeEvent;
 use crate::runtime::RuntimeResponse;
 use crate::runtime::TurnMessage;
 use crate::runtime::WaitRequest;
+use crate::runtime::WaitResponse;
 use crate::runtime::spawn_runtime;
 
+/// Nested tool request emitted by one code-mode cell.
+///
+/// Code mode owns the per-cell runtime id. Hosts should preserve it for
+/// provenance/debugging, but should still assign their own runtime tool call id
+/// if their tool-call graph requires globally unique ids.
+pub struct CodeModeToolInvocation {
+    pub cell_id: String,
+    pub runtime_tool_call_id: String,
+    pub tool_name: ToolName,
+    pub input: Option<JsonValue>,
+}
+
 #[async_trait]
 pub trait CodeModeTurnHost: Send + Sync {
     async fn invoke_tool(
         &self,
-        tool_name: ToolName,
-        input: Option<JsonValue>,
+        invocation: CodeModeToolInvocation,
         cancellation_token: CancellationToken,
     ) -> Result<JsonValue, String>;
 
@@ -76,24 +88,44 @@ impl CodeModeService {
         *self.inner.stored_values.lock().await = values;
     }
 
-    pub async fn execute(&self, request: ExecuteRequest) -> Result<RuntimeResponse, String> {
-        let cell_id = self
-            .inner
+    /// Reserves the runtime cell id for a future `execute` request.
+    ///
+    /// The runtime can issue nested tool calls before the first `execute`
+    /// response is returned. Hosts that need a parent trace object for those
+    /// nested calls should allocate the cell id up front and pass it back on the
+    /// `ExecuteRequest`.
+    pub fn allocate_cell_id(&self) -> String {
+        self.inner
             .next_cell_id
             .fetch_add(1, Ordering::Relaxed)
-            .to_string();
+            .to_string()
+    }
+
+    pub async fn execute(&self, request: ExecuteRequest) -> Result<RuntimeResponse, String> {
+        let cell_id = request
+            .cell_id
+            .clone()
+            .unwrap_or_else(|| self.allocate_cell_id());
+        let mut sessions = self.inner.sessions.lock().await;
+        if sessions.contains_key(&cell_id) {
+            return Err(format!("exec cell {cell_id} already exists"));
+        }
+
+        // Keep the session registry locked through insertion so a caller-owned
+        // cell id cannot race with another execute and replace a live runtime.
         let (event_tx, event_rx) = mpsc::unbounded_channel();
         let (runtime_tx, runtime_terminate_handle) = spawn_runtime(request.clone(), event_tx)?;
         let (control_tx, control_rx) = mpsc::unbounded_channel();
         let (response_tx, response_rx) = oneshot::channel();
 
-        self.inner.sessions.lock().await.insert(
+        sessions.insert(
             cell_id.clone(),
             SessionHandle {
                 control_tx: control_tx.clone(),
                 runtime_tx: runtime_tx.clone(),
             },
         );
+        drop(sessions);
 
         tokio::spawn(run_session_control(
             Arc::clone(&self.inner),
@@ -113,7 +145,7 @@ impl CodeModeService {
             .map_err(|_| "exec runtime ended unexpectedly".to_string())
     }
 
-    pub async fn wait(&self, request: WaitRequest) -> Result<RuntimeResponse, String> {
+    pub async fn wait(&self, request: WaitRequest) -> Result<WaitResponse, String> {
         let cell_id = request.cell_id.clone();
         let handle = self
             .inner
@@ -123,7 +155,7 @@ impl CodeModeService {
             .get(&request.cell_id)
             .cloned();
         let Some(handle) = handle else {
-            return Ok(missing_cell_response(cell_id));
+            return Ok(WaitResponse::MissingCell(missing_cell_response(cell_id)));
         };
         let (response_tx, response_rx) = oneshot::channel();
         let control_message = if request.terminate {
@@ -135,11 +167,13 @@ impl CodeModeService {
             }
         };
         if handle.control_tx.send(control_message).is_err() {
-            return Ok(missing_cell_response(cell_id));
+            return Ok(WaitResponse::MissingCell(missing_cell_response(cell_id)));
         }
         match response_rx.await {
-            Ok(response) => Ok(response),
-            Err(_) => Ok(missing_cell_response(request.cell_id)),
+            Ok(response) => Ok(WaitResponse::Cell(response)),
+            Err(_) => Ok(WaitResponse::MissingCell(missing_cell_response(
+                request.cell_id,
+            ))),
         }
     }
 
@@ -181,9 +215,14 @@ impl CodeModeService {
                         let host = Arc::clone(&host);
                         let inner = Arc::clone(&inner);
                         tokio::spawn(async move {
-                            let response = host
-                                .invoke_tool(name, input, CancellationToken::new())
-                                .await;
+                            let invocation = CodeModeToolInvocation {
+                                cell_id: cell_id.clone(),
+                                runtime_tool_call_id: id.clone(),
+                                tool_name: name,
+                                input,
+                            };
+                            let response =
+                                host.invoke_tool(invocation, CancellationToken::new()).await;
                             let runtime_tx = inner
                                 .sessions
                                 .lock()
@@ -482,6 +521,8 @@ mod tests {
     use super::RuntimeResponse;
     use super::SessionControlCommand;
     use super::SessionControlContext;
+    use super::WaitRequest;
+    use super::WaitResponse;
     use super::run_session_control;
     use crate::FunctionCallOutputContentItem;
     use crate::runtime::ExecuteRequest;
@@ -490,6 +531,7 @@ mod tests {
 
     fn execute_request(source: &str) -> ExecuteRequest {
         ExecuteRequest {
+            cell_id: None,
             tool_call_id: "call_1".to_string(),
             enabled_tools: Vec::new(),
             source: source.to_string(),
@@ -832,6 +874,30 @@ image({
         );
     }
 
+    #[tokio::test]
+    async fn wait_reports_missing_cell_separately_from_runtime_results() {
+        let service = CodeModeService::new();
+
+        let response = service
+            .wait(WaitRequest {
+                cell_id: "missing".to_string(),
+                yield_time_ms: 1,
+                terminate: false,
+            })
+            .await
+            .unwrap();
+
+        assert_eq!(
+            response,
+            WaitResponse::MissingCell(RuntimeResponse::Result {
+                cell_id: "missing".to_string(),
+                content_items: Vec::new(),
+                stored_values: HashMap::new(),
+                error_text: Some("exec cell missing not found".to_string()),
+            })
+        );
+    }
+
     #[tokio::test]
     async fn terminate_waits_for_runtime_shutdown_before_responding() {
         let inner = test_inner();

diff --git a/codex-rs/core/Cargo.toml b/codex-rs/core/Cargo.toml
@@ -53,6 +53,7 @@ codex-plugin = { workspace = true }
 codex-protocol = { workspace = true }
 codex-response-debug-context = { workspace = true }
 codex-rollout = { workspace = true }
+codex-rollout-trace = { workspace = true }
 codex-rmcp-client = { workspace = true }
 codex-sandboxing = { workspace = true }
 codex-state = { workspace = true }