Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
14 changes: 14 additions & 0 deletions codex-rs/Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 2 additions & 0 deletions codex-rs/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,7 @@ members = [
"protocol",
"realtime-webrtc",
"rollout",
"rollout-trace",
"rmcp-client",
"responses-api-proxy",
"response-debug-context",
Expand Down Expand Up @@ -159,6 +160,7 @@ codex-responses-api-proxy = { path = "responses-api-proxy" }
codex-response-debug-context = { path = "response-debug-context" }
codex-rmcp-client = { path = "rmcp-client" }
codex-rollout = { path = "rollout" }
codex-rollout-trace = { path = "rollout-trace" }
codex-sandboxing = { path = "sandboxing" }
codex-secrets = { path = "secrets" }
codex-shell-command = { path = "shell-command" }
Expand Down
1 change: 1 addition & 0 deletions codex-rs/cli/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,7 @@ codex-mcp-server = { workspace = true }
codex-protocol = { workspace = true }
codex-responses-api-proxy = { workspace = true }
codex-rmcp-client = { workspace = true }
codex-rollout-trace = { workspace = true }
codex-sandboxing = { workspace = true }
codex-state = { workspace = true }
codex-stdio-to-uds = { workspace = true }
Expand Down
37 changes: 37 additions & 0 deletions codex-rs/cli/src/main.rs
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,8 @@ use codex_exec::Command as ExecCommand;
use codex_exec::ReviewArgs;
use codex_execpolicy::ExecPolicyCheckCommand;
use codex_responses_api_proxy::Args as ResponsesApiProxyArgs;
use codex_rollout_trace::REDUCED_STATE_FILE_NAME;
use codex_rollout_trace::replay_bundle;
use codex_state::StateRuntime;
use codex_state::state_db_path;
use codex_tui::AppExitInfo;
Expand Down Expand Up @@ -190,6 +192,9 @@ enum DebugSubcommand {
/// Render the model-visible prompt input list as JSON.
PromptInput(DebugPromptInputCommand),

/// Replay a rollout trace bundle and write reduced state JSON.
TraceReduce(DebugTraceReduceCommand),

/// Internal: reset local memory state for a fresh start.
#[clap(hide = true)]
ClearMemories,
Expand Down Expand Up @@ -224,6 +229,17 @@ struct DebugPromptInputCommand {
images: Vec<PathBuf>,
}

#[derive(Debug, Parser)]
struct DebugTraceReduceCommand {
/// Trace bundle directory containing manifest.json and trace.jsonl.
#[arg(value_name = "TRACE_BUNDLE")]
trace_bundle: PathBuf,

/// Output path for reduced RolloutTrace JSON. Defaults to TRACE_BUNDLE/state.json.
#[arg(long = "output", short = 'o', value_name = "FILE")]
output: Option<PathBuf>,
}

#[derive(Debug, Parser)]
struct ResumeCommand {
/// Conversation/session id (UUID) or thread name. UUIDs take precedence if it parses.
Expand Down Expand Up @@ -991,6 +1007,14 @@ async fn cli_main(arg0_paths: Arg0DispatchPaths) -> anyhow::Result<()> {
)
.await?;
}
DebugSubcommand::TraceReduce(cmd) => {
reject_remote_mode_for_subcommand(
root_remote.as_deref(),
root_remote_auth_token_env.as_deref(),
"debug trace-reduce",
)?;
run_debug_trace_reduce_command(cmd).await?;
}
DebugSubcommand::ClearMemories => {
reject_remote_mode_for_subcommand(
root_remote.as_deref(),
Expand Down Expand Up @@ -1192,6 +1216,19 @@ fn maybe_print_under_development_feature_warning(
);
}

async fn run_debug_trace_reduce_command(cmd: DebugTraceReduceCommand) -> anyhow::Result<()> {
let output = cmd
.output
.unwrap_or_else(|| cmd.trace_bundle.join(REDUCED_STATE_FILE_NAME));

let trace = replay_bundle(&cmd.trace_bundle)?;
let reduced_json = serde_json::to_vec_pretty(&trace)?;
tokio::fs::write(&output, reduced_json).await?;
println!("{}", output.display());

Ok(())
}

async fn run_debug_prompt_input_command(
cmd: DebugPromptInputCommand,
root_config_overrides: CliConfigOverrides,
Expand Down
2 changes: 2 additions & 0 deletions codex-rs/code-mode/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,9 @@ pub use runtime::DEFAULT_WAIT_YIELD_TIME_MS;
pub use runtime::ExecuteRequest;
pub use runtime::RuntimeResponse;
pub use runtime::WaitRequest;
pub use runtime::WaitResponse;
pub use service::CodeModeService;
pub use service::CodeModeToolInvocation;
pub use service::CodeModeTurnHost;
pub use service::CodeModeTurnWorker;

Expand Down
35 changes: 35 additions & 0 deletions codex-rs/code-mode/src/runtime/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@ use std::sync::mpsc as std_mpsc;
use std::thread;

use codex_protocol::ToolName;
use serde::Serialize;
use serde_json::Value as JsonValue;
use tokio::sync::mpsc;

Expand All @@ -25,6 +26,12 @@ const EXIT_SENTINEL: &str = "__codex_code_mode_exit__";

#[derive(Clone, Debug)]
pub struct ExecuteRequest {
/// Runtime cell id to use for this execution.
///
/// Hosts that need to trace work before JavaScript starts can allocate an id
/// first and pass it here. `None` keeps the service-owned allocation path
/// for callers that only need the id once a runtime response is returned.
pub cell_id: Option<String>,
pub tool_call_id: String,
pub enabled_tools: Vec<ToolDefinition>,
pub source: String,
Expand All @@ -41,6 +48,33 @@ pub struct WaitRequest {
}

#[derive(Debug, PartialEq)]
pub enum WaitResponse {
/// The requested cell was live when the wait command was accepted.
///
/// Non-yielding responses from this variant are terminal lifecycle points
/// for the matching code cell.
Cell(RuntimeResponse),
/// The requested cell was not live, so the response is only the result of
/// the `wait` tool call. It must not be treated as a code-cell lifecycle
/// event because there is no cell to complete.
MissingCell(RuntimeResponse),
}

impl WaitResponse {
pub fn into_runtime_response(self) -> RuntimeResponse {
match self {
WaitResponse::Cell(response) | WaitResponse::MissingCell(response) => response,
}
}

pub fn runtime_response(&self) -> &RuntimeResponse {
match self {
WaitResponse::Cell(response) | WaitResponse::MissingCell(response) => response,
}
}
}

#[derive(Debug, PartialEq, Serialize)]
pub enum RuntimeResponse {
Yielded {
cell_id: String,
Expand Down Expand Up @@ -331,6 +365,7 @@ mod tests {

fn execute_request(source: &str) -> ExecuteRequest {
ExecuteRequest {
cell_id: None,
tool_call_id: "call_1".to_string(),
enabled_tools: Vec::new(),
source: source.to_string(),
Expand Down
96 changes: 81 additions & 15 deletions codex-rs/code-mode/src/service.rs
Original file line number Diff line number Diff line change
Expand Up @@ -21,14 +21,26 @@ use crate::runtime::RuntimeEvent;
use crate::runtime::RuntimeResponse;
use crate::runtime::TurnMessage;
use crate::runtime::WaitRequest;
use crate::runtime::WaitResponse;
use crate::runtime::spawn_runtime;

/// Nested tool request emitted by one code-mode cell.
///
/// Code mode owns the per-cell runtime id. Hosts should preserve it for
/// provenance/debugging, but should still assign their own runtime tool call id
/// if their tool-call graph requires globally unique ids.
pub struct CodeModeToolInvocation {
pub cell_id: String,
pub runtime_tool_call_id: String,
pub tool_name: ToolName,
pub input: Option<JsonValue>,
}

#[async_trait]
pub trait CodeModeTurnHost: Send + Sync {
async fn invoke_tool(
&self,
tool_name: ToolName,
input: Option<JsonValue>,
invocation: CodeModeToolInvocation,
cancellation_token: CancellationToken,
) -> Result<JsonValue, String>;

Expand Down Expand Up @@ -76,24 +88,44 @@ impl CodeModeService {
*self.inner.stored_values.lock().await = values;
}

pub async fn execute(&self, request: ExecuteRequest) -> Result<RuntimeResponse, String> {
let cell_id = self
.inner
/// Reserves the runtime cell id for a future `execute` request.
///
/// The runtime can issue nested tool calls before the first `execute`
/// response is returned. Hosts that need a parent trace object for those
/// nested calls should allocate the cell id up front and pass it back on the
/// `ExecuteRequest`.
pub fn allocate_cell_id(&self) -> String {
self.inner
.next_cell_id
.fetch_add(1, Ordering::Relaxed)
.to_string();
.to_string()
}

pub async fn execute(&self, request: ExecuteRequest) -> Result<RuntimeResponse, String> {
let cell_id = request
.cell_id
.clone()
.unwrap_or_else(|| self.allocate_cell_id());
let mut sessions = self.inner.sessions.lock().await;
if sessions.contains_key(&cell_id) {
return Err(format!("exec cell {cell_id} already exists"));
}

// Keep the session registry locked through insertion so a caller-owned
// cell id cannot race with another execute and replace a live runtime.
let (event_tx, event_rx) = mpsc::unbounded_channel();
let (runtime_tx, runtime_terminate_handle) = spawn_runtime(request.clone(), event_tx)?;
let (control_tx, control_rx) = mpsc::unbounded_channel();
let (response_tx, response_rx) = oneshot::channel();

self.inner.sessions.lock().await.insert(
sessions.insert(
cell_id.clone(),
SessionHandle {
control_tx: control_tx.clone(),
runtime_tx: runtime_tx.clone(),
},
);
drop(sessions);

tokio::spawn(run_session_control(
Arc::clone(&self.inner),
Expand All @@ -113,7 +145,7 @@ impl CodeModeService {
.map_err(|_| "exec runtime ended unexpectedly".to_string())
}

pub async fn wait(&self, request: WaitRequest) -> Result<RuntimeResponse, String> {
pub async fn wait(&self, request: WaitRequest) -> Result<WaitResponse, String> {
let cell_id = request.cell_id.clone();
let handle = self
.inner
Expand All @@ -123,7 +155,7 @@ impl CodeModeService {
.get(&request.cell_id)
.cloned();
let Some(handle) = handle else {
return Ok(missing_cell_response(cell_id));
return Ok(WaitResponse::MissingCell(missing_cell_response(cell_id)));
};
let (response_tx, response_rx) = oneshot::channel();
let control_message = if request.terminate {
Expand All @@ -135,11 +167,13 @@ impl CodeModeService {
}
};
if handle.control_tx.send(control_message).is_err() {
return Ok(missing_cell_response(cell_id));
return Ok(WaitResponse::MissingCell(missing_cell_response(cell_id)));
}
match response_rx.await {
Ok(response) => Ok(response),
Err(_) => Ok(missing_cell_response(request.cell_id)),
Ok(response) => Ok(WaitResponse::Cell(response)),
Err(_) => Ok(WaitResponse::MissingCell(missing_cell_response(
request.cell_id,
))),
}
}

Expand Down Expand Up @@ -181,9 +215,14 @@ impl CodeModeService {
let host = Arc::clone(&host);
let inner = Arc::clone(&inner);
tokio::spawn(async move {
let response = host
.invoke_tool(name, input, CancellationToken::new())
.await;
let invocation = CodeModeToolInvocation {
cell_id: cell_id.clone(),
runtime_tool_call_id: id.clone(),
tool_name: name,
input,
};
let response =
host.invoke_tool(invocation, CancellationToken::new()).await;
let runtime_tx = inner
.sessions
.lock()
Expand Down Expand Up @@ -482,6 +521,8 @@ mod tests {
use super::RuntimeResponse;
use super::SessionControlCommand;
use super::SessionControlContext;
use super::WaitRequest;
use super::WaitResponse;
use super::run_session_control;
use crate::FunctionCallOutputContentItem;
use crate::runtime::ExecuteRequest;
Expand All @@ -490,6 +531,7 @@ mod tests {

fn execute_request(source: &str) -> ExecuteRequest {
ExecuteRequest {
cell_id: None,
tool_call_id: "call_1".to_string(),
enabled_tools: Vec::new(),
source: source.to_string(),
Expand Down Expand Up @@ -832,6 +874,30 @@ image({
);
}

#[tokio::test]
async fn wait_reports_missing_cell_separately_from_runtime_results() {
let service = CodeModeService::new();

let response = service
.wait(WaitRequest {
cell_id: "missing".to_string(),
yield_time_ms: 1,
terminate: false,
})
.await
.unwrap();

assert_eq!(
response,
WaitResponse::MissingCell(RuntimeResponse::Result {
cell_id: "missing".to_string(),
content_items: Vec::new(),
stored_values: HashMap::new(),
error_text: Some("exec cell missing not found".to_string()),
})
);
}

#[tokio::test]
async fn terminate_waits_for_runtime_shutdown_before_responding() {
let inner = test_inner();
Expand Down
1 change: 1 addition & 0 deletions codex-rs/core/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,7 @@ codex-plugin = { workspace = true }
codex-protocol = { workspace = true }
codex-response-debug-context = { workspace = true }
codex-rollout = { workspace = true }
codex-rollout-trace = { workspace = true }
codex-rmcp-client = { workspace = true }
codex-sandboxing = { workspace = true }
codex-state = { workspace = true }
Expand Down
Loading
Loading