Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 10 additions & 0 deletions .bazelrc
Original file line number Diff line number Diff line change
Expand Up @@ -125,6 +125,15 @@ build:argument-comment-lint --@rules_rust//rust/toolchain/channel=nightly
common:ci-windows --config=ci-bazel
common:ci-windows --build_metadata=TAG_os=windows
common:ci-windows --repo_contents_cache=D:/a/.cache/bazel-repo-contents-cache
# Windows tests run locally, and several Rust integration test binaries spawn
# subprocesses/servers. Keep local test-process fanout lower than the overall
# Bazel job count so sharded tests do not contend as heavily on the runner.
common:ci-windows --local_test_jobs=2
# Also keep Rust's per-test-binary harness serial on Windows. The app-server
# shards spawn many child processes internally; splitting the old giant test into
# multiple Bazel targets otherwise multiplies both Bazel-level and harness-level
# concurrency on the constrained Windows runner.
common:ci-windows --test_env=RUST_TEST_THREADS=1

# We prefer to run the build actions entirely remotely so we can dial up the concurrency.
# We have platform-specific tests, so we want to execute the tests on all platforms using the strongest sandboxing available on each platform.
Expand All @@ -143,6 +152,7 @@ common:ci-macos --build_metadata=TAG_os=macos
common:ci-macos --config=remote
common:ci-macos --strategy=remote
common:ci-macos --strategy=TestRunner=darwin-sandbox,local
common:ci-macos --local_test_jobs=2

# Linux-only V8 CI config.
common:ci-v8 --config=ci
Expand Down
11 changes: 11 additions & 0 deletions .github/scripts/run-bazel-ci.sh
Original file line number Diff line number Diff line change
Expand Up @@ -94,6 +94,17 @@ print_bazel_test_log_tails() {
local rel_path="${target#//}"
rel_path="${rel_path/://}"
local test_log="${testlogs_dir}/${rel_path}/test.log"
local printed_test_log

printed_test_log="$(
grep -F "FAIL: ${target} " "$console_log" \
| sed -nE 's#.*\(see ([^)]+/test\.log)\).*#\1#p' \
| tr -d '\r' \
| head -n 1
)"
if [[ -n "$printed_test_log" ]]; then
test_log="$printed_test_log"
fi

echo "::group::Bazel test log tail for ${target}"
if [[ -f "$test_log" ]]; then
Expand Down
3 changes: 0 additions & 3 deletions codex-rs/app-server/tests/all.rs

This file was deleted.

10 changes: 10 additions & 0 deletions codex-rs/app-server/tests/non_v2.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
// Integration tests for legacy/non-v2 app-server coverage.
//
// Each file in `tests/` becomes its own Bazel integration-test target, so keep
// this split in sync with the generated target names expected by CI.
#[path = "suite/auth.rs"]
mod auth;
#[path = "suite/conversation_summary.rs"]
mod conversation_summary;
#[path = "suite/fuzzy_file_search.rs"]
mod fuzzy_file_search;
4 changes: 0 additions & 4 deletions codex-rs/app-server/tests/suite/mod.rs

This file was deleted.

58 changes: 0 additions & 58 deletions codex-rs/app-server/tests/suite/v2/mod.rs

This file was deleted.

122 changes: 24 additions & 98 deletions codex-rs/app-server/tests/suite/v2/thread_unsubscribe.rs
Original file line number Diff line number Diff line change
@@ -1,15 +1,9 @@
use anyhow::Context;
use anyhow::Result;
use app_test_support::McpProcess;
use app_test_support::create_final_assistant_message_sse_response;
use app_test_support::create_mock_responses_server_repeating_assistant;
use app_test_support::create_mock_responses_server_sequence_unchecked;
use app_test_support::create_shell_command_sse_response;
use app_test_support::to_response;
use codex_app_server_protocol::ItemStartedNotification;
use codex_app_server_protocol::JSONRPCResponse;
use codex_app_server_protocol::RequestId;
use codex_app_server_protocol::ThreadItem;
use codex_app_server_protocol::ThreadLoadedListParams;
use codex_app_server_protocol::ThreadLoadedListResponse;
use codex_app_server_protocol::ThreadReadParams;
Expand All @@ -26,57 +20,15 @@ use codex_app_server_protocol::TurnStartParams;
use codex_app_server_protocol::TurnStartResponse;
use codex_app_server_protocol::UserInput as V2UserInput;
use core_test_support::responses;
use core_test_support::streaming_sse::StreamingSseChunk;
use core_test_support::streaming_sse::start_streaming_sse_server;
use pretty_assertions::assert_eq;
use tempfile::TempDir;
use tokio::sync::oneshot;
use tokio::time::timeout;

const DEFAULT_READ_TIMEOUT: std::time::Duration = std::time::Duration::from_secs(10);

async fn wait_for_responses_request_count_to_stabilize(
server: &wiremock::MockServer,
expected_count: usize,
settle_duration: std::time::Duration,
) -> Result<()> {
timeout(DEFAULT_READ_TIMEOUT, async {
let mut stable_since: Option<tokio::time::Instant> = None;
loop {
let requests = server
.received_requests()
.await
.context("failed to fetch received requests")?;
let responses_request_count = requests
.iter()
.filter(|request| {
request.method == "POST" && request.url.path().ends_with("/responses")
})
.count();

if responses_request_count > expected_count {
anyhow::bail!(
"expected exactly {expected_count} /responses requests, got {responses_request_count}"
);
}

if responses_request_count == expected_count {
match stable_since {
Some(stable_since) if stable_since.elapsed() >= settle_duration => {
return Ok::<(), anyhow::Error>(());
}
None => stable_since = Some(tokio::time::Instant::now()),
Some(_) => {}
}
} else {
stable_since = None;
}

tokio::time::sleep(std::time::Duration::from_millis(10)).await;
}
})
.await??;

Ok(())
}

#[tokio::test]
async fn thread_unsubscribe_keeps_thread_loaded_until_idle_timeout() -> Result<()> {
let server = create_mock_responses_server_repeating_assistant("Done").await;
Expand Down Expand Up @@ -128,32 +80,24 @@ async fn thread_unsubscribe_keeps_thread_loaded_until_idle_timeout() -> Result<(

#[tokio::test]
async fn thread_unsubscribe_during_turn_keeps_turn_running() -> Result<()> {
#[cfg(target_os = "windows")]
let shell_command = vec![
"powershell".to_string(),
"-Command".to_string(),
"Start-Sleep -Seconds 1".to_string(),
];
#[cfg(not(target_os = "windows"))]
let shell_command = vec!["sleep".to_string(), "1".to_string()];

let tmp = TempDir::new()?;
let codex_home = tmp.path().join("codex_home");
std::fs::create_dir(&codex_home)?;
let working_directory = tmp.path().join("workdir");
std::fs::create_dir(&working_directory)?;

let server = create_mock_responses_server_sequence_unchecked(vec![
create_shell_command_sse_response(
shell_command.clone(),
Some(&working_directory),
Some(10_000),
"call_sleep",
)?,
create_final_assistant_message_sse_response("Done")?,
])
let (release_response_tx, release_response_rx) = oneshot::channel();
let (server, mut completions) = start_streaming_sse_server(vec![vec![StreamingSseChunk {
gate: Some(release_response_rx),
body: responses::sse(vec![
responses::ev_response_created("resp-1"),
responses::ev_assistant_message("msg-1", "Done"),
responses::ev_completed("resp-1"),
]),
}]])
.await;
create_config_toml(&codex_home, &server.uri())?;
let response_completed = completions.remove(0);
create_config_toml(&codex_home, server.uri())?;

let mut mcp = McpProcess::new(&codex_home).await?;
timeout(DEFAULT_READ_TIMEOUT, mcp.initialize()).await??;
Expand All @@ -180,9 +124,9 @@ async fn thread_unsubscribe_during_turn_keeps_turn_running() -> Result<()> {

timeout(
DEFAULT_READ_TIMEOUT,
wait_for_command_execution_item_started(&mut mcp),
server.wait_for_request_count(/*count*/ 1),
)
.await??;
.await?;

let unsubscribe_id = mcp
.send_thread_unsubscribe_request(ThreadUnsubscribeParams {
Expand All @@ -197,21 +141,16 @@ async fn thread_unsubscribe_during_turn_keeps_turn_running() -> Result<()> {
let unsubscribe = to_response::<ThreadUnsubscribeResponse>(unsubscribe_resp)?;
assert_eq!(unsubscribe.status, ThreadUnsubscribeStatus::Unsubscribed);

assert!(
timeout(
std::time::Duration::from_millis(250),
mcp.read_stream_until_notification_message("thread/closed"),
)
.await
.is_err()
let closed_while_command_running = timeout(
std::time::Duration::from_millis(250),
mcp.read_stream_until_notification_message("thread/closed"),
);
let closed_while_command_running = closed_while_command_running.await;
let _ = release_response_tx.send(());
assert!(closed_while_command_running.is_err());

wait_for_responses_request_count_to_stabilize(
&server,
/*expected_count*/ 2,
std::time::Duration::from_millis(200),
)
.await?;
timeout(DEFAULT_READ_TIMEOUT, response_completed).await??;
server.shutdown().await;

Ok(())
}
Expand Down Expand Up @@ -350,19 +289,6 @@ async fn thread_unsubscribe_reports_not_subscribed_before_idle_unload() -> Resul
Ok(())
}

async fn wait_for_command_execution_item_started(mcp: &mut McpProcess) -> Result<()> {
loop {
let started_notif = mcp
.read_stream_until_notification_message("item/started")
.await?;
let started_params = started_notif.params.context("item/started params")?;
let started: ItemStartedNotification = serde_json::from_value(started_params)?;
if let ThreadItem::CommandExecution { .. } = started.item {
return Ok(());
}
}
}

fn create_config_toml(codex_home: &std::path::Path, server_uri: &str) -> std::io::Result<()> {
let config_toml = codex_home.join("config.toml");
std::fs::write(
Expand Down
46 changes: 46 additions & 0 deletions codex-rs/app-server/tests/v2_config_and_core.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,46 @@
// Core v2 app-server integration tests that do not depend on the thread/turn
// analytics or websocket helper modules.
#[path = "suite/v2/account.rs"]
mod account;
#[path = "suite/v2/app_list.rs"]
mod app_list;
#[path = "suite/v2/client_metadata.rs"]
mod client_metadata;
#[path = "suite/v2/collaboration_mode_list.rs"]
mod collaboration_mode_list;
#[path = "suite/v2/compaction.rs"]
mod compaction;
#[path = "suite/v2/config_rpc.rs"]
mod config_rpc;
#[path = "suite/v2/dynamic_tools.rs"]
mod dynamic_tools;
#[path = "suite/v2/experimental_api.rs"]
mod experimental_api;
#[path = "suite/v2/experimental_feature_list.rs"]
mod experimental_feature_list;
#[path = "suite/v2/fs.rs"]
mod fs;
#[path = "suite/v2/initialize.rs"]
mod initialize;
#[path = "suite/v2/memory_reset.rs"]
mod memory_reset;
#[path = "suite/v2/model_list.rs"]
mod model_list;
#[path = "suite/v2/output_schema.rs"]
mod output_schema;
#[path = "suite/v2/plan_item.rs"]
mod plan_item;
#[path = "suite/v2/rate_limits.rs"]
mod rate_limits;
#[path = "suite/v2/request_permissions.rs"]
mod request_permissions;
#[path = "suite/v2/request_user_input.rs"]
mod request_user_input;
#[path = "suite/v2/review.rs"]
mod review;
#[path = "suite/v2/safety_check_downgrade.rs"]
mod safety_check_downgrade;
#[path = "suite/v2/skills_list.rs"]
mod skills_list;
#[path = "suite/v2/windows_sandbox_setup.rs"]
mod windows_sandbox_setup;
19 changes: 19 additions & 0 deletions codex-rs/app-server/tests/v2_plugins_mcp.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
// v2 app-server plugin and MCP integration tests.
#[path = "suite/v2/marketplace_add.rs"]
mod marketplace_add;
#[path = "suite/v2/mcp_resource.rs"]
mod mcp_resource;
#[path = "suite/v2/mcp_server_elicitation.rs"]
mod mcp_server_elicitation;
#[path = "suite/v2/mcp_server_status.rs"]
mod mcp_server_status;
#[path = "suite/v2/mcp_tool.rs"]
mod mcp_tool;
#[path = "suite/v2/plugin_install.rs"]
mod plugin_install;
#[path = "suite/v2/plugin_list.rs"]
mod plugin_list;
#[path = "suite/v2/plugin_read.rs"]
mod plugin_read;
#[path = "suite/v2/plugin_uninstall.rs"]
mod plugin_uninstall;
4 changes: 4 additions & 0 deletions codex-rs/app-server/tests/v2_realtime.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
// v2 realtime integration tests, split out because they are comparatively
// large and expensive.
#[path = "suite/v2/realtime_conversation.rs"]
mod realtime_conversation;
Loading
Loading