openai · starr-openai · Apr 15, 2026 · Apr 15, 2026 · Apr 15, 2026 · Apr 15, 2026
diff --git a/.bazelrc b/.bazelrc
@@ -125,6 +125,15 @@ build:argument-comment-lint --@rules_rust//rust/toolchain/channel=nightly
 common:ci-windows --config=ci-bazel
 common:ci-windows --build_metadata=TAG_os=windows
 common:ci-windows --repo_contents_cache=D:/a/.cache/bazel-repo-contents-cache
+# Windows tests run locally, and several Rust integration test binaries spawn
+# subprocesses/servers. Keep local test-process fanout lower than the overall
+# Bazel job count so sharded tests do not contend as heavily on the runner.
+common:ci-windows --local_test_jobs=2
+# Also keep Rust's per-test-binary harness serial on Windows. The app-server
+# shards spawn many child processes internally; splitting the old giant test into
+# multiple Bazel targets otherwise multiplies both Bazel-level and harness-level
+# concurrency on the constrained Windows runner.
+common:ci-windows --test_env=RUST_TEST_THREADS=1
 
 # We prefer to run the build actions entirely remotely so we can dial up the concurrency.
 # We have platform-specific tests, so we want to execute the tests on all platforms using the strongest sandboxing available on each platform.
@@ -143,6 +152,7 @@ common:ci-macos --build_metadata=TAG_os=macos
 common:ci-macos --config=remote
 common:ci-macos --strategy=remote
 common:ci-macos --strategy=TestRunner=darwin-sandbox,local
+common:ci-macos --local_test_jobs=2
 
 # Linux-only V8 CI config.
 common:ci-v8 --config=ci

diff --git a/.github/scripts/run-bazel-ci.sh b/.github/scripts/run-bazel-ci.sh
@@ -94,6 +94,17 @@ print_bazel_test_log_tails() {
     local rel_path="${target#//}"
     rel_path="${rel_path/://}"
     local test_log="${testlogs_dir}/${rel_path}/test.log"
+    local printed_test_log
+
+    printed_test_log="$(
+      grep -F "FAIL: ${target} " "$console_log" \
+        | sed -nE 's#.*\(see ([^)]+/test\.log)\).*#\1#p' \
+        | tr -d '\r' \
+        | head -n 1
+    )"
+    if [[ -n "$printed_test_log" ]]; then
+      test_log="$printed_test_log"
+    fi
 
     echo "::group::Bazel test log tail for ${target}"
     if [[ -f "$test_log" ]]; then

diff --git a/codex-rs/app-server/tests/all.rs b/codex-rs/app-server/tests/all.rs
diff --git a/codex-rs/app-server/tests/non_v2.rs b/codex-rs/app-server/tests/non_v2.rs
@@ -0,0 +1,10 @@
+// Integration tests for legacy/non-v2 app-server coverage.
+//
+// Each file in `tests/` becomes its own Bazel integration-test target, so keep
+// this split in sync with the generated target names expected by CI.
+#[path = "suite/auth.rs"]
+mod auth;
+#[path = "suite/conversation_summary.rs"]
+mod conversation_summary;
+#[path = "suite/fuzzy_file_search.rs"]
+mod fuzzy_file_search;
diff --git a/codex-rs/app-server/tests/suite/mod.rs b/codex-rs/app-server/tests/suite/mod.rs
diff --git a/codex-rs/app-server/tests/suite/v2/mod.rs b/codex-rs/app-server/tests/suite/v2/mod.rs
diff --git a/codex-rs/app-server/tests/suite/v2/thread_unsubscribe.rs b/codex-rs/app-server/tests/suite/v2/thread_unsubscribe.rs
@@ -1,15 +1,9 @@
-use anyhow::Context;
 use anyhow::Result;
 use app_test_support::McpProcess;
-use app_test_support::create_final_assistant_message_sse_response;
 use app_test_support::create_mock_responses_server_repeating_assistant;
-use app_test_support::create_mock_responses_server_sequence_unchecked;
-use app_test_support::create_shell_command_sse_response;
 use app_test_support::to_response;
-use codex_app_server_protocol::ItemStartedNotification;
 use codex_app_server_protocol::JSONRPCResponse;
 use codex_app_server_protocol::RequestId;
-use codex_app_server_protocol::ThreadItem;
 use codex_app_server_protocol::ThreadLoadedListParams;
 use codex_app_server_protocol::ThreadLoadedListResponse;
 use codex_app_server_protocol::ThreadReadParams;
@@ -26,57 +20,15 @@ use codex_app_server_protocol::TurnStartParams;
 use codex_app_server_protocol::TurnStartResponse;
 use codex_app_server_protocol::UserInput as V2UserInput;
 use core_test_support::responses;
+use core_test_support::streaming_sse::StreamingSseChunk;
+use core_test_support::streaming_sse::start_streaming_sse_server;
 use pretty_assertions::assert_eq;
 use tempfile::TempDir;
+use tokio::sync::oneshot;
 use tokio::time::timeout;
 
 const DEFAULT_READ_TIMEOUT: std::time::Duration = std::time::Duration::from_secs(10);
 
-async fn wait_for_responses_request_count_to_stabilize(
-    server: &wiremock::MockServer,
-    expected_count: usize,
-    settle_duration: std::time::Duration,
-) -> Result<()> {
-    timeout(DEFAULT_READ_TIMEOUT, async {
-        let mut stable_since: Option<tokio::time::Instant> = None;
-        loop {
-            let requests = server
-                .received_requests()
-                .await
-                .context("failed to fetch received requests")?;
-            let responses_request_count = requests
-                .iter()
-                .filter(|request| {
-                    request.method == "POST" && request.url.path().ends_with("/responses")
-                })
-                .count();
-
-            if responses_request_count > expected_count {
-                anyhow::bail!(
-                    "expected exactly {expected_count} /responses requests, got {responses_request_count}"
-                );
-            }
-
-            if responses_request_count == expected_count {
-                match stable_since {
-                    Some(stable_since) if stable_since.elapsed() >= settle_duration => {
-                        return Ok::<(), anyhow::Error>(());
-                    }
-                    None => stable_since = Some(tokio::time::Instant::now()),
-                    Some(_) => {}
-                }
-            } else {
-                stable_since = None;
-            }
-
-            tokio::time::sleep(std::time::Duration::from_millis(10)).await;
-        }
-    })
-    .await??;
-
-    Ok(())
-}
-
 #[tokio::test]
 async fn thread_unsubscribe_keeps_thread_loaded_until_idle_timeout() -> Result<()> {
     let server = create_mock_responses_server_repeating_assistant("Done").await;
@@ -128,32 +80,24 @@ async fn thread_unsubscribe_keeps_thread_loaded_until_idle_timeout() -> Result<(
 
 #[tokio::test]
 async fn thread_unsubscribe_during_turn_keeps_turn_running() -> Result<()> {
-    #[cfg(target_os = "windows")]
-    let shell_command = vec![
-        "powershell".to_string(),
-        "-Command".to_string(),
-        "Start-Sleep -Seconds 1".to_string(),
-    ];
-    #[cfg(not(target_os = "windows"))]
-    let shell_command = vec!["sleep".to_string(), "1".to_string()];
-
     let tmp = TempDir::new()?;
     let codex_home = tmp.path().join("codex_home");
     std::fs::create_dir(&codex_home)?;
     let working_directory = tmp.path().join("workdir");
     std::fs::create_dir(&working_directory)?;
 
-    let server = create_mock_responses_server_sequence_unchecked(vec![
-        create_shell_command_sse_response(
-            shell_command.clone(),
-            Some(&working_directory),
-            Some(10_000),
-            "call_sleep",
-        )?,
-        create_final_assistant_message_sse_response("Done")?,
-    ])
+    let (release_response_tx, release_response_rx) = oneshot::channel();
+    let (server, mut completions) = start_streaming_sse_server(vec![vec![StreamingSseChunk {
+        gate: Some(release_response_rx),
+        body: responses::sse(vec![
+            responses::ev_response_created("resp-1"),
+            responses::ev_assistant_message("msg-1", "Done"),
+            responses::ev_completed("resp-1"),
+        ]),
+    }]])
     .await;
-    create_config_toml(&codex_home, &server.uri())?;
+    let response_completed = completions.remove(0);
+    create_config_toml(&codex_home, server.uri())?;
 
     let mut mcp = McpProcess::new(&codex_home).await?;
     timeout(DEFAULT_READ_TIMEOUT, mcp.initialize()).await??;
@@ -180,9 +124,9 @@ async fn thread_unsubscribe_during_turn_keeps_turn_running() -> Result<()> {
 
     timeout(
         DEFAULT_READ_TIMEOUT,
-        wait_for_command_execution_item_started(&mut mcp),
+        server.wait_for_request_count(/*count*/ 1),
     )
-    .await??;
+    .await?;
 
     let unsubscribe_id = mcp
         .send_thread_unsubscribe_request(ThreadUnsubscribeParams {
@@ -197,21 +141,16 @@ async fn thread_unsubscribe_during_turn_keeps_turn_running() -> Result<()> {
     let unsubscribe = to_response::<ThreadUnsubscribeResponse>(unsubscribe_resp)?;
     assert_eq!(unsubscribe.status, ThreadUnsubscribeStatus::Unsubscribed);
 
-    assert!(
-        timeout(
-            std::time::Duration::from_millis(250),
-            mcp.read_stream_until_notification_message("thread/closed"),
-        )
-        .await
-        .is_err()
+    let closed_while_command_running = timeout(
+        std::time::Duration::from_millis(250),
+        mcp.read_stream_until_notification_message("thread/closed"),
     );
+    let closed_while_command_running = closed_while_command_running.await;
+    let _ = release_response_tx.send(());
+    assert!(closed_while_command_running.is_err());
 
-    wait_for_responses_request_count_to_stabilize(
-        &server,
-        /*expected_count*/ 2,
-        std::time::Duration::from_millis(200),
-    )
-    .await?;
+    timeout(DEFAULT_READ_TIMEOUT, response_completed).await??;
+    server.shutdown().await;
 
     Ok(())
 }
@@ -350,19 +289,6 @@ async fn thread_unsubscribe_reports_not_subscribed_before_idle_unload() -> Resul
     Ok(())
 }
 
-async fn wait_for_command_execution_item_started(mcp: &mut McpProcess) -> Result<()> {
-    loop {
-        let started_notif = mcp
-            .read_stream_until_notification_message("item/started")
-            .await?;
-        let started_params = started_notif.params.context("item/started params")?;
-        let started: ItemStartedNotification = serde_json::from_value(started_params)?;
-        if let ThreadItem::CommandExecution { .. } = started.item {
-            return Ok(());
-        }
-    }
-}
-
 fn create_config_toml(codex_home: &std::path::Path, server_uri: &str) -> std::io::Result<()> {
     let config_toml = codex_home.join("config.toml");
     std::fs::write(

diff --git a/codex-rs/app-server/tests/v2_config_and_core.rs b/codex-rs/app-server/tests/v2_config_and_core.rs
@@ -0,0 +1,46 @@
+// Core v2 app-server integration tests that do not depend on the thread/turn
+// analytics or websocket helper modules.
+#[path = "suite/v2/account.rs"]
+mod account;
+#[path = "suite/v2/app_list.rs"]
+mod app_list;
+#[path = "suite/v2/client_metadata.rs"]
+mod client_metadata;
+#[path = "suite/v2/collaboration_mode_list.rs"]
+mod collaboration_mode_list;
+#[path = "suite/v2/compaction.rs"]
+mod compaction;
+#[path = "suite/v2/config_rpc.rs"]
+mod config_rpc;
+#[path = "suite/v2/dynamic_tools.rs"]
+mod dynamic_tools;
+#[path = "suite/v2/experimental_api.rs"]
+mod experimental_api;
+#[path = "suite/v2/experimental_feature_list.rs"]
+mod experimental_feature_list;
+#[path = "suite/v2/fs.rs"]
+mod fs;
+#[path = "suite/v2/initialize.rs"]
+mod initialize;
+#[path = "suite/v2/memory_reset.rs"]
+mod memory_reset;
+#[path = "suite/v2/model_list.rs"]
+mod model_list;
+#[path = "suite/v2/output_schema.rs"]
+mod output_schema;
+#[path = "suite/v2/plan_item.rs"]
+mod plan_item;
+#[path = "suite/v2/rate_limits.rs"]
+mod rate_limits;
+#[path = "suite/v2/request_permissions.rs"]
+mod request_permissions;
+#[path = "suite/v2/request_user_input.rs"]
+mod request_user_input;
+#[path = "suite/v2/review.rs"]
+mod review;
+#[path = "suite/v2/safety_check_downgrade.rs"]
+mod safety_check_downgrade;
+#[path = "suite/v2/skills_list.rs"]
+mod skills_list;
+#[path = "suite/v2/windows_sandbox_setup.rs"]
+mod windows_sandbox_setup;
diff --git a/codex-rs/app-server/tests/v2_plugins_mcp.rs b/codex-rs/app-server/tests/v2_plugins_mcp.rs
@@ -0,0 +1,19 @@
+// v2 app-server plugin and MCP integration tests.
+#[path = "suite/v2/marketplace_add.rs"]
+mod marketplace_add;
+#[path = "suite/v2/mcp_resource.rs"]
+mod mcp_resource;
+#[path = "suite/v2/mcp_server_elicitation.rs"]
+mod mcp_server_elicitation;
+#[path = "suite/v2/mcp_server_status.rs"]
+mod mcp_server_status;
+#[path = "suite/v2/mcp_tool.rs"]
+mod mcp_tool;
+#[path = "suite/v2/plugin_install.rs"]
+mod plugin_install;
+#[path = "suite/v2/plugin_list.rs"]
+mod plugin_list;
+#[path = "suite/v2/plugin_read.rs"]
+mod plugin_read;
+#[path = "suite/v2/plugin_uninstall.rs"]
+mod plugin_uninstall;
diff --git a/codex-rs/app-server/tests/v2_realtime.rs b/codex-rs/app-server/tests/v2_realtime.rs
@@ -0,0 +1,4 @@
+// v2 realtime integration tests, split out because they are comparatively
+// large and expensive.
+#[path = "suite/v2/realtime_conversation.rs"]
+mod realtime_conversation;