From 1c30a6071596b86fcadb2bb7b083490959635760 Mon Sep 17 00:00:00 2001 From: Test Date: Mon, 18 May 2026 14:04:28 -0500 Subject: [PATCH] fix(cognition): max_concurrency: usize::MAX panics at runtime registration MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit CognitionModule::config() set `max_concurrency: usize::MAX` as a mistaken way to spell "no cap" (PR #1306). The Runtime's `register()` calls `tokio::sync::Semaphore::new(max_concurrency)` whenever `max_concurrency > 0`; tokio's MAX_PERMITS is `usize::MAX >> 3` (~2^61), so `Semaphore::new(usize::MAX)` panics immediately at registration with: thread 'X' panicked at tokio-1.50.0/src/sync/batch_semaphore.rs:141:9: a semaphore may not have more than MAX_PERMITS permits (2305843009213693951) The Runtime's `register()` only instantiates a Semaphore when `max_concurrency > 0` — `0` is the canonical spelling of "unbounded, no semaphore" used by other modules (`inference-llm`, `RecordingModule` test fixtures, etc.). Switching to `0` preserves the original intent ("no cap on cognition; downstream ai_provider enforces the inference serialization") without panicking. This was a latent bug surfaced while building integration tests for the Lane D `persona/turn-execute` command (#1409) that go through `runtime.route_command`. Existing cognition tests bypass the panic by calling `module.handle_command` directly without going through `runtime.register`. * Add `registration_safety_tests::cognition_module_registers_without_semaphore_panic` as a regression guard: this test calls `Runtime::register` with CognitionModule, which panics if `max_concurrency > MAX_PERMITS`. Asserts `config.max_concurrency == 0` to make the dependency explicit for future readers. Co-Authored-By: Claude Opus 4.7 (1M context) --- .../continuum-core/src/modules/cognition.rs | 57 ++++++++++++++++++- 1 file changed, 56 insertions(+), 1 deletion(-) diff --git a/src/workers/continuum-core/src/modules/cognition.rs b/src/workers/continuum-core/src/modules/cognition.rs index b46c37009..e5a51857a 100644 --- a/src/workers/continuum-core/src/modules/cognition.rs +++ b/src/workers/continuum-core/src/modules/cognition.rs @@ -152,7 +152,15 @@ impl ServiceModule for CognitionModule { // PressureBroker singleton (#1299) make event fanout the // intended invariant. Inference is still gated downstream by // ai_provider::max_concurrency. No hardcoded fixed cap here. - max_concurrency: usize::MAX, + // + // Use 0 ("no semaphore = unbounded"), NOT usize::MAX. The + // runtime's `register()` only creates a Semaphore when + // `max_concurrency > 0`; setting usize::MAX makes it call + // `Semaphore::new(usize::MAX)`, which exceeds tokio's + // MAX_PERMITS (`usize::MAX >> 3`) and panics at registration + // ("a semaphore may not have more than MAX_PERMITS permits"). + // 0 expresses "no cap" without instantiating a semaphore. + max_concurrency: 0, tick_interval: None, } } @@ -1919,6 +1927,53 @@ mod inline_admission_tests { } } +#[cfg(test)] +mod registration_safety_tests { + //! Regression: `runtime::Runtime::register` calls + //! `Semaphore::new(config.max_concurrency)` whenever + //! `max_concurrency > 0`. tokio's MAX_PERMITS is `usize::MAX >> 3` + //! (~2^61), so a `max_concurrency: usize::MAX` config panics at + //! registration. CognitionModule shipped with `usize::MAX` as a + //! mistaken way to spell "no cap" (PR #1306). The fix is to use + //! `0` instead, which the runtime treats as "do not create a + //! semaphore" (truly unbounded). + //! + //! This test pins the fix in place: any future change that + //! reintroduces `usize::MAX` (or any other value above + //! MAX_PERMITS) blows up here before it reaches production. + use super::*; + use crate::rag::RagEngine; + use crate::runtime::runtime::Runtime; + use std::sync::Arc; + + #[test] + fn cognition_module_registers_without_semaphore_panic() { + // If max_concurrency is ever raised above tokio's + // MAX_PERMITS this `register()` call panics. The test then + // fails with the original "may not have more than + // MAX_PERMITS" message, which is the easiest possible + // breadcrumb back to this regression. + let runtime = Runtime::new(); + let rag_engine = Arc::new(RagEngine::new()); + let state = Arc::new(CognitionState::new(rag_engine)); + let module = Arc::new(CognitionModule::new(state)); + runtime.register(module); + + // No assertion needed — surviving registration IS the + // assertion. Sanity-touch config to make the dependency + // explicit for future readers. + let cfg = CognitionModule::new(Arc::new(CognitionState::new(Arc::new( + RagEngine::new(), + )))) + .config(); + assert_eq!( + cfg.max_concurrency, 0, + "max_concurrency must be 0 (= unbounded via no-semaphore branch); \ + any positive value above tokio's MAX_PERMITS panics at registration" + ); + } +} + /// Parse an InboxMessage from JSON value. fn parse_inbox_message(value: &Value) -> Result { let p = Params::new(value);