From ac0dd7743c460b8d23ad0462fe1e03d78910ceda Mon Sep 17 00:00:00 2001
From: LinZhihao-723 <pleiades3190@gmail.com>
Date: Thu, 28 May 2026 22:35:41 -0400
Subject: [PATCH 01/14] WIP

---
 Cargo.lock                                    | 10 ++++
 Cargo.toml                                    |  1 +
 components/spider-scheduler/Cargo.toml        | 14 +++++
 components/spider-scheduler/src/core.rs       | 56 ++++++++++++++++++
 components/spider-scheduler/src/dispatch.rs   | 28 +++++++++
 components/spider-scheduler/src/error.rs      | 25 ++++++++
 components/spider-scheduler/src/lib.rs        | 21 +++++++
 .../spider-scheduler/src/storage_client.rs    | 58 +++++++++++++++++++
 components/spider-scheduler/src/types.rs      | 35 +++++++++++
 9 files changed, 248 insertions(+)
 create mode 100644 components/spider-scheduler/Cargo.toml
 create mode 100644 components/spider-scheduler/src/core.rs
 create mode 100644 components/spider-scheduler/src/dispatch.rs
 create mode 100644 components/spider-scheduler/src/error.rs
 create mode 100644 components/spider-scheduler/src/lib.rs
 create mode 100644 components/spider-scheduler/src/storage_client.rs
 create mode 100644 components/spider-scheduler/src/types.rs

diff --git a/Cargo.lock b/Cargo.lock
index 2888d5e8..0262e8d3 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -1499,6 +1499,16 @@ dependencies = [
  "tracing",
 ]
 
+[[package]]
+name = "spider-scheduler"
+version = "0.1.0"
+dependencies = [
+ "async-trait",
+ "spider-core",
+ "thiserror",
+ "tokio-util",
+]
+
 [[package]]
 name = "spider-storage"
 version = "0.1.0"
diff --git a/Cargo.toml b/Cargo.toml
index ea9992cf..4d8d20e8 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -4,6 +4,7 @@ members = [
   "components/spider-core",
   "components/spider-derive",
   "components/spider-execution-manager",
+  "components/spider-scheduler",
   "components/spider-storage",
   "components/spider-task-executor",
   "components/spider-tdl",
diff --git a/components/spider-scheduler/Cargo.toml b/components/spider-scheduler/Cargo.toml
new file mode 100644
index 00000000..59e9b8f7
--- /dev/null
+++ b/components/spider-scheduler/Cargo.toml
@@ -0,0 +1,14 @@
+[package]
+name = "spider-scheduler"
+version = "0.1.0"
+edition = "2024"
+
+[lib]
+name = "spider_scheduler"
+path = "src/lib.rs"
+
+[dependencies]
+async-trait = "0.1.89"
+spider-core = { path = "../spider-core" }
+thiserror = "2.0.18"
+tokio-util = "0.7.18"
diff --git a/components/spider-scheduler/src/core.rs b/components/spider-scheduler/src/core.rs
new file mode 100644
index 00000000..95392f49
--- /dev/null
+++ b/components/spider-scheduler/src/core.rs
@@ -0,0 +1,56 @@
+use std::sync::Arc;
+
+use async_trait::async_trait;
+
+use crate::{
+    dispatch::DispatchSink,
+    error::SchedulerError,
+    storage_client::SchedulerStorageClient,
+};
+
+/// A cancellation handle used to signal a running [`SchedulerCore`] to stop.
+///
+/// Cancelling the token causes [`SchedulerCore::run`] to break out of its scheduling loop and
+/// return.
+pub type ShutdownToken = tokio_util::sync::CancellationToken;
+
+/// A pluggable scheduling algorithm.
+///
+/// A core owns its decision loop: it polls the inbound queue through a [`SchedulerStorageClient`],
+/// applies its algorithm (reading storage as needed for placement), and writes assignments to a
+/// [`DispatchSink`]. Modeling the algorithm as a trait lets different scheduling strategies share
+/// the same runtime entry point.
+#[async_trait]
+pub trait SchedulerCore: Send {
+    /// The storage client the core polls and reads for placement decisions.
+    type Storage: SchedulerStorageClient;
+
+    /// The dispatch sink the core writes assignments to.
+    type Sink: DispatchSink;
+
+    /// Runs the scheduling loop until `shutdown` is triggered.
+    ///
+    /// The core polls the inbound queue through `storage`, applies its scheduling algorithm, and
+    /// writes assignments to `sink`, repeating until `shutdown` is cancelled, at which point it
+    /// returns.
+    ///
+    /// # Parameters
+    ///
+    /// * `storage` - The storage client used to poll the inbound queue and read state for
+    ///   placement.
+    /// * `sink` - The dispatch sink that assignments are written to.
+    /// * `shutdown` - The token that, once cancelled, signals the loop to stop and return.
+    ///
+    /// # Errors
+    ///
+    /// Returns an error if:
+    ///
+    /// * [`SchedulerError`] if the scheduling loop fails irrecoverably, e.g. the storage client or
+    ///   dispatch sink fails.
+    async fn run(
+        &mut self,
+        storage: Arc<Self::Storage>,
+        sink: Arc<Self::Sink>,
+        shutdown: ShutdownToken,
+    ) -> Result<(), SchedulerError>;
+}
diff --git a/components/spider-scheduler/src/dispatch.rs b/components/spider-scheduler/src/dispatch.rs
new file mode 100644
index 00000000..c432d73a
--- /dev/null
+++ b/components/spider-scheduler/src/dispatch.rs
@@ -0,0 +1,28 @@
+use async_trait::async_trait;
+
+use crate::{error::SchedulerError, types::TaskAssignment};
+
+/// The write side of the dispatching queue used by the scheduler core.
+///
+/// Modeled as a trait so the scheduler core can be unit-tested against a recording sink without
+/// standing up the execution-manager-facing service. The production implementation is backed by a
+/// bounded single-producer/multi-consumer queue.
+#[async_trait]
+pub trait DispatchSink: Send + Sync {
+    /// Enqueues a task assignment for execution managers to consume.
+    ///
+    /// Implementations backed by a bounded queue await while the queue is full, applying
+    /// back-pressure to the scheduler core.
+    ///
+    /// # Parameters
+    ///
+    /// * `assignment` - The task assignment to enqueue.
+    ///
+    /// # Errors
+    ///
+    /// Returns an error if:
+    ///
+    /// * [`SchedulerError::DispatchClosed`] if the dispatching queue is closed and can no longer
+    ///   accept assignments.
+    async fn dispatch(&self, assignment: TaskAssignment) -> Result<(), SchedulerError>;
+}
diff --git a/components/spider-scheduler/src/error.rs b/components/spider-scheduler/src/error.rs
new file mode 100644
index 00000000..8e7983d5
--- /dev/null
+++ b/components/spider-scheduler/src/error.rs
@@ -0,0 +1,25 @@
+use spider_core::types::id::JobId;
+
+/// Errors returned by [`crate::storage_client::SchedulerStorageClient`] operations.
+#[derive(Debug, thiserror::Error)]
+pub enum StorageClientError {
+    /// The inbound queue is closed and can no longer yield ready entries.
+    #[error("inbound queue is closed")]
+    InboundClosed,
+
+    /// No job with the requested identifier exists.
+    #[error("job not found: {0:?}")]
+    JobNotFound(JobId),
+}
+
+/// Errors returned by the scheduler runtime and its components.
+#[derive(Debug, thiserror::Error)]
+pub enum SchedulerError {
+    /// Forwarded from the storage client.
+    #[error(transparent)]
+    Storage(#[from] StorageClientError),
+
+    /// The dispatching queue is closed and can no longer accept assignments.
+    #[error("dispatching queue is closed")]
+    DispatchClosed,
+}
diff --git a/components/spider-scheduler/src/lib.rs b/components/spider-scheduler/src/lib.rs
new file mode 100644
index 00000000..411b26f0
--- /dev/null
+++ b/components/spider-scheduler/src/lib.rs
@@ -0,0 +1,21 @@
+//! Scheduler skeleton for the Spider task-execution framework.
+//!
+//! This crate defines the core type and trait abstractions of the scheduler: the data types
+//! exchanged with storage and execution managers ([`InboundEntry`], [`TaskAssignment`]), the
+//! storage and dispatch seams ([`SchedulerStorageClient`], [`DispatchSink`]), and the pluggable
+//! scheduling algorithm ([`SchedulerCore`]). Concrete implementations (the dispatch queue, the
+//! runtime, and scheduling algorithms) build on top of these abstractions.
+
+pub mod core;
+pub mod dispatch;
+pub mod error;
+pub mod storage_client;
+pub mod types;
+
+pub use crate::{
+    core::{SchedulerCore, ShutdownToken},
+    dispatch::DispatchSink,
+    error::{SchedulerError, StorageClientError},
+    storage_client::SchedulerStorageClient,
+    types::{InboundEntry, TaskAssignment},
+};
diff --git a/components/spider-scheduler/src/storage_client.rs b/components/spider-scheduler/src/storage_client.rs
new file mode 100644
index 00000000..5b847c63
--- /dev/null
+++ b/components/spider-scheduler/src/storage_client.rs
@@ -0,0 +1,58 @@
+use std::time::Duration;
+
+use async_trait::async_trait;
+use spider_core::{job::JobState, types::id::JobId};
+
+use crate::{error::StorageClientError, types::InboundEntry};
+
+/// The scheduler's view of the storage layer.
+///
+/// Abstracts the storage-owned inbound queue and the read-only queries a scheduling algorithm
+/// needs to make placement decisions. Modeled as a trait so the scheduler runtime can be driven by
+/// a real storage client in production or a mock in tests.
+#[async_trait]
+pub trait SchedulerStorageClient: Send + Sync {
+    /// Polls the storage-owned inbound (ready) queue for newly-ready tasks.
+    ///
+    /// Drains up to `max_items` ready entries across all storage lanes (regular, commit, and
+    /// cleanup tasks), blocking for at most `wait`. Returns an empty vector if no entry becomes
+    /// ready within `wait`.
+    ///
+    /// # Parameters
+    ///
+    /// * `max_items` - The maximum number of entries to return from a single poll.
+    /// * `wait` - The maximum duration to block waiting for ready entries.
+    ///
+    /// # Returns
+    ///
+    /// The ready entries drained from the inbound queue on success.
+    ///
+    /// # Errors
+    ///
+    /// Returns an error if:
+    ///
+    /// * [`StorageClientError::InboundClosed`] if the inbound queue is closed and can no longer
+    ///   yield entries.
+    async fn poll_ready(
+        &self,
+        max_items: usize,
+        wait: Duration,
+    ) -> Result<Vec<InboundEntry>, StorageClientError>;
+
+    /// Reads the current state of a job.
+    ///
+    /// # Parameters
+    ///
+    /// * `job_id` - The identifier of the job to query.
+    ///
+    /// # Returns
+    ///
+    /// The job's current [`JobState`] on success.
+    ///
+    /// # Errors
+    ///
+    /// Returns an error if:
+    ///
+    /// * [`StorageClientError::JobNotFound`] if no job with the given identifier exists.
+    async fn job_state(&self, job_id: JobId) -> Result<JobState, StorageClientError>;
+}
diff --git a/components/spider-scheduler/src/types.rs b/components/spider-scheduler/src/types.rs
new file mode 100644
index 00000000..448a398b
--- /dev/null
+++ b/components/spider-scheduler/src/types.rs
@@ -0,0 +1,35 @@
+use spider_core::types::id::{JobId, ResourceGroupId, TaskId};
+
+/// A ready task drained from the storage-owned inbound queue.
+///
+/// The storage client flattens storage's three ready lanes (regular, commit, and cleanup tasks)
+/// into this uniform entry, resolving each to its [`TaskId`] so the scheduler core can treat every
+/// ready task identically.
+#[derive(Debug, Clone, Copy, PartialEq, Eq)]
+pub struct InboundEntry {
+    /// The resource group that owns the job.
+    pub resource_group_id: ResourceGroupId,
+
+    /// The job the task belongs to.
+    pub job_id: JobId,
+
+    /// The ready task.
+    pub task_id: TaskId,
+}
+
+/// A task placement decision written by the scheduler core to the dispatching queue.
+///
+/// Assignments are intentionally lightweight: they identify the task but carry no inputs. The
+/// consuming execution manager registers the task instance against storage on pull to obtain the
+/// execution context (inputs, timeouts, and the TDL context).
+#[derive(Debug, Clone, Copy, PartialEq, Eq)]
+pub struct TaskAssignment {
+    /// The resource group that owns the job.
+    pub resource_group_id: ResourceGroupId,
+
+    /// The job the task belongs to.
+    pub job_id: JobId,
+
+    /// The task to dispatch.
+    pub task_id: TaskId,
+}

From dced92eace056244a39c7dad83deefe263e8d392 Mon Sep 17 00:00:00 2001
From: LinZhihao-723 <pleiades3190@gmail.com>
Date: Fri, 29 May 2026 18:43:45 -0400
Subject: [PATCH 02/14] Done.

---
 components/spider-scheduler/src/core.rs       | 45 +++++------
 components/spider-scheduler/src/dispatch.rs   | 28 -------
 .../spider-scheduler/src/dispatch_queue.rs    | 59 ++++++++++++++
 components/spider-scheduler/src/error.rs      | 10 ++-
 components/spider-scheduler/src/lib.rs        | 43 +++++++---
 .../spider-scheduler/src/storage_client.rs    | 78 ++++++++++++++++---
 components/spider-scheduler/src/types.rs      |  6 +-
 7 files changed, 188 insertions(+), 81 deletions(-)
 delete mode 100644 components/spider-scheduler/src/dispatch.rs
 create mode 100644 components/spider-scheduler/src/dispatch_queue.rs

diff --git a/components/spider-scheduler/src/core.rs b/components/spider-scheduler/src/core.rs
index 95392f49..c6bb661c 100644
--- a/components/spider-scheduler/src/core.rs
+++ b/components/spider-scheduler/src/core.rs
@@ -1,56 +1,47 @@
-use std::sync::Arc;
+//! The abstract core of a Spider scheduler.
 
 use async_trait::async_trait;
 
 use crate::{
-    dispatch::DispatchSink,
+    dispatch_queue::DispatchQueueSink,
     error::SchedulerError,
     storage_client::SchedulerStorageClient,
 };
 
-/// A cancellation handle used to signal a running [`SchedulerCore`] to stop.
-///
-/// Cancelling the token causes [`SchedulerCore::run`] to break out of its scheduling loop and
-/// return.
-pub type ShutdownToken = tokio_util::sync::CancellationToken;
-
-/// A pluggable scheduling algorithm.
+/// An abstracted core for a scheduling algorithm.
 ///
 /// A core owns its decision loop: it polls the inbound queue through a [`SchedulerStorageClient`],
 /// applies its algorithm (reading storage as needed for placement), and writes assignments to a
-/// [`DispatchSink`]. Modeling the algorithm as a trait lets different scheduling strategies share
-/// the same runtime entry point.
+/// [`DispatchQueueSink`]. Modeling the algorithm as a trait lets different scheduling strategies
+/// share the same runtime entry point.
 #[async_trait]
 pub trait SchedulerCore: Send {
-    /// The storage client the core polls and reads for placement decisions.
-    type Storage: SchedulerStorageClient;
+    /// The storage client used by the core to poll and read for placement decisions.
+    type StorageClient: SchedulerStorageClient;
 
     /// The dispatch sink the core writes assignments to.
-    type Sink: DispatchSink;
+    type Sink: DispatchQueueSink;
 
-    /// Runs the scheduling loop until `shutdown` is triggered.
+    /// Runs the scheduling loop until `cancellation_token` is triggered.
     ///
-    /// The core polls the inbound queue through `storage`, applies its scheduling algorithm, and
-    /// writes assignments to `sink`, repeating until `shutdown` is cancelled, at which point it
-    /// returns.
+    /// The core polls the inbound queue through `storage_client`, applies its scheduling algorithm,
+    /// and writes assignments to `sink`, repeating until `cancellation_token` is fired, at which
+    /// point it returns.
     ///
     /// # Parameters
     ///
-    /// * `storage` - The storage client used to poll the inbound queue and read state for
+    /// * `storage_client` - The storage client used to poll the inbound queue and read state for
     ///   placement.
     /// * `sink` - The dispatch sink that assignments are written to.
-    /// * `shutdown` - The token that, once cancelled, signals the loop to stop and return.
+    /// * `cancellation_token` - The token to signal the scheduling loop to stop.
     ///
     /// # Errors
     ///
-    /// Returns an error if:
-    ///
-    /// * [`SchedulerError`] if the scheduling loop fails irrecoverably, e.g. the storage client or
-    ///   dispatch sink fails.
+    /// Returns a [`SchedulerError`] instance indicating an irrecoverable error.
     async fn run(
         &mut self,
-        storage: Arc<Self::Storage>,
-        sink: Arc<Self::Sink>,
-        shutdown: ShutdownToken,
+        storage_client: Self::StorageClient,
+        sink: Self::Sink,
+        cancellation_token: tokio_util::sync::CancellationToken,
     ) -> Result<(), SchedulerError>;
 }
diff --git a/components/spider-scheduler/src/dispatch.rs b/components/spider-scheduler/src/dispatch.rs
deleted file mode 100644
index c432d73a..00000000
--- a/components/spider-scheduler/src/dispatch.rs
+++ /dev/null
@@ -1,28 +0,0 @@
-use async_trait::async_trait;
-
-use crate::{error::SchedulerError, types::TaskAssignment};
-
-/// The write side of the dispatching queue used by the scheduler core.
-///
-/// Modeled as a trait so the scheduler core can be unit-tested against a recording sink without
-/// standing up the execution-manager-facing service. The production implementation is backed by a
-/// bounded single-producer/multi-consumer queue.
-#[async_trait]
-pub trait DispatchSink: Send + Sync {
-    /// Enqueues a task assignment for execution managers to consume.
-    ///
-    /// Implementations backed by a bounded queue await while the queue is full, applying
-    /// back-pressure to the scheduler core.
-    ///
-    /// # Parameters
-    ///
-    /// * `assignment` - The task assignment to enqueue.
-    ///
-    /// # Errors
-    ///
-    /// Returns an error if:
-    ///
-    /// * [`SchedulerError::DispatchClosed`] if the dispatching queue is closed and can no longer
-    ///   accept assignments.
-    async fn dispatch(&self, assignment: TaskAssignment) -> Result<(), SchedulerError>;
-}
diff --git a/components/spider-scheduler/src/dispatch_queue.rs b/components/spider-scheduler/src/dispatch_queue.rs
new file mode 100644
index 00000000..7ef57fdc
--- /dev/null
+++ b/components/spider-scheduler/src/dispatch_queue.rs
@@ -0,0 +1,59 @@
+//! The dispatching queue that decouples the scheduler core's placement decisions from the
+//! execution-manager-facing service.
+
+use async_trait::async_trait;
+use spider_core::types::id::SessionId;
+
+use crate::{error::SchedulerError, types::TaskAssignment};
+
+/// The writer side of the dispatching queue used by the scheduler core.
+#[async_trait]
+pub trait DispatchQueueSink: Send + Sync + Clone {
+    /// Enqueues a task assignment for execution managers to consume.
+    ///
+    /// # Parameters
+    ///
+    /// * `assignment` - The task assignment to enqueue.
+    ///
+    /// # Errors
+    ///
+    /// Returns an error if:
+    ///
+    /// * [`SchedulerError::DispatchQueueClosed`] if the dispatching queue is closed.
+    async fn enqueue(&self, assignment: TaskAssignment) -> Result<(), SchedulerError>;
+
+    /// Bumps the session ID and invalidates all queued task assignments.
+    ///
+    /// # Parameters
+    ///
+    /// * `new_session_id` - The new session ID. Must be greater than the current session ID.
+    ///
+    /// # Errors
+    ///
+    /// Returns an error if:
+    ///
+    /// * [`SchedulerError::DispatchQueueClosed`] if the dispatching queue is closed.
+    /// * [`SchedulerError::InvalidSessionId`] if the new session ID is not greater than the current
+    ///   session ID.
+    async fn bump_session_id(&self, new_session_id: SessionId) -> Result<(), SchedulerError>;
+}
+
+/// The reader side of the dispatching queue, drained by the execution-manager-facing service.
+#[async_trait]
+pub trait DispatchQueueSource: Send + Sync + Clone {
+    /// Dequeues the next task assignment for an execution manager to execute.
+    ///
+    /// # Returns
+    ///
+    /// A tuple on success, containing:
+    ///
+    /// * The storage session associated with the assignment.
+    /// * The next task assignment ready to execute.
+    ///
+    /// # Errors
+    ///
+    /// Returns an error if:
+    ///
+    /// * [`SchedulerError::DispatchQueueClosed`] if the dispatching queue is closed.
+    async fn dequeue(&self) -> Result<(SessionId, TaskAssignment), SchedulerError>;
+}
diff --git a/components/spider-scheduler/src/error.rs b/components/spider-scheduler/src/error.rs
index 8e7983d5..6a852c46 100644
--- a/components/spider-scheduler/src/error.rs
+++ b/components/spider-scheduler/src/error.rs
@@ -1,4 +1,6 @@
-use spider_core::types::id::JobId;
+//! The error types used in this crate.
+
+use spider_core::types::id::{JobId, SessionId};
 
 /// Errors returned by [`crate::storage_client::SchedulerStorageClient`] operations.
 #[derive(Debug, thiserror::Error)]
@@ -21,5 +23,9 @@ pub enum SchedulerError {
 
     /// The dispatching queue is closed and can no longer accept assignments.
     #[error("dispatching queue is closed")]
-    DispatchClosed,
+    DispatchQueueClosed,
+
+    /// The session ID is invalid.
+    #[error("invalid session ID: {0:?}")]
+    InvalidSessionId(SessionId),
 }
diff --git a/components/spider-scheduler/src/lib.rs b/components/spider-scheduler/src/lib.rs
index 411b26f0..bddd0750 100644
--- a/components/spider-scheduler/src/lib.rs
+++ b/components/spider-scheduler/src/lib.rs
@@ -1,20 +1,45 @@
-//! Scheduler skeleton for the Spider task-execution framework.
+//! Trait and type abstractions for the Spider scheduler.
 //!
-//! This crate defines the core type and trait abstractions of the scheduler: the data types
-//! exchanged with storage and execution managers ([`InboundEntry`], [`TaskAssignment`]), the
-//! storage and dispatch seams ([`SchedulerStorageClient`], [`DispatchSink`]), and the pluggable
-//! scheduling algorithm ([`SchedulerCore`]). Concrete implementations (the dispatch queue, the
-//! runtime, and scheduling algorithms) build on top of these abstractions.
+//! The scheduler is the serial decision maker that turns ready tasks discovered by the storage
+//! layer into assignments for execution managers. It owns placement and ordering policy, not
+//! dependency resolution: storage decides *what* is ready, and the scheduler decides *in what
+//! order* and *with what throttling* ready tasks are offered to the fleet.
+//!
+//! The crate defines three trait seams wired into a single pipeline — a storage client that polls
+//! the ready queue, a core that makes serial decisions, and a dispatching queue that fans those
+//! decisions out to execution managers:
+//!
+//! ```text
+//!   storage  ── authoritative ready queue (owned by the storage layer, not this crate)
+//!         │
+//!         │  poll_ready / poll_commit_ready / poll_cleanup_ready  (SchedulerStorageClient)
+//!         ▼
+//!   ┌───────────────────┐
+//!   │   SchedulerCore   │  serial loop: poll → decide → enqueue
+//!   └───────────────────┘
+//!         │
+//!         │  enqueue             (DispatchQueueSink — writer side)
+//!         ▼
+//!   ┌───────────────────┐
+//!   │  dispatch queue   │  bounded SPMC; a full queue back-pressures the core
+//!   └───────────────────┘
+//!         │
+//!         │  dequeue             (DispatchQueueSource — reader side)
+//!         ▼
+//!   ┌───────────────────┐
+//!   │ scheduler service │ ──▶ execution managers (concurrent fan-out)
+//!   └───────────────────┘
+//! ```
 
 pub mod core;
-pub mod dispatch;
+pub mod dispatch_queue;
 pub mod error;
 pub mod storage_client;
 pub mod types;
 
 pub use crate::{
-    core::{SchedulerCore, ShutdownToken},
-    dispatch::DispatchSink,
+    core::SchedulerCore,
+    dispatch_queue::{DispatchQueueSink, DispatchQueueSource},
     error::{SchedulerError, StorageClientError},
     storage_client::SchedulerStorageClient,
     types::{InboundEntry, TaskAssignment},
diff --git a/components/spider-scheduler/src/storage_client.rs b/components/spider-scheduler/src/storage_client.rs
index 5b847c63..9f7adaf4 100644
--- a/components/spider-scheduler/src/storage_client.rs
+++ b/components/spider-scheduler/src/storage_client.rs
@@ -1,7 +1,12 @@
+//! The scheduler's view of the storage layer, abstracting inbound polling and placement-time reads.
+
 use std::time::Duration;
 
 use async_trait::async_trait;
-use spider_core::{job::JobState, types::id::JobId};
+use spider_core::{
+    job::JobState,
+    types::id::{JobId, SessionId},
+};
 
 use crate::{error::StorageClientError, types::InboundEntry};
 
@@ -11,33 +16,84 @@ use crate::{error::StorageClientError, types::InboundEntry};
 /// needs to make placement decisions. Modeled as a trait so the scheduler runtime can be driven by
 /// a real storage client in production or a mock in tests.
 #[async_trait]
-pub trait SchedulerStorageClient: Send + Sync {
-    /// Polls the storage-owned inbound (ready) queue for newly-ready tasks.
-    ///
-    /// Drains up to `max_items` ready entries across all storage lanes (regular, commit, and
-    /// cleanup tasks), blocking for at most `wait`. Returns an empty vector if no entry becomes
-    /// ready within `wait`.
+pub trait SchedulerStorageClient: Send + Sync + Clone {
+    /// Polls the regular-task lane of the storage-owned inbound queue for ready tasks.
     ///
     /// # Parameters
     ///
     /// * `max_items` - The maximum number of entries to return from a single poll.
-    /// * `wait` - The maximum duration to block waiting for ready entries.
+    /// * `wait` - The maximum duration to block waiting for ready entries on the storage side.
     ///
     /// # Returns
     ///
-    /// The ready entries drained from the inbound queue on success.
+    /// A tuple on success, containing:
+    ///
+    /// * The storage session the poll was served under.
+    /// * The ready regular tasks drained from the lane.
     ///
     /// # Errors
     ///
     /// Returns an error if:
     ///
-    /// * [`StorageClientError::InboundClosed`] if the inbound queue is closed and can no longer
+    /// * [`StorageClientError::InboundClosed`] if the regular-task lane is closed and can no longer
     ///   yield entries.
     async fn poll_ready(
         &self,
         max_items: usize,
         wait: Duration,
-    ) -> Result<Vec<InboundEntry>, StorageClientError>;
+    ) -> Result<(SessionId, Vec<InboundEntry>), StorageClientError>;
+
+    /// Polls the commit-task lane of the storage-owned inbound queue for ready tasks.
+    ///
+    /// # Parameters
+    ///
+    /// * `max_items` - The maximum number of entries to return from a single poll.
+    /// * `wait` - The maximum duration to block waiting for ready entries on the storage side.
+    ///
+    /// # Returns
+    ///
+    /// A tuple on success, containing:
+    ///
+    /// * The storage session the poll was served under.
+    /// * The ready commit tasks drained from the lane.
+    ///
+    /// # Errors
+    ///
+    /// Returns an error if:
+    ///
+    /// * [`StorageClientError::InboundClosed`] if the commit-task lane is closed and can no longer
+    ///   yield entries.
+    async fn poll_commit_ready(
+        &self,
+        max_items: usize,
+        wait: Duration,
+    ) -> Result<(SessionId, Vec<InboundEntry>), StorageClientError>;
+
+    /// Polls the cleanup-task lane of the storage-owned inbound queue for ready tasks.
+    ///
+    /// # Parameters
+    ///
+    /// * `max_items` - The maximum number of entries to return from a single poll.
+    /// * `wait` - The maximum duration to block waiting for ready entries on the storage side.
+    ///
+    /// # Returns
+    ///
+    /// A tuple on success, containing:
+    ///
+    /// * The storage session the poll was served under.
+    /// * The ready cleanup tasks drained from the lane.
+    ///
+    /// # Errors
+    ///
+    /// Returns an error if:
+    ///
+    /// * [`StorageClientError::InboundClosed`] if the cleanup-task lane is closed and can no longer
+    ///   yield entries.
+    async fn poll_cleanup_ready(
+        &self,
+        max_items: usize,
+        wait: Duration,
+    ) -> Result<(SessionId, Vec<InboundEntry>), StorageClientError>;
 
     /// Reads the current state of a job.
     ///
diff --git a/components/spider-scheduler/src/types.rs b/components/spider-scheduler/src/types.rs
index 448a398b..70267b15 100644
--- a/components/spider-scheduler/src/types.rs
+++ b/components/spider-scheduler/src/types.rs
@@ -1,3 +1,5 @@
+//! The data types the scheduler exchanges with the storage layer and execution managers.
+
 use spider_core::types::id::{JobId, ResourceGroupId, TaskId};
 
 /// A ready task drained from the storage-owned inbound queue.
@@ -18,10 +20,6 @@ pub struct InboundEntry {
 }
 
 /// A task placement decision written by the scheduler core to the dispatching queue.
-///
-/// Assignments are intentionally lightweight: they identify the task but carry no inputs. The
-/// consuming execution manager registers the task instance against storage on pull to obtain the
-/// execution context (inputs, timeouts, and the TDL context).
 #[derive(Debug, Clone, Copy, PartialEq, Eq)]
 pub struct TaskAssignment {
     /// The resource group that owns the job.

From 9c436bd9d4efbd90ddec53860fbcb58342ed6a73 Mon Sep 17 00:00:00 2001
From: LinZhihao-723 <pleiades3190@gmail.com>
Date: Sat, 30 May 2026 14:45:35 -0400
Subject: [PATCH 03/14] Update dispatch queue's trait.

---
 .../spider-scheduler/src/dispatch_queue.rs    | 19 +++++++++++++++++--
 1 file changed, 17 insertions(+), 2 deletions(-)

diff --git a/components/spider-scheduler/src/dispatch_queue.rs b/components/spider-scheduler/src/dispatch_queue.rs
index 7ef57fdc..2305a64d 100644
--- a/components/spider-scheduler/src/dispatch_queue.rs
+++ b/components/spider-scheduler/src/dispatch_queue.rs
@@ -1,6 +1,8 @@
 //! The dispatching queue that decouples the scheduler core's placement decisions from the
 //! execution-manager-facing service.
 
+use std::time::Duration;
+
 use async_trait::async_trait;
 use spider_core::types::id::SessionId;
 
@@ -36,6 +38,11 @@ pub trait DispatchQueueSink: Send + Sync + Clone {
     /// * [`SchedulerError::InvalidSessionId`] if the new session ID is not greater than the current
     ///   session ID.
     async fn bump_session_id(&self, new_session_id: SessionId) -> Result<(), SchedulerError>;
+
+    /// # Returns
+    ///
+    /// The current size of the dispatch queue.
+    fn size(&self) -> usize;
 }
 
 /// The reader side of the dispatching queue, drained by the execution-manager-facing service.
@@ -43,9 +50,14 @@ pub trait DispatchQueueSink: Send + Sync + Clone {
 pub trait DispatchQueueSource: Send + Sync + Clone {
     /// Dequeues the next task assignment for an execution manager to execute.
     ///
+    /// # Parameters
+    ///
+    /// * `wait_time` - The maximum amount of time to wait for a task assignment.
+    ///
     /// # Returns
     ///
-    /// A tuple on success, containing:
+    /// `None` if no task assignment is available within the specified wait time, or a tuple
+    /// containing:
     ///
     /// * The storage session associated with the assignment.
     /// * The next task assignment ready to execute.
@@ -55,5 +67,8 @@ pub trait DispatchQueueSource: Send + Sync + Clone {
     /// Returns an error if:
     ///
     /// * [`SchedulerError::DispatchQueueClosed`] if the dispatching queue is closed.
-    async fn dequeue(&self) -> Result<(SessionId, TaskAssignment), SchedulerError>;
+    async fn dequeue(
+        &self,
+        wait_time: Duration,
+    ) -> Result<Option<(SessionId, TaskAssignment)>, SchedulerError>;
 }

From 5650d5a832b3cee4a00d51b40fc40638213c14b9 Mon Sep 17 00:00:00 2001
From: LinZhihao-723 <pleiades3190@gmail.com>
Date: Sun, 31 May 2026 17:28:27 -0400
Subject: [PATCH 04/14] Fix done.

---
 components/spider-core/src/types/id.rs        | 47 ++++++-------------
 components/spider-storage/src/cache.rs        | 15 ------
 components/spider-storage/src/cache/job.rs    |  3 +-
 .../spider-storage/src/task_instance_pool.rs  |  3 +-
 .../spider-storage/tests/scheduling_infra.rs  |  3 +-
 components/spider-tdl/src/task.rs             |  2 +-
 components/spider-tdl/src/task_context.rs     |  2 +-
 .../spider-tdl/tests/test_task_macro.rs       |  4 +-
 tests/huntsman/task-executor/src/lib.rs       |  2 +-
 .../task-executor/tests/test_process_pool.rs  |  2 +-
 .../huntsman/tdl-integration/tests/complex.rs |  2 +-
 11 files changed, 24 insertions(+), 61 deletions(-)

diff --git a/components/spider-core/src/types/id.rs b/components/spider-core/src/types/id.rs
index 21821e7e..4735f798 100644
--- a/components/spider-core/src/types/id.rs
+++ b/components/spider-core/src/types/id.rs
@@ -4,6 +4,8 @@ use serde::{Deserialize, Serialize};
 use sqlx::{Database, encode::IsNull};
 use uuid::Uuid;
 
+use crate::task::TaskIndex;
+
 /// A generic identifier type that wraps a UUID and a type marker.
 ///
 /// # Type Parameters:
@@ -96,9 +98,18 @@ pub type UuidBytes = uuid::Bytes;
 pub enum ResourceGroupIdMarker {}
 pub type ResourceGroupId = Id<ResourceGroupIdMarker>;
 
-#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
-pub enum TaskIdMarker {}
-pub type TaskId = Id<TaskIdMarker>;
+/// Identifier of a task inside a job.
+#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)]
+pub enum TaskId {
+    /// The index of the task in the job's task graph.
+    Index(TaskIndex),
+
+    /// The commit task.
+    Commit,
+
+    /// The cleanup task.
+    Cleanup,
+}
 
 #[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
 pub enum JobIdMarker {}
@@ -169,33 +180,3 @@ where
 }
 
 pub type SignedJobId = SignedId<JobIdMarker>;
-
-pub type SignedTaskId = SignedId<TaskIdMarker>;
-
-#[cfg(test)]
-mod tests {
-    use std::any::TypeId;
-
-    use super::*;
-
-    #[test]
-    fn test_id_basic() {
-        let id = TaskId::new();
-        let underlying_uuid = id.as_uuid_ref().to_owned();
-        assert_eq!(id, TaskId::from(underlying_uuid));
-
-        assert_ne!(TypeId::of::<TaskId>(), TypeId::of::<JobId>());
-    }
-
-    #[test]
-    fn task_id_json_roundtrip() {
-        let id = TaskId::new();
-        let deserialized_id: TaskId = serde_json::from_str(
-            serde_json::to_string(&id)
-                .expect("JSON serialization failure")
-                .as_str(),
-        )
-        .expect("JSON deserialization failure");
-        assert_eq!(id, deserialized_id);
-    }
-}
diff --git a/components/spider-storage/src/cache.rs b/components/spider-storage/src/cache.rs
index d520f519..89a5e13d 100644
--- a/components/spider-storage/src/cache.rs
+++ b/components/spider-storage/src/cache.rs
@@ -1,21 +1,6 @@
-use spider_core::task::TaskIndex;
-
 pub mod error;
 pub mod io;
 pub mod job;
 pub mod job_submission;
 mod sync;
 pub mod task;
-
-/// Identifier of a task inside a job.
-#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
-pub enum TaskId {
-    /// The index of the task in the job's task graph.
-    Index(TaskIndex),
-
-    /// The commit task.
-    Commit,
-
-    /// The cleanup task.
-    Cleanup,
-}
diff --git a/components/spider-storage/src/cache/job.rs b/components/spider-storage/src/cache/job.rs
index 5c575e8e..c5a06ccb 100644
--- a/components/spider-storage/src/cache/job.rs
+++ b/components/spider-storage/src/cache/job.rs
@@ -10,7 +10,7 @@ use spider_core::{
     job::JobState,
     task::{TaskIndex, TaskState},
     types::{
-        id::{ExecutionManagerId, JobId, ResourceGroupId, TaskInstanceId},
+        id::{ExecutionManagerId, JobId, ResourceGroupId, TaskId, TaskInstanceId},
         io::{ExecutionContext, TaskOutput},
     },
 };
@@ -18,7 +18,6 @@ use tokio::sync::{RwLockReadGuard, RwLockWriteGuard};
 
 use crate::{
     cache::{
-        TaskId,
         error::{CacheError, InternalError, InternalError::UnexpectedJobState, StaleStateError},
         job_submission::ValidatedJobSubmission,
         task::TaskGraph,
diff --git a/components/spider-storage/src/task_instance_pool.rs b/components/spider-storage/src/task_instance_pool.rs
index ace45ce6..bba0cf77 100644
--- a/components/spider-storage/src/task_instance_pool.rs
+++ b/components/spider-storage/src/task_instance_pool.rs
@@ -23,12 +23,11 @@ use std::{
 };
 
 use async_trait::async_trait;
-use spider_core::types::id::{ExecutionManagerId, JobId, ResourceGroupId, TaskInstanceId};
+use spider_core::types::id::{ExecutionManagerId, JobId, ResourceGroupId, TaskId, TaskInstanceId};
 use tokio::sync::mpsc;
 
 use crate::{
     cache::{
-        TaskId,
         error::InternalError,
         task::{SharedTaskControlBlock, SharedTerminationTaskControlBlock},
     },
diff --git a/components/spider-storage/tests/scheduling_infra.rs b/components/spider-storage/tests/scheduling_infra.rs
index d3e5eb98..046a35eb 100644
--- a/components/spider-storage/tests/scheduling_infra.rs
+++ b/components/spider-storage/tests/scheduling_infra.rs
@@ -87,13 +87,12 @@ use spider_core::{
     job::JobState,
     task::TaskIndex,
     types::{
-        id::{ExecutionManagerId, JobId, ResourceGroupId, TaskInstanceId},
+        id::{ExecutionManagerId, JobId, ResourceGroupId, TaskId, TaskInstanceId},
         io::{ExecutionContext, TaskOutput},
     },
 };
 use spider_storage::{
     cache::{
-        TaskId,
         error::{CacheError, InternalError},
         job::SharedJobControlBlock,
         job_submission::ValidatedJobSubmission,
diff --git a/components/spider-tdl/src/task.rs b/components/spider-tdl/src/task.rs
index 99ca904d..d4015e0c 100644
--- a/components/spider-tdl/src/task.rs
+++ b/components/spider-tdl/src/task.rs
@@ -254,7 +254,7 @@ mod tests {
     fn make_encoded_ctx() -> Vec<u8> {
         let ctx = TaskContext {
             job_id: JobId::new(),
-            task_id: TaskId::new(),
+            task_id: TaskId::Index(0),
             task_instance_id: 1,
             resource_group_id: ResourceGroupId::new(),
         };
diff --git a/components/spider-tdl/src/task_context.rs b/components/spider-tdl/src/task_context.rs
index 60348315..d412bdb4 100644
--- a/components/spider-tdl/src/task_context.rs
+++ b/components/spider-tdl/src/task_context.rs
@@ -31,7 +31,7 @@ mod tests {
     fn round_trip_msgpack() -> anyhow::Result<()> {
         let ctx = TaskContext {
             job_id: JobId::new(),
-            task_id: TaskId::new(),
+            task_id: TaskId::Index(0),
             task_instance_id: 13,
             resource_group_id: ResourceGroupId::new(),
         };
diff --git a/components/spider-tdl/tests/test_task_macro.rs b/components/spider-tdl/tests/test_task_macro.rs
index e2a070fe..9a891f19 100644
--- a/components/spider-tdl/tests/test_task_macro.rs
+++ b/components/spider-tdl/tests/test_task_macro.rs
@@ -81,7 +81,7 @@ fn translate(_ctx: TaskContext, p: Point, dx: int32, dy: int32) -> Result<(Point
 fn make_encoded_ctx() -> Vec<u8> {
     let ctx = TaskContext {
         job_id: JobId::new(),
-        task_id: TaskId::new(),
+        task_id: TaskId::Index(0),
         task_instance_id: 1,
         resource_group_id: ResourceGroupId::new(),
     };
@@ -303,7 +303,7 @@ fn direct_execute_call_round_trips() -> anyhow::Result<()> {
 
     let ctx = TaskContext {
         job_id: JobId::new(),
-        task_id: TaskId::new(),
+        task_id: TaskId::Index(0),
         task_instance_id: 1,
         resource_group_id: ResourceGroupId::new(),
     };
diff --git a/tests/huntsman/task-executor/src/lib.rs b/tests/huntsman/task-executor/src/lib.rs
index c42a20f4..2a7e5ca1 100644
--- a/tests/huntsman/task-executor/src/lib.rs
+++ b/tests/huntsman/task-executor/src/lib.rs
@@ -194,7 +194,7 @@ pub fn tdl_package_dir() -> PathBuf {
 pub fn build_ctx() -> Vec<u8> {
     let ctx = TaskContext {
         job_id: JobId::new(),
-        task_id: TaskId::new(),
+        task_id: TaskId::Index(0),
         task_instance_id: 1,
         resource_group_id: ResourceGroupId::new(),
     };
diff --git a/tests/huntsman/task-executor/tests/test_process_pool.rs b/tests/huntsman/task-executor/tests/test_process_pool.rs
index 7bc5d332..9e762af1 100644
--- a/tests/huntsman/task-executor/tests/test_process_pool.rs
+++ b/tests/huntsman/task-executor/tests/test_process_pool.rs
@@ -72,7 +72,7 @@ fn build_pool() -> ProcessPool {
 fn make_request(task_func: &str, inputs: Vec<TaskInput>) -> ExecuteRequest {
     ExecuteRequest {
         job_id: JobId::new(),
-        task_id: TaskId::new(),
+        task_id: TaskId::Index(0),
         resource_group_id: ResourceGroupId::new(),
         ctx: ExecutionContext {
             task_instance_id: 1,
diff --git a/tests/huntsman/tdl-integration/tests/complex.rs b/tests/huntsman/tdl-integration/tests/complex.rs
index 513e7d75..0e2bc7d5 100644
--- a/tests/huntsman/tdl-integration/tests/complex.rs
+++ b/tests/huntsman/tdl-integration/tests/complex.rs
@@ -33,7 +33,7 @@ fn lib_path() -> std::path::PathBuf {
 fn encode_ctx() -> Vec<u8> {
     let ctx = TaskContext {
         job_id: JobId::new(),
-        task_id: TaskId::new(),
+        task_id: TaskId::Index(0),
         task_instance_id: 1,
         resource_group_id: ResourceGroupId::new(),
     };

From 10cb6ad186c3dd2fd3cc135c5e24cdc939b10a90 Mon Sep 17 00:00:00 2001
From: LinZhihao-723 <pleiades3190@gmail.com>
Date: Sun, 31 May 2026 20:50:56 -0400
Subject: [PATCH 05/14] Add channel-based dispatch queue implementation.

---
 Cargo.lock                                    |   8 +-
 components/spider-scheduler/Cargo.toml        |   8 +
 .../spider-scheduler/src/dispatch_queue.rs    | 596 +++++++++++++++++-
 3 files changed, 609 insertions(+), 3 deletions(-)

diff --git a/Cargo.lock b/Cargo.lock
index 6f729b53..1742569a 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -1504,9 +1504,13 @@ dependencies = [
 name = "spider-scheduler"
 version = "0.1.0"
 dependencies = [
+ "anyhow",
+ "async-channel",
  "async-trait",
+ "dashmap",
  "spider-core",
  "thiserror",
+ "tokio",
  "tokio-util",
 ]
 
@@ -1985,9 +1989,9 @@ checksum = "1f3ccbac311fea05f86f61904b462b55fb3df8837a366dfc601a0161d0532f20"
 
 [[package]]
 name = "tokio"
-version = "1.52.2"
+version = "1.52.3"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "110a78583f19d5cdb2c5ccf321d1290344e71313c6c37d43520d386027d18386"
+checksum = "8fc7f01b389ac15039e4dc9531aa973a135d7a4135281b12d7c1bc79fd57fffe"
 dependencies = [
  "bytes",
  "libc",
diff --git a/components/spider-scheduler/Cargo.toml b/components/spider-scheduler/Cargo.toml
index 59e9b8f7..ee803e17 100644
--- a/components/spider-scheduler/Cargo.toml
+++ b/components/spider-scheduler/Cargo.toml
@@ -8,7 +8,15 @@ name = "spider_scheduler"
 path = "src/lib.rs"
 
 [dependencies]
+async-channel = "2.3.1"
 async-trait = "0.1.89"
 spider-core = { path = "../spider-core" }
 thiserror = "2.0.18"
+tokio = { version = "1.52.3", features = ["sync", "time"] }
 tokio-util = "0.7.18"
+
+[dev-dependencies]
+anyhow = "1.0.102"
+dashmap = "6.1.0"
+tokio = { version = "1.52.3", features = ["macros", "rt-multi-thread"] }
+tokio-util = { version = "0.7.18", features = ["rt"] }
diff --git a/components/spider-scheduler/src/dispatch_queue.rs b/components/spider-scheduler/src/dispatch_queue.rs
index 2305a64d..18435b17 100644
--- a/components/spider-scheduler/src/dispatch_queue.rs
+++ b/components/spider-scheduler/src/dispatch_queue.rs
@@ -1,10 +1,11 @@
 //! The dispatching queue that decouples the scheduler core's placement decisions from the
 //! execution-manager-facing service.
 
-use std::time::Duration;
+use std::{sync::Arc, time::Duration};
 
 use async_trait::async_trait;
 use spider_core::types::id::SessionId;
+use tokio::sync::RwLock;
 
 use crate::{error::SchedulerError, types::TaskAssignment};
 
@@ -72,3 +73,596 @@ pub trait DispatchQueueSource: Send + Sync + Clone {
         wait_time: Duration,
     ) -> Result<Option<(SessionId, TaskAssignment)>, SchedulerError>;
 }
+
+/// A cloneable writer handle for the dispatching queue, implementing [`DispatchQueueSink`] using
+/// an async channel.
+///
+/// # NOTE
+///
+/// The current implementation assumes that `enqueue` and `bump_session_id` will not be called
+/// concurrently: `bump_session_id` must be called before consequent `enqueue` calls to make session
+/// ID consistent with the enqueued assignments.
+#[derive(Clone)]
+pub struct DispatchQueueWriter {
+    inner: Arc<DispatchQueueWriterInner>,
+}
+
+#[async_trait]
+impl DispatchQueueSink for DispatchQueueWriter {
+    async fn enqueue(&self, assignment: TaskAssignment) -> Result<(), SchedulerError> {
+        self.inner
+            .assignment_sender
+            .send(assignment)
+            .await
+            .map_err(|_| SchedulerError::DispatchQueueClosed)
+    }
+
+    async fn bump_session_id(&self, new_session_id: SessionId) -> Result<(), SchedulerError> {
+        let mut session_id_guard = self.inner.session_id.write().await;
+        if new_session_id <= *session_id_guard {
+            return Err(SchedulerError::InvalidSessionId(new_session_id));
+        }
+        *session_id_guard = new_session_id;
+        while self.inner.assignment_receiver.try_recv().is_ok() {
+            // Drain the queue.
+        }
+
+        // Lock session ID for the entire duration of the drain to exclude all readers.
+        drop(session_id_guard);
+        Ok(())
+    }
+
+    fn size(&self) -> usize {
+        self.inner.assignment_sender.len()
+    }
+}
+
+/// A cloneable reader handle for the dispatching queue, implementing [`DispatchQueueSource`] using
+/// an async channel.
+#[derive(Clone)]
+pub struct DispatchQueueReader {
+    inner: Arc<DispatchQueueReaderInner>,
+}
+
+#[async_trait]
+impl DispatchQueueSource for DispatchQueueReader {
+    async fn dequeue(
+        &self,
+        wait_time: Duration,
+    ) -> Result<Option<(SessionId, TaskAssignment)>, SchedulerError> {
+        // Lock session ID for the entire duration of the dequeue operation to exclude any
+        // `bump_session_id` operations.
+        let session_id_guard = self.inner.session_id.read().await;
+
+        if let Ok(assignment) = self.inner.assignment_receiver.try_recv() {
+            return Ok(Some((*session_id_guard, assignment)));
+        }
+
+        if wait_time.is_zero() {
+            return Ok(None);
+        }
+
+        match tokio::time::timeout(wait_time, self.inner.assignment_receiver.recv()).await {
+            Ok(Ok(assignment)) => Ok(Some((*session_id_guard, assignment))),
+            Ok(Err(_)) => Err(SchedulerError::DispatchQueueClosed),
+            Err(_) => Ok(None),
+        }
+    }
+}
+
+/// Dispatch queue factory.
+///
+/// # Returns
+///
+/// A tuple containing:
+///
+/// * The writer for the scheduler core to enqueue task assignments.
+/// * The reader for the execution-manager-facing service to dequeue task assignments.
+#[must_use]
+pub fn create_dispatch_queue(
+    capacity: usize,
+    init_session_id: SessionId,
+) -> (DispatchQueueWriter, DispatchQueueReader) {
+    let (assignment_sender, assignment_receiver) = async_channel::bounded(capacity);
+    let session_id = Arc::new(RwLock::new(init_session_id));
+    let writer_inner = Arc::new(DispatchQueueWriterInner {
+        session_id: session_id.clone(),
+        assignment_sender,
+        assignment_receiver: assignment_receiver.clone(),
+    });
+    let reader_inner = Arc::new(DispatchQueueReaderInner {
+        session_id,
+        assignment_receiver,
+    });
+    (
+        DispatchQueueWriter {
+            inner: writer_inner,
+        },
+        DispatchQueueReader {
+            inner: reader_inner,
+        },
+    )
+}
+
+struct DispatchQueueWriterInner {
+    session_id: Arc<RwLock<SessionId>>,
+    assignment_sender: async_channel::Sender<TaskAssignment>,
+    assignment_receiver: async_channel::Receiver<TaskAssignment>,
+}
+
+struct DispatchQueueReaderInner {
+    session_id: Arc<RwLock<SessionId>>,
+    assignment_receiver: async_channel::Receiver<TaskAssignment>,
+}
+
+#[cfg(test)]
+mod tests {
+    use std::{
+        collections::HashMap,
+        sync::{
+            Arc,
+            atomic::{AtomicUsize, Ordering},
+        },
+        time::Duration,
+    };
+
+    use anyhow::Result;
+    use dashmap::{DashMap, DashSet};
+    use spider_core::types::id::{JobId, ResourceGroupId, SessionId, TaskId};
+    use tokio_util::task::TaskTracker;
+
+    use super::*;
+    use crate::{error::SchedulerError, types::TaskAssignment};
+
+    /// Generates a [`TaskId`] backed by a module-local monotonic counter.
+    ///
+    /// # Returns
+    ///
+    /// A new [`TaskId::Index`] whose inner value is unique within the test binary.
+    fn next_task_id() -> TaskId {
+        static COUNTER: AtomicUsize = AtomicUsize::new(0);
+        TaskId::Index(COUNTER.fetch_add(1, Ordering::Relaxed))
+    }
+
+    /// # Returns
+    ///
+    /// Forwards [`make_assignment_with_task_id`]'s return values with `task_id` set with
+    /// [`next_task_id`]'s return value.
+    fn make_assignment() -> TaskAssignment {
+        make_assignment_with_task_id(next_task_id())
+    }
+
+    /// # Returns
+    ///
+    /// A new [`TaskAssignment`] with the given `task_id` and other ID fields are auto-generated.
+    fn make_assignment_with_task_id(task_id: TaskId) -> TaskAssignment {
+        TaskAssignment {
+            resource_group_id: ResourceGroupId::new(),
+            job_id: JobId::new(),
+            task_id,
+        }
+    }
+
+    /// Spawns `reader_count` reader tasks that each drain the queue with `wait_time` and count the
+    /// assignments they receive, looping until the queue is closed.
+    ///
+    /// # Returns
+    ///
+    /// A vector of join handles, one per spawned task; each handle yields the number of assignments
+    /// that the reader pulled from the queue.
+    fn spawn_counting_readers(
+        reader: &DispatchQueueReader,
+        reader_count: usize,
+        wait_time: Duration,
+    ) -> Vec<tokio::task::JoinHandle<usize>> {
+        (0..reader_count)
+            .map(|_| {
+                let r = reader.clone();
+                tokio::spawn(async move {
+                    let mut count = 0usize;
+                    loop {
+                        match r.dequeue(wait_time).await {
+                            Ok(Some(_)) => count += 1,
+                            Ok(None) => (),
+                            Err(_) => break,
+                        }
+                    }
+                    count
+                })
+            })
+            .collect()
+    }
+
+    /// Drives the pair-consistency stress scenario for one or more concurrent readers.
+    ///
+    /// A single producer issues `ROUNDS` rounds of `[enqueue × k_i; bump_session_id(+1)]` with
+    /// batch sizes drawn from a 64-bit LCG seeded by `rng_seed`, finishes with a final batch under
+    /// the latest session, and drops the writer. `reader_count` reader tasks drain the queue
+    /// concurrently, each delivered assignment is tagged at enqueue time, and pair consistency is
+    /// verified across the collected results once all readers are closed.
+    ///
+    /// # Errors
+    ///
+    /// Returns an error if:
+    ///
+    /// * Forwards [`tokio::task::JoinHandle`]'s return values on failure (indicating a task panic).
+    async fn run_pair_consistency_stress(reader_count: usize, rng_seed: u64) -> Result<()> {
+        const INIT_SESSION: SessionId = 1;
+        const ROUNDS: usize = 20;
+        const CAPACITY: usize = 16;
+        const FINAL_BATCH: usize = 5;
+
+        assert!(reader_count > 0, "`reader_count` must be positive");
+
+        let (writer, reader) = create_dispatch_queue(CAPACITY, INIT_SESSION);
+        let tagged: Arc<DashMap<TaskId, SessionId>> = Arc::new(DashMap::new());
+
+        let tagged_for_writer = tagged.clone();
+        let writer_handle = tokio::spawn(async move {
+            let mut current_session = INIT_SESSION;
+            let mut rng = rng_seed;
+            for _ in 0..ROUNDS {
+                // 64-bit LCG parameters
+                const LCG_MULTIPLIER: u64 = 6_364_136_223_846_793_005;
+                const LCG_INCREMENT: u64 = 1_442_695_040_888_963_407;
+                rng = rng.wrapping_mul(LCG_MULTIPLIER).wrapping_add(LCG_INCREMENT);
+                let k = usize::try_from(rng % (CAPACITY as u64 + 1))
+                    .expect("modulo result fits in usize");
+                for _ in 0..k {
+                    let id = next_task_id();
+                    tagged_for_writer.insert(id, current_session);
+                    writer
+                        .enqueue(make_assignment_with_task_id(id))
+                        .await
+                        .expect("enqueue failed");
+                }
+                current_session += 1;
+                writer
+                    .bump_session_id(current_session)
+                    .await
+                    .expect("bump failed");
+            }
+            // Final batch under the latest session, which guarantees the readers have something to
+            // drain post-bump.
+            for _ in 0..FINAL_BATCH {
+                let id = next_task_id();
+                tagged_for_writer.insert(id, current_session);
+                writer
+                    .enqueue(make_assignment_with_task_id(id))
+                    .await
+                    .expect("enqueue failed");
+            }
+            drop(writer);
+        });
+
+        let all_delivered: Arc<DashMap<TaskId, SessionId>> = Arc::new(DashMap::new());
+        let duplicates: Arc<DashSet<TaskId>> = Arc::new(DashSet::new());
+        let tracker = TaskTracker::new();
+        for _ in 0..reader_count {
+            let r = reader.clone();
+            let delivered_for_reader = all_delivered.clone();
+            let duplicates_for_reader = duplicates.clone();
+            tracker.spawn(async move {
+                loop {
+                    match r.dequeue(Duration::from_millis(500)).await {
+                        Ok(Some((session, assignment))) => {
+                            if delivered_for_reader
+                                .insert(assignment.task_id, session)
+                                .is_some()
+                            {
+                                duplicates_for_reader.insert(assignment.task_id);
+                            }
+                        }
+                        Ok(None) => (),
+                        Err(_) => break,
+                    }
+                }
+            });
+        }
+        tracker.close();
+        drop(reader);
+
+        writer_handle.await?;
+        tracker.wait().await;
+
+        assert!(
+            duplicates.is_empty(),
+            "duplicate deliveries: {:?}",
+            duplicates.iter().map(|e| *e.key()).collect::<Vec<_>>(),
+        );
+        for entry in all_delivered.iter() {
+            let task_id = *entry.key();
+            let delivered_session = *entry.value();
+            let expected = tagged.get(&task_id).map(|e| *e.value());
+            assert_eq!(
+                Some(delivered_session),
+                expected,
+                "pair stamp mismatch: task_id={task_id:?}, delivered={delivered_session}, \
+                 expected={expected:?}",
+            );
+        }
+
+        let delivered_count = all_delivered.len();
+        assert!(
+            delivered_count >= FINAL_BATCH,
+            "expected at least the final batch ({FINAL_BATCH}) to be delivered, got \
+             {delivered_count}",
+        );
+        Ok(())
+    }
+
+    #[tokio::test(flavor = "multi_thread", worker_threads = 4)]
+    async fn sanity_round_trip_and_initial_session() -> Result<()> {
+        const SESSION_ID: SessionId = 1;
+        let (writer, reader) = create_dispatch_queue(8, SESSION_ID);
+        let assignment = make_assignment();
+
+        writer.enqueue(assignment).await?;
+
+        let (session, received) = reader
+            .dequeue(Duration::from_millis(1))
+            .await?
+            .expect("expected an assignment");
+        assert_eq!(session, SESSION_ID);
+        assert_eq!(received, assignment);
+        Ok(())
+    }
+
+    #[tokio::test(flavor = "multi_thread", worker_threads = 4)]
+    async fn load_balancing_across_consumers() -> Result<()> {
+        const N: usize = 100;
+        const M: usize = 4;
+        let (writer, reader) = create_dispatch_queue(32, 1);
+
+        let reader_handles = spawn_counting_readers(&reader, M, Duration::from_millis(500));
+        drop(reader);
+
+        for _ in 0..N {
+            writer
+                .enqueue(make_assignment())
+                .await
+                .expect("enqueue failed");
+        }
+        drop(writer);
+
+        let mut total = 0usize;
+        for handle in reader_handles {
+            total += handle.await?;
+        }
+        assert_eq!(total, N);
+        Ok(())
+    }
+
+    #[tokio::test(flavor = "multi_thread", worker_threads = 4)]
+    async fn many_readers_with_slow_producer() -> Result<()> {
+        const N: usize = 10;
+        const M: usize = 16;
+        let (writer, reader) = create_dispatch_queue(8, 1);
+
+        let reader_handles = spawn_counting_readers(&reader, M, Duration::from_millis(500));
+        drop(reader);
+
+        for _ in 0..N {
+            writer
+                .enqueue(make_assignment())
+                .await
+                .expect("enqueue failed");
+            tokio::time::sleep(Duration::from_millis(10)).await;
+        }
+        drop(writer);
+
+        let mut total = 0usize;
+        for handle in reader_handles {
+            total += handle.await?;
+        }
+        assert_eq!(total, N);
+        Ok(())
+    }
+
+    #[tokio::test(flavor = "multi_thread", worker_threads = 4)]
+    async fn bump_same_session_id_returns_invalid() -> Result<()> {
+        const SESSION_ID: SessionId = 5;
+        let (writer, _reader) = create_dispatch_queue(8, SESSION_ID);
+        let result = writer.bump_session_id(SESSION_ID).await;
+        assert!(
+            matches!(result, Err(SchedulerError::InvalidSessionId(5))),
+            "expected InvalidSessionId(5), got {result:?}",
+        );
+        Ok(())
+    }
+
+    #[tokio::test(flavor = "multi_thread", worker_threads = 4)]
+    async fn bump_smaller_smaller_session_id_returns_invalid() -> Result<()> {
+        const SESSION_ID: SessionId = 5;
+        const SMALLER_SESSION_ID: SessionId = SESSION_ID - 1;
+
+        let (writer, _reader) = create_dispatch_queue(8, SESSION_ID);
+        let result = writer.bump_session_id(SMALLER_SESSION_ID).await;
+        assert!(matches!(
+            result,
+            Err(SchedulerError::InvalidSessionId(SMALLER_SESSION_ID))
+        ));
+        Ok(())
+    }
+
+    #[tokio::test(flavor = "multi_thread", worker_threads = 4)]
+    async fn bump_higher_succeeds() -> Result<()> {
+        const SESSION_ID: SessionId = 5;
+        const NEW_SESSION_ID: SessionId = SESSION_ID + 1;
+
+        let (writer, reader) = create_dispatch_queue(8, SESSION_ID);
+        writer.bump_session_id(NEW_SESSION_ID).await?;
+        writer.enqueue(make_assignment()).await?;
+
+        let (session, _) = reader
+            .dequeue(Duration::from_secs(1))
+            .await?
+            .expect("expected an assignment");
+        assert_eq!(session, NEW_SESSION_ID);
+        Ok(())
+    }
+
+    #[tokio::test(flavor = "multi_thread", worker_threads = 4)]
+    async fn pre_bump_items_not_delivered() -> Result<()> {
+        let (writer, reader) = create_dispatch_queue(8, 1);
+        writer.enqueue(make_assignment()).await?;
+        writer.enqueue(make_assignment()).await?;
+        writer.bump_session_id(2).await?;
+
+        let result = reader.dequeue(Duration::from_millis(100)).await?;
+        assert_eq!(result, None);
+        Ok(())
+    }
+
+    #[tokio::test(flavor = "multi_thread", worker_threads = 4)]
+    async fn post_bump_items_paired_with_new_session() -> Result<()> {
+        let (writer, reader) = create_dispatch_queue(8, 1);
+        writer.bump_session_id(2).await?;
+        let assignment = make_assignment();
+        writer.enqueue(assignment).await?;
+
+        let (session, received) = reader
+            .dequeue(Duration::from_secs(1))
+            .await?
+            .expect("expected an assignment");
+        assert_eq!(session, 2);
+        assert_eq!(received, assignment);
+        Ok(())
+    }
+
+    #[tokio::test(flavor = "multi_thread", worker_threads = 4)]
+    async fn successive_bumps() -> Result<()> {
+        let (writer, reader) = create_dispatch_queue(8, 1);
+        writer.bump_session_id(2).await?;
+        writer.bump_session_id(3).await?;
+
+        let equal = writer.bump_session_id(3).await;
+        assert!(
+            matches!(equal, Err(SchedulerError::InvalidSessionId(3))),
+            "expected InvalidSessionId(3), got {equal:?}",
+        );
+        let smaller = writer.bump_session_id(2).await;
+        assert!(
+            matches!(smaller, Err(SchedulerError::InvalidSessionId(2))),
+            "expected InvalidSessionId(2), got {smaller:?}",
+        );
+
+        writer.enqueue(make_assignment()).await?;
+        let (session, _) = reader
+            .dequeue(Duration::from_secs(1))
+            .await?
+            .expect("expected an assignment");
+        assert_eq!(session, 3);
+        Ok(())
+    }
+
+    #[tokio::test(flavor = "multi_thread", worker_threads = 4)]
+    async fn size_zero_after_bump() -> Result<()> {
+        let (writer, _reader) = create_dispatch_queue(8, 1);
+        writer.enqueue(make_assignment()).await?;
+        writer.enqueue(make_assignment()).await?;
+        writer.enqueue(make_assignment()).await?;
+        assert_eq!(writer.size(), 3);
+
+        writer.bump_session_id(2).await?;
+        assert_eq!(writer.size(), 0);
+        Ok(())
+    }
+
+    #[tokio::test(flavor = "multi_thread", worker_threads = 4)]
+    async fn single_bump_pair_consistency() -> Result<()> {
+        const INIT_SESSION: SessionId = 10;
+        const MID_SESSION: SessionId = 20;
+        const FINAL_SESSION: SessionId = 30;
+
+        let (writer, reader) = create_dispatch_queue(8, INIT_SESSION);
+
+        let pre_bump_ids: Vec<TaskId> = (0..3).map(|_| next_task_id()).collect();
+        let post_bump_ids: Vec<TaskId> = (0..2).map(|_| next_task_id()).collect();
+        let final_id = next_task_id();
+
+        let pre_bump_for_writer = pre_bump_ids.clone();
+        let post_bump_for_writer = post_bump_ids.clone();
+        let writer_handle = tokio::spawn(async move {
+            for &id in &pre_bump_for_writer {
+                writer
+                    .enqueue(make_assignment_with_task_id(id))
+                    .await
+                    .expect("enqueue failed");
+            }
+            // Wait for the reader to consume the batch before bumping, so the items survive into
+            // the delivered set instead of being drained.
+            while writer.size() > 0 {
+                tokio::time::sleep(Duration::from_millis(50)).await;
+            }
+            writer
+                .bump_session_id(MID_SESSION)
+                .await
+                .expect("bump to mid session failed");
+
+            for &id in &post_bump_for_writer {
+                writer
+                    .enqueue(make_assignment_with_task_id(id))
+                    .await
+                    .expect("enqueue failed");
+            }
+            while writer.size() > 0 {
+                tokio::time::sleep(Duration::from_millis(50)).await;
+            }
+            writer
+                .bump_session_id(FINAL_SESSION)
+                .await
+                .expect("bump to final session failed");
+
+            writer
+                .enqueue(make_assignment_with_task_id(final_id))
+                .await
+                .expect("enqueue failed");
+            drop(writer);
+        });
+
+        let mut delivered: HashMap<TaskId, SessionId> = HashMap::new();
+        loop {
+            match reader.dequeue(Duration::from_millis(100)).await {
+                Ok(Some((session, assignment))) => {
+                    let prior = delivered.insert(assignment.task_id, session);
+                    assert_eq!(
+                        prior, None,
+                        "duplicate delivery for {:?}",
+                        assignment.task_id
+                    );
+                }
+                Ok(None) => (),
+                Err(_) => break,
+            }
+        }
+        writer_handle.await?;
+
+        for &id in &pre_bump_ids {
+            assert_eq!(
+                delivered.get(&id).copied(),
+                Some(INIT_SESSION),
+                "pre-bump item not paired with initial session: {id:?}",
+            );
+        }
+        for &id in &post_bump_ids {
+            assert_eq!(
+                delivered.get(&id).copied(),
+                Some(MID_SESSION),
+                "post-bump item not paired with mid session: {id:?}",
+            );
+        }
+        assert_eq!(delivered.get(&final_id).copied(), Some(FINAL_SESSION));
+        Ok(())
+    }
+
+    #[tokio::test(flavor = "multi_thread", worker_threads = 4)]
+    async fn pair_consistency_stress_with_one_reader() -> Result<()> {
+        run_pair_consistency_stress(1, 1_234_567).await
+    }
+
+    #[tokio::test(flavor = "multi_thread", worker_threads = 4)]
+    async fn pair_consistency_stress_with_many_readers() -> Result<()> {
+        run_pair_consistency_stress(4, 7_654_321).await
+    }
+}

From dda7770592f4555d35532d1b011f2e6307810d52 Mon Sep 17 00:00:00 2001
From: LinZhihao-723 <pleiades3190@gmail.com>
Date: Wed, 3 Jun 2026 11:18:35 -0400
Subject: [PATCH 06/14] Done.

---
 Cargo.lock                                    |   1 +
 components/spider-scheduler/Cargo.toml        |   3 +-
 .../examples/round_robin_load.rs              | 392 +++++++++
 components/spider-scheduler/src/core.rs       |   2 +-
 components/spider-scheduler/src/core_impl.rs  |   3 +
 .../src/core_impl/round_robin.rs              | 776 ++++++++++++++++++
 components/spider-scheduler/src/error.rs      |   6 +
 components/spider-scheduler/src/lib.rs        |   1 +
 8 files changed, 1182 insertions(+), 2 deletions(-)
 create mode 100644 components/spider-scheduler/examples/round_robin_load.rs
 create mode 100644 components/spider-scheduler/src/core_impl.rs
 create mode 100644 components/spider-scheduler/src/core_impl/round_robin.rs

diff --git a/Cargo.lock b/Cargo.lock
index 1742569a..6d96c4b7 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -1508,6 +1508,7 @@ dependencies = [
  "async-channel",
  "async-trait",
  "dashmap",
+ "serde",
  "spider-core",
  "thiserror",
  "tokio",
diff --git a/components/spider-scheduler/Cargo.toml b/components/spider-scheduler/Cargo.toml
index ee803e17..a928fddf 100644
--- a/components/spider-scheduler/Cargo.toml
+++ b/components/spider-scheduler/Cargo.toml
@@ -12,8 +12,9 @@ async-channel = "2.3.1"
 async-trait = "0.1.89"
 spider-core = { path = "../spider-core" }
 thiserror = "2.0.18"
-tokio = { version = "1.52.3", features = ["sync", "time"] }
+tokio = { version = "1.52.3", features = ["macros", "rt", "sync", "time"] }
 tokio-util = "0.7.18"
+serde = { version = "1.0.228", features = ["derive"] }
 
 [dev-dependencies]
 anyhow = "1.0.102"
diff --git a/components/spider-scheduler/examples/round_robin_load.rs b/components/spider-scheduler/examples/round_robin_load.rs
new file mode 100644
index 00000000..9d2d9385
--- /dev/null
+++ b/components/spider-scheduler/examples/round_robin_load.rs
@@ -0,0 +1,392 @@
+//! Load-test and instrumentation harness for the round-robin scheduler core.
+//!
+//! Topology:
+//!
+//! ```text
+//!   submitter ──▶ MockStorage (ready lane) ──poll──▶ RoundRobinCore ──enqueue──▶ dispatch queue ──▶ 64 workers
+//! ```
+//!
+//! * A mock storage holds 128 jobs of 1000 tasks each, released gradually (one job at a time) to
+//!   simulate a job-submission cycle rather than making everything ready at `t=0`.
+//! * 1% of the tasks are submitted twice (back-to-back) so the scheduler's deduplication can be
+//!   exercised; workers must still observe every task exactly once.
+//! * 64 workers drain the dispatch queue, sleeping 5ms per task to model execution latency.
+//!
+//! Run with (release recommended so the timings are meaningful):
+//!
+//! ```bash
+//! cargo run -p spider-scheduler --example round_robin_load --release
+//! ```
+
+use std::{
+    sync::{
+        Arc,
+        atomic::{AtomicBool, AtomicUsize, Ordering},
+    },
+    time::{Duration, Instant},
+};
+
+use async_trait::async_trait;
+use dashmap::DashSet;
+use spider_core::{
+    job::JobState,
+    types::id::{JobId, ResourceGroupId, SessionId, TaskId},
+};
+use spider_scheduler::{
+    DispatchQueueSource,
+    SchedulerCore,
+    SchedulerStorageClient,
+    StorageClientError,
+    core_impl::RoundRobinConfig,
+    dispatch_queue::{DispatchQueueReader, DispatchQueueWriter, create_dispatch_queue},
+    types::InboundEntry,
+};
+use tokio_util::sync::CancellationToken;
+
+// ---------------------------------------------------------------------------------------------
+// Workload parameters
+// ---------------------------------------------------------------------------------------------
+
+const NUM_JOBS: usize = 128;
+const TASKS_PER_JOB: usize = 1000;
+const TOTAL_UNIQUE_TASKS: usize = NUM_JOBS * TASKS_PER_JOB;
+
+/// Every `DUP_EVERY`-th task within a job is submitted twice, yielding exactly 1% duplicates.
+const DUP_EVERY: usize = 100;
+const EXPECTED_DUPLICATES_SUBMITTED: usize = NUM_JOBS * TASKS_PER_JOB / DUP_EVERY;
+
+const NUM_WORKERS: usize = 64;
+const WORKER_SLEEP: Duration = Duration::from_millis(5);
+const WORKER_POLL_WAIT: Duration = Duration::from_millis(10);
+
+/// Delay between releasing successive jobs into storage (the "submission cycle").
+const JOB_SUBMIT_INTERVAL: Duration = Duration::from_millis(10);
+
+/// A fixed session: this harness never bumps the session, so storage and the dispatch queue both
+/// start (and stay) here.
+const SESSION_ID: SessionId = 0;
+
+// Round-robin scheduler configuration (as requested).
+const ACTIVE_JOB_POOL_CAPACITY: usize = 8;
+const DISPATCH_QUEUE_CAPACITY: usize = NUM_WORKERS * 4; // 256
+const STORAGE_POLLING_WAIT_TIME_MS: u64 = 10; // dispatch/poll interval
+const READY_TASK_CAPACITY: usize = TASKS_PER_JOB * NUM_WORKERS; // 64_000
+const COMMIT_READY_TASK_CAPACITY: usize = 10;
+const CLEANUP_READY_TASK_CAPACITY: usize = 10;
+
+/// Safety net so a scheduling bug that drops a task cannot hang the harness forever.
+const OVERALL_TIMEOUT: Duration = Duration::from_mins(2);
+
+// ---------------------------------------------------------------------------------------------
+// Mock storage
+// ---------------------------------------------------------------------------------------------
+
+/// A mock [`SchedulerStorageClient`] whose regular lane is backed by an unbounded channel that the
+/// submitter feeds. Commit and cleanup lanes are always empty.
+#[derive(Clone)]
+struct MockStorage {
+    inner: Arc<MockStorageInner>,
+}
+
+struct MockStorageInner {
+    ready_tx: async_channel::Sender<InboundEntry>,
+    ready_rx: async_channel::Receiver<InboundEntry>,
+}
+
+impl MockStorage {
+    fn new() -> Self {
+        let (ready_tx, ready_rx) = async_channel::unbounded();
+        Self {
+            inner: Arc::new(MockStorageInner { ready_tx, ready_rx }),
+        }
+    }
+
+    /// # Returns
+    ///
+    /// A cloned sender for the regular ready lane, used by the submitter task.
+    fn sender(&self) -> async_channel::Sender<InboundEntry> {
+        self.inner.ready_tx.clone()
+    }
+}
+
+#[async_trait]
+impl SchedulerStorageClient for MockStorage {
+    async fn poll_ready(
+        &self,
+        max_items: usize,
+        wait: Duration,
+    ) -> Result<(SessionId, Vec<InboundEntry>), StorageClientError> {
+        if max_items == 0 {
+            // The scheduler has no buffer headroom; emulate a real blocking poll that yields
+            // nothing rather than pulling past the requested cap.
+            tokio::time::sleep(wait).await;
+            return Ok((SESSION_ID, Vec::new()));
+        }
+
+        let mut out = Vec::new();
+        // Block up to `wait` for the first entry, mirroring a real long-poll.
+        match tokio::time::timeout(wait, self.inner.ready_rx.recv()).await {
+            Ok(Ok(entry)) => out.push(entry),
+            // Channel closed (never happens here, sender is held by storage) or timed out: return
+            // whatever we have (possibly nothing).
+            Ok(Err(_)) | Err(_) => return Ok((SESSION_ID, out)),
+        }
+        // Drain the rest without blocking, up to `max_items`.
+        while out.len() < max_items {
+            match self.inner.ready_rx.try_recv() {
+                Ok(entry) => out.push(entry),
+                Err(_) => break,
+            }
+        }
+        Ok((SESSION_ID, out))
+    }
+
+    async fn poll_commit_ready(
+        &self,
+        _max_items: usize,
+        wait: Duration,
+    ) -> Result<(SessionId, Vec<InboundEntry>), StorageClientError> {
+        tokio::time::sleep(wait).await;
+        Ok((SESSION_ID, Vec::new()))
+    }
+
+    async fn poll_cleanup_ready(
+        &self,
+        _max_items: usize,
+        wait: Duration,
+    ) -> Result<(SessionId, Vec<InboundEntry>), StorageClientError> {
+        tokio::time::sleep(wait).await;
+        Ok((SESSION_ID, Vec::new()))
+    }
+
+    async fn job_state(&self, _job_id: JobId) -> Result<JobState, StorageClientError> {
+        Ok(JobState::Running)
+    }
+}
+
+// ---------------------------------------------------------------------------------------------
+// Submitter & workers
+// ---------------------------------------------------------------------------------------------
+
+/// Releases each job's tasks into storage one job at a time, duplicating every `DUP_EVERY`-th task
+/// back-to-back so the duplicate lands in the same poll batch as its original.
+async fn submit_jobs(jobs: Vec<(JobId, ResourceGroupId)>, tx: async_channel::Sender<InboundEntry>) {
+    for (job_id, resource_group_id) in jobs {
+        for i in 0..TASKS_PER_JOB {
+            let entry = InboundEntry {
+                resource_group_id,
+                job_id,
+                task_id: TaskId::Index(i),
+            };
+            tx.send(entry).await.expect("ready lane closed");
+            if i % DUP_EVERY == 0 {
+                tx.send(entry).await.expect("ready lane closed");
+            }
+        }
+        tokio::time::sleep(JOB_SUBMIT_INTERVAL).await;
+    }
+}
+
+/// Shared bookkeeping for the "each task is polled exactly once" check.
+struct WorkerStats {
+    seen: DashSet<(JobId, TaskId)>,
+    total_received: AtomicUsize,
+    duplicate_received: AtomicUsize,
+}
+
+/// A single worker: drain the dispatch queue, record each assignment, then sleep to model work.
+async fn worker(reader: DispatchQueueReader, stats: Arc<WorkerStats>, done: Arc<AtomicBool>) {
+    loop {
+        if done.load(Ordering::Relaxed) {
+            break;
+        }
+        match reader.dequeue(WORKER_POLL_WAIT).await {
+            Ok(Some((_session, assignment))) => {
+                stats.total_received.fetch_add(1, Ordering::Relaxed);
+                if !stats.seen.insert((assignment.job_id, assignment.task_id)) {
+                    stats.duplicate_received.fetch_add(1, Ordering::Relaxed);
+                }
+                tokio::time::sleep(WORKER_SLEEP).await;
+            }
+            Ok(None) => {}
+            // Dispatch queue closed (scheduler dropped its writer): nothing more will arrive.
+            Err(_) => break,
+        }
+    }
+}
+
+// ---------------------------------------------------------------------------------------------
+// Harness
+// ---------------------------------------------------------------------------------------------
+
+#[tokio::main(flavor = "multi_thread")]
+async fn main() {
+    let storage = MockStorage::new();
+    let (writer, reader) = create_dispatch_queue(DISPATCH_QUEUE_CAPACITY, SESSION_ID);
+
+    let config = RoundRobinConfig::<MockStorage, DispatchQueueWriter>::new(
+        ACTIVE_JOB_POOL_CAPACITY,
+        DISPATCH_QUEUE_CAPACITY,
+        READY_TASK_CAPACITY,
+        COMMIT_READY_TASK_CAPACITY,
+        CLEANUP_READY_TASK_CAPACITY,
+        STORAGE_POLLING_WAIT_TIME_MS,
+    );
+    let metrics = config.metrics();
+
+    let jobs: Vec<(JobId, ResourceGroupId)> = (0..NUM_JOBS)
+        .map(|_| (JobId::new(), ResourceGroupId::new()))
+        .collect();
+
+    // Scheduler.
+    let scheduler_token = CancellationToken::new();
+    let scheduler_handle = {
+        let token = scheduler_token.clone();
+        let storage = storage.clone();
+        tokio::spawn(async move { config.run(storage, writer, token).await })
+    };
+
+    // Workers.
+    let stats = Arc::new(WorkerStats {
+        seen: DashSet::with_capacity(TOTAL_UNIQUE_TASKS),
+        total_received: AtomicUsize::new(0),
+        duplicate_received: AtomicUsize::new(0),
+    });
+    let done = Arc::new(AtomicBool::new(false));
+    let worker_handles: Vec<_> = (0..NUM_WORKERS)
+        .map(|_| tokio::spawn(worker(reader.clone(), stats.clone(), done.clone())))
+        .collect();
+    drop(reader);
+
+    // Submitter.
+    let submit_handle = tokio::spawn(submit_jobs(jobs, storage.sender()));
+
+    // Drive to completion: every unique task delivered, or the safety timeout. Poll tightly so the
+    // metrics are frozen as soon as the last task arrives, keeping the idle tail out of the averages.
+    let start = Instant::now();
+    let mut timed_out = false;
+    loop {
+        if stats.seen.len() >= TOTAL_UNIQUE_TASKS {
+            break;
+        }
+        if start.elapsed() > OVERALL_TIMEOUT {
+            timed_out = true;
+            break;
+        }
+        tokio::time::sleep(Duration::from_millis(1)).await;
+    }
+    // Stop timing now that every task has arrived; the scheduler only spins on empty polls past
+    // this point and must not pollute the stage 1 & 2 averages.
+    metrics.stop();
+    let wall = start.elapsed();
+
+    // Tear down.
+    done.store(true, Ordering::Relaxed);
+    scheduler_token.cancel();
+    submit_handle.abort();
+    for handle in worker_handles {
+        let _ = handle.await;
+    }
+    match scheduler_handle.await {
+        Ok(Ok(())) => {}
+        Ok(Err(e)) => eprintln!("scheduler returned an error: {e:?}"),
+        Err(e) => eprintln!("scheduler task panicked: {e:?}"),
+    }
+    let _ = submit_handle.await;
+
+    report(&metrics, &stats, wall, timed_out);
+}
+
+/// Prints the collected timing and correctness results.
+fn report(
+    metrics: &spider_scheduler::core_impl::RoundRobinMetrics,
+    stats: &WorkerStats,
+    wall: Duration,
+    timed_out: bool,
+) {
+    let loop_count = metrics.loop_count.load(Ordering::Relaxed);
+    let total_loop_ns = metrics.total_loop_ns.load(Ordering::Relaxed);
+    let buffer_count = metrics.buffer_enrich_count.load(Ordering::Relaxed);
+    let buffer_ns = metrics.buffer_enrich_ns.load(Ordering::Relaxed);
+    let dispatch_count = metrics.dispatch_enrich_count.load(Ordering::Relaxed);
+    let dispatch_ns = metrics.dispatch_enrich_ns.load(Ordering::Relaxed);
+
+    let total = stats.total_received.load(Ordering::Relaxed);
+    let duplicates = stats.duplicate_received.load(Ordering::Relaxed);
+    let unique = stats.seen.len();
+
+    println!("\n================ Round-robin scheduler load test ================");
+    println!(
+        "Wall-clock runtime:                 {:.3} s",
+        wall.as_secs_f64()
+    );
+    if timed_out {
+        println!("!! TIMED OUT before all unique tasks were delivered !!");
+    }
+
+    println!("\n---- Workload ----");
+    println!("Jobs:                               {NUM_JOBS}");
+    println!("Tasks per job:                      {TASKS_PER_JOB}");
+    println!("Unique tasks (expected):            {TOTAL_UNIQUE_TASKS}");
+    println!("Duplicate task entries submitted:   {EXPECTED_DUPLICATES_SUBMITTED}");
+    println!("Workers:                            {NUM_WORKERS}");
+
+    println!("\n---- Scheduling-loop timing ----");
+    println!("Scheduling-loop iterations:         {loop_count}");
+    println!(
+        "Avg time per scheduling loop:       {:>9.3} us",
+        avg_us(total_loop_ns, loop_count)
+    );
+    println!(
+        "Avg buffer-enrich time (stage 1):   {:>9.3} us   (over {buffer_count} iterations that \
+         polled a non-empty result)",
+        avg_us(buffer_ns, buffer_count)
+    );
+    println!(
+        "Avg dispatch-enrich time (stage 2): {:>9.3} us   (over {dispatch_count} iterations that \
+         dispatched >=1 task)",
+        avg_us(dispatch_ns, dispatch_count)
+    );
+    let idle_loops = loop_count.saturating_sub(dispatch_count);
+    println!(
+        "No-dispatch loop iterations:        {idle_loops}   ({:.1}% of all iterations)",
+        percent(idle_loops, loop_count)
+    );
+
+    println!("\n---- Correctness: each task polled exactly once ----");
+    println!("Total assignments received:         {total}");
+    println!("Unique (job, task) pairs received:  {unique}");
+    println!("Duplicate deliveries observed:      {duplicates}");
+
+    let exactly_once = !timed_out
+        && duplicates == 0
+        && unique == TOTAL_UNIQUE_TASKS
+        && total == TOTAL_UNIQUE_TASKS;
+    println!(
+        "\nRESULT: each task polled exactly once -> {}",
+        if exactly_once { "PASS" } else { "FAIL" }
+    );
+    println!("=================================================================\n");
+}
+
+/// # Returns
+///
+/// `ns / count` converted to microseconds, or `0.0` when `count` is zero.
+fn avg_us(ns: u64, count: u64) -> f64 {
+    if count == 0 {
+        0.0
+    } else {
+        ns as f64 / count as f64 / 1_000.0
+    }
+}
+
+/// # Returns
+///
+/// `part` as a percentage of `whole`, or `0.0` when `whole` is zero.
+fn percent(part: u64, whole: u64) -> f64 {
+    if whole == 0 {
+        0.0
+    } else {
+        part as f64 / whole as f64 * 100.0
+    }
+}
diff --git a/components/spider-scheduler/src/core.rs b/components/spider-scheduler/src/core.rs
index c6bb661c..f6715341 100644
--- a/components/spider-scheduler/src/core.rs
+++ b/components/spider-scheduler/src/core.rs
@@ -39,7 +39,7 @@ pub trait SchedulerCore: Send {
     ///
     /// Returns a [`SchedulerError`] instance indicating an irrecoverable error.
     async fn run(
-        &mut self,
+        self,
         storage_client: Self::StorageClient,
         sink: Self::Sink,
         cancellation_token: tokio_util::sync::CancellationToken,
diff --git a/components/spider-scheduler/src/core_impl.rs b/components/spider-scheduler/src/core_impl.rs
new file mode 100644
index 00000000..2e27d2e9
--- /dev/null
+++ b/components/spider-scheduler/src/core_impl.rs
@@ -0,0 +1,3 @@
+mod round_robin;
+
+pub use round_robin::*;
diff --git a/components/spider-scheduler/src/core_impl/round_robin.rs b/components/spider-scheduler/src/core_impl/round_robin.rs
new file mode 100644
index 00000000..f32c36ca
--- /dev/null
+++ b/components/spider-scheduler/src/core_impl/round_robin.rs
@@ -0,0 +1,776 @@
+use std::{
+    collections::{HashMap, HashSet, VecDeque},
+    sync::{
+        Arc,
+        atomic::{AtomicBool, AtomicU64, Ordering},
+    },
+    time::{Duration, Instant},
+};
+
+use async_trait::async_trait;
+use spider_core::types::id::{JobId, ResourceGroupId, SessionId, TaskId};
+use tokio::select;
+use tokio_util::sync::CancellationToken;
+use serde::Deserialize;
+use crate::{
+    DispatchQueueSink,
+    InboundEntry,
+    SchedulerCore,
+    SchedulerError,
+    SchedulerStorageClient,
+    StorageClientError,
+    TaskAssignment,
+};
+
+#[derive(Deserialize)]
+pub struct RoundRobinConfig<
+    SchedulerStorageClientType: SchedulerStorageClient + 'static,
+    DispatchQueueSinkType: DispatchQueueSink,
+> {
+    /// The capacity of the active jobs pool. The scheduler will make task assignments from these
+    /// jobs in a round-robin manner.
+    pub active_job_pool_capacity: usize,
+
+    /// The capacity of the dispatch queue.
+    pub dispatch_queue_capacity: usize,
+
+    /// The capacity of the total pending ready tasks buffered in the scheduler.
+    pub ready_task_capacity: usize,
+
+    /// The capacity of the total pending commit-ready tasks buffered in the scheduler.
+    pub commit_ready_task_capacity: usize,
+
+    /// The capacity of the total pending cleanup-ready tasks buffered in the scheduler.
+    pub cleanup_ready_task_capacity: usize,
+
+    pub storage_polling_wait_time_ms: u64,
+
+    #[serde(skip)]
+    metrics: Arc<RoundRobinMetrics>,
+
+    #[serde(skip)]
+    _marker: std::marker::PhantomData<(SchedulerStorageClientType, DispatchQueueSinkType)>,
+}
+
+/// Instrumentation counters for the round-robin scheduling loop.
+///
+/// Durations are accumulated in nanoseconds; an average is a `*_ns` total divided by its matching
+/// `*_count`. All counters use [`Ordering::Relaxed`] and are meant for coarse profiling only, not
+/// for establishing happens-before relationships.
+#[derive(Debug, Default)]
+pub struct RoundRobinMetrics {
+    /// Number of completed scheduling-loop iterations (`loop_once` calls).
+    pub loop_count: AtomicU64,
+
+    /// Total wall-clock time spent across all scheduling-loop iterations.
+    pub total_loop_ns: AtomicU64,
+
+    /// Number of iterations that processed a fresh inbound polling result.
+    pub buffer_enrich_count: AtomicU64,
+
+    /// Total time spent draining inbound polling results into the scheduler's buffers ("enrich the
+    /// buffer", stage 1).
+    pub buffer_enrich_ns: AtomicU64,
+
+    /// Number of iterations that dispatched at least one assignment.
+    pub dispatch_enrich_count: AtomicU64,
+
+    /// Total time spent making scheduling decisions and filling the dispatch queue ("enrich the
+    /// dispatch queue", stage 2).
+    pub dispatch_enrich_ns: AtomicU64,
+
+    /// When set, the scheduling loop stops accumulating any of the counters above. Used to exclude
+    /// the idle tail (after all work has drained) from the averages.
+    stopped: AtomicBool,
+}
+
+impl RoundRobinMetrics {
+    /// Freezes all counters: subsequent scheduling-loop iterations are not recorded.
+    pub fn stop(&self) {
+        self.stopped.store(true, Ordering::Relaxed);
+    }
+
+    /// # Returns
+    ///
+    /// Whether the counters are still being recorded.
+    fn is_recording(&self) -> bool {
+        !self.stopped.load(Ordering::Relaxed)
+    }
+}
+
+impl<
+    SchedulerStorageClientType: SchedulerStorageClient + 'static,
+    DispatchQueueSinkType: DispatchQueueSink,
+> RoundRobinConfig<SchedulerStorageClientType, DispatchQueueSinkType>
+{
+    /// Creates a new round-robin configuration with a fresh, empty set of metrics.
+    #[must_use]
+    pub fn new(
+        active_job_pool_capacity: usize,
+        dispatch_queue_capacity: usize,
+        ready_task_capacity: usize,
+        commit_ready_task_capacity: usize,
+        cleanup_ready_task_capacity: usize,
+        storage_polling_wait_time_ms: u64,
+    ) -> Self {
+        Self {
+            active_job_pool_capacity,
+            dispatch_queue_capacity,
+            ready_task_capacity,
+            commit_ready_task_capacity,
+            cleanup_ready_task_capacity,
+            storage_polling_wait_time_ms,
+            metrics: Arc::new(RoundRobinMetrics::default()),
+            _marker: std::marker::PhantomData,
+        }
+    }
+
+    /// # Returns
+    ///
+    /// A shared handle to the loop instrumentation counters, so callers can read them while (or
+    /// after) the scheduler runs.
+    #[must_use]
+    pub fn metrics(&self) -> Arc<RoundRobinMetrics> {
+        Arc::clone(&self.metrics)
+    }
+}
+
+/// # Returns
+///
+/// The time elapsed since `start` in nanoseconds, saturating at [`u64::MAX`].
+fn elapsed_nanos(start: Instant) -> u64 {
+    u64::try_from(start.elapsed().as_nanos()).unwrap_or(u64::MAX)
+}
+
+#[async_trait]
+impl<
+    SchedulerStorageClientType: SchedulerStorageClient + 'static,
+    DispatchQueueSinkType: DispatchQueueSink,
+> SchedulerCore for RoundRobinConfig<SchedulerStorageClientType, DispatchQueueSinkType>
+{
+    type StorageClient = SchedulerStorageClientType;
+    type Sink = DispatchQueueSinkType;
+
+    async fn run(
+        self,
+        storage_client: Self::StorageClient,
+        sink: Self::Sink,
+        cancellation_token: CancellationToken,
+    ) -> Result<(), SchedulerError> {
+        RoundRobin::new(
+            SessionId::default(),
+            storage_client,
+            sink,
+            cancellation_token,
+            self,
+        )
+        .run()
+        .await
+    }
+}
+
+struct JobEntry {
+    job_id: JobId,
+    resource_group_id: ResourceGroupId,
+    task_ids: VecDeque<TaskId>,
+}
+
+impl JobEntry {
+    fn new(job_id: JobId, resource_group_id: ResourceGroupId, init_task_id: TaskId) -> Self {
+        Self {
+            job_id,
+            resource_group_id,
+            task_ids: VecDeque::from([init_task_id]),
+        }
+    }
+
+    fn enqueue(&mut self, task_id: TaskId) {
+        self.task_ids.push_back(task_id);
+    }
+
+    fn dequeue(&mut self) -> Option<TaskId> {
+        self.task_ids.pop_front()
+    }
+}
+
+#[derive(Clone)]
+enum ActiveJobQueueEntry {
+    Ready(JobId),
+    CommitReady,
+    CleanupReady,
+}
+
+struct RoundRobin<
+    SchedulerStorageClientType: SchedulerStorageClient + 'static,
+    DispatchQueueSinkType: DispatchQueueSink,
+> {
+    storage_client: SchedulerStorageClientType,
+    sink: DispatchQueueSinkType,
+    cancellation_token: CancellationToken,
+    config: RoundRobinConfig<SchedulerStorageClientType, DispatchQueueSinkType>,
+    storage_session_id: SessionId,
+    ready_set: HashSet<(JobId, TaskId)>,
+
+    active_jobs: HashMap<JobId, JobEntry>,
+    active_job_queue: Vec<ActiveJobQueueEntry>,
+    active_job_queue_cursor: usize,
+
+    pending_jobs: HashMap<JobId, JobEntry>,
+    pending_job_queue: VecDeque<JobId>,
+
+    commit_ready_queue: VecDeque<(JobId, ResourceGroupId)>,
+    cleanup_ready_queue: VecDeque<(JobId, ResourceGroupId)>,
+
+    commit_ready_or_cleanup_ready_tasks: HashSet<JobId>,
+
+    inbound_queue_reader: AsyncInboundQueueReader<SchedulerStorageClientType>,
+}
+
+impl<
+    SchedulerStorageClientType: SchedulerStorageClient + 'static,
+    DispatchQueueSinkType: DispatchQueueSink,
+> RoundRobin<SchedulerStorageClientType, DispatchQueueSinkType>
+{
+    fn new(
+        storage_session_id: SessionId,
+        storage_client: SchedulerStorageClientType,
+        sink: DispatchQueueSinkType,
+        cancellation_token: CancellationToken,
+        config: RoundRobinConfig<SchedulerStorageClientType, DispatchQueueSinkType>,
+    ) -> Self {
+        let ready_set = HashSet::with_capacity(config.ready_task_capacity);
+        let active_jobs = HashMap::with_capacity(config.active_job_pool_capacity);
+        let active_job_queue = Self::new_active_job_queue(config.active_job_pool_capacity);
+        let active_job_queue_cursor = 0;
+        let pending_jobs = HashMap::with_capacity(config.active_job_pool_capacity);
+        let pending_job_queue = VecDeque::with_capacity(config.active_job_pool_capacity);
+        let commit_ready_queue = VecDeque::with_capacity(config.commit_ready_task_capacity);
+        let cleanup_ready_queue = VecDeque::with_capacity(config.cleanup_ready_task_capacity);
+        let commit_ready_or_cleanup_ready_tasks = HashSet::with_capacity(
+            config.commit_ready_task_capacity + config.cleanup_ready_task_capacity,
+        );
+        let inbound_queue_reader = AsyncInboundQueueReader::new(storage_client.clone());
+        Self {
+            storage_client,
+            sink,
+            cancellation_token,
+            config,
+            storage_session_id,
+            ready_set,
+            active_jobs,
+            active_job_queue,
+            active_job_queue_cursor,
+            pending_jobs,
+            pending_job_queue,
+            commit_ready_queue,
+            cleanup_ready_queue,
+            commit_ready_or_cleanup_ready_tasks,
+            inbound_queue_reader,
+        }
+    }
+
+    fn new_active_job_queue(active_job_pool_capacity: usize) -> Vec<ActiveJobQueueEntry> {
+        let mut active_job_queue = Vec::with_capacity(active_job_pool_capacity + 2);
+        active_job_queue.push(ActiveJobQueueEntry::CommitReady);
+        active_job_queue.push(ActiveJobQueueEntry::CleanupReady);
+        active_job_queue
+    }
+
+    async fn run(mut self) -> Result<(), SchedulerError> {
+        loop {
+            let cancellation_token = self.cancellation_token.clone();
+            select! {
+                () = cancellation_token.cancelled() => {
+                    return Ok(());
+                }
+                result = self.loop_once() => {
+                    let () = result?;
+                }
+            }
+        }
+    }
+
+    fn clear_all_placement(&mut self) {
+        self.ready_set.clear();
+        self.active_jobs.clear();
+        self.pending_jobs.clear();
+        self.pending_job_queue.clear();
+        self.commit_ready_queue.clear();
+        self.cleanup_ready_queue.clear();
+        self.commit_ready_or_cleanup_ready_tasks.clear();
+
+        self.active_job_queue = Self::new_active_job_queue(self.config.active_job_pool_capacity);
+        self.active_job_queue_cursor = 0;
+    }
+
+    fn remove_active_job_and_dequeue_next_pending_job(
+        &mut self,
+        job_id: JobId,
+    ) -> Result<(), SchedulerError> {
+        if let Some(index) = self.active_job_queue.iter().position(|entry| match entry {
+            ActiveJobQueueEntry::Ready(id) => *id == job_id,
+            _ => false,
+        }) {
+            self.active_job_queue.swap_remove(index);
+        } else {
+            return Err(SchedulerError::Internal(
+                "attempt to remove a non-existing active job: {job_id:?}".to_string(),
+            ));
+        }
+
+        if let Some(entry_to_remove) = self.active_jobs.remove(&job_id) {
+            self.destroy_job_entry(entry_to_remove);
+        } else {
+            return Err(SchedulerError::Internal(
+                "attempt to destroy a non-existing active job: {job_id:?}".to_string(),
+            ));
+        }
+
+        if let Some(next_pending_job) = self.next_pending_job() {
+            self.active_job_queue
+                .push(ActiveJobQueueEntry::Ready(next_pending_job.job_id));
+            self.active_jobs
+                .insert(next_pending_job.job_id, next_pending_job);
+        }
+        Ok(())
+    }
+
+    fn next_pending_job(&mut self) -> Option<JobEntry> {
+        loop {
+            let job_id = self.pending_job_queue.pop_front()?;
+            // NOTE: The job may have been cancelled and removed from `pending_jobs`, so the ID in
+            // the queue may not necessarily exist in `pending_jobs`.
+            if let Some(pending_job) = self.pending_jobs.remove(&job_id) {
+                return Some(pending_job);
+            }
+        }
+    }
+
+    fn destroy_job_entry(&mut self, job_entry: JobEntry) {
+        for task_id in job_entry.task_ids {
+            self.ready_set.remove(&(job_entry.job_id, task_id));
+        }
+    }
+
+    async fn loop_once(&mut self) -> Result<(), SchedulerError> {
+        let loop_start = Instant::now();
+        let recording = self.config.metrics.is_recording();
+
+        // Stage 1: Retrieve inbound queue results
+        let curr_session_id = self.storage_session_id;
+        let inbound_queue_result = self
+            .inbound_queue_reader
+            .poll_ready(curr_session_id)
+            .await?;
+        match inbound_queue_result {
+            InboundQueueResult::Result {
+                session_id,
+                ready_entries,
+                commit_ready_entries,
+                cleanup_ready_entries,
+            } => {
+                let buffer_start = Instant::now();
+                let inbound_entry_count =
+                    ready_entries.len() + commit_ready_entries.len() + cleanup_ready_entries.len();
+                if session_id < curr_session_id {
+                    return Err(SchedulerError::InvalidSessionId(session_id));
+                }
+                if session_id > curr_session_id {
+                    self.storage_session_id = session_id;
+                    self.clear_all_placement();
+                    self.sink.bump_session_id(session_id).await?;
+                }
+
+                // Load commit ready tasks and cleanup ready tasks first to avoid loading a job that
+                // is already cancelled or commit-ready.
+                for inbound_entry in commit_ready_entries {
+                    if !self
+                        .ready_set
+                        .insert((inbound_entry.job_id, inbound_entry.task_id))
+                    {
+                        continue;
+                    }
+                    self.commit_ready_or_cleanup_ready_tasks
+                        .insert(inbound_entry.job_id);
+                    self.commit_ready_queue
+                        .push_back((inbound_entry.job_id, inbound_entry.resource_group_id));
+
+                    if self.active_jobs.contains_key(&inbound_entry.job_id) {
+                        self.remove_active_job_and_dequeue_next_pending_job(inbound_entry.job_id)?;
+                        continue;
+                    }
+
+                    if let Some(job_entry) = self.pending_jobs.remove(&inbound_entry.job_id) {
+                        self.destroy_job_entry(job_entry);
+                    }
+                }
+
+                for inbound_entry in cleanup_ready_entries {
+                    if !self
+                        .ready_set
+                        .insert((inbound_entry.job_id, inbound_entry.task_id))
+                    {
+                        continue;
+                    }
+                    self.commit_ready_or_cleanup_ready_tasks
+                        .insert(inbound_entry.job_id);
+                    self.cleanup_ready_queue
+                        .push_back((inbound_entry.job_id, inbound_entry.resource_group_id));
+
+                    if self.active_jobs.contains_key(&inbound_entry.job_id) {
+                        self.remove_active_job_and_dequeue_next_pending_job(inbound_entry.job_id)?;
+                        continue;
+                    }
+
+                    if let Some(job_entry) = self.pending_jobs.remove(&inbound_entry.job_id) {
+                        self.destroy_job_entry(job_entry);
+                    }
+                }
+
+                for inbound_entry in ready_entries {
+                    if self
+                        .commit_ready_or_cleanup_ready_tasks
+                        .contains(&inbound_entry.job_id)
+                    {
+                        continue;
+                    }
+                    if !self
+                        .ready_set
+                        .insert((inbound_entry.job_id, inbound_entry.task_id))
+                    {
+                        continue;
+                    }
+                    if let Some(active_job) = self.active_jobs.get_mut(&inbound_entry.job_id) {
+                        active_job.enqueue(inbound_entry.task_id);
+                        continue;
+                    }
+                    if let Some(pending_job) = self.pending_jobs.get_mut(&inbound_entry.job_id) {
+                        pending_job.enqueue(inbound_entry.task_id);
+                        continue;
+                    }
+                    if self.active_jobs.len() < self.config.active_job_pool_capacity {
+                        self.active_jobs.insert(
+                            inbound_entry.job_id,
+                            JobEntry::new(
+                                inbound_entry.job_id,
+                                inbound_entry.resource_group_id,
+                                inbound_entry.task_id,
+                            ),
+                        );
+                        self.active_job_queue
+                            .push(ActiveJobQueueEntry::Ready(inbound_entry.job_id));
+                        continue;
+                    }
+                    self.pending_jobs.insert(
+                        inbound_entry.job_id,
+                        JobEntry::new(
+                            inbound_entry.job_id,
+                            inbound_entry.resource_group_id,
+                            inbound_entry.task_id,
+                        ),
+                    );
+                    self.pending_job_queue.push_back(inbound_entry.job_id);
+                }
+
+                // Only record iterations that actually had entries to enrich, so the average
+                // reflects real work rather than empty polls when the scheduler is idle.
+                if recording && inbound_entry_count > 0 {
+                    self.config
+                        .metrics
+                        .buffer_enrich_ns
+                        .fetch_add(elapsed_nanos(buffer_start), Ordering::Relaxed);
+                    self.config
+                        .metrics
+                        .buffer_enrich_count
+                        .fetch_add(1, Ordering::Relaxed);
+                }
+
+                self.spawn_inbound_queue_reader();
+            }
+            InboundQueueResult::ResultNotReady => {}
+            InboundQueueResult::HandleNotSpawned => {
+                self.spawn_inbound_queue_reader();
+            }
+        }
+
+        // Stage 2: Make scheduling decisions to fill the dispatch queue
+        let dispatch_start = Instant::now();
+        let mut dispatch_queue_slots = self
+            .config
+            .dispatch_queue_capacity
+            .saturating_sub(self.sink.size());
+        let initial_dispatch_queue_slots = dispatch_queue_slots;
+        loop {
+            if dispatch_queue_slots == 0 || self.ready_set.is_empty() {
+                break;
+            }
+            if self.active_job_queue_cursor >= self.active_job_queue.len() {
+                self.active_job_queue_cursor = 0;
+            }
+            let active_job_queue_entry =
+                match self.active_job_queue.get(self.active_job_queue_cursor) {
+                    Some(entry) => entry.clone(),
+                    None => {
+                        return Err(SchedulerError::Internal(
+                            "active job queue cursor is corrupted".to_string(),
+                        ));
+                    }
+                };
+            self.active_job_queue_cursor += 1;
+            match active_job_queue_entry {
+                ActiveJobQueueEntry::CleanupReady => {
+                    let Some((job_id, resource_group_id)) = self.cleanup_ready_queue.pop_front()
+                    else {
+                        continue;
+                    };
+                    self.sink
+                        .enqueue(TaskAssignment {
+                            job_id,
+                            resource_group_id,
+                            task_id: TaskId::Cleanup,
+                        })
+                        .await?;
+                    self.ready_set.remove(&(job_id, TaskId::Cleanup));
+                    self.commit_ready_or_cleanup_ready_tasks.remove(&job_id);
+                    dispatch_queue_slots -= 1;
+                }
+                ActiveJobQueueEntry::CommitReady => {
+                    let Some((job_id, resource_group_id)) = self.commit_ready_queue.pop_front()
+                    else {
+                        continue;
+                    };
+                    self.sink
+                        .enqueue(TaskAssignment {
+                            job_id,
+                            resource_group_id,
+                            task_id: TaskId::Commit,
+                        })
+                        .await?;
+                    self.ready_set.remove(&(job_id, TaskId::Commit));
+                    self.commit_ready_or_cleanup_ready_tasks.remove(&job_id);
+                    dispatch_queue_slots -= 1;
+                }
+                ActiveJobQueueEntry::Ready(job_id) => {
+                    let Some(job_entry) = self.active_jobs.get_mut(&job_id) else {
+                        return Err(SchedulerError::Internal(
+                            "attempt to remove a non-existing active job: {job_id:?}".to_string(),
+                        ));
+                    };
+                    if let Some(task_id) = job_entry.dequeue() {
+                        self.sink
+                            .enqueue(TaskAssignment {
+                                job_id,
+                                resource_group_id: job_entry.resource_group_id,
+                                task_id,
+                            })
+                            .await?;
+                        self.ready_set.remove(&(job_id, task_id));
+                        dispatch_queue_slots -= 1;
+                    } else {
+                        self.remove_active_job_and_dequeue_next_pending_job(job_id)?;
+                    }
+                }
+            }
+        }
+
+        let dispatched = initial_dispatch_queue_slots - dispatch_queue_slots;
+        if recording && dispatched > 0 {
+            self.config
+                .metrics
+                .dispatch_enrich_ns
+                .fetch_add(elapsed_nanos(dispatch_start), Ordering::Relaxed);
+            self.config
+                .metrics
+                .dispatch_enrich_count
+                .fetch_add(1, Ordering::Relaxed);
+        }
+
+        if recording {
+            self.config
+                .metrics
+                .total_loop_ns
+                .fetch_add(elapsed_nanos(loop_start), Ordering::Relaxed);
+            self.config
+                .metrics
+                .loop_count
+                .fetch_add(1, Ordering::Relaxed);
+        }
+
+        // When the iteration dispatched nothing, the loop is either waiting on an in-flight poll or
+        // back-pressured by a full dispatch queue. In both cases it would otherwise spin without an
+        // await point; because the inbound polls run on tasks this same runtime must schedule, a
+        // non-yielding spin livelocks them and the scheduler never makes progress. Yield to let the
+        // poll tasks and dispatch-queue readers run.
+        if dispatched == 0 {
+            tokio::task::yield_now().await;
+        }
+
+        Ok(())
+    }
+
+    fn spawn_inbound_queue_reader(&mut self) {
+        let num_commit_ready_tasks = self.commit_ready_queue.len();
+        let num_cleanup_ready_tasks = self.cleanup_ready_queue.len();
+        let max_commit_ready_to_poll = self
+            .config
+            .commit_ready_task_capacity
+            .saturating_sub(num_commit_ready_tasks);
+        let max_cleanup_ready_to_poll = self
+            .config
+            .cleanup_ready_task_capacity
+            .saturating_sub(num_cleanup_ready_tasks);
+        let max_ready_to_poll = self.config.ready_task_capacity.saturating_sub(
+            self.ready_set.len() - num_commit_ready_tasks - num_cleanup_ready_tasks,
+        );
+        self.inbound_queue_reader.spawn(
+            Duration::from_millis(self.config.storage_polling_wait_time_ms),
+            max_ready_to_poll,
+            max_commit_ready_to_poll,
+            max_cleanup_ready_to_poll,
+        );
+    }
+}
+
+enum InboundQueueResult {
+    Result {
+        session_id: SessionId,
+        ready_entries: Vec<InboundEntry>,
+        commit_ready_entries: Vec<InboundEntry>,
+        cleanup_ready_entries: Vec<InboundEntry>,
+    },
+    ResultNotReady,
+    HandleNotSpawned,
+}
+
+struct InboundQueuePollingHandle {
+    ready_handle:
+        tokio::task::JoinHandle<Result<(SessionId, Vec<InboundEntry>), StorageClientError>>,
+    commit_ready_handle:
+        tokio::task::JoinHandle<Result<(SessionId, Vec<InboundEntry>), StorageClientError>>,
+    cleanup_ready_handle:
+        tokio::task::JoinHandle<Result<(SessionId, Vec<InboundEntry>), StorageClientError>>,
+}
+
+impl InboundQueuePollingHandle {
+    async fn poll_ready(
+        &mut self,
+        curr_session_id: SessionId,
+    ) -> Result<InboundQueueResult, SchedulerError> {
+        if !self.ready_handle.is_finished()
+            || !self.commit_ready_handle.is_finished()
+            || !self.cleanup_ready_handle.is_finished()
+        {
+            return Ok(InboundQueueResult::ResultNotReady);
+        }
+
+        let (ready_session_id, ready_entries) = (&mut self.ready_handle)
+            .await
+            .map_err(|e| SchedulerError::Internal(e.to_string()))??;
+        let (commit_session_id, commit_ready_entries) = (&mut self.commit_ready_handle)
+            .await
+            .map_err(|e| SchedulerError::Internal(e.to_string()))??;
+        let (cleanup_session_id, cleanup_ready_entries) =
+            (&mut self.cleanup_ready_handle)
+                .await
+                .map_err(|e| SchedulerError::Internal(e.to_string()))??;
+
+        let latest_session_id = curr_session_id
+            .max(ready_session_id)
+            .max(commit_session_id)
+            .max(cleanup_session_id);
+
+        Ok(InboundQueueResult::Result {
+            session_id: latest_session_id,
+            ready_entries: Self::drop_if_stale(ready_session_id, latest_session_id, ready_entries),
+            commit_ready_entries: Self::drop_if_stale(
+                commit_session_id,
+                latest_session_id,
+                commit_ready_entries,
+            ),
+            cleanup_ready_entries: Self::drop_if_stale(
+                cleanup_session_id,
+                latest_session_id,
+                cleanup_ready_entries,
+            ),
+        })
+    }
+
+    fn drop_if_stale(
+        session_id: SessionId,
+        latest_session_id: SessionId,
+        entries: Vec<InboundEntry>,
+    ) -> Vec<InboundEntry> {
+        if session_id == latest_session_id {
+            entries
+        } else {
+            Vec::new()
+        }
+    }
+}
+
+struct AsyncInboundQueueReader<StorageClientType: SchedulerStorageClient + 'static> {
+    storage_client: StorageClientType,
+    handle: Option<InboundQueuePollingHandle>,
+}
+
+impl<StorageClientType: SchedulerStorageClient + 'static>
+    AsyncInboundQueueReader<StorageClientType>
+{
+    const fn new(storage_client: StorageClientType) -> Self {
+        Self {
+            storage_client,
+            handle: None,
+        }
+    }
+
+    async fn poll_ready(
+        &mut self,
+        curr_session_id: SessionId,
+    ) -> Result<InboundQueueResult, SchedulerError> {
+        match &mut self.handle {
+            None => Ok(InboundQueueResult::HandleNotSpawned),
+            Some(handle) => {
+                let inbound_queue_result = handle.poll_ready(curr_session_id).await?;
+                if !matches!(inbound_queue_result, InboundQueueResult::ResultNotReady) {
+                    self.handle = None;
+                }
+                Ok(inbound_queue_result)
+            }
+        }
+    }
+
+    fn spawn(
+        &mut self,
+        storage_polling_wait_time: Duration,
+        max_ready_entries: usize,
+        max_commit_ready_entries: usize,
+        max_cleanup_ready_entries: usize,
+    ) {
+        let ready_storage_client = self.storage_client.clone();
+        let ready_handle = tokio::task::spawn(async move {
+            ready_storage_client
+                .poll_ready(max_ready_entries, storage_polling_wait_time)
+                .await
+        });
+
+        let commit_ready_storage_client = self.storage_client.clone();
+        let commit_ready_handle = tokio::task::spawn(async move {
+            commit_ready_storage_client
+                .poll_commit_ready(max_commit_ready_entries, storage_polling_wait_time)
+                .await
+        });
+
+        let cleanup_ready_storage_client = self.storage_client.clone();
+        let cleanup_ready_handle = tokio::task::spawn(async move {
+            cleanup_ready_storage_client
+                .poll_cleanup_ready(max_cleanup_ready_entries, storage_polling_wait_time)
+                .await
+        });
+
+        self.handle = Some(InboundQueuePollingHandle {
+            ready_handle,
+            commit_ready_handle,
+            cleanup_ready_handle,
+        });
+    }
+}
diff --git a/components/spider-scheduler/src/error.rs b/components/spider-scheduler/src/error.rs
index 6a852c46..bff7571d 100644
--- a/components/spider-scheduler/src/error.rs
+++ b/components/spider-scheduler/src/error.rs
@@ -28,4 +28,10 @@ pub enum SchedulerError {
     /// The session ID is invalid.
     #[error("invalid session ID: {0:?}")]
     InvalidSessionId(SessionId),
+
+    #[error("internal error: {0}")]
+    Internal(String),
+
+    #[error("async result not ready")]
+    ResultNotReady,
 }
diff --git a/components/spider-scheduler/src/lib.rs b/components/spider-scheduler/src/lib.rs
index bddd0750..a97580d6 100644
--- a/components/spider-scheduler/src/lib.rs
+++ b/components/spider-scheduler/src/lib.rs
@@ -32,6 +32,7 @@
 //! ```
 
 pub mod core;
+pub mod core_impl;
 pub mod dispatch_queue;
 pub mod error;
 pub mod storage_client;

From 25045e2cfa062ce6dc4a3a65b0d4c490387818c5 Mon Sep 17 00:00:00 2001
From: LinZhihao-723 <pleiades3190@gmail.com>
Date: Wed, 3 Jun 2026 13:16:24 -0400
Subject: [PATCH 07/14] Polish.

---
 .../examples/round_robin_load.rs              | 392 ----------------
 .../src/core_impl/round_robin.rs              | 442 ++++++++----------
 2 files changed, 182 insertions(+), 652 deletions(-)
 delete mode 100644 components/spider-scheduler/examples/round_robin_load.rs

diff --git a/components/spider-scheduler/examples/round_robin_load.rs b/components/spider-scheduler/examples/round_robin_load.rs
deleted file mode 100644
index 9d2d9385..00000000
--- a/components/spider-scheduler/examples/round_robin_load.rs
+++ /dev/null
@@ -1,392 +0,0 @@
-//! Load-test and instrumentation harness for the round-robin scheduler core.
-//!
-//! Topology:
-//!
-//! ```text
-//!   submitter ──▶ MockStorage (ready lane) ──poll──▶ RoundRobinCore ──enqueue──▶ dispatch queue ──▶ 64 workers
-//! ```
-//!
-//! * A mock storage holds 128 jobs of 1000 tasks each, released gradually (one job at a time) to
-//!   simulate a job-submission cycle rather than making everything ready at `t=0`.
-//! * 1% of the tasks are submitted twice (back-to-back) so the scheduler's deduplication can be
-//!   exercised; workers must still observe every task exactly once.
-//! * 64 workers drain the dispatch queue, sleeping 5ms per task to model execution latency.
-//!
-//! Run with (release recommended so the timings are meaningful):
-//!
-//! ```bash
-//! cargo run -p spider-scheduler --example round_robin_load --release
-//! ```
-
-use std::{
-    sync::{
-        Arc,
-        atomic::{AtomicBool, AtomicUsize, Ordering},
-    },
-    time::{Duration, Instant},
-};
-
-use async_trait::async_trait;
-use dashmap::DashSet;
-use spider_core::{
-    job::JobState,
-    types::id::{JobId, ResourceGroupId, SessionId, TaskId},
-};
-use spider_scheduler::{
-    DispatchQueueSource,
-    SchedulerCore,
-    SchedulerStorageClient,
-    StorageClientError,
-    core_impl::RoundRobinConfig,
-    dispatch_queue::{DispatchQueueReader, DispatchQueueWriter, create_dispatch_queue},
-    types::InboundEntry,
-};
-use tokio_util::sync::CancellationToken;
-
-// ---------------------------------------------------------------------------------------------
-// Workload parameters
-// ---------------------------------------------------------------------------------------------
-
-const NUM_JOBS: usize = 128;
-const TASKS_PER_JOB: usize = 1000;
-const TOTAL_UNIQUE_TASKS: usize = NUM_JOBS * TASKS_PER_JOB;
-
-/// Every `DUP_EVERY`-th task within a job is submitted twice, yielding exactly 1% duplicates.
-const DUP_EVERY: usize = 100;
-const EXPECTED_DUPLICATES_SUBMITTED: usize = NUM_JOBS * TASKS_PER_JOB / DUP_EVERY;
-
-const NUM_WORKERS: usize = 64;
-const WORKER_SLEEP: Duration = Duration::from_millis(5);
-const WORKER_POLL_WAIT: Duration = Duration::from_millis(10);
-
-/// Delay between releasing successive jobs into storage (the "submission cycle").
-const JOB_SUBMIT_INTERVAL: Duration = Duration::from_millis(10);
-
-/// A fixed session: this harness never bumps the session, so storage and the dispatch queue both
-/// start (and stay) here.
-const SESSION_ID: SessionId = 0;
-
-// Round-robin scheduler configuration (as requested).
-const ACTIVE_JOB_POOL_CAPACITY: usize = 8;
-const DISPATCH_QUEUE_CAPACITY: usize = NUM_WORKERS * 4; // 256
-const STORAGE_POLLING_WAIT_TIME_MS: u64 = 10; // dispatch/poll interval
-const READY_TASK_CAPACITY: usize = TASKS_PER_JOB * NUM_WORKERS; // 64_000
-const COMMIT_READY_TASK_CAPACITY: usize = 10;
-const CLEANUP_READY_TASK_CAPACITY: usize = 10;
-
-/// Safety net so a scheduling bug that drops a task cannot hang the harness forever.
-const OVERALL_TIMEOUT: Duration = Duration::from_mins(2);
-
-// ---------------------------------------------------------------------------------------------
-// Mock storage
-// ---------------------------------------------------------------------------------------------
-
-/// A mock [`SchedulerStorageClient`] whose regular lane is backed by an unbounded channel that the
-/// submitter feeds. Commit and cleanup lanes are always empty.
-#[derive(Clone)]
-struct MockStorage {
-    inner: Arc<MockStorageInner>,
-}
-
-struct MockStorageInner {
-    ready_tx: async_channel::Sender<InboundEntry>,
-    ready_rx: async_channel::Receiver<InboundEntry>,
-}
-
-impl MockStorage {
-    fn new() -> Self {
-        let (ready_tx, ready_rx) = async_channel::unbounded();
-        Self {
-            inner: Arc::new(MockStorageInner { ready_tx, ready_rx }),
-        }
-    }
-
-    /// # Returns
-    ///
-    /// A cloned sender for the regular ready lane, used by the submitter task.
-    fn sender(&self) -> async_channel::Sender<InboundEntry> {
-        self.inner.ready_tx.clone()
-    }
-}
-
-#[async_trait]
-impl SchedulerStorageClient for MockStorage {
-    async fn poll_ready(
-        &self,
-        max_items: usize,
-        wait: Duration,
-    ) -> Result<(SessionId, Vec<InboundEntry>), StorageClientError> {
-        if max_items == 0 {
-            // The scheduler has no buffer headroom; emulate a real blocking poll that yields
-            // nothing rather than pulling past the requested cap.
-            tokio::time::sleep(wait).await;
-            return Ok((SESSION_ID, Vec::new()));
-        }
-
-        let mut out = Vec::new();
-        // Block up to `wait` for the first entry, mirroring a real long-poll.
-        match tokio::time::timeout(wait, self.inner.ready_rx.recv()).await {
-            Ok(Ok(entry)) => out.push(entry),
-            // Channel closed (never happens here, sender is held by storage) or timed out: return
-            // whatever we have (possibly nothing).
-            Ok(Err(_)) | Err(_) => return Ok((SESSION_ID, out)),
-        }
-        // Drain the rest without blocking, up to `max_items`.
-        while out.len() < max_items {
-            match self.inner.ready_rx.try_recv() {
-                Ok(entry) => out.push(entry),
-                Err(_) => break,
-            }
-        }
-        Ok((SESSION_ID, out))
-    }
-
-    async fn poll_commit_ready(
-        &self,
-        _max_items: usize,
-        wait: Duration,
-    ) -> Result<(SessionId, Vec<InboundEntry>), StorageClientError> {
-        tokio::time::sleep(wait).await;
-        Ok((SESSION_ID, Vec::new()))
-    }
-
-    async fn poll_cleanup_ready(
-        &self,
-        _max_items: usize,
-        wait: Duration,
-    ) -> Result<(SessionId, Vec<InboundEntry>), StorageClientError> {
-        tokio::time::sleep(wait).await;
-        Ok((SESSION_ID, Vec::new()))
-    }
-
-    async fn job_state(&self, _job_id: JobId) -> Result<JobState, StorageClientError> {
-        Ok(JobState::Running)
-    }
-}
-
-// ---------------------------------------------------------------------------------------------
-// Submitter & workers
-// ---------------------------------------------------------------------------------------------
-
-/// Releases each job's tasks into storage one job at a time, duplicating every `DUP_EVERY`-th task
-/// back-to-back so the duplicate lands in the same poll batch as its original.
-async fn submit_jobs(jobs: Vec<(JobId, ResourceGroupId)>, tx: async_channel::Sender<InboundEntry>) {
-    for (job_id, resource_group_id) in jobs {
-        for i in 0..TASKS_PER_JOB {
-            let entry = InboundEntry {
-                resource_group_id,
-                job_id,
-                task_id: TaskId::Index(i),
-            };
-            tx.send(entry).await.expect("ready lane closed");
-            if i % DUP_EVERY == 0 {
-                tx.send(entry).await.expect("ready lane closed");
-            }
-        }
-        tokio::time::sleep(JOB_SUBMIT_INTERVAL).await;
-    }
-}
-
-/// Shared bookkeeping for the "each task is polled exactly once" check.
-struct WorkerStats {
-    seen: DashSet<(JobId, TaskId)>,
-    total_received: AtomicUsize,
-    duplicate_received: AtomicUsize,
-}
-
-/// A single worker: drain the dispatch queue, record each assignment, then sleep to model work.
-async fn worker(reader: DispatchQueueReader, stats: Arc<WorkerStats>, done: Arc<AtomicBool>) {
-    loop {
-        if done.load(Ordering::Relaxed) {
-            break;
-        }
-        match reader.dequeue(WORKER_POLL_WAIT).await {
-            Ok(Some((_session, assignment))) => {
-                stats.total_received.fetch_add(1, Ordering::Relaxed);
-                if !stats.seen.insert((assignment.job_id, assignment.task_id)) {
-                    stats.duplicate_received.fetch_add(1, Ordering::Relaxed);
-                }
-                tokio::time::sleep(WORKER_SLEEP).await;
-            }
-            Ok(None) => {}
-            // Dispatch queue closed (scheduler dropped its writer): nothing more will arrive.
-            Err(_) => break,
-        }
-    }
-}
-
-// ---------------------------------------------------------------------------------------------
-// Harness
-// ---------------------------------------------------------------------------------------------
-
-#[tokio::main(flavor = "multi_thread")]
-async fn main() {
-    let storage = MockStorage::new();
-    let (writer, reader) = create_dispatch_queue(DISPATCH_QUEUE_CAPACITY, SESSION_ID);
-
-    let config = RoundRobinConfig::<MockStorage, DispatchQueueWriter>::new(
-        ACTIVE_JOB_POOL_CAPACITY,
-        DISPATCH_QUEUE_CAPACITY,
-        READY_TASK_CAPACITY,
-        COMMIT_READY_TASK_CAPACITY,
-        CLEANUP_READY_TASK_CAPACITY,
-        STORAGE_POLLING_WAIT_TIME_MS,
-    );
-    let metrics = config.metrics();
-
-    let jobs: Vec<(JobId, ResourceGroupId)> = (0..NUM_JOBS)
-        .map(|_| (JobId::new(), ResourceGroupId::new()))
-        .collect();
-
-    // Scheduler.
-    let scheduler_token = CancellationToken::new();
-    let scheduler_handle = {
-        let token = scheduler_token.clone();
-        let storage = storage.clone();
-        tokio::spawn(async move { config.run(storage, writer, token).await })
-    };
-
-    // Workers.
-    let stats = Arc::new(WorkerStats {
-        seen: DashSet::with_capacity(TOTAL_UNIQUE_TASKS),
-        total_received: AtomicUsize::new(0),
-        duplicate_received: AtomicUsize::new(0),
-    });
-    let done = Arc::new(AtomicBool::new(false));
-    let worker_handles: Vec<_> = (0..NUM_WORKERS)
-        .map(|_| tokio::spawn(worker(reader.clone(), stats.clone(), done.clone())))
-        .collect();
-    drop(reader);
-
-    // Submitter.
-    let submit_handle = tokio::spawn(submit_jobs(jobs, storage.sender()));
-
-    // Drive to completion: every unique task delivered, or the safety timeout. Poll tightly so the
-    // metrics are frozen as soon as the last task arrives, keeping the idle tail out of the averages.
-    let start = Instant::now();
-    let mut timed_out = false;
-    loop {
-        if stats.seen.len() >= TOTAL_UNIQUE_TASKS {
-            break;
-        }
-        if start.elapsed() > OVERALL_TIMEOUT {
-            timed_out = true;
-            break;
-        }
-        tokio::time::sleep(Duration::from_millis(1)).await;
-    }
-    // Stop timing now that every task has arrived; the scheduler only spins on empty polls past
-    // this point and must not pollute the stage 1 & 2 averages.
-    metrics.stop();
-    let wall = start.elapsed();
-
-    // Tear down.
-    done.store(true, Ordering::Relaxed);
-    scheduler_token.cancel();
-    submit_handle.abort();
-    for handle in worker_handles {
-        let _ = handle.await;
-    }
-    match scheduler_handle.await {
-        Ok(Ok(())) => {}
-        Ok(Err(e)) => eprintln!("scheduler returned an error: {e:?}"),
-        Err(e) => eprintln!("scheduler task panicked: {e:?}"),
-    }
-    let _ = submit_handle.await;
-
-    report(&metrics, &stats, wall, timed_out);
-}
-
-/// Prints the collected timing and correctness results.
-fn report(
-    metrics: &spider_scheduler::core_impl::RoundRobinMetrics,
-    stats: &WorkerStats,
-    wall: Duration,
-    timed_out: bool,
-) {
-    let loop_count = metrics.loop_count.load(Ordering::Relaxed);
-    let total_loop_ns = metrics.total_loop_ns.load(Ordering::Relaxed);
-    let buffer_count = metrics.buffer_enrich_count.load(Ordering::Relaxed);
-    let buffer_ns = metrics.buffer_enrich_ns.load(Ordering::Relaxed);
-    let dispatch_count = metrics.dispatch_enrich_count.load(Ordering::Relaxed);
-    let dispatch_ns = metrics.dispatch_enrich_ns.load(Ordering::Relaxed);
-
-    let total = stats.total_received.load(Ordering::Relaxed);
-    let duplicates = stats.duplicate_received.load(Ordering::Relaxed);
-    let unique = stats.seen.len();
-
-    println!("\n================ Round-robin scheduler load test ================");
-    println!(
-        "Wall-clock runtime:                 {:.3} s",
-        wall.as_secs_f64()
-    );
-    if timed_out {
-        println!("!! TIMED OUT before all unique tasks were delivered !!");
-    }
-
-    println!("\n---- Workload ----");
-    println!("Jobs:                               {NUM_JOBS}");
-    println!("Tasks per job:                      {TASKS_PER_JOB}");
-    println!("Unique tasks (expected):            {TOTAL_UNIQUE_TASKS}");
-    println!("Duplicate task entries submitted:   {EXPECTED_DUPLICATES_SUBMITTED}");
-    println!("Workers:                            {NUM_WORKERS}");
-
-    println!("\n---- Scheduling-loop timing ----");
-    println!("Scheduling-loop iterations:         {loop_count}");
-    println!(
-        "Avg time per scheduling loop:       {:>9.3} us",
-        avg_us(total_loop_ns, loop_count)
-    );
-    println!(
-        "Avg buffer-enrich time (stage 1):   {:>9.3} us   (over {buffer_count} iterations that \
-         polled a non-empty result)",
-        avg_us(buffer_ns, buffer_count)
-    );
-    println!(
-        "Avg dispatch-enrich time (stage 2): {:>9.3} us   (over {dispatch_count} iterations that \
-         dispatched >=1 task)",
-        avg_us(dispatch_ns, dispatch_count)
-    );
-    let idle_loops = loop_count.saturating_sub(dispatch_count);
-    println!(
-        "No-dispatch loop iterations:        {idle_loops}   ({:.1}% of all iterations)",
-        percent(idle_loops, loop_count)
-    );
-
-    println!("\n---- Correctness: each task polled exactly once ----");
-    println!("Total assignments received:         {total}");
-    println!("Unique (job, task) pairs received:  {unique}");
-    println!("Duplicate deliveries observed:      {duplicates}");
-
-    let exactly_once = !timed_out
-        && duplicates == 0
-        && unique == TOTAL_UNIQUE_TASKS
-        && total == TOTAL_UNIQUE_TASKS;
-    println!(
-        "\nRESULT: each task polled exactly once -> {}",
-        if exactly_once { "PASS" } else { "FAIL" }
-    );
-    println!("=================================================================\n");
-}
-
-/// # Returns
-///
-/// `ns / count` converted to microseconds, or `0.0` when `count` is zero.
-fn avg_us(ns: u64, count: u64) -> f64 {
-    if count == 0 {
-        0.0
-    } else {
-        ns as f64 / count as f64 / 1_000.0
-    }
-}
-
-/// # Returns
-///
-/// `part` as a percentage of `whole`, or `0.0` when `whole` is zero.
-fn percent(part: u64, whole: u64) -> f64 {
-    if whole == 0 {
-        0.0
-    } else {
-        part as f64 / whole as f64 * 100.0
-    }
-}
diff --git a/components/spider-scheduler/src/core_impl/round_robin.rs b/components/spider-scheduler/src/core_impl/round_robin.rs
index f32c36ca..8708a538 100644
--- a/components/spider-scheduler/src/core_impl/round_robin.rs
+++ b/components/spider-scheduler/src/core_impl/round_robin.rs
@@ -1,17 +1,47 @@
+//! Round-robin scheduler.
+//!
+//! This scheduler provides basic fairness across jobs using a round-robin scheduling policy. It
+//! polls tasks from the inbound queue (maintained by the storage service) and organizes jobs into
+//! two sets:
+//!
+//! * Active jobs: jobs that participate in round-robin scheduling.
+//! * Pending jobs: jobs that are buffered but not yet scheduled. When an active job has no
+//!   remaining schedulable tasks, it is replaced by the next pending job in FIFO order.
+//!
+//! The scheduler operates in discrete ticks. During each tick, it attempts to consume the results
+//! of an asynchronous inbound-queue polling operation and loads any newly available tasks into its
+//! internal buffers. It then makes scheduling decisions until the dispatch queue reaches capacity.
+//!
+//! # Properties
+//!
+//! * Each round-robin cycle may schedule at most one additional commit task and one additional
+//!   cleanup task, if available.
+//! * All buffered tasks are unique. Tasks loaded from the inbound queue are deduplicated before
+//!   entering the scheduler's internal buffers.
+//!
+//! # Configuration
+//!
+//! * `active_job_pool_capacity`: Maximum number of active jobs maintained by the scheduler.
+//! * `dispatch_queue_capacity`: Maximum number of task assignments in the dispatch queue.
+//! * `ready_task_capacity`: Maximum number of ready tasks buffered by the scheduler.
+//! * `commit_ready_task_capacity`: Maximum number of buffered commit-ready tasks.
+//! * `cleanup_ready_task_capacity`: Maximum number of buffered cleanup-ready tasks.
+//! * `storage_polling_wait_time_ms`: Maximum time, in milliseconds, that inbound-queue polling may
+//!   block on the storage-service side.
+//! * `tick_interval_ms`: Interval, in milliseconds, between scheduler ticks (tick execution time
+//!   included).
+
 use std::{
     collections::{HashMap, HashSet, VecDeque},
-    sync::{
-        Arc,
-        atomic::{AtomicBool, AtomicU64, Ordering},
-    },
-    time::{Duration, Instant},
+    time::Duration,
 };
 
 use async_trait::async_trait;
+use serde::Deserialize;
 use spider_core::types::id::{JobId, ResourceGroupId, SessionId, TaskId};
 use tokio::select;
 use tokio_util::sync::CancellationToken;
-use serde::Deserialize;
+
 use crate::{
     DispatchQueueSink,
     InboundEntry,
@@ -43,113 +73,25 @@ pub struct RoundRobinConfig<
     /// The capacity of the total pending cleanup-ready tasks buffered in the scheduler.
     pub cleanup_ready_task_capacity: usize,
 
+    /// The maximum time (in milliseconds) that the scheduler will wait for the storage server to
+    /// fill the inbound-queue reading request.
     pub storage_polling_wait_time_ms: u64,
 
-    #[serde(skip)]
-    metrics: Arc<RoundRobinMetrics>,
+    /// The time (in milliseconds) that the scheduler will spend on each tick.
+    pub tick_interval_ms: u64,
 
     #[serde(skip)]
     _marker: std::marker::PhantomData<(SchedulerStorageClientType, DispatchQueueSinkType)>,
 }
 
-/// Instrumentation counters for the round-robin scheduling loop.
-///
-/// Durations are accumulated in nanoseconds; an average is a `*_ns` total divided by its matching
-/// `*_count`. All counters use [`Ordering::Relaxed`] and are meant for coarse profiling only, not
-/// for establishing happens-before relationships.
-#[derive(Debug, Default)]
-pub struct RoundRobinMetrics {
-    /// Number of completed scheduling-loop iterations (`loop_once` calls).
-    pub loop_count: AtomicU64,
-
-    /// Total wall-clock time spent across all scheduling-loop iterations.
-    pub total_loop_ns: AtomicU64,
-
-    /// Number of iterations that processed a fresh inbound polling result.
-    pub buffer_enrich_count: AtomicU64,
-
-    /// Total time spent draining inbound polling results into the scheduler's buffers ("enrich the
-    /// buffer", stage 1).
-    pub buffer_enrich_ns: AtomicU64,
-
-    /// Number of iterations that dispatched at least one assignment.
-    pub dispatch_enrich_count: AtomicU64,
-
-    /// Total time spent making scheduling decisions and filling the dispatch queue ("enrich the
-    /// dispatch queue", stage 2).
-    pub dispatch_enrich_ns: AtomicU64,
-
-    /// When set, the scheduling loop stops accumulating any of the counters above. Used to exclude
-    /// the idle tail (after all work has drained) from the averages.
-    stopped: AtomicBool,
-}
-
-impl RoundRobinMetrics {
-    /// Freezes all counters: subsequent scheduling-loop iterations are not recorded.
-    pub fn stop(&self) {
-        self.stopped.store(true, Ordering::Relaxed);
-    }
-
-    /// # Returns
-    ///
-    /// Whether the counters are still being recorded.
-    fn is_recording(&self) -> bool {
-        !self.stopped.load(Ordering::Relaxed)
-    }
-}
-
-impl<
-    SchedulerStorageClientType: SchedulerStorageClient + 'static,
-    DispatchQueueSinkType: DispatchQueueSink,
-> RoundRobinConfig<SchedulerStorageClientType, DispatchQueueSinkType>
-{
-    /// Creates a new round-robin configuration with a fresh, empty set of metrics.
-    #[must_use]
-    pub fn new(
-        active_job_pool_capacity: usize,
-        dispatch_queue_capacity: usize,
-        ready_task_capacity: usize,
-        commit_ready_task_capacity: usize,
-        cleanup_ready_task_capacity: usize,
-        storage_polling_wait_time_ms: u64,
-    ) -> Self {
-        Self {
-            active_job_pool_capacity,
-            dispatch_queue_capacity,
-            ready_task_capacity,
-            commit_ready_task_capacity,
-            cleanup_ready_task_capacity,
-            storage_polling_wait_time_ms,
-            metrics: Arc::new(RoundRobinMetrics::default()),
-            _marker: std::marker::PhantomData,
-        }
-    }
-
-    /// # Returns
-    ///
-    /// A shared handle to the loop instrumentation counters, so callers can read them while (or
-    /// after) the scheduler runs.
-    #[must_use]
-    pub fn metrics(&self) -> Arc<RoundRobinMetrics> {
-        Arc::clone(&self.metrics)
-    }
-}
-
-/// # Returns
-///
-/// The time elapsed since `start` in nanoseconds, saturating at [`u64::MAX`].
-fn elapsed_nanos(start: Instant) -> u64 {
-    u64::try_from(start.elapsed().as_nanos()).unwrap_or(u64::MAX)
-}
-
 #[async_trait]
 impl<
     SchedulerStorageClientType: SchedulerStorageClient + 'static,
     DispatchQueueSinkType: DispatchQueueSink,
 > SchedulerCore for RoundRobinConfig<SchedulerStorageClientType, DispatchQueueSinkType>
 {
-    type StorageClient = SchedulerStorageClientType;
     type Sink = DispatchQueueSinkType;
+    type StorageClient = SchedulerStorageClientType;
 
     async fn run(
         self,
@@ -277,16 +219,25 @@ impl<
     }
 
     async fn run(mut self) -> Result<(), SchedulerError> {
+        let tick_interval = Duration::from_millis(self.config.tick_interval_ms);
         loop {
+            let now = tokio::time::Instant::now();
             let cancellation_token = self.cancellation_token.clone();
             select! {
                 () = cancellation_token.cancelled() => {
                     return Ok(());
                 }
-                result = self.loop_once() => {
+                result = self.tick() => {
                     let () = result?;
                 }
             }
+            let elapsed = now.elapsed();
+            let sleep_time = tick_interval.saturating_sub(elapsed);
+            if !sleep_time.is_zero() {
+                tokio::time::sleep(sleep_time).await;
+            } else {
+                tokio::task::yield_now().await;
+            }
         }
     }
 
@@ -352,11 +303,124 @@ impl<
         }
     }
 
-    async fn loop_once(&mut self) -> Result<(), SchedulerError> {
-        let loop_start = Instant::now();
-        let recording = self.config.metrics.is_recording();
+    async fn tick(&mut self) -> Result<(), SchedulerError> {
+        self.poll_inbound_queue_result().await?;
+        self.make_schedule_decision().await?;
+        Ok(())
+    }
+
+    async fn load_inbound_queue_result(
+        &mut self,
+        curr_session_id: SessionId,
+        storage_session_id: SessionId,
+        ready_entries: Vec<InboundEntry>,
+        commit_ready_entries: Vec<InboundEntry>,
+        cleanup_ready_entries: Vec<InboundEntry>,
+    ) -> Result<(), SchedulerError> {
+        if storage_session_id < curr_session_id {
+            return Err(SchedulerError::InvalidSessionId(storage_session_id));
+        }
+        if storage_session_id > curr_session_id {
+            self.storage_session_id = storage_session_id;
+            self.clear_all_placement();
+            self.sink.bump_session_id(storage_session_id).await?;
+        }
+
+        // Load commit ready tasks and cleanup ready tasks first to avoid loading a job that
+        // is already cancelled or commit-ready.
+        for inbound_entry in commit_ready_entries {
+            if !self
+                .ready_set
+                .insert((inbound_entry.job_id, inbound_entry.task_id))
+            {
+                continue;
+            }
+            self.commit_ready_or_cleanup_ready_tasks
+                .insert(inbound_entry.job_id);
+            self.commit_ready_queue
+                .push_back((inbound_entry.job_id, inbound_entry.resource_group_id));
+
+            if self.active_jobs.contains_key(&inbound_entry.job_id) {
+                self.remove_active_job_and_dequeue_next_pending_job(inbound_entry.job_id)?;
+                continue;
+            }
+
+            if let Some(job_entry) = self.pending_jobs.remove(&inbound_entry.job_id) {
+                self.destroy_job_entry(job_entry);
+            }
+        }
+
+        for inbound_entry in cleanup_ready_entries {
+            if !self
+                .ready_set
+                .insert((inbound_entry.job_id, inbound_entry.task_id))
+            {
+                continue;
+            }
+            self.commit_ready_or_cleanup_ready_tasks
+                .insert(inbound_entry.job_id);
+            self.cleanup_ready_queue
+                .push_back((inbound_entry.job_id, inbound_entry.resource_group_id));
+
+            if self.active_jobs.contains_key(&inbound_entry.job_id) {
+                self.remove_active_job_and_dequeue_next_pending_job(inbound_entry.job_id)?;
+                continue;
+            }
+
+            if let Some(job_entry) = self.pending_jobs.remove(&inbound_entry.job_id) {
+                self.destroy_job_entry(job_entry);
+            }
+        }
+
+        for inbound_entry in ready_entries {
+            if self
+                .commit_ready_or_cleanup_ready_tasks
+                .contains(&inbound_entry.job_id)
+            {
+                continue;
+            }
+            if !self
+                .ready_set
+                .insert((inbound_entry.job_id, inbound_entry.task_id))
+            {
+                continue;
+            }
+            if let Some(active_job) = self.active_jobs.get_mut(&inbound_entry.job_id) {
+                active_job.enqueue(inbound_entry.task_id);
+                continue;
+            }
+            if let Some(pending_job) = self.pending_jobs.get_mut(&inbound_entry.job_id) {
+                pending_job.enqueue(inbound_entry.task_id);
+                continue;
+            }
+            if self.active_jobs.len() < self.config.active_job_pool_capacity {
+                self.active_jobs.insert(
+                    inbound_entry.job_id,
+                    JobEntry::new(
+                        inbound_entry.job_id,
+                        inbound_entry.resource_group_id,
+                        inbound_entry.task_id,
+                    ),
+                );
+                self.active_job_queue
+                    .push(ActiveJobQueueEntry::Ready(inbound_entry.job_id));
+                continue;
+            }
+            self.pending_jobs.insert(
+                inbound_entry.job_id,
+                JobEntry::new(
+                    inbound_entry.job_id,
+                    inbound_entry.resource_group_id,
+                    inbound_entry.task_id,
+                ),
+            );
+            self.pending_job_queue.push_back(inbound_entry.job_id);
+        }
+
+        Ok(())
+    }
 
-        // Stage 1: Retrieve inbound queue results
+    async fn poll_inbound_queue_result(&mut self) -> Result<(), SchedulerError> {
         let curr_session_id = self.storage_session_id;
         let inbound_queue_result = self
             .inbound_queue_reader
@@ -364,127 +428,19 @@ impl<
             .await?;
         match inbound_queue_result {
             InboundQueueResult::Result {
-                session_id,
+                session_id: storage_session_id,
                 ready_entries,
                 commit_ready_entries,
                 cleanup_ready_entries,
             } => {
-                let buffer_start = Instant::now();
-                let inbound_entry_count =
-                    ready_entries.len() + commit_ready_entries.len() + cleanup_ready_entries.len();
-                if session_id < curr_session_id {
-                    return Err(SchedulerError::InvalidSessionId(session_id));
-                }
-                if session_id > curr_session_id {
-                    self.storage_session_id = session_id;
-                    self.clear_all_placement();
-                    self.sink.bump_session_id(session_id).await?;
-                }
-
-                // Load commit ready tasks and cleanup ready tasks first to avoid loading a job that
-                // is already cancelled or commit-ready.
-                for inbound_entry in commit_ready_entries {
-                    if !self
-                        .ready_set
-                        .insert((inbound_entry.job_id, inbound_entry.task_id))
-                    {
-                        continue;
-                    }
-                    self.commit_ready_or_cleanup_ready_tasks
-                        .insert(inbound_entry.job_id);
-                    self.commit_ready_queue
-                        .push_back((inbound_entry.job_id, inbound_entry.resource_group_id));
-
-                    if self.active_jobs.contains_key(&inbound_entry.job_id) {
-                        self.remove_active_job_and_dequeue_next_pending_job(inbound_entry.job_id)?;
-                        continue;
-                    }
-
-                    if let Some(job_entry) = self.pending_jobs.remove(&inbound_entry.job_id) {
-                        self.destroy_job_entry(job_entry);
-                    }
-                }
-
-                for inbound_entry in cleanup_ready_entries {
-                    if !self
-                        .ready_set
-                        .insert((inbound_entry.job_id, inbound_entry.task_id))
-                    {
-                        continue;
-                    }
-                    self.commit_ready_or_cleanup_ready_tasks
-                        .insert(inbound_entry.job_id);
-                    self.cleanup_ready_queue
-                        .push_back((inbound_entry.job_id, inbound_entry.resource_group_id));
-
-                    if self.active_jobs.contains_key(&inbound_entry.job_id) {
-                        self.remove_active_job_and_dequeue_next_pending_job(inbound_entry.job_id)?;
-                        continue;
-                    }
-
-                    if let Some(job_entry) = self.pending_jobs.remove(&inbound_entry.job_id) {
-                        self.destroy_job_entry(job_entry);
-                    }
-                }
-
-                for inbound_entry in ready_entries {
-                    if self
-                        .commit_ready_or_cleanup_ready_tasks
-                        .contains(&inbound_entry.job_id)
-                    {
-                        continue;
-                    }
-                    if !self
-                        .ready_set
-                        .insert((inbound_entry.job_id, inbound_entry.task_id))
-                    {
-                        continue;
-                    }
-                    if let Some(active_job) = self.active_jobs.get_mut(&inbound_entry.job_id) {
-                        active_job.enqueue(inbound_entry.task_id);
-                        continue;
-                    }
-                    if let Some(pending_job) = self.pending_jobs.get_mut(&inbound_entry.job_id) {
-                        pending_job.enqueue(inbound_entry.task_id);
-                        continue;
-                    }
-                    if self.active_jobs.len() < self.config.active_job_pool_capacity {
-                        self.active_jobs.insert(
-                            inbound_entry.job_id,
-                            JobEntry::new(
-                                inbound_entry.job_id,
-                                inbound_entry.resource_group_id,
-                                inbound_entry.task_id,
-                            ),
-                        );
-                        self.active_job_queue
-                            .push(ActiveJobQueueEntry::Ready(inbound_entry.job_id));
-                        continue;
-                    }
-                    self.pending_jobs.insert(
-                        inbound_entry.job_id,
-                        JobEntry::new(
-                            inbound_entry.job_id,
-                            inbound_entry.resource_group_id,
-                            inbound_entry.task_id,
-                        ),
-                    );
-                    self.pending_job_queue.push_back(inbound_entry.job_id);
-                }
-
-                // Only record iterations that actually had entries to enrich, so the average
-                // reflects real work rather than empty polls when the scheduler is idle.
-                if recording && inbound_entry_count > 0 {
-                    self.config
-                        .metrics
-                        .buffer_enrich_ns
-                        .fetch_add(elapsed_nanos(buffer_start), Ordering::Relaxed);
-                    self.config
-                        .metrics
-                        .buffer_enrich_count
-                        .fetch_add(1, Ordering::Relaxed);
-                }
-
+                self.load_inbound_queue_result(
+                    curr_session_id,
+                    storage_session_id,
+                    ready_entries,
+                    commit_ready_entries,
+                    cleanup_ready_entries,
+                )
+                .await?;
                 self.spawn_inbound_queue_reader();
             }
             InboundQueueResult::ResultNotReady => {}
@@ -493,17 +449,15 @@ impl<
             }
         }
 
-        // Stage 2: Make scheduling decisions to fill the dispatch queue
-        let dispatch_start = Instant::now();
+        Ok(())
+    }
+
+    async fn make_schedule_decision(&mut self) -> Result<(), SchedulerError> {
         let mut dispatch_queue_slots = self
             .config
             .dispatch_queue_capacity
             .saturating_sub(self.sink.size());
-        let initial_dispatch_queue_slots = dispatch_queue_slots;
-        loop {
-            if dispatch_queue_slots == 0 || self.ready_set.is_empty() {
-                break;
-            }
+        while dispatch_queue_slots > 0 && !self.ready_set.is_empty() {
             if self.active_job_queue_cursor >= self.active_job_queue.len() {
                 self.active_job_queue_cursor = 0;
             }
@@ -573,38 +527,6 @@ impl<
             }
         }
 
-        let dispatched = initial_dispatch_queue_slots - dispatch_queue_slots;
-        if recording && dispatched > 0 {
-            self.config
-                .metrics
-                .dispatch_enrich_ns
-                .fetch_add(elapsed_nanos(dispatch_start), Ordering::Relaxed);
-            self.config
-                .metrics
-                .dispatch_enrich_count
-                .fetch_add(1, Ordering::Relaxed);
-        }
-
-        if recording {
-            self.config
-                .metrics
-                .total_loop_ns
-                .fetch_add(elapsed_nanos(loop_start), Ordering::Relaxed);
-            self.config
-                .metrics
-                .loop_count
-                .fetch_add(1, Ordering::Relaxed);
-        }
-
-        // When the iteration dispatched nothing, the loop is either waiting on an in-flight poll or
-        // back-pressured by a full dispatch queue. In both cases it would otherwise spin without an
-        // await point; because the inbound polls run on tasks this same runtime must schedule, a
-        // non-yielding spin livelocks them and the scheduler never makes progress. Yield to let the
-        // poll tasks and dispatch-queue readers run.
-        if dispatched == 0 {
-            tokio::task::yield_now().await;
-        }
-
         Ok(())
     }
 

From 8b722d777c11d65d4e41f0c16d601366b49f5dcc Mon Sep 17 00:00:00 2001
From: LinZhihao-723 <pleiades3190@gmail.com>
Date: Thu, 4 Jun 2026 15:37:09 -0400
Subject: [PATCH 08/14] Core implementation done.

---
 components/spider-scheduler/src/core.rs       |   6 +-
 .../src/core_impl/round_robin.rs              | 698 -------------
 .../core_impl/round_robin/implementation.rs   | 919 ++++++++++++++++++
 .../src/core_impl/round_robin/mod.rs          |  39 +
 .../src/core_impl/round_robin/tests.rs        |   1 +
 components/spider-scheduler/src/error.rs      |   3 +
 6 files changed, 965 insertions(+), 701 deletions(-)
 delete mode 100644 components/spider-scheduler/src/core_impl/round_robin.rs
 create mode 100644 components/spider-scheduler/src/core_impl/round_robin/implementation.rs
 create mode 100644 components/spider-scheduler/src/core_impl/round_robin/mod.rs
 create mode 100644 components/spider-scheduler/src/core_impl/round_robin/tests.rs

diff --git a/components/spider-scheduler/src/core.rs b/components/spider-scheduler/src/core.rs
index f6715341..ebc5c143 100644
--- a/components/spider-scheduler/src/core.rs
+++ b/components/spider-scheduler/src/core.rs
@@ -16,12 +16,12 @@ use crate::{
 /// share the same runtime entry point.
 #[async_trait]
 pub trait SchedulerCore: Send {
-    /// The storage client used by the core to poll and read for placement decisions.
-    type StorageClient: SchedulerStorageClient;
-
     /// The dispatch sink the core writes assignments to.
     type Sink: DispatchQueueSink;
 
+    /// The storage client used by the core to poll and read for placement decisions.
+    type StorageClient: SchedulerStorageClient;
+
     /// Runs the scheduling loop until `cancellation_token` is triggered.
     ///
     /// The core polls the inbound queue through `storage_client`, applies its scheduling algorithm,
diff --git a/components/spider-scheduler/src/core_impl/round_robin.rs b/components/spider-scheduler/src/core_impl/round_robin.rs
deleted file mode 100644
index 8708a538..00000000
--- a/components/spider-scheduler/src/core_impl/round_robin.rs
+++ /dev/null
@@ -1,698 +0,0 @@
-//! Round-robin scheduler.
-//!
-//! This scheduler provides basic fairness across jobs using a round-robin scheduling policy. It
-//! polls tasks from the inbound queue (maintained by the storage service) and organizes jobs into
-//! two sets:
-//!
-//! * Active jobs: jobs that participate in round-robin scheduling.
-//! * Pending jobs: jobs that are buffered but not yet scheduled. When an active job has no
-//!   remaining schedulable tasks, it is replaced by the next pending job in FIFO order.
-//!
-//! The scheduler operates in discrete ticks. During each tick, it attempts to consume the results
-//! of an asynchronous inbound-queue polling operation and loads any newly available tasks into its
-//! internal buffers. It then makes scheduling decisions until the dispatch queue reaches capacity.
-//!
-//! # Properties
-//!
-//! * Each round-robin cycle may schedule at most one additional commit task and one additional
-//!   cleanup task, if available.
-//! * All buffered tasks are unique. Tasks loaded from the inbound queue are deduplicated before
-//!   entering the scheduler's internal buffers.
-//!
-//! # Configuration
-//!
-//! * `active_job_pool_capacity`: Maximum number of active jobs maintained by the scheduler.
-//! * `dispatch_queue_capacity`: Maximum number of task assignments in the dispatch queue.
-//! * `ready_task_capacity`: Maximum number of ready tasks buffered by the scheduler.
-//! * `commit_ready_task_capacity`: Maximum number of buffered commit-ready tasks.
-//! * `cleanup_ready_task_capacity`: Maximum number of buffered cleanup-ready tasks.
-//! * `storage_polling_wait_time_ms`: Maximum time, in milliseconds, that inbound-queue polling may
-//!   block on the storage-service side.
-//! * `tick_interval_ms`: Interval, in milliseconds, between scheduler ticks (tick execution time
-//!   included).
-
-use std::{
-    collections::{HashMap, HashSet, VecDeque},
-    time::Duration,
-};
-
-use async_trait::async_trait;
-use serde::Deserialize;
-use spider_core::types::id::{JobId, ResourceGroupId, SessionId, TaskId};
-use tokio::select;
-use tokio_util::sync::CancellationToken;
-
-use crate::{
-    DispatchQueueSink,
-    InboundEntry,
-    SchedulerCore,
-    SchedulerError,
-    SchedulerStorageClient,
-    StorageClientError,
-    TaskAssignment,
-};
-
-#[derive(Deserialize)]
-pub struct RoundRobinConfig<
-    SchedulerStorageClientType: SchedulerStorageClient + 'static,
-    DispatchQueueSinkType: DispatchQueueSink,
-> {
-    /// The capacity of the active jobs pool. The scheduler will make task assignments from these
-    /// jobs in a round-robin manner.
-    pub active_job_pool_capacity: usize,
-
-    /// The capacity of the dispatch queue.
-    pub dispatch_queue_capacity: usize,
-
-    /// The capacity of the total pending ready tasks buffered in the scheduler.
-    pub ready_task_capacity: usize,
-
-    /// The capacity of the total pending commit-ready tasks buffered in the scheduler.
-    pub commit_ready_task_capacity: usize,
-
-    /// The capacity of the total pending cleanup-ready tasks buffered in the scheduler.
-    pub cleanup_ready_task_capacity: usize,
-
-    /// The maximum time (in milliseconds) that the scheduler will wait for the storage server to
-    /// fill the inbound-queue reading request.
-    pub storage_polling_wait_time_ms: u64,
-
-    /// The time (in milliseconds) that the scheduler will spend on each tick.
-    pub tick_interval_ms: u64,
-
-    #[serde(skip)]
-    _marker: std::marker::PhantomData<(SchedulerStorageClientType, DispatchQueueSinkType)>,
-}
-
-#[async_trait]
-impl<
-    SchedulerStorageClientType: SchedulerStorageClient + 'static,
-    DispatchQueueSinkType: DispatchQueueSink,
-> SchedulerCore for RoundRobinConfig<SchedulerStorageClientType, DispatchQueueSinkType>
-{
-    type Sink = DispatchQueueSinkType;
-    type StorageClient = SchedulerStorageClientType;
-
-    async fn run(
-        self,
-        storage_client: Self::StorageClient,
-        sink: Self::Sink,
-        cancellation_token: CancellationToken,
-    ) -> Result<(), SchedulerError> {
-        RoundRobin::new(
-            SessionId::default(),
-            storage_client,
-            sink,
-            cancellation_token,
-            self,
-        )
-        .run()
-        .await
-    }
-}
-
-struct JobEntry {
-    job_id: JobId,
-    resource_group_id: ResourceGroupId,
-    task_ids: VecDeque<TaskId>,
-}
-
-impl JobEntry {
-    fn new(job_id: JobId, resource_group_id: ResourceGroupId, init_task_id: TaskId) -> Self {
-        Self {
-            job_id,
-            resource_group_id,
-            task_ids: VecDeque::from([init_task_id]),
-        }
-    }
-
-    fn enqueue(&mut self, task_id: TaskId) {
-        self.task_ids.push_back(task_id);
-    }
-
-    fn dequeue(&mut self) -> Option<TaskId> {
-        self.task_ids.pop_front()
-    }
-}
-
-#[derive(Clone)]
-enum ActiveJobQueueEntry {
-    Ready(JobId),
-    CommitReady,
-    CleanupReady,
-}
-
-struct RoundRobin<
-    SchedulerStorageClientType: SchedulerStorageClient + 'static,
-    DispatchQueueSinkType: DispatchQueueSink,
-> {
-    storage_client: SchedulerStorageClientType,
-    sink: DispatchQueueSinkType,
-    cancellation_token: CancellationToken,
-    config: RoundRobinConfig<SchedulerStorageClientType, DispatchQueueSinkType>,
-    storage_session_id: SessionId,
-    ready_set: HashSet<(JobId, TaskId)>,
-
-    active_jobs: HashMap<JobId, JobEntry>,
-    active_job_queue: Vec<ActiveJobQueueEntry>,
-    active_job_queue_cursor: usize,
-
-    pending_jobs: HashMap<JobId, JobEntry>,
-    pending_job_queue: VecDeque<JobId>,
-
-    commit_ready_queue: VecDeque<(JobId, ResourceGroupId)>,
-    cleanup_ready_queue: VecDeque<(JobId, ResourceGroupId)>,
-
-    commit_ready_or_cleanup_ready_tasks: HashSet<JobId>,
-
-    inbound_queue_reader: AsyncInboundQueueReader<SchedulerStorageClientType>,
-}
-
-impl<
-    SchedulerStorageClientType: SchedulerStorageClient + 'static,
-    DispatchQueueSinkType: DispatchQueueSink,
-> RoundRobin<SchedulerStorageClientType, DispatchQueueSinkType>
-{
-    fn new(
-        storage_session_id: SessionId,
-        storage_client: SchedulerStorageClientType,
-        sink: DispatchQueueSinkType,
-        cancellation_token: CancellationToken,
-        config: RoundRobinConfig<SchedulerStorageClientType, DispatchQueueSinkType>,
-    ) -> Self {
-        let ready_set = HashSet::with_capacity(config.ready_task_capacity);
-        let active_jobs = HashMap::with_capacity(config.active_job_pool_capacity);
-        let active_job_queue = Self::new_active_job_queue(config.active_job_pool_capacity);
-        let active_job_queue_cursor = 0;
-        let pending_jobs = HashMap::with_capacity(config.active_job_pool_capacity);
-        let pending_job_queue = VecDeque::with_capacity(config.active_job_pool_capacity);
-        let commit_ready_queue = VecDeque::with_capacity(config.commit_ready_task_capacity);
-        let cleanup_ready_queue = VecDeque::with_capacity(config.cleanup_ready_task_capacity);
-        let commit_ready_or_cleanup_ready_tasks = HashSet::with_capacity(
-            config.commit_ready_task_capacity + config.cleanup_ready_task_capacity,
-        );
-        let inbound_queue_reader = AsyncInboundQueueReader::new(storage_client.clone());
-        Self {
-            storage_client,
-            sink,
-            cancellation_token,
-            config,
-            storage_session_id,
-            ready_set,
-            active_jobs,
-            active_job_queue,
-            active_job_queue_cursor,
-            pending_jobs,
-            pending_job_queue,
-            commit_ready_queue,
-            cleanup_ready_queue,
-            commit_ready_or_cleanup_ready_tasks,
-            inbound_queue_reader,
-        }
-    }
-
-    fn new_active_job_queue(active_job_pool_capacity: usize) -> Vec<ActiveJobQueueEntry> {
-        let mut active_job_queue = Vec::with_capacity(active_job_pool_capacity + 2);
-        active_job_queue.push(ActiveJobQueueEntry::CommitReady);
-        active_job_queue.push(ActiveJobQueueEntry::CleanupReady);
-        active_job_queue
-    }
-
-    async fn run(mut self) -> Result<(), SchedulerError> {
-        let tick_interval = Duration::from_millis(self.config.tick_interval_ms);
-        loop {
-            let now = tokio::time::Instant::now();
-            let cancellation_token = self.cancellation_token.clone();
-            select! {
-                () = cancellation_token.cancelled() => {
-                    return Ok(());
-                }
-                result = self.tick() => {
-                    let () = result?;
-                }
-            }
-            let elapsed = now.elapsed();
-            let sleep_time = tick_interval.saturating_sub(elapsed);
-            if !sleep_time.is_zero() {
-                tokio::time::sleep(sleep_time).await;
-            } else {
-                tokio::task::yield_now().await;
-            }
-        }
-    }
-
-    fn clear_all_placement(&mut self) {
-        self.ready_set.clear();
-        self.active_jobs.clear();
-        self.pending_jobs.clear();
-        self.pending_job_queue.clear();
-        self.commit_ready_queue.clear();
-        self.cleanup_ready_queue.clear();
-        self.commit_ready_or_cleanup_ready_tasks.clear();
-
-        self.active_job_queue = Self::new_active_job_queue(self.config.active_job_pool_capacity);
-        self.active_job_queue_cursor = 0;
-    }
-
-    fn remove_active_job_and_dequeue_next_pending_job(
-        &mut self,
-        job_id: JobId,
-    ) -> Result<(), SchedulerError> {
-        if let Some(index) = self.active_job_queue.iter().position(|entry| match entry {
-            ActiveJobQueueEntry::Ready(id) => *id == job_id,
-            _ => false,
-        }) {
-            self.active_job_queue.swap_remove(index);
-        } else {
-            return Err(SchedulerError::Internal(
-                "attempt to remove a non-existing active job: {job_id:?}".to_string(),
-            ));
-        }
-
-        if let Some(entry_to_remove) = self.active_jobs.remove(&job_id) {
-            self.destroy_job_entry(entry_to_remove);
-        } else {
-            return Err(SchedulerError::Internal(
-                "attempt to destroy a non-existing active job: {job_id:?}".to_string(),
-            ));
-        }
-
-        if let Some(next_pending_job) = self.next_pending_job() {
-            self.active_job_queue
-                .push(ActiveJobQueueEntry::Ready(next_pending_job.job_id));
-            self.active_jobs
-                .insert(next_pending_job.job_id, next_pending_job);
-        }
-        Ok(())
-    }
-
-    fn next_pending_job(&mut self) -> Option<JobEntry> {
-        loop {
-            let job_id = self.pending_job_queue.pop_front()?;
-            // NOTE: The job may have been cancelled and removed from `pending_jobs`, so the ID in
-            // the queue may not necessarily exist in `pending_jobs`.
-            if let Some(pending_job) = self.pending_jobs.remove(&job_id) {
-                return Some(pending_job);
-            }
-        }
-    }
-
-    fn destroy_job_entry(&mut self, job_entry: JobEntry) {
-        for task_id in job_entry.task_ids {
-            self.ready_set.remove(&(job_entry.job_id, task_id));
-        }
-    }
-
-    async fn tick(&mut self) -> Result<(), SchedulerError> {
-        self.poll_inbound_queue_result().await?;
-        self.make_schedule_decision().await?;
-        Ok(())
-    }
-
-    async fn load_inbound_queue_result(
-        &mut self,
-        curr_session_id: SessionId,
-        storage_session_id: SessionId,
-        ready_entries: Vec<InboundEntry>,
-        commit_ready_entries: Vec<InboundEntry>,
-        cleanup_ready_entries: Vec<InboundEntry>,
-    ) -> Result<(), SchedulerError> {
-        if storage_session_id < curr_session_id {
-            return Err(SchedulerError::InvalidSessionId(storage_session_id));
-        }
-        if storage_session_id > curr_session_id {
-            self.storage_session_id = storage_session_id;
-            self.clear_all_placement();
-            self.sink.bump_session_id(storage_session_id).await?;
-        }
-
-        // Load commit ready tasks and cleanup ready tasks first to avoid loading a job that
-        // is already cancelled or commit-ready.
-        for inbound_entry in commit_ready_entries {
-            if !self
-                .ready_set
-                .insert((inbound_entry.job_id, inbound_entry.task_id))
-            {
-                continue;
-            }
-            self.commit_ready_or_cleanup_ready_tasks
-                .insert(inbound_entry.job_id);
-            self.commit_ready_queue
-                .push_back((inbound_entry.job_id, inbound_entry.resource_group_id));
-
-            if self.active_jobs.contains_key(&inbound_entry.job_id) {
-                self.remove_active_job_and_dequeue_next_pending_job(inbound_entry.job_id)?;
-                continue;
-            }
-
-            if let Some(job_entry) = self.pending_jobs.remove(&inbound_entry.job_id) {
-                self.destroy_job_entry(job_entry);
-            }
-        }
-
-        for inbound_entry in cleanup_ready_entries {
-            if !self
-                .ready_set
-                .insert((inbound_entry.job_id, inbound_entry.task_id))
-            {
-                continue;
-            }
-            self.commit_ready_or_cleanup_ready_tasks
-                .insert(inbound_entry.job_id);
-            self.cleanup_ready_queue
-                .push_back((inbound_entry.job_id, inbound_entry.resource_group_id));
-
-            if self.active_jobs.contains_key(&inbound_entry.job_id) {
-                self.remove_active_job_and_dequeue_next_pending_job(inbound_entry.job_id)?;
-                continue;
-            }
-
-            if let Some(job_entry) = self.pending_jobs.remove(&inbound_entry.job_id) {
-                self.destroy_job_entry(job_entry);
-            }
-        }
-
-        for inbound_entry in ready_entries {
-            if self
-                .commit_ready_or_cleanup_ready_tasks
-                .contains(&inbound_entry.job_id)
-            {
-                continue;
-            }
-            if !self
-                .ready_set
-                .insert((inbound_entry.job_id, inbound_entry.task_id))
-            {
-                continue;
-            }
-            if let Some(active_job) = self.active_jobs.get_mut(&inbound_entry.job_id) {
-                active_job.enqueue(inbound_entry.task_id);
-                continue;
-            }
-            if let Some(pending_job) = self.pending_jobs.get_mut(&inbound_entry.job_id) {
-                pending_job.enqueue(inbound_entry.task_id);
-                continue;
-            }
-            if self.active_jobs.len() < self.config.active_job_pool_capacity {
-                self.active_jobs.insert(
-                    inbound_entry.job_id,
-                    JobEntry::new(
-                        inbound_entry.job_id,
-                        inbound_entry.resource_group_id,
-                        inbound_entry.task_id,
-                    ),
-                );
-                self.active_job_queue
-                    .push(ActiveJobQueueEntry::Ready(inbound_entry.job_id));
-                continue;
-            }
-            self.pending_jobs.insert(
-                inbound_entry.job_id,
-                JobEntry::new(
-                    inbound_entry.job_id,
-                    inbound_entry.resource_group_id,
-                    inbound_entry.task_id,
-                ),
-            );
-            self.pending_job_queue.push_back(inbound_entry.job_id);
-        }
-
-        Ok(())
-    }
-
-    async fn poll_inbound_queue_result(&mut self) -> Result<(), SchedulerError> {
-        let curr_session_id = self.storage_session_id;
-        let inbound_queue_result = self
-            .inbound_queue_reader
-            .poll_ready(curr_session_id)
-            .await?;
-        match inbound_queue_result {
-            InboundQueueResult::Result {
-                session_id: storage_session_id,
-                ready_entries,
-                commit_ready_entries,
-                cleanup_ready_entries,
-            } => {
-                self.load_inbound_queue_result(
-                    curr_session_id,
-                    storage_session_id,
-                    ready_entries,
-                    commit_ready_entries,
-                    cleanup_ready_entries,
-                )
-                .await?;
-                self.spawn_inbound_queue_reader();
-            }
-            InboundQueueResult::ResultNotReady => {}
-            InboundQueueResult::HandleNotSpawned => {
-                self.spawn_inbound_queue_reader();
-            }
-        }
-
-        Ok(())
-    }
-
-    async fn make_schedule_decision(&mut self) -> Result<(), SchedulerError> {
-        let mut dispatch_queue_slots = self
-            .config
-            .dispatch_queue_capacity
-            .saturating_sub(self.sink.size());
-        while dispatch_queue_slots > 0 && !self.ready_set.is_empty() {
-            if self.active_job_queue_cursor >= self.active_job_queue.len() {
-                self.active_job_queue_cursor = 0;
-            }
-            let active_job_queue_entry =
-                match self.active_job_queue.get(self.active_job_queue_cursor) {
-                    Some(entry) => entry.clone(),
-                    None => {
-                        return Err(SchedulerError::Internal(
-                            "active job queue cursor is corrupted".to_string(),
-                        ));
-                    }
-                };
-            self.active_job_queue_cursor += 1;
-            match active_job_queue_entry {
-                ActiveJobQueueEntry::CleanupReady => {
-                    let Some((job_id, resource_group_id)) = self.cleanup_ready_queue.pop_front()
-                    else {
-                        continue;
-                    };
-                    self.sink
-                        .enqueue(TaskAssignment {
-                            job_id,
-                            resource_group_id,
-                            task_id: TaskId::Cleanup,
-                        })
-                        .await?;
-                    self.ready_set.remove(&(job_id, TaskId::Cleanup));
-                    self.commit_ready_or_cleanup_ready_tasks.remove(&job_id);
-                    dispatch_queue_slots -= 1;
-                }
-                ActiveJobQueueEntry::CommitReady => {
-                    let Some((job_id, resource_group_id)) = self.commit_ready_queue.pop_front()
-                    else {
-                        continue;
-                    };
-                    self.sink
-                        .enqueue(TaskAssignment {
-                            job_id,
-                            resource_group_id,
-                            task_id: TaskId::Commit,
-                        })
-                        .await?;
-                    self.ready_set.remove(&(job_id, TaskId::Commit));
-                    self.commit_ready_or_cleanup_ready_tasks.remove(&job_id);
-                    dispatch_queue_slots -= 1;
-                }
-                ActiveJobQueueEntry::Ready(job_id) => {
-                    let Some(job_entry) = self.active_jobs.get_mut(&job_id) else {
-                        return Err(SchedulerError::Internal(
-                            "attempt to remove a non-existing active job: {job_id:?}".to_string(),
-                        ));
-                    };
-                    if let Some(task_id) = job_entry.dequeue() {
-                        self.sink
-                            .enqueue(TaskAssignment {
-                                job_id,
-                                resource_group_id: job_entry.resource_group_id,
-                                task_id,
-                            })
-                            .await?;
-                        self.ready_set.remove(&(job_id, task_id));
-                        dispatch_queue_slots -= 1;
-                    } else {
-                        self.remove_active_job_and_dequeue_next_pending_job(job_id)?;
-                    }
-                }
-            }
-        }
-
-        Ok(())
-    }
-
-    fn spawn_inbound_queue_reader(&mut self) {
-        let num_commit_ready_tasks = self.commit_ready_queue.len();
-        let num_cleanup_ready_tasks = self.cleanup_ready_queue.len();
-        let max_commit_ready_to_poll = self
-            .config
-            .commit_ready_task_capacity
-            .saturating_sub(num_commit_ready_tasks);
-        let max_cleanup_ready_to_poll = self
-            .config
-            .cleanup_ready_task_capacity
-            .saturating_sub(num_cleanup_ready_tasks);
-        let max_ready_to_poll = self.config.ready_task_capacity.saturating_sub(
-            self.ready_set.len() - num_commit_ready_tasks - num_cleanup_ready_tasks,
-        );
-        self.inbound_queue_reader.spawn(
-            Duration::from_millis(self.config.storage_polling_wait_time_ms),
-            max_ready_to_poll,
-            max_commit_ready_to_poll,
-            max_cleanup_ready_to_poll,
-        );
-    }
-}
-
-enum InboundQueueResult {
-    Result {
-        session_id: SessionId,
-        ready_entries: Vec<InboundEntry>,
-        commit_ready_entries: Vec<InboundEntry>,
-        cleanup_ready_entries: Vec<InboundEntry>,
-    },
-    ResultNotReady,
-    HandleNotSpawned,
-}
-
-struct InboundQueuePollingHandle {
-    ready_handle:
-        tokio::task::JoinHandle<Result<(SessionId, Vec<InboundEntry>), StorageClientError>>,
-    commit_ready_handle:
-        tokio::task::JoinHandle<Result<(SessionId, Vec<InboundEntry>), StorageClientError>>,
-    cleanup_ready_handle:
-        tokio::task::JoinHandle<Result<(SessionId, Vec<InboundEntry>), StorageClientError>>,
-}
-
-impl InboundQueuePollingHandle {
-    async fn poll_ready(
-        &mut self,
-        curr_session_id: SessionId,
-    ) -> Result<InboundQueueResult, SchedulerError> {
-        if !self.ready_handle.is_finished()
-            || !self.commit_ready_handle.is_finished()
-            || !self.cleanup_ready_handle.is_finished()
-        {
-            return Ok(InboundQueueResult::ResultNotReady);
-        }
-
-        let (ready_session_id, ready_entries) = (&mut self.ready_handle)
-            .await
-            .map_err(|e| SchedulerError::Internal(e.to_string()))??;
-        let (commit_session_id, commit_ready_entries) = (&mut self.commit_ready_handle)
-            .await
-            .map_err(|e| SchedulerError::Internal(e.to_string()))??;
-        let (cleanup_session_id, cleanup_ready_entries) =
-            (&mut self.cleanup_ready_handle)
-                .await
-                .map_err(|e| SchedulerError::Internal(e.to_string()))??;
-
-        let latest_session_id = curr_session_id
-            .max(ready_session_id)
-            .max(commit_session_id)
-            .max(cleanup_session_id);
-
-        Ok(InboundQueueResult::Result {
-            session_id: latest_session_id,
-            ready_entries: Self::drop_if_stale(ready_session_id, latest_session_id, ready_entries),
-            commit_ready_entries: Self::drop_if_stale(
-                commit_session_id,
-                latest_session_id,
-                commit_ready_entries,
-            ),
-            cleanup_ready_entries: Self::drop_if_stale(
-                cleanup_session_id,
-                latest_session_id,
-                cleanup_ready_entries,
-            ),
-        })
-    }
-
-    fn drop_if_stale(
-        session_id: SessionId,
-        latest_session_id: SessionId,
-        entries: Vec<InboundEntry>,
-    ) -> Vec<InboundEntry> {
-        if session_id == latest_session_id {
-            entries
-        } else {
-            Vec::new()
-        }
-    }
-}
-
-struct AsyncInboundQueueReader<StorageClientType: SchedulerStorageClient + 'static> {
-    storage_client: StorageClientType,
-    handle: Option<InboundQueuePollingHandle>,
-}
-
-impl<StorageClientType: SchedulerStorageClient + 'static>
-    AsyncInboundQueueReader<StorageClientType>
-{
-    const fn new(storage_client: StorageClientType) -> Self {
-        Self {
-            storage_client,
-            handle: None,
-        }
-    }
-
-    async fn poll_ready(
-        &mut self,
-        curr_session_id: SessionId,
-    ) -> Result<InboundQueueResult, SchedulerError> {
-        match &mut self.handle {
-            None => Ok(InboundQueueResult::HandleNotSpawned),
-            Some(handle) => {
-                let inbound_queue_result = handle.poll_ready(curr_session_id).await?;
-                if !matches!(inbound_queue_result, InboundQueueResult::ResultNotReady) {
-                    self.handle = None;
-                }
-                Ok(inbound_queue_result)
-            }
-        }
-    }
-
-    fn spawn(
-        &mut self,
-        storage_polling_wait_time: Duration,
-        max_ready_entries: usize,
-        max_commit_ready_entries: usize,
-        max_cleanup_ready_entries: usize,
-    ) {
-        let ready_storage_client = self.storage_client.clone();
-        let ready_handle = tokio::task::spawn(async move {
-            ready_storage_client
-                .poll_ready(max_ready_entries, storage_polling_wait_time)
-                .await
-        });
-
-        let commit_ready_storage_client = self.storage_client.clone();
-        let commit_ready_handle = tokio::task::spawn(async move {
-            commit_ready_storage_client
-                .poll_commit_ready(max_commit_ready_entries, storage_polling_wait_time)
-                .await
-        });
-
-        let cleanup_ready_storage_client = self.storage_client.clone();
-        let cleanup_ready_handle = tokio::task::spawn(async move {
-            cleanup_ready_storage_client
-                .poll_cleanup_ready(max_cleanup_ready_entries, storage_polling_wait_time)
-                .await
-        });
-
-        self.handle = Some(InboundQueuePollingHandle {
-            ready_handle,
-            commit_ready_handle,
-            cleanup_ready_handle,
-        });
-    }
-}
diff --git a/components/spider-scheduler/src/core_impl/round_robin/implementation.rs b/components/spider-scheduler/src/core_impl/round_robin/implementation.rs
new file mode 100644
index 00000000..9ff881bc
--- /dev/null
+++ b/components/spider-scheduler/src/core_impl/round_robin/implementation.rs
@@ -0,0 +1,919 @@
+//! The implementation of the round-robin scheduler core. See the parent module's documentation for
+//! the scheduling policy and configuration.
+
+use std::{
+    collections::{HashMap, HashSet, VecDeque},
+    time::Duration,
+};
+
+use async_trait::async_trait;
+use serde::Deserialize;
+use spider_core::types::id::{JobId, ResourceGroupId, SessionId, TaskId};
+use tokio::select;
+use tokio_util::sync::CancellationToken;
+
+use crate::{
+    DispatchQueueSink,
+    InboundEntry,
+    SchedulerCore,
+    SchedulerError,
+    SchedulerStorageClient,
+    StorageClientError,
+    TaskAssignment,
+};
+
+/// The configuration of the round-robin scheduler core.
+///
+/// The configuration itself implements [`SchedulerCore`]: consuming it through
+/// [`SchedulerCore::run`] creates the underlying scheduler and drives its scheduling loop.
+///
+/// # Type Parameters
+///
+/// * `SchedulerStorageClientType` - The storage client used to poll the inbound queue.
+/// * `DispatchQueueSinkType` - The dispatch sink that task assignments are written to.
+#[derive(Deserialize)]
+pub struct RoundRobinConfig<
+    SchedulerStorageClientType: SchedulerStorageClient + 'static,
+    DispatchQueueSinkType: DispatchQueueSink,
+> {
+    /// The capacity of the active job queue. The scheduler will make task assignments from these
+    /// jobs in a round-robin manner.
+    ///
+    /// Must be greater than 0.
+    pub active_job_queue_capacity: usize,
+
+    /// The capacity of the dispatch queue.
+    ///
+    /// Must be greater than 0.
+    pub dispatch_queue_capacity: usize,
+
+    /// The capacity of the total pending ready tasks buffered in the scheduler.
+    ///
+    /// Must be greater than 0.
+    pub ready_task_capacity: usize,
+
+    /// The capacity of the total pending commit-ready tasks buffered in the scheduler.
+    ///
+    /// Must be greater than 0.
+    pub commit_ready_task_capacity: usize,
+
+    /// The capacity of the total pending cleanup-ready tasks buffered in the scheduler.
+    ///
+    /// Must be greater than 0.
+    pub cleanup_ready_task_capacity: usize,
+
+    /// The maximum time (in milliseconds) that the scheduler will wait for the storage server to
+    /// fill the inbound-queue reading request.
+    pub storage_poll_timeout_ms: u64,
+
+    /// The time (in milliseconds) that the scheduler will spend on each tick.
+    pub tick_interval_ms: u64,
+
+    #[serde(skip)]
+    _marker: std::marker::PhantomData<(SchedulerStorageClientType, DispatchQueueSinkType)>,
+}
+
+#[async_trait]
+impl<
+    SchedulerStorageClientType: SchedulerStorageClient + 'static,
+    DispatchQueueSinkType: DispatchQueueSink,
+> SchedulerCore for RoundRobinConfig<SchedulerStorageClientType, DispatchQueueSinkType>
+{
+    type Sink = DispatchQueueSinkType;
+    type StorageClient = SchedulerStorageClientType;
+
+    async fn run(
+        self,
+        storage_client: Self::StorageClient,
+        sink: Self::Sink,
+        cancellation_token: CancellationToken,
+    ) -> Result<(), SchedulerError> {
+        RoundRobin::new(
+            SessionId::default(),
+            storage_client,
+            sink,
+            cancellation_token,
+            self,
+        )?
+        .run()
+        .await
+    }
+}
+
+/// A FIFO queue of a job's buffered ready tasks.
+struct JobTaskQueue {
+    job_id: JobId,
+    resource_group_id: ResourceGroupId,
+    task_ids: VecDeque<TaskId>,
+}
+
+impl JobTaskQueue {
+    /// Factory function.
+    ///
+    /// # Returns
+    ///
+    /// A new task queue for the given job, seeded with `init_task_id`.
+    fn new(job_id: JobId, resource_group_id: ResourceGroupId, init_task_id: TaskId) -> Self {
+        Self {
+            job_id,
+            resource_group_id,
+            task_ids: VecDeque::from([init_task_id]),
+        }
+    }
+
+    fn enqueue(&mut self, task_id: TaskId) {
+        self.task_ids.push_back(task_id);
+    }
+
+    /// # Returns
+    ///
+    /// * The next ready task ID in FIFO order.
+    /// * [`None`] if the queue is empty.
+    fn dequeue(&mut self) -> Option<TaskId> {
+        self.task_ids.pop_front()
+    }
+}
+
+/// A slot in the round-robin rotation that the scheduler draws task assignments from.
+#[derive(Clone)]
+enum RoundRobinSlot {
+    /// An active job: assignments are drawn from the job's buffered ready tasks.
+    Job(JobId),
+
+    /// The commit lane: assignments are drawn from the buffered commit-ready jobs.
+    CommitReady,
+
+    /// The cleanup lane: assignments are drawn from the buffered cleanup-ready jobs.
+    CleanupReady,
+}
+
+/// The round-robin scheduler core created from a [`RoundRobinConfig`].
+///
+/// # Type Parameters
+///
+/// * `SchedulerStorageClientType` - The storage client used to poll the inbound queue.
+/// * `DispatchQueueSinkType` - The dispatch sink that task assignments are written to.
+struct RoundRobin<
+    SchedulerStorageClientType: SchedulerStorageClient + 'static,
+    DispatchQueueSinkType: DispatchQueueSink,
+> {
+    sink: DispatchQueueSinkType,
+    cancellation_token: CancellationToken,
+    config: RoundRobinConfig<SchedulerStorageClientType, DispatchQueueSinkType>,
+    storage_session_id: SessionId,
+    buffered_tasks: HashSet<(JobId, TaskId)>,
+
+    active_jobs: HashMap<JobId, JobTaskQueue>,
+    active_job_queue: Vec<RoundRobinSlot>,
+    active_job_queue_round_robin_cursor: usize,
+
+    pending_jobs: HashMap<JobId, JobTaskQueue>,
+    pending_job_queue: VecDeque<JobId>,
+
+    commit_ready_jobs: VecDeque<(JobId, ResourceGroupId)>,
+    cleanup_ready_jobs: VecDeque<(JobId, ResourceGroupId)>,
+
+    finalizing_jobs: HashSet<JobId>,
+
+    inbound_queue_reader: AsyncInboundQueueReader<SchedulerStorageClientType>,
+}
+
+impl<
+    SchedulerStorageClientType: SchedulerStorageClient + 'static,
+    DispatchQueueSinkType: DispatchQueueSink,
+> RoundRobin<SchedulerStorageClientType, DispatchQueueSinkType>
+{
+    /// Factory function.
+    ///
+    /// Creates a [`RoundRobin`] scheduler from the given config.
+    ///
+    /// # Returns
+    ///
+    /// The constructed [`RoundRobin`] scheduler on success.
+    ///
+    /// # Errors
+    ///
+    /// Returns an error if:
+    ///
+    /// * [`SchedulerError::InvalidConfig`] if the config contains invalid values. Check
+    ///   [`RoundRobinConfig`]'s docstring for details.
+    fn new(
+        storage_session_id: SessionId,
+        storage_client: SchedulerStorageClientType,
+        sink: DispatchQueueSinkType,
+        cancellation_token: CancellationToken,
+        config: RoundRobinConfig<SchedulerStorageClientType, DispatchQueueSinkType>,
+    ) -> Result<Self, SchedulerError> {
+        if config.active_job_queue_capacity == 0 {
+            return Err(SchedulerError::InvalidConfig(
+                "`active_job_queue_capacity` must be greater than 0".to_string(),
+            ));
+        }
+
+        if config.dispatch_queue_capacity == 0 {
+            return Err(SchedulerError::InvalidConfig(
+                "`dispatch_queue_capacity` must be greater than 0".to_string(),
+            ));
+        }
+
+        if config.ready_task_capacity == 0 {
+            return Err(SchedulerError::InvalidConfig(
+                "`ready_task_capacity` must be greater than 0".to_string(),
+            ));
+        }
+
+        if config.commit_ready_task_capacity == 0 {
+            return Err(SchedulerError::InvalidConfig(
+                "`commit_ready_task_capacity` must be greater than 0".to_string(),
+            ));
+        }
+
+        if config.cleanup_ready_task_capacity == 0 {
+            return Err(SchedulerError::InvalidConfig(
+                "`cleanup_ready_task_capacity` must be greater than 0".to_string(),
+            ));
+        }
+
+        let buffered_tasks = HashSet::with_capacity(config.ready_task_capacity);
+        let active_jobs = HashMap::with_capacity(config.active_job_queue_capacity);
+        let active_job_queue = Self::new_active_job_queue(config.active_job_queue_capacity);
+        let round_robin_cursor = 0;
+        let pending_jobs = HashMap::with_capacity(config.active_job_queue_capacity);
+        let pending_job_queue = VecDeque::with_capacity(config.active_job_queue_capacity);
+        let commit_ready_jobs = VecDeque::with_capacity(config.commit_ready_task_capacity);
+        let cleanup_ready_jobs = VecDeque::with_capacity(config.cleanup_ready_task_capacity);
+        let finalizing_jobs = HashSet::with_capacity(
+            config.commit_ready_task_capacity + config.cleanup_ready_task_capacity,
+        );
+        let inbound_queue_reader = AsyncInboundQueueReader::new(storage_client);
+        Ok(Self {
+            sink,
+            cancellation_token,
+            config,
+            storage_session_id,
+            buffered_tasks,
+            active_jobs,
+            active_job_queue,
+            active_job_queue_round_robin_cursor: round_robin_cursor,
+            pending_jobs,
+            pending_job_queue,
+            commit_ready_jobs,
+            cleanup_ready_jobs,
+            finalizing_jobs,
+            inbound_queue_reader,
+        })
+    }
+
+    /// # Returns
+    ///
+    /// A new active job queue containing only the commit-ready and cleanup-ready slots.
+    fn new_active_job_queue(active_job_pool_capacity: usize) -> Vec<RoundRobinSlot> {
+        let mut active_job_queue = Vec::with_capacity(active_job_pool_capacity + 2);
+        active_job_queue.push(RoundRobinSlot::CommitReady);
+        active_job_queue.push(RoundRobinSlot::CleanupReady);
+        active_job_queue
+    }
+
+    /// Runs the scheduling loop until the cancellation token is triggered.
+    ///
+    /// Each iteration executes one [`Self::tick`] and then sleeps for the remainder of the
+    /// configured tick interval.
+    ///
+    /// # Errors
+    ///
+    /// Returns an error if:
+    ///
+    /// * Forwards [`Self::tick`]'s return values on failure.
+    async fn run(mut self) -> Result<(), SchedulerError> {
+        let tick_interval = Duration::from_millis(self.config.tick_interval_ms);
+        loop {
+            let now = tokio::time::Instant::now();
+            let cancellation_token = self.cancellation_token.clone();
+            select! {
+                () = cancellation_token.cancelled() => {
+                    return Ok(());
+                }
+                result = self.tick() => {
+                    let () = result?;
+                }
+            }
+            let elapsed = now.elapsed();
+            let sleep_time = tick_interval.saturating_sub(elapsed);
+            if sleep_time.is_zero() {
+                tokio::task::yield_now().await;
+            } else {
+                tokio::time::sleep(sleep_time).await;
+            }
+        }
+    }
+
+    /// Clears all buffered jobs and tasks, resetting the scheduler to its initial placement state.
+    fn clear(&mut self) {
+        self.buffered_tasks.clear();
+        self.active_jobs.clear();
+        self.pending_jobs.clear();
+        self.pending_job_queue.clear();
+        self.commit_ready_jobs.clear();
+        self.cleanup_ready_jobs.clear();
+        self.finalizing_jobs.clear();
+
+        self.active_job_queue = Self::new_active_job_queue(self.config.active_job_queue_capacity);
+        self.active_job_queue_round_robin_cursor = 0;
+    }
+
+    /// Removes the given job from the active set, discards its buffered tasks, and backfills the
+    /// freed slot with the next pending job, if any.
+    ///
+    /// # Errors
+    ///
+    /// Returns an error if:
+    ///
+    /// * [`SchedulerError::Internal`] if the given job is not currently active.
+    fn retire_active_job(&mut self, job_id: JobId) -> Result<(), SchedulerError> {
+        if let Some(index) = self.active_job_queue.iter().position(|entry| match entry {
+            RoundRobinSlot::Job(id) => *id == job_id,
+            _ => false,
+        }) {
+            self.active_job_queue.swap_remove(index);
+        } else {
+            return Err(SchedulerError::Internal(format!(
+                "attempt to remove a non-existing active job: {job_id:?}"
+            )));
+        }
+
+        if let Some(removed_entry) = self.active_jobs.remove(&job_id) {
+            self.discard_job_tasks(removed_entry);
+        } else {
+            return Err(SchedulerError::Internal(format!(
+                "attempt to destroy a non-existing active job: {job_id:?}"
+            )));
+        }
+
+        if let Some(next_pending_job) = self.pop_next_pending_job() {
+            self.active_job_queue
+                .push(RoundRobinSlot::Job(next_pending_job.job_id));
+            self.active_jobs
+                .insert(next_pending_job.job_id, next_pending_job);
+        }
+        Ok(())
+    }
+
+    /// # Returns
+    ///
+    /// The next pending job in FIFO order, or [`None`] if there is no pending job left.
+    fn pop_next_pending_job(&mut self) -> Option<JobTaskQueue> {
+        loop {
+            let job_id = self.pending_job_queue.pop_front()?;
+            // NOTE: The job may have been cancelled and removed from `pending_jobs`, so the ID in
+            // the queue may not necessarily exist in `pending_jobs`.
+            if let Some(pending_job) = self.pending_jobs.remove(&job_id) {
+                return Some(pending_job);
+            }
+        }
+    }
+
+    /// Removes all of the given job's queued tasks from the buffered-task set.
+    fn discard_job_tasks(&mut self, job_entry: JobTaskQueue) {
+        for task_id in job_entry.task_ids {
+            self.buffered_tasks.remove(&(job_entry.job_id, task_id));
+        }
+    }
+
+    /// Executes a single scheduling tick: consumes any completed inbound poll, then makes
+    /// scheduling decisions to fill the dispatch queue.
+    ///
+    /// # Errors
+    ///
+    /// Returns an error if:
+    ///
+    /// * Forwards [`Self::consume_inbound_poll_result`]'s return values on failure.
+    /// * Forwards [`Self::make_schedule_decisions`]'s return values on failure.
+    async fn tick(&mut self) -> Result<(), SchedulerError> {
+        self.consume_inbound_poll_result().await?;
+        self.make_schedule_decisions().await?;
+        Ok(())
+    }
+
+    /// Loads polled inbound entries into the scheduler's internal buffers.
+    ///
+    /// If the polled session is newer than the current session, all existing placement states are
+    /// cleared and the dispatch queue's session is bumped before loading. Entries whose tasks are
+    /// already buffered are ignored.
+    ///
+    /// A commit-ready or cleanup-ready entry marks its job as finalizing. A finalizing job no
+    /// longer participates in regular-task scheduling: the job is removed from the active or
+    /// pending set, its buffered ready tasks are discarded, and its incoming ready entries are
+    /// ignored.
+    ///
+    /// # Errors
+    ///
+    /// Returns an error if:
+    ///
+    /// * [`SchedulerError::InvalidSessionId`] if the polled session is older than the current
+    ///   session.
+    /// * Forwards [`DispatchQueueSink::bump_session_id`]'s return values on failure.
+    /// * Forwards [`Self::retire_active_job`]'s return values on failure.
+    async fn ingest_inbound_entries(
+        &mut self,
+        curr_session_id: SessionId,
+        storage_session_id: SessionId,
+        ready_entries: Vec<InboundEntry>,
+        commit_ready_entries: Vec<InboundEntry>,
+        cleanup_ready_entries: Vec<InboundEntry>,
+    ) -> Result<(), SchedulerError> {
+        if storage_session_id < curr_session_id {
+            return Err(SchedulerError::InvalidSessionId(storage_session_id));
+        }
+        if storage_session_id > curr_session_id {
+            self.storage_session_id = storage_session_id;
+            self.clear();
+            self.sink.bump_session_id(storage_session_id).await?;
+        }
+
+        // Load commit ready tasks and cleanup ready tasks first to avoid loading a job that
+        // is already cancelled or commit-ready.
+        for inbound_entry in commit_ready_entries {
+            if !self
+                .buffered_tasks
+                .insert((inbound_entry.job_id, inbound_entry.task_id))
+            {
+                continue;
+            }
+            self.finalizing_jobs.insert(inbound_entry.job_id);
+            self.commit_ready_jobs
+                .push_back((inbound_entry.job_id, inbound_entry.resource_group_id));
+
+            if self.active_jobs.contains_key(&inbound_entry.job_id) {
+                self.retire_active_job(inbound_entry.job_id)?;
+                continue;
+            }
+
+            if let Some(job_entry) = self.pending_jobs.remove(&inbound_entry.job_id) {
+                self.discard_job_tasks(job_entry);
+            }
+        }
+
+        for inbound_entry in cleanup_ready_entries {
+            if !self
+                .buffered_tasks
+                .insert((inbound_entry.job_id, inbound_entry.task_id))
+            {
+                continue;
+            }
+            self.finalizing_jobs.insert(inbound_entry.job_id);
+            self.cleanup_ready_jobs
+                .push_back((inbound_entry.job_id, inbound_entry.resource_group_id));
+
+            if self.active_jobs.contains_key(&inbound_entry.job_id) {
+                self.retire_active_job(inbound_entry.job_id)?;
+                continue;
+            }
+
+            if let Some(job_entry) = self.pending_jobs.remove(&inbound_entry.job_id) {
+                self.discard_job_tasks(job_entry);
+            }
+        }
+
+        for inbound_entry in ready_entries {
+            if self.finalizing_jobs.contains(&inbound_entry.job_id) {
+                continue;
+            }
+            if !self
+                .buffered_tasks
+                .insert((inbound_entry.job_id, inbound_entry.task_id))
+            {
+                continue;
+            }
+            if let Some(active_job) = self.active_jobs.get_mut(&inbound_entry.job_id) {
+                active_job.enqueue(inbound_entry.task_id);
+                continue;
+            }
+            if let Some(pending_job) = self.pending_jobs.get_mut(&inbound_entry.job_id) {
+                pending_job.enqueue(inbound_entry.task_id);
+                continue;
+            }
+            if self.active_jobs.len() < self.config.active_job_queue_capacity {
+                self.active_jobs.insert(
+                    inbound_entry.job_id,
+                    JobTaskQueue::new(
+                        inbound_entry.job_id,
+                        inbound_entry.resource_group_id,
+                        inbound_entry.task_id,
+                    ),
+                );
+                self.active_job_queue
+                    .push(RoundRobinSlot::Job(inbound_entry.job_id));
+                continue;
+            }
+            self.pending_jobs.insert(
+                inbound_entry.job_id,
+                JobTaskQueue::new(
+                    inbound_entry.job_id,
+                    inbound_entry.resource_group_id,
+                    inbound_entry.task_id,
+                ),
+            );
+            self.pending_job_queue.push_back(inbound_entry.job_id);
+        }
+
+        Ok(())
+    }
+
+    /// Consumes the in-flight inbound poll if it has completed, ingesting its entries and starting
+    /// the next poll; starts the initial poll if none is in flight.
+    ///
+    /// # Errors
+    ///
+    /// Returns an error if:
+    ///
+    /// * Forwards [`AsyncInboundQueueReader::try_collect_result`]'s return values on failure.
+    /// * Forwards [`Self::ingest_inbound_entries`]'s return values on failure.
+    /// * Forwards [`Self::start_inbound_poll`]'s return values on failure.
+    async fn consume_inbound_poll_result(&mut self) -> Result<(), SchedulerError> {
+        let curr_session_id = self.storage_session_id;
+        let inbound_poll_state = self
+            .inbound_queue_reader
+            .try_collect_result(curr_session_id)
+            .await?;
+        match inbound_poll_state {
+            InboundPollState::Ready {
+                session_id: storage_session_id,
+                ready_entries,
+                commit_ready_entries,
+                cleanup_ready_entries,
+            } => {
+                self.ingest_inbound_entries(
+                    curr_session_id,
+                    storage_session_id,
+                    ready_entries,
+                    commit_ready_entries,
+                    cleanup_ready_entries,
+                )
+                .await?;
+                self.start_inbound_poll()?;
+            }
+            InboundPollState::Pending => {}
+            InboundPollState::NotStarted => {
+                self.start_inbound_poll()?;
+            }
+        }
+
+        Ok(())
+    }
+
+    /// Makes scheduling decisions in round-robin order, writing task assignments to the dispatch
+    /// queue until it reaches capacity or no buffered task is left.
+    ///
+    /// # Errors
+    ///
+    /// Returns an error if:
+    ///
+    /// * [`SchedulerError::Internal`] if the round-robin queue is inconsistent with the scheduler's
+    ///   job bookkeeping.
+    /// * Forwards [`DispatchQueueSink::enqueue`]'s return values on failure.
+    /// * Forwards [`Self::retire_active_job`]'s return values on failure.
+    async fn make_schedule_decisions(&mut self) -> Result<(), SchedulerError> {
+        let mut remaining_dispatch_slots = self
+            .config
+            .dispatch_queue_capacity
+            .saturating_sub(self.sink.size());
+        while remaining_dispatch_slots > 0 && !self.buffered_tasks.is_empty() {
+            if self.active_job_queue_round_robin_cursor >= self.active_job_queue.len() {
+                self.active_job_queue_round_robin_cursor = 0;
+            }
+            let active_job_queue_entry = match self
+                .active_job_queue
+                .get(self.active_job_queue_round_robin_cursor)
+            {
+                Some(entry) => entry.clone(),
+                None => {
+                    return Err(SchedulerError::Internal(
+                        "round-robin cursor is corrupted".to_string(),
+                    ));
+                }
+            };
+            self.active_job_queue_round_robin_cursor += 1;
+
+            match active_job_queue_entry {
+                RoundRobinSlot::CleanupReady => {
+                    let Some((job_id, resource_group_id)) = self.cleanup_ready_jobs.pop_front()
+                    else {
+                        continue;
+                    };
+                    self.sink
+                        .enqueue(TaskAssignment {
+                            job_id,
+                            resource_group_id,
+                            task_id: TaskId::Cleanup,
+                        })
+                        .await?;
+                    self.buffered_tasks.remove(&(job_id, TaskId::Cleanup));
+                    self.finalizing_jobs.remove(&job_id);
+                    remaining_dispatch_slots -= 1;
+                }
+                RoundRobinSlot::CommitReady => {
+                    let Some((job_id, resource_group_id)) = self.commit_ready_jobs.pop_front()
+                    else {
+                        continue;
+                    };
+                    self.sink
+                        .enqueue(TaskAssignment {
+                            job_id,
+                            resource_group_id,
+                            task_id: TaskId::Commit,
+                        })
+                        .await?;
+                    self.buffered_tasks.remove(&(job_id, TaskId::Commit));
+                    self.finalizing_jobs.remove(&job_id);
+                    remaining_dispatch_slots -= 1;
+                }
+                RoundRobinSlot::Job(job_id) => {
+                    let Some(job_entry) = self.active_jobs.get_mut(&job_id) else {
+                        return Err(SchedulerError::Internal(format!(
+                            "attempt to remove a non-existing active job: {job_id:?}"
+                        )));
+                    };
+                    if let Some(task_id) = job_entry.dequeue() {
+                        self.sink
+                            .enqueue(TaskAssignment {
+                                job_id,
+                                resource_group_id: job_entry.resource_group_id,
+                                task_id,
+                            })
+                            .await?;
+                        self.buffered_tasks.remove(&(job_id, task_id));
+                        remaining_dispatch_slots -= 1;
+                    } else {
+                        self.retire_active_job(job_id)?;
+                    }
+                }
+            }
+        }
+
+        Ok(())
+    }
+
+    /// Starts a new asynchronous inbound poll, with per-lane entry limits derived from the
+    /// remaining buffer capacities.
+    ///
+    /// # Errors
+    ///
+    /// Returns an error if:
+    ///
+    /// * Forwards [`AsyncInboundQueueReader::start`]'s return values on failure.
+    fn start_inbound_poll(&mut self) -> Result<(), SchedulerError> {
+        let num_commit_ready_tasks = self.commit_ready_jobs.len();
+        let num_cleanup_ready_tasks = self.cleanup_ready_jobs.len();
+        let max_commit_ready_entries = self
+            .config
+            .commit_ready_task_capacity
+            .saturating_sub(num_commit_ready_tasks);
+        let max_cleanup_ready_entries = self
+            .config
+            .cleanup_ready_task_capacity
+            .saturating_sub(num_cleanup_ready_tasks);
+        let max_ready_entries = self.config.ready_task_capacity.saturating_sub(
+            self.buffered_tasks.len() - num_commit_ready_tasks - num_cleanup_ready_tasks,
+        );
+
+        self.inbound_queue_reader.start(
+            Duration::from_millis(self.config.storage_poll_timeout_ms),
+            max_ready_entries,
+            max_commit_ready_entries,
+            max_cleanup_ready_entries,
+        )
+    }
+}
+
+/// The state of an asynchronous inbound-queue poll.
+enum InboundPollState {
+    /// The poll has completed, carrying the polled session and the entries drained from each
+    /// inbound-queue lane.
+    Ready {
+        session_id: SessionId,
+        ready_entries: Vec<InboundEntry>,
+        commit_ready_entries: Vec<InboundEntry>,
+        cleanup_ready_entries: Vec<InboundEntry>,
+    },
+
+    /// The poll is still in flight.
+    Pending,
+
+    /// No poll has been started.
+    NotStarted,
+}
+
+/// The join handles of one in-flight inbound poll, one per inbound-queue lane.
+#[allow(clippy::struct_field_names)]
+struct InboundPollHandles {
+    ready_handle:
+        tokio::task::JoinHandle<Result<(SessionId, Vec<InboundEntry>), StorageClientError>>,
+    commit_ready_handle:
+        tokio::task::JoinHandle<Result<(SessionId, Vec<InboundEntry>), StorageClientError>>,
+    cleanup_ready_handle:
+        tokio::task::JoinHandle<Result<(SessionId, Vec<InboundEntry>), StorageClientError>>,
+}
+
+impl InboundPollHandles {
+    /// Tries to collect the results of all lane polls without blocking.
+    ///
+    /// Entries from lanes that report an older session than the latest observed session are
+    /// dropped.
+    ///
+    /// # Returns
+    ///
+    /// On success:
+    ///
+    /// * [`InboundPollState::Pending`] if any lane poll is still in flight.
+    /// * [`InboundPollState::Ready`] with the latest observed session and its entries otherwise.
+    ///
+    /// # Errors
+    ///
+    /// Returns an error if:
+    ///
+    /// * [`SchedulerError::Internal`] if any lane's polling task fails to join.
+    /// * Forwards [`SchedulerStorageClient::poll_ready`]'s return values on failure.
+    /// * Forwards [`SchedulerStorageClient::poll_commit_ready`]'s return values on failure.
+    /// * Forwards [`SchedulerStorageClient::poll_cleanup_ready`]'s return values on failure.
+    async fn try_collect_result(
+        &mut self,
+        curr_session_id: SessionId,
+    ) -> Result<InboundPollState, SchedulerError> {
+        if !self.ready_handle.is_finished()
+            || !self.commit_ready_handle.is_finished()
+            || !self.cleanup_ready_handle.is_finished()
+        {
+            return Ok(InboundPollState::Pending);
+        }
+
+        let (ready_session_id, ready_entries) = (&mut self.ready_handle)
+            .await
+            .map_err(|e| SchedulerError::Internal(e.to_string()))??;
+        let (commit_session_id, commit_ready_entries) = (&mut self.commit_ready_handle)
+            .await
+            .map_err(|e| SchedulerError::Internal(e.to_string()))??;
+        let (cleanup_session_id, cleanup_ready_entries) =
+            (&mut self.cleanup_ready_handle)
+                .await
+                .map_err(|e| SchedulerError::Internal(e.to_string()))??;
+
+        let latest_session_id = curr_session_id
+            .max(ready_session_id)
+            .max(commit_session_id)
+            .max(cleanup_session_id);
+
+        Ok(InboundPollState::Ready {
+            session_id: latest_session_id,
+            ready_entries: Self::drop_if_stale(ready_session_id, latest_session_id, ready_entries),
+            commit_ready_entries: Self::drop_if_stale(
+                commit_session_id,
+                latest_session_id,
+                commit_ready_entries,
+            ),
+            cleanup_ready_entries: Self::drop_if_stale(
+                cleanup_session_id,
+                latest_session_id,
+                cleanup_ready_entries,
+            ),
+        })
+    }
+
+    /// # Returns
+    ///
+    /// `entries` if `session_id` matches `latest_session_id`, or an empty vector otherwise.
+    fn drop_if_stale(
+        session_id: SessionId,
+        latest_session_id: SessionId,
+        entries: Vec<InboundEntry>,
+    ) -> Vec<InboundEntry> {
+        if session_id == latest_session_id {
+            entries
+        } else {
+            Vec::new()
+        }
+    }
+}
+
+/// A reader that runs inbound-queue polls as background tasks, with at most one polling request
+/// (from all three lanes) in flight at a time.
+///
+/// # Type Parameters
+///
+/// * `StorageClientType` - The storage client used to poll the inbound queue.
+struct AsyncInboundQueueReader<StorageClientType: SchedulerStorageClient + 'static> {
+    storage_client: StorageClientType,
+    handle: Option<InboundPollHandles>,
+}
+
+impl<StorageClientType: SchedulerStorageClient + 'static>
+    AsyncInboundQueueReader<StorageClientType>
+{
+    /// Factory function.
+    ///
+    /// # Returns
+    ///
+    /// A new reader with no poll in flight.
+    const fn new(storage_client: StorageClientType) -> Self {
+        Self {
+            storage_client,
+            handle: None,
+        }
+    }
+
+    /// Tries to collect the result of the in-flight poll without blocking, releasing the poll
+    /// handles once a result is produced.
+    ///
+    /// # Returns
+    ///
+    /// On success:
+    ///
+    /// * [`InboundPollState::NotStarted`] if no poll is in flight.
+    /// * Forwards [`InboundPollHandles::try_collect_result`]'s return values otherwise.
+    ///
+    /// # Errors
+    ///
+    /// Returns an error if:
+    ///
+    /// * Forwards [`InboundPollHandles::try_collect_result`]'s return values on failure.
+    async fn try_collect_result(
+        &mut self,
+        curr_session_id: SessionId,
+    ) -> Result<InboundPollState, SchedulerError> {
+        match &mut self.handle {
+            None => Ok(InboundPollState::NotStarted),
+            Some(handle) => {
+                let inbound_poll_state = handle.try_collect_result(curr_session_id).await?;
+                if !matches!(inbound_poll_state, InboundPollState::Pending) {
+                    self.handle = None;
+                }
+                Ok(inbound_poll_state)
+            }
+        }
+    }
+
+    /// Starts a new inbound poll, polling each inbound-queue lane as a background task.
+    ///
+    /// Lanes whose entry limit is 0 are not polled; if all limits are 0, no poll is started.
+    ///
+    /// # Errors
+    ///
+    /// Returns an error if:
+    ///
+    /// * [`SchedulerError::Internal`] if a poll is already in flight.
+    fn start(
+        &mut self,
+        storage_poll_timeout: Duration,
+        max_ready_entries: usize,
+        max_commit_ready_entries: usize,
+        max_cleanup_ready_entries: usize,
+    ) -> Result<(), SchedulerError> {
+        if self.handle.is_some() {
+            return Err(SchedulerError::Internal(
+                "inbound poll handle already exists".to_string(),
+            ));
+        }
+
+        if max_ready_entries == 0 && max_commit_ready_entries == 0 && max_cleanup_ready_entries == 0
+        {
+            return Ok(());
+        }
+
+        let ready_storage_client = self.storage_client.clone();
+        let ready_handle = tokio::task::spawn(async move {
+            if max_ready_entries == 0 {
+                return Ok((0, Vec::new()));
+            }
+            ready_storage_client
+                .poll_ready(max_ready_entries, storage_poll_timeout)
+                .await
+        });
+
+        let commit_ready_storage_client = self.storage_client.clone();
+        let commit_ready_handle = tokio::task::spawn(async move {
+            if max_commit_ready_entries == 0 {
+                return Ok((0, Vec::new()));
+            }
+            commit_ready_storage_client
+                .poll_commit_ready(max_commit_ready_entries, storage_poll_timeout)
+                .await
+        });
+
+        let cleanup_ready_storage_client = self.storage_client.clone();
+        let cleanup_ready_handle = tokio::task::spawn(async move {
+            if max_cleanup_ready_entries == 0 {
+                return Ok((0, Vec::new()));
+            }
+            cleanup_ready_storage_client
+                .poll_cleanup_ready(max_cleanup_ready_entries, storage_poll_timeout)
+                .await
+        });
+
+        self.handle = Some(InboundPollHandles {
+            ready_handle,
+            commit_ready_handle,
+            cleanup_ready_handle,
+        });
+
+        Ok(())
+    }
+}
diff --git a/components/spider-scheduler/src/core_impl/round_robin/mod.rs b/components/spider-scheduler/src/core_impl/round_robin/mod.rs
new file mode 100644
index 00000000..ce6f1feb
--- /dev/null
+++ b/components/spider-scheduler/src/core_impl/round_robin/mod.rs
@@ -0,0 +1,39 @@
+//! Round-robin scheduler.
+//!
+//! This scheduler provides basic fairness across jobs using a round-robin scheduling policy. It
+//! polls tasks from the inbound queue (maintained by the storage service) and organizes jobs into
+//! two sets:
+//!
+//! * Active jobs: jobs that participate in round-robin scheduling.
+//! * Pending jobs: jobs that are buffered but not yet scheduled. When an active job has no
+//!   remaining schedulable tasks, it is replaced by the next pending job in FIFO order.
+//!
+//! The scheduler operates in discrete ticks. During each tick, it attempts to consume the results
+//! of an asynchronous inbound-queue polling operation and loads any newly available tasks into its
+//! internal buffers. It then makes scheduling decisions until the dispatch queue reaches capacity.
+//!
+//! # Properties
+//!
+//! * Each round-robin cycle may schedule at most one additional commit task and one additional
+//!   cleanup task, if available.
+//! * All buffered tasks are unique. Tasks loaded from the inbound queue are deduplicated before
+//!   entering the scheduler's internal buffers.
+//!
+//! # Configuration
+//!
+//! * `active_job_queue_capacity`: Maximum number of active jobs maintained by the scheduler.
+//! * `dispatch_queue_capacity`: Maximum number of task assignments in the dispatch queue.
+//! * `ready_task_capacity`: Maximum number of ready tasks buffered by the scheduler.
+//! * `commit_ready_task_capacity`: Maximum number of buffered commit-ready tasks.
+//! * `cleanup_ready_task_capacity`: Maximum number of buffered cleanup-ready tasks.
+//! * `storage_poll_timeout_ms`: Maximum time, in milliseconds, that inbound-queue polling may block
+//!   on the storage-service side.
+//! * `tick_interval_ms`: Interval, in milliseconds, between scheduler ticks (tick execution time
+//!   included).
+
+mod implementation;
+
+#[cfg(test)]
+mod tests;
+
+pub use implementation::RoundRobinConfig;
diff --git a/components/spider-scheduler/src/core_impl/round_robin/tests.rs b/components/spider-scheduler/src/core_impl/round_robin/tests.rs
new file mode 100644
index 00000000..6ad55423
--- /dev/null
+++ b/components/spider-scheduler/src/core_impl/round_robin/tests.rs
@@ -0,0 +1 @@
+//! Unit tests for the round-robin scheduler core.
diff --git a/components/spider-scheduler/src/error.rs b/components/spider-scheduler/src/error.rs
index bff7571d..50851809 100644
--- a/components/spider-scheduler/src/error.rs
+++ b/components/spider-scheduler/src/error.rs
@@ -32,6 +32,9 @@ pub enum SchedulerError {
     #[error("internal error: {0}")]
     Internal(String),
 
+    #[error("invalid config: {0}")]
+    InvalidConfig(String),
+
     #[error("async result not ready")]
     ResultNotReady,
 }

From 10680666dfa47c697ef9db828e5616b133d11031 Mon Sep 17 00:00:00 2001
From: LinZhihao-723 <pleiades3190@gmail.com>
Date: Thu, 4 Jun 2026 18:05:22 -0400
Subject: [PATCH 09/14] Add black-box unit testing.

---
 .../src/{core_impl.rs => core_impl/mod.rs}    |   0
 .../core_impl/round_robin/implementation.rs   | 154 +++---
 .../src/core_impl/round_robin/mod.rs          |   2 +-
 .../src/core_impl/round_robin/tests.rs        | 480 ++++++++++++++++++
 4 files changed, 575 insertions(+), 61 deletions(-)
 rename components/spider-scheduler/src/{core_impl.rs => core_impl/mod.rs} (100%)

diff --git a/components/spider-scheduler/src/core_impl.rs b/components/spider-scheduler/src/core_impl/mod.rs
similarity index 100%
rename from components/spider-scheduler/src/core_impl.rs
rename to components/spider-scheduler/src/core_impl/mod.rs
diff --git a/components/spider-scheduler/src/core_impl/round_robin/implementation.rs b/components/spider-scheduler/src/core_impl/round_robin/implementation.rs
index 9ff881bc..3418487e 100644
--- a/components/spider-scheduler/src/core_impl/round_robin/implementation.rs
+++ b/components/spider-scheduler/src/core_impl/round_robin/implementation.rs
@@ -23,19 +23,8 @@ use crate::{
 };
 
 /// The configuration of the round-robin scheduler core.
-///
-/// The configuration itself implements [`SchedulerCore`]: consuming it through
-/// [`SchedulerCore::run`] creates the underlying scheduler and drives its scheduling loop.
-///
-/// # Type Parameters
-///
-/// * `SchedulerStorageClientType` - The storage client used to poll the inbound queue.
-/// * `DispatchQueueSinkType` - The dispatch sink that task assignments are written to.
 #[derive(Deserialize)]
-pub struct RoundRobinConfig<
-    SchedulerStorageClientType: SchedulerStorageClient + 'static,
-    DispatchQueueSinkType: DispatchQueueSink,
-> {
+pub struct RoundRobinConfig {
     /// The capacity of the active job queue. The scheduler will make task assignments from these
     /// jobs in a round-robin manner.
     ///
@@ -68,8 +57,89 @@ pub struct RoundRobinConfig<
 
     /// The time (in milliseconds) that the scheduler will spend on each tick.
     pub tick_interval_ms: u64,
+}
 
-    #[serde(skip)]
+impl RoundRobinConfig {
+    /// Validates the configuration and creates a ready-to-run scheduler core from it.
+    ///
+    /// # Type Parameters
+    ///
+    /// * `SchedulerStorageClientType` - The storage client used to poll the inbound queue.
+    /// * `DispatchQueueSinkType` - The dispatch sink that task assignments are written to.
+    ///
+    /// # Returns
+    ///
+    /// A newly created round-robin scheduler core on success.
+    ///
+    /// # Errors
+    ///
+    /// Returns an error if:
+    ///
+    /// * [`SchedulerError::InvalidConfig`] if any of the following configuration entries is 0:
+    ///   * `active_job_queue_capacity`
+    ///   * `dispatch_queue_capacity`
+    ///   * `ready_task_capacity`
+    ///   * `commit_ready_task_capacity`
+    ///   * `cleanup_ready_task_capacity`
+    pub fn make_core<
+        SchedulerStorageClientType: SchedulerStorageClient + 'static,
+        DispatchQueueSinkType: DispatchQueueSink,
+    >(
+        self,
+    ) -> Result<RoundRobinCore<SchedulerStorageClientType, DispatchQueueSinkType>, SchedulerError>
+    {
+        if self.active_job_queue_capacity == 0 {
+            return Err(SchedulerError::InvalidConfig(
+                "`active_job_queue_capacity` must be greater than 0".to_string(),
+            ));
+        }
+
+        if self.dispatch_queue_capacity == 0 {
+            return Err(SchedulerError::InvalidConfig(
+                "`dispatch_queue_capacity` must be greater than 0".to_string(),
+            ));
+        }
+
+        if self.ready_task_capacity == 0 {
+            return Err(SchedulerError::InvalidConfig(
+                "`ready_task_capacity` must be greater than 0".to_string(),
+            ));
+        }
+
+        if self.commit_ready_task_capacity == 0 {
+            return Err(SchedulerError::InvalidConfig(
+                "`commit_ready_task_capacity` must be greater than 0".to_string(),
+            ));
+        }
+
+        if self.cleanup_ready_task_capacity == 0 {
+            return Err(SchedulerError::InvalidConfig(
+                "`cleanup_ready_task_capacity` must be greater than 0".to_string(),
+            ));
+        }
+
+        Ok(RoundRobinCore {
+            config: self,
+            _marker: std::marker::PhantomData,
+        })
+    }
+}
+
+/// The round-robin implementation of [`SchedulerCore`], created from
+/// [`RoundRobinConfig::make_core`].
+///
+/// Holding an instance of this type guarantees the wrapped configuration has passed validation, so
+/// the scheduling loop can trust its invariants without re-validating.
+///
+/// # Type Parameters
+///
+/// * `SchedulerStorageClientType` - The storage client used to poll the inbound queue.
+/// * `DispatchQueueSinkType` - The dispatch sink that task assignments are written to.
+pub struct RoundRobinCore<
+    SchedulerStorageClientType: SchedulerStorageClient + 'static,
+    DispatchQueueSinkType: DispatchQueueSink,
+> {
+    config: RoundRobinConfig,
     _marker: std::marker::PhantomData<(SchedulerStorageClientType, DispatchQueueSinkType)>,
 }
 
@@ -77,7 +147,7 @@ pub struct RoundRobinConfig<
 impl<
     SchedulerStorageClientType: SchedulerStorageClient + 'static,
     DispatchQueueSinkType: DispatchQueueSink,
-> SchedulerCore for RoundRobinConfig<SchedulerStorageClientType, DispatchQueueSinkType>
+> SchedulerCore for RoundRobinCore<SchedulerStorageClientType, DispatchQueueSinkType>
 {
     type Sink = DispatchQueueSinkType;
     type StorageClient = SchedulerStorageClientType;
@@ -93,8 +163,8 @@ impl<
             storage_client,
             sink,
             cancellation_token,
-            self,
-        )?
+            self.config,
+        )
         .run()
         .await
     }
@@ -159,7 +229,7 @@ struct RoundRobin<
 > {
     sink: DispatchQueueSinkType,
     cancellation_token: CancellationToken,
-    config: RoundRobinConfig<SchedulerStorageClientType, DispatchQueueSinkType>,
+    config: RoundRobinConfig,
     storage_session_id: SessionId,
     buffered_tasks: HashSet<(JobId, TaskId)>,
 
@@ -185,55 +255,19 @@ impl<
 {
     /// Factory function.
     ///
-    /// Creates a [`RoundRobin`] scheduler from the given config.
+    /// Creates a [`RoundRobin`] scheduler from the given config. The config must have been
+    /// validated through [`RoundRobinConfig::make_core`].
     ///
     /// # Returns
     ///
-    /// The constructed [`RoundRobin`] scheduler on success.
-    ///
-    /// # Errors
-    ///
-    /// Returns an error if:
-    ///
-    /// * [`SchedulerError::InvalidConfig`] if the config contains invalid values. Check
-    ///   [`RoundRobinConfig`]'s docstring for details.
+    /// The constructed [`RoundRobin`] scheduler.
     fn new(
         storage_session_id: SessionId,
         storage_client: SchedulerStorageClientType,
         sink: DispatchQueueSinkType,
         cancellation_token: CancellationToken,
-        config: RoundRobinConfig<SchedulerStorageClientType, DispatchQueueSinkType>,
-    ) -> Result<Self, SchedulerError> {
-        if config.active_job_queue_capacity == 0 {
-            return Err(SchedulerError::InvalidConfig(
-                "`active_job_queue_capacity` must be greater than 0".to_string(),
-            ));
-        }
-
-        if config.dispatch_queue_capacity == 0 {
-            return Err(SchedulerError::InvalidConfig(
-                "`dispatch_queue_capacity` must be greater than 0".to_string(),
-            ));
-        }
-
-        if config.ready_task_capacity == 0 {
-            return Err(SchedulerError::InvalidConfig(
-                "`ready_task_capacity` must be greater than 0".to_string(),
-            ));
-        }
-
-        if config.commit_ready_task_capacity == 0 {
-            return Err(SchedulerError::InvalidConfig(
-                "`commit_ready_task_capacity` must be greater than 0".to_string(),
-            ));
-        }
-
-        if config.cleanup_ready_task_capacity == 0 {
-            return Err(SchedulerError::InvalidConfig(
-                "`cleanup_ready_task_capacity` must be greater than 0".to_string(),
-            ));
-        }
-
+        config: RoundRobinConfig,
+    ) -> Self {
         let buffered_tasks = HashSet::with_capacity(config.ready_task_capacity);
         let active_jobs = HashMap::with_capacity(config.active_job_queue_capacity);
         let active_job_queue = Self::new_active_job_queue(config.active_job_queue_capacity);
@@ -246,7 +280,7 @@ impl<
             config.commit_ready_task_capacity + config.cleanup_ready_task_capacity,
         );
         let inbound_queue_reader = AsyncInboundQueueReader::new(storage_client);
-        Ok(Self {
+        Self {
             sink,
             cancellation_token,
             config,
@@ -261,7 +295,7 @@ impl<
             cleanup_ready_jobs,
             finalizing_jobs,
             inbound_queue_reader,
-        })
+        }
     }
 
     /// # Returns
diff --git a/components/spider-scheduler/src/core_impl/round_robin/mod.rs b/components/spider-scheduler/src/core_impl/round_robin/mod.rs
index ce6f1feb..f1224799 100644
--- a/components/spider-scheduler/src/core_impl/round_robin/mod.rs
+++ b/components/spider-scheduler/src/core_impl/round_robin/mod.rs
@@ -36,4 +36,4 @@ mod implementation;
 #[cfg(test)]
 mod tests;
 
-pub use implementation::RoundRobinConfig;
+pub use implementation::{RoundRobinConfig, RoundRobinCore};
diff --git a/components/spider-scheduler/src/core_impl/round_robin/tests.rs b/components/spider-scheduler/src/core_impl/round_robin/tests.rs
index 6ad55423..2ac650ca 100644
--- a/components/spider-scheduler/src/core_impl/round_robin/tests.rs
+++ b/components/spider-scheduler/src/core_impl/round_robin/tests.rs
@@ -1 +1,481 @@
 //! Unit tests for the round-robin scheduler core.
+
+use std::{
+    collections::{HashMap, HashSet, VecDeque},
+    sync::{
+        Arc,
+        Mutex,
+        atomic::{AtomicU64, Ordering},
+    },
+    time::Duration,
+};
+
+use anyhow::bail;
+use async_trait::async_trait;
+use spider_core::{
+    job::JobState,
+    types::id::{JobId, ResourceGroupId, SessionId, TaskId},
+};
+use tokio_util::sync::CancellationToken;
+
+use super::RoundRobinConfig;
+use crate::{
+    DispatchQueueSource,
+    InboundEntry,
+    SchedulerCore,
+    SchedulerError,
+    SchedulerStorageClient,
+    StorageClientError,
+    TaskAssignment,
+    dispatch_queue::{DispatchQueueReader, DispatchQueueWriter, create_dispatch_queue},
+};
+
+/// The session used by tests that never bump the session.
+const DEFAULT_SESSION_ID: SessionId = 0;
+
+/// The maximum time to wait for expected assignments before failing a test.
+const DRAIN_DEADLINE: Duration = Duration::from_secs(5);
+
+struct MockStorageInner {
+    session_id: AtomicU64,
+    ready_batches: Mutex<VecDeque<(SessionId, Vec<InboundEntry>)>>,
+}
+
+/// A mock [`SchedulerStorageClient`] backed by scripted poll batches.
+///
+/// Each lane serves its scripted batches in FIFO order, one batch per poll; when a lane's script
+/// is empty, polls return an empty batch under the mock's current session immediately (the `wait`
+/// parameter is ignored to keep tests fast).
+#[derive(Clone)]
+struct MockStorageClient {
+    inner: Arc<MockStorageInner>,
+}
+
+impl MockStorageClient {
+    /// Factory function.
+    ///
+    /// # Returns
+    ///
+    /// A new mock storage client with no scripted batches, reporting `session_id` on empty polls.
+    fn new(session_id: SessionId) -> Self {
+        Self {
+            inner: Arc::new(MockStorageInner {
+                session_id: AtomicU64::new(session_id),
+                ready_batches: Mutex::new(VecDeque::new()),
+            }),
+        }
+    }
+
+    /// Scripts a batch to be served by the next unserved [`SchedulerStorageClient::poll_ready`]
+    /// call.
+    fn push_ready_batch(&self, session_id: SessionId, entries: Vec<InboundEntry>) {
+        self.inner
+            .ready_batches
+            .lock()
+            .expect("ready-batch lock poisoned")
+            .push_back((session_id, entries));
+    }
+
+    /// # Returns
+    ///
+    /// The session reported on polls that have no scripted batch.
+    fn current_session(&self) -> SessionId {
+        self.inner.session_id.load(Ordering::Relaxed)
+    }
+}
+
+#[async_trait]
+impl SchedulerStorageClient for MockStorageClient {
+    async fn poll_ready(
+        &self,
+        max_items: usize,
+        _wait: Duration,
+    ) -> Result<(SessionId, Vec<InboundEntry>), StorageClientError> {
+        let scripted_batch = self
+            .inner
+            .ready_batches
+            .lock()
+            .expect("ready-batch lock poisoned")
+            .pop_front();
+        let Some((session_id, entries)) = scripted_batch else {
+            return Ok((self.current_session(), Vec::new()));
+        };
+        assert!(
+            entries.len() <= max_items,
+            "scripted batch of {} entries exceeds the scheduler's poll limit of {max_items}",
+            entries.len(),
+        );
+        Ok((session_id, entries))
+    }
+
+    async fn poll_commit_ready(
+        &self,
+        _max_items: usize,
+        _wait: Duration,
+    ) -> Result<(SessionId, Vec<InboundEntry>), StorageClientError> {
+        Ok((self.current_session(), Vec::new()))
+    }
+
+    async fn poll_cleanup_ready(
+        &self,
+        _max_items: usize,
+        _wait: Duration,
+    ) -> Result<(SessionId, Vec<InboundEntry>), StorageClientError> {
+        Ok((self.current_session(), Vec::new()))
+    }
+
+    async fn job_state(&self, _job_id: JobId) -> Result<JobState, StorageClientError> {
+        Ok(JobState::Running)
+    }
+}
+
+/// # Returns
+///
+/// A config with the given pool and dispatch capacities, and defaults large enough that the other
+/// capacities never throttle the tests.
+fn make_config(
+    active_job_queue_capacity: usize,
+    dispatch_queue_capacity: usize,
+) -> RoundRobinConfig {
+    RoundRobinConfig {
+        active_job_queue_capacity,
+        dispatch_queue_capacity,
+        ready_task_capacity: 16_384,
+        commit_ready_task_capacity: 16,
+        cleanup_ready_task_capacity: 16,
+        storage_poll_timeout_ms: 10,
+        tick_interval_ms: 1,
+    }
+}
+
+/// # Returns
+///
+/// `n` jobs with freshly generated job and resource-group IDs.
+fn make_jobs(n: usize) -> Vec<(JobId, ResourceGroupId)> {
+    (0..n)
+        .map(|_| (JobId::new(), ResourceGroupId::new()))
+        .collect()
+}
+
+/// Builds one inbound ready batch containing `tasks_per_job` tasks per job, interleaved across
+/// jobs in per-job FIFO order (task 0 of every job, then task 1 of every job, and so on).
+///
+/// When `dup_every` is non-zero, every `dup_every`-th entry is duplicated adjacently within the
+/// batch, emulating the duplicate task assignments a real storage may return.
+///
+/// # Returns
+///
+/// The inbound entries of the batch.
+fn make_ready_batch(
+    jobs: &[(JobId, ResourceGroupId)],
+    tasks_per_job: usize,
+    dup_every: usize,
+) -> Vec<InboundEntry> {
+    let mut entries = Vec::new();
+    let mut num_emitted = 0_usize;
+    for task_index in 0..tasks_per_job {
+        for &(job_id, resource_group_id) in jobs {
+            let entry = InboundEntry {
+                resource_group_id,
+                job_id,
+                task_id: TaskId::Index(task_index),
+            };
+            entries.push(entry);
+            num_emitted += 1;
+            if dup_every > 0 && num_emitted.is_multiple_of(dup_every) {
+                entries.push(entry);
+            }
+        }
+    }
+    entries
+}
+
+/// Validates the given config and spawns the scheduler's public run loop as a background task.
+///
+/// # Returns
+///
+/// A tuple containing:
+///
+/// * The join handle yielding the scheduler's exit result.
+/// * The cancellation token that stops the scheduler.
+///
+/// # Panics
+///
+/// Panics if the given config fails validation.
+fn spawn_scheduler(
+    config: RoundRobinConfig,
+    storage_client: MockStorageClient,
+    sink: DispatchQueueWriter,
+) -> (
+    tokio::task::JoinHandle<Result<(), SchedulerError>>,
+    CancellationToken,
+) {
+    let core = config.make_core().expect("config validation failed");
+    let cancellation_token = CancellationToken::new();
+    let scheduler_token = cancellation_token.clone();
+    let handle = tokio::spawn(async move { core.run(storage_client, sink, scheduler_token).await });
+    (handle, cancellation_token)
+}
+
+/// Drains exactly `n` task assignments from the dispatch queue, playing the worker pool's role.
+///
+/// # Returns
+///
+/// The drained assignments in FIFO order on success.
+///
+/// # Errors
+///
+/// Returns an error if:
+///
+/// * Fewer than `n` assignments arrive within [`DRAIN_DEADLINE`].
+/// * Forwards [`DispatchQueueSource::dequeue`]'s return values on failure.
+async fn drain_n(reader: &DispatchQueueReader, n: usize) -> anyhow::Result<Vec<TaskAssignment>> {
+    const DEQUEUE_WAIT: Duration = Duration::from_millis(100);
+    let deadline = tokio::time::Instant::now() + DRAIN_DEADLINE;
+    let mut assignments = Vec::with_capacity(n);
+    while assignments.len() < n {
+        if tokio::time::Instant::now() > deadline {
+            bail!(
+                "timed out draining assignments: got {}, expected {n}",
+                assignments.len(),
+            );
+        }
+        if let Some((_session_id, assignment)) = reader.dequeue(DEQUEUE_WAIT).await? {
+            assignments.push(assignment);
+        }
+    }
+    Ok(assignments)
+}
+
+/// Asserts that no further assignment arrives within a short observation window, proving that
+/// duplicated or dropped tasks never leak into the dispatch queue.
+///
+/// # Errors
+///
+/// Returns an error if:
+///
+/// * Forwards [`DispatchQueueSource::dequeue`]'s return values on failure.
+async fn assert_no_more_assignments(reader: &DispatchQueueReader) -> anyhow::Result<()> {
+    const OBSERVATION_WINDOW: Duration = Duration::from_secs(1);
+    let unexpected_assignment = reader.dequeue(OBSERVATION_WINDOW).await?;
+    assert_eq!(unexpected_assignment, None);
+    Ok(())
+}
+
+/// Asserts that `assignments` is exactly `rounds` full round-robin rotations over `jobs` in order:
+/// rotation `r` consists of task `r` of every job, following the jobs' order, so every job's task
+/// indices are dispatched FIFO.
+fn assert_strict_rotation(
+    assignments: &[TaskAssignment],
+    jobs: &[(JobId, ResourceGroupId)],
+    rounds: usize,
+) {
+    let expected: Vec<(JobId, ResourceGroupId, TaskId)> = (0..rounds)
+        .flat_map(|round| {
+            jobs.iter().map(move |&(job_id, resource_group_id)| {
+                (job_id, resource_group_id, TaskId::Index(round))
+            })
+        })
+        .collect();
+    let actual: Vec<(JobId, ResourceGroupId, TaskId)> = assignments
+        .iter()
+        .map(|assignment| {
+            (
+                assignment.job_id,
+                assignment.resource_group_id,
+                assignment.task_id,
+            )
+        })
+        .collect();
+    assert_eq!(actual, expected);
+}
+
+/// Asserts that `assignments` follows the round-robin scheduling policy over `jobs` without pinning
+/// down the exact rotation order:
+///
+/// * Every aligned window of `jobs.len()` assignments (one full rotation pass) contains each job
+///   exactly once.
+/// * Each job's task indices are dispatched in FIFO order, with the matching resource group.
+/// * Each job receives exactly `tasks_per_job` assignments.
+fn assert_round_robin_property(
+    assignments: &[TaskAssignment],
+    jobs: &[(JobId, ResourceGroupId)],
+    tasks_per_job: usize,
+) {
+    assert_eq!(assignments.len(), jobs.len() * tasks_per_job);
+
+    // With equal task counts, no job leaves the rotation mid-phase, so every rotation pass must
+    // schedule every job exactly once.
+    for rotation_pass in assignments.chunks(jobs.len()) {
+        let scheduled_jobs: HashSet<JobId> = rotation_pass
+            .iter()
+            .map(|assignment| assignment.job_id)
+            .collect();
+        assert_eq!(
+            scheduled_jobs.len(),
+            jobs.len(),
+            "a rotation pass repeats or misses a job: {rotation_pass:?}",
+        );
+    }
+
+    let resource_groups: HashMap<JobId, ResourceGroupId> = jobs.iter().copied().collect();
+    let mut next_task_indices: HashMap<JobId, usize> = HashMap::new();
+    for assignment in assignments {
+        let resource_group_id = *resource_groups
+            .get(&assignment.job_id)
+            .expect("assignment belongs to a job outside the given job set");
+        assert_eq!(assignment.resource_group_id, resource_group_id);
+
+        let next_task_index = next_task_indices.entry(assignment.job_id).or_insert(0);
+        assert_eq!(assignment.task_id, TaskId::Index(*next_task_index));
+        *next_task_index += 1;
+    }
+
+    for &(job_id, _) in jobs {
+        assert_eq!(next_task_indices.get(&job_id).copied(), Some(tasks_per_job));
+    }
+}
+
+#[test]
+fn zero_capacity_configs_are_rejected() {
+    let try_make_core =
+        |config: RoundRobinConfig| config.make_core::<MockStorageClient, DispatchQueueWriter>();
+
+    assert!(try_make_core(make_config(2, 2)).is_ok());
+
+    let zeroed_configs = [
+        RoundRobinConfig {
+            active_job_queue_capacity: 0,
+            ..make_config(2, 2)
+        },
+        RoundRobinConfig {
+            dispatch_queue_capacity: 0,
+            ..make_config(2, 2)
+        },
+        RoundRobinConfig {
+            ready_task_capacity: 0,
+            ..make_config(2, 2)
+        },
+        RoundRobinConfig {
+            commit_ready_task_capacity: 0,
+            ..make_config(2, 2)
+        },
+        RoundRobinConfig {
+            cleanup_ready_task_capacity: 0,
+            ..make_config(2, 2)
+        },
+    ];
+    for config in zeroed_configs {
+        let result = try_make_core(config);
+        assert!(
+            matches!(result, Err(SchedulerError::InvalidConfig(_))),
+            "expected InvalidConfig, got {:?}",
+            result.err(),
+        );
+    }
+}
+
+#[tokio::test(flavor = "multi_thread")]
+async fn single_capacity_pool_schedules_jobs_serially() -> anyhow::Result<()> {
+    const NUM_JOBS: usize = 3;
+    const TASKS_PER_JOB: usize = 5;
+    const DUP_EVERY: usize = 3;
+    const DISPATCH_QUEUE_CAPACITY: usize = 32;
+
+    let jobs = make_jobs(NUM_JOBS);
+    let storage_client = MockStorageClient::new(DEFAULT_SESSION_ID);
+    storage_client.push_ready_batch(
+        DEFAULT_SESSION_ID,
+        make_ready_batch(&jobs, TASKS_PER_JOB, DUP_EVERY),
+    );
+
+    let (writer, reader) = create_dispatch_queue(DISPATCH_QUEUE_CAPACITY, DEFAULT_SESSION_ID);
+    let config = make_config(1, DISPATCH_QUEUE_CAPACITY);
+    let (scheduler_handle, cancellation_token) = spawn_scheduler(config, storage_client, writer);
+
+    let assignments = drain_n(&reader, NUM_JOBS * TASKS_PER_JOB).await?;
+    assert_no_more_assignments(&reader).await?;
+
+    // With an active job pool of capacity 1, round-robin degenerates to serial job FIFO: the
+    // rotation holds a single job at a time, so each job's tasks dispatch as one consecutive
+    // single-job rotation, in job-arrival order.
+    for (segment, job) in assignments.chunks(TASKS_PER_JOB).zip(&jobs) {
+        assert_strict_rotation(segment, std::slice::from_ref(job), TASKS_PER_JOB);
+    }
+
+    cancellation_token.cancel();
+    scheduler_handle.await.expect("scheduler task panicked")?;
+    Ok(())
+}
+
+#[tokio::test(flavor = "multi_thread")]
+async fn active_jobs_dispatch_in_round_robin_order() -> anyhow::Result<()> {
+    const NUM_JOBS: usize = 10;
+    const TASKS_PER_JOB: usize = 5;
+    const DUP_EVERY: usize = 4;
+    const DISPATCH_QUEUE_CAPACITY: usize = 32;
+
+    let jobs = make_jobs(NUM_JOBS);
+    let storage_client = MockStorageClient::new(DEFAULT_SESSION_ID);
+    storage_client.push_ready_batch(
+        DEFAULT_SESSION_ID,
+        make_ready_batch(&jobs, TASKS_PER_JOB, DUP_EVERY),
+    );
+
+    let (writer, reader) = create_dispatch_queue(DISPATCH_QUEUE_CAPACITY, DEFAULT_SESSION_ID);
+    let config = make_config(NUM_JOBS, DISPATCH_QUEUE_CAPACITY);
+    let (scheduler_handle, cancellation_token) = spawn_scheduler(config, storage_client, writer);
+
+    let assignments = drain_n(&reader, NUM_JOBS * TASKS_PER_JOB).await?;
+    assert_no_more_assignments(&reader).await?;
+
+    // All 10 jobs fit into the active job pool, so no job ever pends and dispatch follows the
+    // strict rotation: task 0 of every job in batch order, then task 1 of every job, and so on. The
+    // exact count of 50 (with no trailing assignments) also proves the in-batch duplicates were
+    // deduplicated.
+    assert_strict_rotation(&assignments, &jobs, TASKS_PER_JOB);
+
+    cancellation_token.cancel();
+    scheduler_handle.await.expect("scheduler task panicked")?;
+    Ok(())
+}
+
+#[tokio::test(flavor = "multi_thread")]
+async fn pending_jobs_promote_and_schedule_round_robin() -> anyhow::Result<()> {
+    const ACTIVE_JOB_QUEUE_CAPACITY: usize = 10;
+    const NUM_JOBS: usize = 20;
+    const TASKS_PER_JOB: usize = 5;
+    const DUP_EVERY: usize = 5;
+    const DISPATCH_QUEUE_CAPACITY: usize = 32;
+
+    let jobs = make_jobs(NUM_JOBS);
+    let storage_client = MockStorageClient::new(DEFAULT_SESSION_ID);
+    storage_client.push_ready_batch(
+        DEFAULT_SESSION_ID,
+        make_ready_batch(&jobs, TASKS_PER_JOB, DUP_EVERY),
+    );
+
+    let (writer, reader) = create_dispatch_queue(DISPATCH_QUEUE_CAPACITY, DEFAULT_SESSION_ID);
+    let config = make_config(ACTIVE_JOB_QUEUE_CAPACITY, DISPATCH_QUEUE_CAPACITY);
+    let (scheduler_handle, cancellation_token) = spawn_scheduler(config, storage_client, writer);
+
+    let assignments = drain_n(&reader, NUM_JOBS * TASKS_PER_JOB).await?;
+    assert_no_more_assignments(&reader).await?;
+
+    let (active_jobs, pending_jobs) = jobs.split_at(ACTIVE_JOB_QUEUE_CAPACITY);
+    let (phase1, phase2) = assignments.split_at(ACTIVE_JOB_QUEUE_CAPACITY * TASKS_PER_JOB);
+
+    // Phase 1: the first 10 jobs in batch order fill the active job pool and dispatch in strict
+    // rotation; the pending jobs must not appear while the active jobs still have tasks.
+    assert_strict_rotation(phase1, active_jobs, TASKS_PER_JOB);
+
+    // Phase 2: once the active jobs exhaust, the 10 pending jobs are promoted and scheduled
+    // round-robin. The exact slot order after the retire-and-promote wave is an implementation
+    // detail of the rotation bookkeeping, so assert the round-robin property instead of one
+    // hard-coded sequence.
+    assert_round_robin_property(phase2, pending_jobs, TASKS_PER_JOB);
+
+    cancellation_token.cancel();
+    scheduler_handle.await.expect("scheduler task panicked")?;
+    Ok(())
+}

From bc619580a6d60d5b3eb8c2e52991ed4623e68062 Mon Sep 17 00:00:00 2001
From: LinZhihao-723 <pleiades3190@gmail.com>
Date: Thu, 4 Jun 2026 18:17:03 -0400
Subject: [PATCH 10/14] Add cleanup and commit tasks.

---
 .../src/core_impl/round_robin/tests.rs        | 175 +++++++++++++++---
 1 file changed, 146 insertions(+), 29 deletions(-)

diff --git a/components/spider-scheduler/src/core_impl/round_robin/tests.rs b/components/spider-scheduler/src/core_impl/round_robin/tests.rs
index 2ac650ca..6245d7cb 100644
--- a/components/spider-scheduler/src/core_impl/round_robin/tests.rs
+++ b/components/spider-scheduler/src/core_impl/round_robin/tests.rs
@@ -39,6 +39,8 @@ const DRAIN_DEADLINE: Duration = Duration::from_secs(5);
 struct MockStorageInner {
     session_id: AtomicU64,
     ready_batches: Mutex<VecDeque<(SessionId, Vec<InboundEntry>)>>,
+    commit_ready_batches: Mutex<VecDeque<(SessionId, Vec<InboundEntry>)>>,
+    cleanup_ready_batches: Mutex<VecDeque<(SessionId, Vec<InboundEntry>)>>,
 }
 
 /// A mock [`SchedulerStorageClient`] backed by scripted poll batches.
@@ -62,6 +64,8 @@ impl MockStorageClient {
             inner: Arc::new(MockStorageInner {
                 session_id: AtomicU64::new(session_id),
                 ready_batches: Mutex::new(VecDeque::new()),
+                commit_ready_batches: Mutex::new(VecDeque::new()),
+                cleanup_ready_batches: Mutex::new(VecDeque::new()),
             }),
         }
     }
@@ -76,52 +80,81 @@ impl MockStorageClient {
             .push_back((session_id, entries));
     }
 
+    /// Scripts a batch to be served by the next unserved
+    /// [`SchedulerStorageClient::poll_commit_ready`] call.
+    fn push_commit_ready_batch(&self, session_id: SessionId, entries: Vec<InboundEntry>) {
+        self.inner
+            .commit_ready_batches
+            .lock()
+            .expect("commit-ready-batch lock poisoned")
+            .push_back((session_id, entries));
+    }
+
+    /// Scripts a batch to be served by the next unserved
+    /// [`SchedulerStorageClient::poll_cleanup_ready`] call.
+    fn push_cleanup_ready_batch(&self, session_id: SessionId, entries: Vec<InboundEntry>) {
+        self.inner
+            .cleanup_ready_batches
+            .lock()
+            .expect("cleanup-ready-batch lock poisoned")
+            .push_back((session_id, entries));
+    }
+
     /// # Returns
     ///
     /// The session reported on polls that have no scripted batch.
     fn current_session(&self) -> SessionId {
         self.inner.session_id.load(Ordering::Relaxed)
     }
-}
 
-#[async_trait]
-impl SchedulerStorageClient for MockStorageClient {
-    async fn poll_ready(
+    /// Serves one poll from the given lane's script.
+    ///
+    /// # Returns
+    ///
+    /// The lane's next scripted batch, or an empty batch under the current session if the lane's
+    /// script is exhausted.
+    fn serve_batch(
         &self,
+        batches: &Mutex<VecDeque<(SessionId, Vec<InboundEntry>)>>,
         max_items: usize,
-        _wait: Duration,
-    ) -> Result<(SessionId, Vec<InboundEntry>), StorageClientError> {
-        let scripted_batch = self
-            .inner
-            .ready_batches
-            .lock()
-            .expect("ready-batch lock poisoned")
-            .pop_front();
+    ) -> (SessionId, Vec<InboundEntry>) {
+        let scripted_batch = batches.lock().expect("batch lock poisoned").pop_front();
         let Some((session_id, entries)) = scripted_batch else {
-            return Ok((self.current_session(), Vec::new()));
+            return (self.current_session(), Vec::new());
         };
         assert!(
             entries.len() <= max_items,
             "scripted batch of {} entries exceeds the scheduler's poll limit of {max_items}",
             entries.len(),
         );
-        Ok((session_id, entries))
+        (session_id, entries)
+    }
+}
+
+#[async_trait]
+impl SchedulerStorageClient for MockStorageClient {
+    async fn poll_ready(
+        &self,
+        max_items: usize,
+        _wait: Duration,
+    ) -> Result<(SessionId, Vec<InboundEntry>), StorageClientError> {
+        Ok(self.serve_batch(&self.inner.ready_batches, max_items))
     }
 
     async fn poll_commit_ready(
         &self,
-        _max_items: usize,
+        max_items: usize,
         _wait: Duration,
     ) -> Result<(SessionId, Vec<InboundEntry>), StorageClientError> {
-        Ok((self.current_session(), Vec::new()))
+        Ok(self.serve_batch(&self.inner.commit_ready_batches, max_items))
     }
 
     async fn poll_cleanup_ready(
         &self,
-        _max_items: usize,
+        max_items: usize,
         _wait: Duration,
     ) -> Result<(SessionId, Vec<InboundEntry>), StorageClientError> {
-        Ok((self.current_session(), Vec::new()))
+        Ok(self.serve_batch(&self.inner.cleanup_ready_batches, max_items))
     }
 
     async fn job_state(&self, _job_id: JobId) -> Result<JobState, StorageClientError> {
@@ -190,6 +223,22 @@ fn make_ready_batch(
     entries
 }
 
+/// Builds one inbound batch that marks each given job as finalizing, with `task_id` (either
+/// [`TaskId::Commit`] or [`TaskId::Cleanup`]) set on every entry.
+///
+/// # Returns
+///
+/// The inbound entries of the batch.
+fn make_finalizing_batch(jobs: &[(JobId, ResourceGroupId)], task_id: TaskId) -> Vec<InboundEntry> {
+    jobs.iter()
+        .map(|&(job_id, resource_group_id)| InboundEntry {
+            resource_group_id,
+            job_id,
+            task_id,
+        })
+        .collect()
+}
+
 /// Validates the given config and spawns the scheduler's public run loop as a background task.
 ///
 /// # Returns
@@ -262,6 +311,26 @@ async fn assert_no_more_assignments(reader: &DispatchQueueReader) -> anyhow::Res
     Ok(())
 }
 
+/// # Returns
+///
+/// A vector of tuples following the order of the input assignments, each tuple containing:
+///
+/// * The job ID.
+/// * The resource group ID.
+/// * The task ID.
+fn make_assigment_tuple(assignments: &[TaskAssignment]) -> Vec<(JobId, ResourceGroupId, TaskId)> {
+    assignments
+        .iter()
+        .map(|assignment| {
+            (
+                assignment.job_id,
+                assignment.resource_group_id,
+                assignment.task_id,
+            )
+        })
+        .collect()
+}
+
 /// Asserts that `assignments` is exactly `rounds` full round-robin rotations over `jobs` in order:
 /// rotation `r` consists of task `r` of every job, following the jobs' order, so every job's task
 /// indices are dispatched FIFO.
@@ -277,17 +346,7 @@ fn assert_strict_rotation(
             })
         })
         .collect();
-    let actual: Vec<(JobId, ResourceGroupId, TaskId)> = assignments
-        .iter()
-        .map(|assignment| {
-            (
-                assignment.job_id,
-                assignment.resource_group_id,
-                assignment.task_id,
-            )
-        })
-        .collect();
-    assert_eq!(actual, expected);
+    assert_eq!(make_assigment_tuple(assignments), expected);
 }
 
 /// Asserts that `assignments` follows the round-robin scheduling policy over `jobs` without pinning
@@ -479,3 +538,61 @@ async fn pending_jobs_promote_and_schedule_round_robin() -> anyhow::Result<()> {
     scheduler_handle.await.expect("scheduler task panicked")?;
     Ok(())
 }
+
+#[tokio::test(flavor = "multi_thread")]
+async fn commit_and_cleanup_dispatch_once_per_cycle() -> anyhow::Result<()> {
+    const NUM_ACTIVE_JOBS: usize = 4;
+    const TASKS_PER_JOB: usize = 3;
+    const NUM_FINALIZING_JOBS_PER_LANE: usize = 3;
+    const DISPATCH_QUEUE_CAPACITY: usize = 1024;
+
+    let active_jobs = make_jobs(NUM_ACTIVE_JOBS);
+    let commit_ready_jobs = make_jobs(NUM_FINALIZING_JOBS_PER_LANE);
+    let cleanup_ready_jobs = make_jobs(NUM_FINALIZING_JOBS_PER_LANE);
+
+    let storage_client = MockStorageClient::new(DEFAULT_SESSION_ID);
+    storage_client.push_ready_batch(
+        DEFAULT_SESSION_ID,
+        make_ready_batch(&active_jobs, TASKS_PER_JOB, 0),
+    );
+    let mut commit_ready_batch = make_finalizing_batch(&commit_ready_jobs, TaskId::Commit);
+    // Duplicate one commit-ready entry within the batch: it must dispatch exactly once.
+    commit_ready_batch.push(commit_ready_batch[0]);
+    storage_client.push_commit_ready_batch(DEFAULT_SESSION_ID, commit_ready_batch);
+    storage_client.push_cleanup_ready_batch(
+        DEFAULT_SESSION_ID,
+        make_finalizing_batch(&cleanup_ready_jobs, TaskId::Cleanup),
+    );
+
+    let (writer, reader) = create_dispatch_queue(DISPATCH_QUEUE_CAPACITY, DEFAULT_SESSION_ID);
+    let config = make_config(NUM_ACTIVE_JOBS, DISPATCH_QUEUE_CAPACITY);
+    let (scheduler_handle, cancellation_token) = spawn_scheduler(config, storage_client, writer);
+
+    let num_assignments = NUM_ACTIVE_JOBS * TASKS_PER_JOB + 2 * NUM_FINALIZING_JOBS_PER_LANE;
+    let assignments = drain_n(&reader, num_assignments).await?;
+    assert_no_more_assignments(&reader).await?;
+
+    // The rotation is [commit lane, cleanup lane, active jobs...], so every cycle dispatches
+    // exactly one commit task and one cleanup task (while their queues are non-empty), each lane
+    // drained FIFO, followed by one task of every active job.
+    let expected: Vec<(JobId, ResourceGroupId, TaskId)> = (0..TASKS_PER_JOB)
+        .flat_map(|round| {
+            let (commit_job_id, commit_resource_group_id) = commit_ready_jobs[round];
+            let (cleanup_job_id, cleanup_resource_group_id) = cleanup_ready_jobs[round];
+            std::iter::once((commit_job_id, commit_resource_group_id, TaskId::Commit))
+                .chain(std::iter::once((
+                    cleanup_job_id,
+                    cleanup_resource_group_id,
+                    TaskId::Cleanup,
+                )))
+                .chain(active_jobs.iter().map(move |&(job_id, resource_group_id)| {
+                    (job_id, resource_group_id, TaskId::Index(round))
+                }))
+        })
+        .collect();
+    assert_eq!(make_assigment_tuple(&assignments), expected);
+
+    cancellation_token.cancel();
+    scheduler_handle.await.expect("scheduler task panicked")?;
+    Ok(())
+}

From 8c8bdf3519d1750919556e221544c768f66fb85f Mon Sep 17 00:00:00 2001
From: LinZhihao-723 <pleiades3190@gmail.com>
Date: Thu, 4 Jun 2026 18:57:59 -0400
Subject: [PATCH 11/14] Add the last.

---
 .../core_impl/round_robin/implementation.rs   |  97 +++---
 .../src/core_impl/round_robin/tests.rs        | 327 +++++++++++++++++-
 2 files changed, 374 insertions(+), 50 deletions(-)

diff --git a/components/spider-scheduler/src/core_impl/round_robin/implementation.rs b/components/spider-scheduler/src/core_impl/round_robin/implementation.rs
index 3418487e..3d7b1214 100644
--- a/components/spider-scheduler/src/core_impl/round_robin/implementation.rs
+++ b/components/spider-scheduler/src/core_impl/round_robin/implementation.rs
@@ -171,7 +171,7 @@ impl<
 }
 
 /// A FIFO queue of a job's buffered ready tasks.
-struct JobTaskQueue {
+pub(super) struct JobTaskQueue {
     job_id: JobId,
     resource_group_id: ResourceGroupId,
     task_ids: VecDeque<TaskId>,
@@ -204,48 +204,40 @@ impl JobTaskQueue {
     }
 }
 
-/// A slot in the round-robin rotation that the scheduler draws task assignments from.
-#[derive(Clone)]
-enum RoundRobinSlot {
-    /// An active job: assignments are drawn from the job's buffered ready tasks.
-    Job(JobId),
-
-    /// The commit lane: assignments are drawn from the buffered commit-ready jobs.
-    CommitReady,
-
-    /// The cleanup lane: assignments are drawn from the buffered cleanup-ready jobs.
-    CleanupReady,
-}
-
 /// The round-robin scheduler core created from a [`RoundRobinConfig`].
 ///
 /// # Type Parameters
 ///
 /// * `SchedulerStorageClientType` - The storage client used to poll the inbound queue.
 /// * `DispatchQueueSinkType` - The dispatch sink that task assignments are written to.
-struct RoundRobin<
+///
+/// # Note
+///
+/// All member variables are marked `pub(super)` to allow the test module to inspect the internal
+/// states.
+pub(super) struct RoundRobin<
     SchedulerStorageClientType: SchedulerStorageClient + 'static,
     DispatchQueueSinkType: DispatchQueueSink,
 > {
-    sink: DispatchQueueSinkType,
-    cancellation_token: CancellationToken,
-    config: RoundRobinConfig,
-    storage_session_id: SessionId,
-    buffered_tasks: HashSet<(JobId, TaskId)>,
+    pub(super) sink: DispatchQueueSinkType,
+    pub(super) cancellation_token: CancellationToken,
+    pub(super) config: RoundRobinConfig,
+    pub(super) storage_session_id: SessionId,
+    pub(super) buffered_tasks: HashSet<(JobId, TaskId)>,
 
-    active_jobs: HashMap<JobId, JobTaskQueue>,
-    active_job_queue: Vec<RoundRobinSlot>,
-    active_job_queue_round_robin_cursor: usize,
+    pub(super) active_jobs: HashMap<JobId, JobTaskQueue>,
+    pub(super) active_job_queue: Vec<RoundRobinSlot>,
+    pub(super) active_job_queue_round_robin_cursor: usize,
 
-    pending_jobs: HashMap<JobId, JobTaskQueue>,
-    pending_job_queue: VecDeque<JobId>,
+    pub(super) pending_jobs: HashMap<JobId, JobTaskQueue>,
+    pub(super) pending_job_queue: VecDeque<JobId>,
 
-    commit_ready_jobs: VecDeque<(JobId, ResourceGroupId)>,
-    cleanup_ready_jobs: VecDeque<(JobId, ResourceGroupId)>,
+    pub(super) commit_ready_jobs: VecDeque<(JobId, ResourceGroupId)>,
+    pub(super) cleanup_ready_jobs: VecDeque<(JobId, ResourceGroupId)>,
 
-    finalizing_jobs: HashSet<JobId>,
+    pub(super) finalizing_jobs: HashSet<JobId>,
 
-    inbound_queue_reader: AsyncInboundQueueReader<SchedulerStorageClientType>,
+    pub(super) inbound_queue_reader: AsyncInboundQueueReader<SchedulerStorageClientType>,
 }
 
 impl<
@@ -261,7 +253,7 @@ impl<
     /// # Returns
     ///
     /// The constructed [`RoundRobin`] scheduler.
-    fn new(
+    pub(super) fn new(
         storage_session_id: SessionId,
         storage_client: SchedulerStorageClientType,
         sink: DispatchQueueSinkType,
@@ -298,6 +290,21 @@ impl<
         }
     }
 
+    /// Executes a single scheduling tick: consumes any completed inbound poll, then makes
+    /// scheduling decisions to fill the dispatch queue.
+    ///
+    /// # Errors
+    ///
+    /// Returns an error if:
+    ///
+    /// * Forwards [`Self::consume_inbound_poll_result`]'s return values on failure.
+    /// * Forwards [`Self::make_schedule_decisions`]'s return values on failure.
+    pub(super) async fn tick(&mut self) -> Result<(), SchedulerError> {
+        self.consume_inbound_poll_result().await?;
+        self.make_schedule_decisions().await?;
+        Ok(())
+    }
+
     /// # Returns
     ///
     /// A new active job queue containing only the commit-ready and cleanup-ready slots.
@@ -413,21 +420,6 @@ impl<
         }
     }
 
-    /// Executes a single scheduling tick: consumes any completed inbound poll, then makes
-    /// scheduling decisions to fill the dispatch queue.
-    ///
-    /// # Errors
-    ///
-    /// Returns an error if:
-    ///
-    /// * Forwards [`Self::consume_inbound_poll_result`]'s return values on failure.
-    /// * Forwards [`Self::make_schedule_decisions`]'s return values on failure.
-    async fn tick(&mut self) -> Result<(), SchedulerError> {
-        self.consume_inbound_poll_result().await?;
-        self.make_schedule_decisions().await?;
-        Ok(())
-    }
-
     /// Loads polled inbound entries into the scheduler's internal buffers.
     ///
     /// If the polled session is newer than the current session, all existing placement states are
@@ -719,6 +711,19 @@ impl<
     }
 }
 
+/// A slot in the round-robin rotation that the scheduler draws task assignments from.
+#[derive(Clone)]
+pub(super) enum RoundRobinSlot {
+    /// An active job: assignments are drawn from the job's buffered ready tasks.
+    Job(JobId),
+
+    /// The commit lane: assignments are drawn from the buffered commit-ready jobs.
+    CommitReady,
+
+    /// The cleanup lane: assignments are drawn from the buffered cleanup-ready jobs.
+    CleanupReady,
+}
+
 /// The state of an asynchronous inbound-queue poll.
 enum InboundPollState {
     /// The poll has completed, carrying the polled session and the entries drained from each
@@ -834,7 +839,7 @@ impl InboundPollHandles {
 /// # Type Parameters
 ///
 /// * `StorageClientType` - The storage client used to poll the inbound queue.
-struct AsyncInboundQueueReader<StorageClientType: SchedulerStorageClient + 'static> {
+pub(super) struct AsyncInboundQueueReader<StorageClientType: SchedulerStorageClient + 'static> {
     storage_client: StorageClientType,
     handle: Option<InboundPollHandles>,
 }
diff --git a/components/spider-scheduler/src/core_impl/round_robin/tests.rs b/components/spider-scheduler/src/core_impl/round_robin/tests.rs
index 6245d7cb..14242482 100644
--- a/components/spider-scheduler/src/core_impl/round_robin/tests.rs
+++ b/components/spider-scheduler/src/core_impl/round_robin/tests.rs
@@ -18,7 +18,7 @@ use spider_core::{
 };
 use tokio_util::sync::CancellationToken;
 
-use super::RoundRobinConfig;
+use super::{RoundRobinConfig, implementation::RoundRobin};
 use crate::{
     DispatchQueueSource,
     InboundEntry,
@@ -33,6 +33,9 @@ use crate::{
 /// The session used by tests that never bump the session.
 const DEFAULT_SESSION_ID: SessionId = 0;
 
+/// The white-box scheduler under test, driven by manual ticks.
+type TestScheduler = RoundRobin<MockStorageClient, DispatchQueueWriter>;
+
 /// The maximum time to wait for expected assignments before failing a test.
 const DRAIN_DEADLINE: Duration = Duration::from_secs(5);
 
@@ -304,6 +307,10 @@ async fn drain_n(reader: &DispatchQueueReader, n: usize) -> anyhow::Result<Vec<T
 /// Returns an error if:
 ///
 /// * Forwards [`DispatchQueueSource::dequeue`]'s return values on failure.
+///
+/// # Panics
+///
+/// Panics if an assignment arrives within the observation window.
 async fn assert_no_more_assignments(reader: &DispatchQueueReader) -> anyhow::Result<()> {
     const OBSERVATION_WINDOW: Duration = Duration::from_secs(1);
     let unexpected_assignment = reader.dequeue(OBSERVATION_WINDOW).await?;
@@ -318,7 +325,7 @@ async fn assert_no_more_assignments(reader: &DispatchQueueReader) -> anyhow::Res
 /// * The job ID.
 /// * The resource group ID.
 /// * The task ID.
-fn make_assigment_tuple(assignments: &[TaskAssignment]) -> Vec<(JobId, ResourceGroupId, TaskId)> {
+fn make_assignment_tuple(assignments: &[TaskAssignment]) -> Vec<(JobId, ResourceGroupId, TaskId)> {
     assignments
         .iter()
         .map(|assignment| {
@@ -334,6 +341,10 @@ fn make_assigment_tuple(assignments: &[TaskAssignment]) -> Vec<(JobId, ResourceG
 /// Asserts that `assignments` is exactly `rounds` full round-robin rotations over `jobs` in order:
 /// rotation `r` consists of task `r` of every job, following the jobs' order, so every job's task
 /// indices are dispatched FIFO.
+///
+/// # Panics
+///
+/// Panics if `assignments` deviates from the expected strict rotation.
 fn assert_strict_rotation(
     assignments: &[TaskAssignment],
     jobs: &[(JobId, ResourceGroupId)],
@@ -346,7 +357,7 @@ fn assert_strict_rotation(
             })
         })
         .collect();
-    assert_eq!(make_assigment_tuple(assignments), expected);
+    assert_eq!(make_assignment_tuple(assignments), expected);
 }
 
 /// Asserts that `assignments` follows the round-robin scheduling policy over `jobs` without pinning
@@ -356,6 +367,10 @@ fn assert_strict_rotation(
 ///   exactly once.
 /// * Each job's task indices are dispatched in FIFO order, with the matching resource group.
 /// * Each job receives exactly `tasks_per_job` assignments.
+///
+/// # Panics
+///
+/// Panics if `assignments` violates any of the properties above.
 fn assert_round_robin_property(
     assignments: &[TaskAssignment],
     jobs: &[(JobId, ResourceGroupId)],
@@ -590,9 +605,313 @@ async fn commit_and_cleanup_dispatch_once_per_cycle() -> anyhow::Result<()> {
                 }))
         })
         .collect();
-    assert_eq!(make_assigment_tuple(&assignments), expected);
+    assert_eq!(make_assignment_tuple(&assignments), expected);
 
     cancellation_token.cancel();
     scheduler_handle.await.expect("scheduler task panicked")?;
     Ok(())
 }
+
+/// # Returns
+///
+/// A white-box scheduler wired to the given storage client and sink, to be driven by manual
+/// [`RoundRobin::tick`] calls.
+fn make_scheduler(
+    config: RoundRobinConfig,
+    storage_client: MockStorageClient,
+    sink: DispatchQueueWriter,
+) -> TestScheduler {
+    RoundRobin::new(
+        DEFAULT_SESSION_ID,
+        storage_client,
+        sink,
+        CancellationToken::new(),
+        config,
+    )
+}
+
+/// Ticks the scheduler until `predicate` holds on its state.
+///
+/// # Errors
+///
+/// Returns an error if:
+///
+/// * The predicate does not hold within [`DRAIN_DEADLINE`].
+/// * Forwards [`RoundRobin::tick`]'s return values on failure.
+async fn tick_until(
+    scheduler: &mut TestScheduler,
+    predicate: impl Fn(&TestScheduler) -> bool,
+) -> anyhow::Result<()> {
+    let deadline = tokio::time::Instant::now() + DRAIN_DEADLINE;
+    while !predicate(scheduler) {
+        if tokio::time::Instant::now() > deadline {
+            bail!("timed out waiting for the tick predicate to hold");
+        }
+        scheduler.tick().await?;
+        tokio::task::yield_now().await;
+    }
+    Ok(())
+}
+
+/// Drains exactly `n` task assignments while manually ticking the scheduler to refill the dispatch
+/// queue (the white-box counterpart of [`drain_n`]).
+///
+/// # Returns
+///
+/// The drained assignments in FIFO order on success.
+///
+/// # Errors
+///
+/// Returns an error if:
+///
+/// * Fewer than `n` assignments arrive within [`DRAIN_DEADLINE`].
+/// * Forwards [`RoundRobin::tick`]'s return values on failure.
+/// * Forwards [`DispatchQueueSource::dequeue`]'s return values on failure.
+async fn tick_and_drain_n(
+    scheduler: &mut TestScheduler,
+    reader: &DispatchQueueReader,
+    n: usize,
+) -> anyhow::Result<Vec<TaskAssignment>> {
+    let deadline = tokio::time::Instant::now() + DRAIN_DEADLINE;
+    let mut assignments = Vec::with_capacity(n);
+    while assignments.len() < n {
+        if tokio::time::Instant::now() > deadline {
+            bail!(
+                "timed out draining assignments: got {}, expected {n}",
+                assignments.len(),
+            );
+        }
+        scheduler.tick().await?;
+        while let Some((_session_id, assignment)) = reader.dequeue(Duration::ZERO).await? {
+            assignments.push(assignment);
+        }
+        tokio::task::yield_now().await;
+    }
+    Ok(assignments)
+}
+
+/// Ticks the scheduler a few extra rounds and asserts that no further assignment is dispatched.
+///
+/// # Errors
+///
+/// Returns an error if:
+///
+/// * Forwards [`RoundRobin::tick`]'s return values on failure.
+/// * Forwards [`DispatchQueueSource::dequeue`]'s return values on failure.
+///
+/// # Panics
+///
+/// Panics if a further assignment is dispatched.
+async fn assert_no_further_assignments(
+    scheduler: &mut TestScheduler,
+    reader: &DispatchQueueReader,
+) -> anyhow::Result<()> {
+    const EXTRA_TICKS: usize = 8;
+    for _ in 0..EXTRA_TICKS {
+        scheduler.tick().await?;
+        tokio::task::yield_now().await;
+    }
+    let unexpected_assignment = reader.dequeue(Duration::from_millis(50)).await?;
+    assert_eq!(unexpected_assignment, None);
+    Ok(())
+}
+
+/// Drives the shared scenario where a finalizing batch drops one active and one pending job.
+///
+/// The finalizing lane is selected by `finalizing_task_id`: commit-ready for [`TaskId::Commit`],
+/// or cleanup-ready for [`TaskId::Cleanup`]. The scenario:
+///
+/// 1. Buffers four jobs (two active, two pending) and freezes dispatch via a full dispatch queue.
+/// 2. Delivers a finalizing batch for one active job and one pending job mid-stream.
+/// 3. Asserts both jobs leave the placement state with their buffered regular tasks discarded.
+/// 4. Unfreezes and asserts the drained sequence: each finalized job dispatches its finalizing task
+///    exactly once and no further regular task, while the surviving jobs complete in FIFO order.
+///
+/// # Errors
+///
+/// Returns an error if:
+///
+/// * `finalizing_task_id` is a regular [`TaskId::Index`] task.
+/// * Forwards [`tick_until`]'s return values on failure.
+/// * Forwards [`tick_and_drain_n`]'s return values on failure.
+/// * Forwards [`assert_no_further_assignments`]'s return values on failure.
+///
+/// # Panics
+///
+/// Panics if any scheduling-behavior assertion of the scenario fails.
+#[allow(clippy::too_many_lines, clippy::similar_names)]
+async fn assert_finalizing_ready_drops_jobs(finalizing_task_id: TaskId) -> anyhow::Result<()> {
+    // NOTE: We disable two linting rules for the following reasons:
+    // * `clippy::too_many_lines`: This test case is long, but we want to avoid breaking it into
+    //   smaller functions since that would also make the overall flow hard to navigate.
+    // * `clippy::similar_names`: The linter complains about `job_a_regular`, `job_b_regular`, etc.,
+    //   but these names are fine for test cases.
+    const ACTIVE_JOB_QUEUE_CAPACITY: usize = 2;
+    const DISPATCH_QUEUE_CAPACITY: usize = 2;
+    const TASKS_PER_JOB: usize = 3;
+    const NUM_PRE_FREEZE_ASSIGNMENTS: usize = DISPATCH_QUEUE_CAPACITY;
+    const NUM_FINALIZED_JOBS: usize = 2;
+
+    if matches!(finalizing_task_id, TaskId::Index(_)) {
+        bail!("`finalizing_task_id` must be `TaskId::Commit` or `TaskId::Cleanup`");
+    }
+    let is_commit = finalizing_task_id == TaskId::Commit;
+
+    // Batch order makes `job_a` and `job_b` active, `job_p` and `job_q` pending.
+    let jobs = make_jobs(4);
+    let (job_a, job_b, job_p, job_q) = (jobs[0], jobs[1], jobs[2], jobs[3]);
+
+    let storage_client = MockStorageClient::new(DEFAULT_SESSION_ID);
+    storage_client.push_ready_batch(
+        DEFAULT_SESSION_ID,
+        make_ready_batch(&jobs, TASKS_PER_JOB, 0),
+    );
+
+    let (writer, reader) = create_dispatch_queue(DISPATCH_QUEUE_CAPACITY, DEFAULT_SESSION_ID);
+    let mut scheduler = make_scheduler(
+        make_config(ACTIVE_JOB_QUEUE_CAPACITY, DISPATCH_QUEUE_CAPACITY),
+        storage_client.clone(),
+        writer,
+    );
+
+    // Step 1: ingest the ready batch. The ingesting tick also dispatches exactly two assignments
+    // (`job_a.t0`, `job_b.t0`), filling the dispatch queue; dispatch is frozen from here on because
+    // the test does not drain yet.
+    tick_until(&mut scheduler, |scheduler| {
+        !scheduler.buffered_tasks.is_empty()
+    })
+    .await?;
+    assert_eq!(
+        scheduler
+            .active_jobs
+            .keys()
+            .copied()
+            .collect::<HashSet<_>>(),
+        HashSet::from([job_a.0, job_b.0]),
+    );
+    assert_eq!(
+        scheduler
+            .pending_jobs
+            .keys()
+            .copied()
+            .collect::<HashSet<_>>(),
+        HashSet::from([job_p.0, job_q.0]),
+    );
+
+    // Step 2: with dispatch frozen, deliver the finalizing batch for one active job, `job_b`, and
+    // one pending job, `job_q`, before any of their remaining tasks can dispatch.
+    let finalizing_batch = make_finalizing_batch(&[job_b, job_q], finalizing_task_id);
+    if is_commit {
+        storage_client.push_commit_ready_batch(DEFAULT_SESSION_ID, finalizing_batch);
+    } else {
+        storage_client.push_cleanup_ready_batch(DEFAULT_SESSION_ID, finalizing_batch);
+    }
+    tick_until(&mut scheduler, |scheduler| {
+        scheduler.finalizing_jobs.contains(&job_b.0) && scheduler.finalizing_jobs.contains(&job_q.0)
+    })
+    .await?;
+
+    // Step 3: both jobs left the placement state and their buffered regular tasks are discarded;
+    // only their finalizing assignments remain queued, in arrival order.
+    assert!(!scheduler.active_jobs.contains_key(&job_b.0));
+    assert!(!scheduler.pending_jobs.contains_key(&job_q.0));
+    assert!(
+        scheduler.buffered_tasks.iter().all(|&(job_id, task_id)| {
+            (job_id != job_b.0 && job_id != job_q.0) || !matches!(task_id, TaskId::Index(_))
+        }),
+        "a finalized job still has buffered regular tasks",
+    );
+    let finalizing_queue = if is_commit {
+        &scheduler.commit_ready_jobs
+    } else {
+        &scheduler.cleanup_ready_jobs
+    };
+    assert_eq!(
+        finalizing_queue.iter().copied().collect::<Vec<_>>(),
+        vec![job_b, job_q],
+    );
+
+    // Step 4: unfreeze. Every remaining assignment is accounted for below: the pre-freeze
+    // assignments already queued, one finalizing task per finalized job, `job_a`'s remaining
+    // tasks (its first task dispatched pre-freeze), and the full task set of `job_p`, which
+    // backfills `job_b`'s freed slot.
+
+    // total number of assignments = pre-freeze assignments + finalizing assignments +
+    //     remaining `job_a` assignments + full `job_p` assignments
+    let num_assignments =
+        NUM_PRE_FREEZE_ASSIGNMENTS + NUM_FINALIZED_JOBS + (TASKS_PER_JOB - 1) + TASKS_PER_JOB;
+    let assignments = tick_and_drain_n(&mut scheduler, &reader, num_assignments).await?;
+    assert_no_further_assignments(&mut scheduler, &reader).await?;
+    assert_eq!(scheduler.buffered_tasks.len(), 0);
+
+    let triples = make_assignment_tuple(&assignments);
+
+    // The pre-freeze head is exactly `job_a.t0`, `job_b.t0`.
+    assert_eq!(
+        &triples[..NUM_PRE_FREEZE_ASSIGNMENTS],
+        &[
+            (job_a.0, job_a.1, TaskId::Index(0)),
+            (job_b.0, job_b.1, TaskId::Index(0)),
+        ],
+    );
+
+    // Each finalized job's finalizing task dispatches exactly once, in arrival (FIFO) order.
+    let finalizing_assignments: Vec<_> = triples
+        .iter()
+        .filter(|&&(_, _, task_id)| task_id == finalizing_task_id)
+        .copied()
+        .collect();
+    assert_eq!(
+        finalizing_assignments,
+        vec![
+            (job_b.0, job_b.1, finalizing_task_id),
+            (job_q.0, job_q.1, finalizing_task_id),
+        ],
+    );
+
+    let job_a_tasks: Vec<TaskId> = triples
+        .iter()
+        .filter(|&&(job_id, ..)| job_id == job_a.0)
+        .map(|&(_, _, task_id)| task_id)
+        .collect();
+    assert_eq!(
+        job_a_tasks,
+        vec![TaskId::Index(0), TaskId::Index(1), TaskId::Index(2)],
+    );
+
+    let job_b_regular: Vec<_> = triples
+        .iter()
+        .filter(|&&(job_id, _, task_id)| job_id == job_b.0 && matches!(task_id, TaskId::Index(_)))
+        .copied()
+        .collect();
+    assert_eq!(job_b_regular, vec![(job_b.0, job_b.1, TaskId::Index(0))]);
+
+    let job_p_tasks: Vec<TaskId> = triples
+        .iter()
+        .filter(|&&(job_id, ..)| job_id == job_p.0)
+        .map(|&(_, _, task_id)| task_id)
+        .collect();
+    assert_eq!(
+        job_p_tasks,
+        vec![TaskId::Index(0), TaskId::Index(1), TaskId::Index(2)],
+    );
+
+    let job_q_regular: Vec<_> = triples
+        .iter()
+        .filter(|&&(job_id, _, task_id)| job_id == job_q.0 && matches!(task_id, TaskId::Index(_)))
+        .copied()
+        .collect();
+    assert_eq!(job_q_regular, []);
+
+    Ok(())
+}
+
+#[tokio::test(flavor = "multi_thread")]
+async fn cleanup_ready_drops_active_and_pending_jobs() -> anyhow::Result<()> {
+    assert_finalizing_ready_drops_jobs(TaskId::Cleanup).await
+}
+
+#[tokio::test(flavor = "multi_thread")]
+async fn commit_ready_drops_active_and_pending_jobs() -> anyhow::Result<()> {
+    assert_finalizing_ready_drops_jobs(TaskId::Commit).await
+}

From 9ef086caca2d2ce47089fe18c10904e298a54353 Mon Sep 17 00:00:00 2001
From: LinZhihao-723 <pleiades3190@gmail.com>
Date: Thu, 4 Jun 2026 19:03:25 -0400
Subject: [PATCH 12/14] Add commit and cleanup testing.

---
 .../src/core_impl/round_robin/implementation.rs             | 2 ++
 .../spider-scheduler/src/core_impl/round_robin/tests.rs     | 6 ++++++
 2 files changed, 8 insertions(+)

diff --git a/components/spider-scheduler/src/core_impl/round_robin/implementation.rs b/components/spider-scheduler/src/core_impl/round_robin/implementation.rs
index 3d7b1214..c3947683 100644
--- a/components/spider-scheduler/src/core_impl/round_robin/implementation.rs
+++ b/components/spider-scheduler/src/core_impl/round_robin/implementation.rs
@@ -171,6 +171,8 @@ impl<
 }
 
 /// A FIFO queue of a job's buffered ready tasks.
+#[derive(Eq, PartialEq)]
+#[derive(Debug)]
 pub(super) struct JobTaskQueue {
     job_id: JobId,
     resource_group_id: ResourceGroupId,
diff --git a/components/spider-scheduler/src/core_impl/round_robin/tests.rs b/components/spider-scheduler/src/core_impl/round_robin/tests.rs
index 14242482..f5d4118c 100644
--- a/components/spider-scheduler/src/core_impl/round_robin/tests.rs
+++ b/components/spider-scheduler/src/core_impl/round_robin/tests.rs
@@ -903,6 +903,12 @@ async fn assert_finalizing_ready_drops_jobs(finalizing_task_id: TaskId) -> anyho
         .collect();
     assert_eq!(job_q_regular, []);
 
+    assert!(scheduler.buffered_tasks.is_empty());
+    assert!(scheduler.pending_jobs.is_empty());
+    assert!(scheduler.pending_job_queue.is_empty());
+    assert!(scheduler.commit_ready_jobs.is_empty());
+    assert!(scheduler.cleanup_ready_jobs.is_empty());
+
     Ok(())
 }
 

From 7af3840b7569d5e78014303f52f744c76aa14541 Mon Sep 17 00:00:00 2001
From: LinZhihao-723 <pleiades3190@gmail.com>
Date: Thu, 4 Jun 2026 19:17:35 -0400
Subject: [PATCH 13/14] Add test for session bump.

---
 .../core_impl/round_robin/implementation.rs   |   3 +-
 .../src/core_impl/round_robin/tests.rs        | 438 +++++++++++-------
 2 files changed, 271 insertions(+), 170 deletions(-)

diff --git a/components/spider-scheduler/src/core_impl/round_robin/implementation.rs b/components/spider-scheduler/src/core_impl/round_robin/implementation.rs
index c3947683..68fb27c9 100644
--- a/components/spider-scheduler/src/core_impl/round_robin/implementation.rs
+++ b/components/spider-scheduler/src/core_impl/round_robin/implementation.rs
@@ -171,8 +171,7 @@ impl<
 }
 
 /// A FIFO queue of a job's buffered ready tasks.
-#[derive(Eq, PartialEq)]
-#[derive(Debug)]
+#[derive(Eq, PartialEq, Debug)]
 pub(super) struct JobTaskQueue {
     job_id: JobId,
     resource_group_id: ResourceGroupId,
diff --git a/components/spider-scheduler/src/core_impl/round_robin/tests.rs b/components/spider-scheduler/src/core_impl/round_robin/tests.rs
index f5d4118c..86d4b092 100644
--- a/components/spider-scheduler/src/core_impl/round_robin/tests.rs
+++ b/components/spider-scheduler/src/core_impl/round_robin/tests.rs
@@ -110,6 +110,11 @@ impl MockStorageClient {
         self.inner.session_id.load(Ordering::Relaxed)
     }
 
+    /// Sets the session reported on polls that have no scripted batch.
+    fn set_session(&self, session_id: SessionId) {
+        self.inner.session_id.store(session_id, Ordering::Relaxed);
+    }
+
     /// Serves one poll from the given lane's script.
     ///
     /// # Returns
@@ -449,169 +454,6 @@ fn zero_capacity_configs_are_rejected() {
     }
 }
 
-#[tokio::test(flavor = "multi_thread")]
-async fn single_capacity_pool_schedules_jobs_serially() -> anyhow::Result<()> {
-    const NUM_JOBS: usize = 3;
-    const TASKS_PER_JOB: usize = 5;
-    const DUP_EVERY: usize = 3;
-    const DISPATCH_QUEUE_CAPACITY: usize = 32;
-
-    let jobs = make_jobs(NUM_JOBS);
-    let storage_client = MockStorageClient::new(DEFAULT_SESSION_ID);
-    storage_client.push_ready_batch(
-        DEFAULT_SESSION_ID,
-        make_ready_batch(&jobs, TASKS_PER_JOB, DUP_EVERY),
-    );
-
-    let (writer, reader) = create_dispatch_queue(DISPATCH_QUEUE_CAPACITY, DEFAULT_SESSION_ID);
-    let config = make_config(1, DISPATCH_QUEUE_CAPACITY);
-    let (scheduler_handle, cancellation_token) = spawn_scheduler(config, storage_client, writer);
-
-    let assignments = drain_n(&reader, NUM_JOBS * TASKS_PER_JOB).await?;
-    assert_no_more_assignments(&reader).await?;
-
-    // With an active job pool of capacity 1, round-robin degenerates to serial job FIFO: the
-    // rotation holds a single job at a time, so each job's tasks dispatch as one consecutive
-    // single-job rotation, in job-arrival order.
-    for (segment, job) in assignments.chunks(TASKS_PER_JOB).zip(&jobs) {
-        assert_strict_rotation(segment, std::slice::from_ref(job), TASKS_PER_JOB);
-    }
-
-    cancellation_token.cancel();
-    scheduler_handle.await.expect("scheduler task panicked")?;
-    Ok(())
-}
-
-#[tokio::test(flavor = "multi_thread")]
-async fn active_jobs_dispatch_in_round_robin_order() -> anyhow::Result<()> {
-    const NUM_JOBS: usize = 10;
-    const TASKS_PER_JOB: usize = 5;
-    const DUP_EVERY: usize = 4;
-    const DISPATCH_QUEUE_CAPACITY: usize = 32;
-
-    let jobs = make_jobs(NUM_JOBS);
-    let storage_client = MockStorageClient::new(DEFAULT_SESSION_ID);
-    storage_client.push_ready_batch(
-        DEFAULT_SESSION_ID,
-        make_ready_batch(&jobs, TASKS_PER_JOB, DUP_EVERY),
-    );
-
-    let (writer, reader) = create_dispatch_queue(DISPATCH_QUEUE_CAPACITY, DEFAULT_SESSION_ID);
-    let config = make_config(NUM_JOBS, DISPATCH_QUEUE_CAPACITY);
-    let (scheduler_handle, cancellation_token) = spawn_scheduler(config, storage_client, writer);
-
-    let assignments = drain_n(&reader, NUM_JOBS * TASKS_PER_JOB).await?;
-    assert_no_more_assignments(&reader).await?;
-
-    // All 10 jobs fit into the active job pool, so no job ever pends and dispatch follows the
-    // strict rotation: task 0 of every job in batch order, then task 1 of every job, and so on. The
-    // exact count of 50 (with no trailing assignments) also proves the in-batch duplicates were
-    // deduplicated.
-    assert_strict_rotation(&assignments, &jobs, TASKS_PER_JOB);
-
-    cancellation_token.cancel();
-    scheduler_handle.await.expect("scheduler task panicked")?;
-    Ok(())
-}
-
-#[tokio::test(flavor = "multi_thread")]
-async fn pending_jobs_promote_and_schedule_round_robin() -> anyhow::Result<()> {
-    const ACTIVE_JOB_QUEUE_CAPACITY: usize = 10;
-    const NUM_JOBS: usize = 20;
-    const TASKS_PER_JOB: usize = 5;
-    const DUP_EVERY: usize = 5;
-    const DISPATCH_QUEUE_CAPACITY: usize = 32;
-
-    let jobs = make_jobs(NUM_JOBS);
-    let storage_client = MockStorageClient::new(DEFAULT_SESSION_ID);
-    storage_client.push_ready_batch(
-        DEFAULT_SESSION_ID,
-        make_ready_batch(&jobs, TASKS_PER_JOB, DUP_EVERY),
-    );
-
-    let (writer, reader) = create_dispatch_queue(DISPATCH_QUEUE_CAPACITY, DEFAULT_SESSION_ID);
-    let config = make_config(ACTIVE_JOB_QUEUE_CAPACITY, DISPATCH_QUEUE_CAPACITY);
-    let (scheduler_handle, cancellation_token) = spawn_scheduler(config, storage_client, writer);
-
-    let assignments = drain_n(&reader, NUM_JOBS * TASKS_PER_JOB).await?;
-    assert_no_more_assignments(&reader).await?;
-
-    let (active_jobs, pending_jobs) = jobs.split_at(ACTIVE_JOB_QUEUE_CAPACITY);
-    let (phase1, phase2) = assignments.split_at(ACTIVE_JOB_QUEUE_CAPACITY * TASKS_PER_JOB);
-
-    // Phase 1: the first 10 jobs in batch order fill the active job pool and dispatch in strict
-    // rotation; the pending jobs must not appear while the active jobs still have tasks.
-    assert_strict_rotation(phase1, active_jobs, TASKS_PER_JOB);
-
-    // Phase 2: once the active jobs exhaust, the 10 pending jobs are promoted and scheduled
-    // round-robin. The exact slot order after the retire-and-promote wave is an implementation
-    // detail of the rotation bookkeeping, so assert the round-robin property instead of one
-    // hard-coded sequence.
-    assert_round_robin_property(phase2, pending_jobs, TASKS_PER_JOB);
-
-    cancellation_token.cancel();
-    scheduler_handle.await.expect("scheduler task panicked")?;
-    Ok(())
-}
-
-#[tokio::test(flavor = "multi_thread")]
-async fn commit_and_cleanup_dispatch_once_per_cycle() -> anyhow::Result<()> {
-    const NUM_ACTIVE_JOBS: usize = 4;
-    const TASKS_PER_JOB: usize = 3;
-    const NUM_FINALIZING_JOBS_PER_LANE: usize = 3;
-    const DISPATCH_QUEUE_CAPACITY: usize = 1024;
-
-    let active_jobs = make_jobs(NUM_ACTIVE_JOBS);
-    let commit_ready_jobs = make_jobs(NUM_FINALIZING_JOBS_PER_LANE);
-    let cleanup_ready_jobs = make_jobs(NUM_FINALIZING_JOBS_PER_LANE);
-
-    let storage_client = MockStorageClient::new(DEFAULT_SESSION_ID);
-    storage_client.push_ready_batch(
-        DEFAULT_SESSION_ID,
-        make_ready_batch(&active_jobs, TASKS_PER_JOB, 0),
-    );
-    let mut commit_ready_batch = make_finalizing_batch(&commit_ready_jobs, TaskId::Commit);
-    // Duplicate one commit-ready entry within the batch: it must dispatch exactly once.
-    commit_ready_batch.push(commit_ready_batch[0]);
-    storage_client.push_commit_ready_batch(DEFAULT_SESSION_ID, commit_ready_batch);
-    storage_client.push_cleanup_ready_batch(
-        DEFAULT_SESSION_ID,
-        make_finalizing_batch(&cleanup_ready_jobs, TaskId::Cleanup),
-    );
-
-    let (writer, reader) = create_dispatch_queue(DISPATCH_QUEUE_CAPACITY, DEFAULT_SESSION_ID);
-    let config = make_config(NUM_ACTIVE_JOBS, DISPATCH_QUEUE_CAPACITY);
-    let (scheduler_handle, cancellation_token) = spawn_scheduler(config, storage_client, writer);
-
-    let num_assignments = NUM_ACTIVE_JOBS * TASKS_PER_JOB + 2 * NUM_FINALIZING_JOBS_PER_LANE;
-    let assignments = drain_n(&reader, num_assignments).await?;
-    assert_no_more_assignments(&reader).await?;
-
-    // The rotation is [commit lane, cleanup lane, active jobs...], so every cycle dispatches
-    // exactly one commit task and one cleanup task (while their queues are non-empty), each lane
-    // drained FIFO, followed by one task of every active job.
-    let expected: Vec<(JobId, ResourceGroupId, TaskId)> = (0..TASKS_PER_JOB)
-        .flat_map(|round| {
-            let (commit_job_id, commit_resource_group_id) = commit_ready_jobs[round];
-            let (cleanup_job_id, cleanup_resource_group_id) = cleanup_ready_jobs[round];
-            std::iter::once((commit_job_id, commit_resource_group_id, TaskId::Commit))
-                .chain(std::iter::once((
-                    cleanup_job_id,
-                    cleanup_resource_group_id,
-                    TaskId::Cleanup,
-                )))
-                .chain(active_jobs.iter().map(move |&(job_id, resource_group_id)| {
-                    (job_id, resource_group_id, TaskId::Index(round))
-                }))
-        })
-        .collect();
-    assert_eq!(make_assignment_tuple(&assignments), expected);
-
-    cancellation_token.cancel();
-    scheduler_handle.await.expect("scheduler task panicked")?;
-    Ok(())
-}
-
 /// # Returns
 ///
 /// A white-box scheduler wired to the given storage client and sink, to be driven by manual
@@ -658,7 +500,8 @@ async fn tick_until(
 ///
 /// # Returns
 ///
-/// The drained assignments in FIFO order on success.
+/// The drained assignments in FIFO order on success, each paired with the session under which it
+/// was dequeued.
 ///
 /// # Errors
 ///
@@ -671,7 +514,7 @@ async fn tick_and_drain_n(
     scheduler: &mut TestScheduler,
     reader: &DispatchQueueReader,
     n: usize,
-) -> anyhow::Result<Vec<TaskAssignment>> {
+) -> anyhow::Result<Vec<(SessionId, TaskAssignment)>> {
     let deadline = tokio::time::Instant::now() + DRAIN_DEADLINE;
     let mut assignments = Vec::with_capacity(n);
     while assignments.len() < n {
@@ -682,8 +525,8 @@ async fn tick_and_drain_n(
             );
         }
         scheduler.tick().await?;
-        while let Some((_session_id, assignment)) = reader.dequeue(Duration::ZERO).await? {
-            assignments.push(assignment);
+        while let Some((session_id, assignment)) = reader.dequeue(Duration::ZERO).await? {
+            assignments.push((session_id, assignment));
         }
         tokio::task::yield_now().await;
     }
@@ -840,7 +683,12 @@ async fn assert_finalizing_ready_drops_jobs(finalizing_task_id: TaskId) -> anyho
     //     remaining `job_a` assignments + full `job_p` assignments
     let num_assignments =
         NUM_PRE_FREEZE_ASSIGNMENTS + NUM_FINALIZED_JOBS + (TASKS_PER_JOB - 1) + TASKS_PER_JOB;
-    let assignments = tick_and_drain_n(&mut scheduler, &reader, num_assignments).await?;
+    let assignments: Vec<TaskAssignment> =
+        tick_and_drain_n(&mut scheduler, &reader, num_assignments)
+            .await?
+            .into_iter()
+            .map(|(_session_id, assignment)| assignment)
+            .collect();
     assert_no_further_assignments(&mut scheduler, &reader).await?;
     assert_eq!(scheduler.buffered_tasks.len(), 0);
 
@@ -912,6 +760,169 @@ async fn assert_finalizing_ready_drops_jobs(finalizing_task_id: TaskId) -> anyho
     Ok(())
 }
 
+#[tokio::test(flavor = "multi_thread")]
+async fn single_capacity_pool_schedules_jobs_serially() -> anyhow::Result<()> {
+    const NUM_JOBS: usize = 3;
+    const TASKS_PER_JOB: usize = 5;
+    const DUP_EVERY: usize = 3;
+    const DISPATCH_QUEUE_CAPACITY: usize = 32;
+
+    let jobs = make_jobs(NUM_JOBS);
+    let storage_client = MockStorageClient::new(DEFAULT_SESSION_ID);
+    storage_client.push_ready_batch(
+        DEFAULT_SESSION_ID,
+        make_ready_batch(&jobs, TASKS_PER_JOB, DUP_EVERY),
+    );
+
+    let (writer, reader) = create_dispatch_queue(DISPATCH_QUEUE_CAPACITY, DEFAULT_SESSION_ID);
+    let config = make_config(1, DISPATCH_QUEUE_CAPACITY);
+    let (scheduler_handle, cancellation_token) = spawn_scheduler(config, storage_client, writer);
+
+    let assignments = drain_n(&reader, NUM_JOBS * TASKS_PER_JOB).await?;
+    assert_no_more_assignments(&reader).await?;
+
+    // With an active job pool of capacity 1, round-robin degenerates to serial job FIFO: the
+    // rotation holds a single job at a time, so each job's tasks dispatch as one consecutive
+    // single-job rotation, in job-arrival order.
+    for (segment, job) in assignments.chunks(TASKS_PER_JOB).zip(&jobs) {
+        assert_strict_rotation(segment, std::slice::from_ref(job), TASKS_PER_JOB);
+    }
+
+    cancellation_token.cancel();
+    scheduler_handle.await.expect("scheduler task panicked")?;
+    Ok(())
+}
+
+#[tokio::test(flavor = "multi_thread")]
+async fn active_jobs_dispatch_in_round_robin_order() -> anyhow::Result<()> {
+    const NUM_JOBS: usize = 10;
+    const TASKS_PER_JOB: usize = 5;
+    const DUP_EVERY: usize = 4;
+    const DISPATCH_QUEUE_CAPACITY: usize = 32;
+
+    let jobs = make_jobs(NUM_JOBS);
+    let storage_client = MockStorageClient::new(DEFAULT_SESSION_ID);
+    storage_client.push_ready_batch(
+        DEFAULT_SESSION_ID,
+        make_ready_batch(&jobs, TASKS_PER_JOB, DUP_EVERY),
+    );
+
+    let (writer, reader) = create_dispatch_queue(DISPATCH_QUEUE_CAPACITY, DEFAULT_SESSION_ID);
+    let config = make_config(NUM_JOBS, DISPATCH_QUEUE_CAPACITY);
+    let (scheduler_handle, cancellation_token) = spawn_scheduler(config, storage_client, writer);
+
+    let assignments = drain_n(&reader, NUM_JOBS * TASKS_PER_JOB).await?;
+    assert_no_more_assignments(&reader).await?;
+
+    // All 10 jobs fit into the active job pool, so no job ever pends and dispatch follows the
+    // strict rotation: task 0 of every job in batch order, then task 1 of every job, and so on. The
+    // exact count of 50 (with no trailing assignments) also proves the in-batch duplicates were
+    // deduplicated.
+    assert_strict_rotation(&assignments, &jobs, TASKS_PER_JOB);
+
+    cancellation_token.cancel();
+    scheduler_handle.await.expect("scheduler task panicked")?;
+    Ok(())
+}
+
+#[tokio::test(flavor = "multi_thread")]
+async fn pending_jobs_promote_and_schedule_round_robin() -> anyhow::Result<()> {
+    const ACTIVE_JOB_QUEUE_CAPACITY: usize = 10;
+    const NUM_JOBS: usize = 20;
+    const TASKS_PER_JOB: usize = 5;
+    const DUP_EVERY: usize = 5;
+    const DISPATCH_QUEUE_CAPACITY: usize = 32;
+
+    let jobs = make_jobs(NUM_JOBS);
+    let storage_client = MockStorageClient::new(DEFAULT_SESSION_ID);
+    storage_client.push_ready_batch(
+        DEFAULT_SESSION_ID,
+        make_ready_batch(&jobs, TASKS_PER_JOB, DUP_EVERY),
+    );
+
+    let (writer, reader) = create_dispatch_queue(DISPATCH_QUEUE_CAPACITY, DEFAULT_SESSION_ID);
+    let config = make_config(ACTIVE_JOB_QUEUE_CAPACITY, DISPATCH_QUEUE_CAPACITY);
+    let (scheduler_handle, cancellation_token) = spawn_scheduler(config, storage_client, writer);
+
+    let assignments = drain_n(&reader, NUM_JOBS * TASKS_PER_JOB).await?;
+    assert_no_more_assignments(&reader).await?;
+
+    let (active_jobs, pending_jobs) = jobs.split_at(ACTIVE_JOB_QUEUE_CAPACITY);
+    let (phase1, phase2) = assignments.split_at(ACTIVE_JOB_QUEUE_CAPACITY * TASKS_PER_JOB);
+
+    // Phase 1: the first 10 jobs in batch order fill the active job pool and dispatch in strict
+    // rotation; the pending jobs must not appear while the active jobs still have tasks.
+    assert_strict_rotation(phase1, active_jobs, TASKS_PER_JOB);
+
+    // Phase 2: once the active jobs exhaust, the 10 pending jobs are promoted and scheduled
+    // round-robin. The exact slot order after the retire-and-promote wave is an implementation
+    // detail of the rotation bookkeeping, so assert the round-robin property instead of one
+    // hard-coded sequence.
+    assert_round_robin_property(phase2, pending_jobs, TASKS_PER_JOB);
+
+    cancellation_token.cancel();
+    scheduler_handle.await.expect("scheduler task panicked")?;
+    Ok(())
+}
+
+#[tokio::test(flavor = "multi_thread")]
+async fn commit_and_cleanup_dispatch_once_per_cycle() -> anyhow::Result<()> {
+    const NUM_ACTIVE_JOBS: usize = 4;
+    const TASKS_PER_JOB: usize = 3;
+    const NUM_FINALIZING_JOBS_PER_LANE: usize = 3;
+    const DISPATCH_QUEUE_CAPACITY: usize = 1024;
+
+    let active_jobs = make_jobs(NUM_ACTIVE_JOBS);
+    let commit_ready_jobs = make_jobs(NUM_FINALIZING_JOBS_PER_LANE);
+    let cleanup_ready_jobs = make_jobs(NUM_FINALIZING_JOBS_PER_LANE);
+
+    let storage_client = MockStorageClient::new(DEFAULT_SESSION_ID);
+    storage_client.push_ready_batch(
+        DEFAULT_SESSION_ID,
+        make_ready_batch(&active_jobs, TASKS_PER_JOB, 0),
+    );
+    let mut commit_ready_batch = make_finalizing_batch(&commit_ready_jobs, TaskId::Commit);
+    // Duplicate one commit-ready entry within the batch: it must dispatch exactly once.
+    commit_ready_batch.push(commit_ready_batch[0]);
+    storage_client.push_commit_ready_batch(DEFAULT_SESSION_ID, commit_ready_batch);
+    storage_client.push_cleanup_ready_batch(
+        DEFAULT_SESSION_ID,
+        make_finalizing_batch(&cleanup_ready_jobs, TaskId::Cleanup),
+    );
+
+    let (writer, reader) = create_dispatch_queue(DISPATCH_QUEUE_CAPACITY, DEFAULT_SESSION_ID);
+    let config = make_config(NUM_ACTIVE_JOBS, DISPATCH_QUEUE_CAPACITY);
+    let (scheduler_handle, cancellation_token) = spawn_scheduler(config, storage_client, writer);
+
+    let num_assignments = NUM_ACTIVE_JOBS * TASKS_PER_JOB + 2 * NUM_FINALIZING_JOBS_PER_LANE;
+    let assignments = drain_n(&reader, num_assignments).await?;
+    assert_no_more_assignments(&reader).await?;
+
+    // The rotation is [commit lane, cleanup lane, active jobs...], so every cycle dispatches
+    // exactly one commit task and one cleanup task (while their queues are non-empty), each lane
+    // drained FIFO, followed by one task of every active job.
+    let expected: Vec<(JobId, ResourceGroupId, TaskId)> = (0..TASKS_PER_JOB)
+        .flat_map(|round| {
+            let (commit_job_id, commit_resource_group_id) = commit_ready_jobs[round];
+            let (cleanup_job_id, cleanup_resource_group_id) = cleanup_ready_jobs[round];
+            std::iter::once((commit_job_id, commit_resource_group_id, TaskId::Commit))
+                .chain(std::iter::once((
+                    cleanup_job_id,
+                    cleanup_resource_group_id,
+                    TaskId::Cleanup,
+                )))
+                .chain(active_jobs.iter().map(move |&(job_id, resource_group_id)| {
+                    (job_id, resource_group_id, TaskId::Index(round))
+                }))
+        })
+        .collect();
+    assert_eq!(make_assignment_tuple(&assignments), expected);
+
+    cancellation_token.cancel();
+    scheduler_handle.await.expect("scheduler task panicked")?;
+    Ok(())
+}
+
 #[tokio::test(flavor = "multi_thread")]
 async fn cleanup_ready_drops_active_and_pending_jobs() -> anyhow::Result<()> {
     assert_finalizing_ready_drops_jobs(TaskId::Cleanup).await
@@ -921,3 +932,94 @@ async fn cleanup_ready_drops_active_and_pending_jobs() -> anyhow::Result<()> {
 async fn commit_ready_drops_active_and_pending_jobs() -> anyhow::Result<()> {
     assert_finalizing_ready_drops_jobs(TaskId::Commit).await
 }
+
+#[tokio::test(flavor = "multi_thread")]
+async fn session_bump_clears_buffered_tasks() -> anyhow::Result<()> {
+    const ACTIVE_JOB_QUEUE_CAPACITY: usize = 4;
+    const DISPATCH_QUEUE_CAPACITY: usize = 4;
+    const TASKS_PER_JOB: usize = 4;
+    const NEW_SESSION_ID: SessionId = DEFAULT_SESSION_ID + 1;
+    const NEW_TASKS_PER_JOB: usize = 2;
+
+    let old_jobs = make_jobs(4);
+    let new_jobs = make_jobs(2);
+
+    let storage_client = MockStorageClient::new(DEFAULT_SESSION_ID);
+    storage_client.push_ready_batch(
+        DEFAULT_SESSION_ID,
+        make_ready_batch(&old_jobs, TASKS_PER_JOB, 0),
+    );
+
+    let (writer, reader) = create_dispatch_queue(DISPATCH_QUEUE_CAPACITY, DEFAULT_SESSION_ID);
+    let mut scheduler = make_scheduler(
+        make_config(ACTIVE_JOB_QUEUE_CAPACITY, DISPATCH_QUEUE_CAPACITY),
+        storage_client.clone(),
+        writer,
+    );
+
+    // Step 1: ingest the old-session batch. The ingesting tick dispatches enough assignments to
+    // fill the dispatch queue (which the test never drains); the rest will stay in the buffer.
+    tick_until(&mut scheduler, |scheduler| {
+        !scheduler.buffered_tasks.is_empty()
+    })
+    .await?;
+    assert_eq!(scheduler.active_jobs.len(), old_jobs.len());
+    assert_eq!(
+        scheduler.buffered_tasks.len(),
+        old_jobs.len() * TASKS_PER_JOB - DISPATCH_QUEUE_CAPACITY,
+    );
+
+    // Step 2: bump the session on the storage side and deliver a batch under the new session.
+    storage_client.set_session(NEW_SESSION_ID);
+    storage_client.push_ready_batch(
+        NEW_SESSION_ID,
+        make_ready_batch(&new_jobs, NEW_TASKS_PER_JOB, 0),
+    );
+    tick_until(&mut scheduler, |scheduler| {
+        scheduler.storage_session_id == NEW_SESSION_ID
+            && new_jobs
+                .iter()
+                .all(|(job_id, _)| scheduler.active_jobs.contains_key(job_id))
+    })
+    .await?;
+
+    assert_eq!(
+        scheduler
+            .active_jobs
+            .keys()
+            .copied()
+            .collect::<HashSet<_>>(),
+        new_jobs
+            .iter()
+            .map(|&(job_id, _)| job_id)
+            .collect::<HashSet<_>>(),
+    );
+    assert_eq!(scheduler.pending_jobs.len(), 0);
+    assert!(
+        scheduler.buffered_tasks.iter().all(|(job_id, _)| {
+            new_jobs
+                .iter()
+                .any(|&(new_job_id, _)| *job_id == new_job_id)
+        }),
+        "an old-session task survived the session bump",
+    );
+
+    // The session bump drained the dispatch queue: the frozen old-session assignments are gone, and
+    // draining yields exactly the new jobs' tasks in strict rotation, each paired with the new
+    // session.
+    let num_new_assignments = new_jobs.len() * NEW_TASKS_PER_JOB;
+    let session_stamped = tick_and_drain_n(&mut scheduler, &reader, num_new_assignments).await?;
+    assert_no_further_assignments(&mut scheduler, &reader).await?;
+
+    for &(session_id, _) in &session_stamped {
+        assert_eq!(session_id, NEW_SESSION_ID);
+    }
+
+    let assignments: Vec<TaskAssignment> = session_stamped
+        .into_iter()
+        .map(|(_session_id, assignment)| assignment)
+        .collect();
+    assert_strict_rotation(&assignments, &new_jobs, NEW_TASKS_PER_JOB);
+
+    Ok(())
+}

From 4c300f37fb6946294e78649e4689fd9329b6d7e0 Mon Sep 17 00:00:00 2001
From: LinZhihao-723 <pleiades3190@gmail.com>
Date: Thu, 4 Jun 2026 21:46:37 -0400
Subject: [PATCH 14/14] Done with implementation and testing.

---
 Cargo.lock                                    |   1 +
 components/spider-scheduler/Cargo.toml        |   3 +-
 .../core_impl/round_robin/implementation.rs   | 184 ++++++++++++++++--
 .../src/core_impl/round_robin/tests.rs        |  25 +++
 components/spider-scheduler/src/error.rs      |   3 +
 5 files changed, 202 insertions(+), 14 deletions(-)

diff --git a/Cargo.lock b/Cargo.lock
index 6d96c4b7..4fc8b10b 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -1513,6 +1513,7 @@ dependencies = [
  "thiserror",
  "tokio",
  "tokio-util",
+ "tracing",
 ]
 
 [[package]]
diff --git a/components/spider-scheduler/Cargo.toml b/components/spider-scheduler/Cargo.toml
index a928fddf..042dcd49 100644
--- a/components/spider-scheduler/Cargo.toml
+++ b/components/spider-scheduler/Cargo.toml
@@ -10,11 +10,12 @@ path = "src/lib.rs"
 [dependencies]
 async-channel = "2.3.1"
 async-trait = "0.1.89"
+serde = { version = "1.0.228", features = ["derive"] }
 spider-core = { path = "../spider-core" }
 thiserror = "2.0.18"
 tokio = { version = "1.52.3", features = ["macros", "rt", "sync", "time"] }
 tokio-util = "0.7.18"
-serde = { version = "1.0.228", features = ["derive"] }
+tracing = { version = "0.1.41", default-features = false, features = ["std"] }
 
 [dev-dependencies]
 anyhow = "1.0.102"
diff --git a/components/spider-scheduler/src/core_impl/round_robin/implementation.rs b/components/spider-scheduler/src/core_impl/round_robin/implementation.rs
index 68fb27c9..51dbb160 100644
--- a/components/spider-scheduler/src/core_impl/round_robin/implementation.rs
+++ b/components/spider-scheduler/src/core_impl/round_robin/implementation.rs
@@ -3,7 +3,7 @@
 
 use std::{
     collections::{HashMap, HashSet, VecDeque},
-    time::Duration,
+    time::{Duration, SystemTime},
 };
 
 use async_trait::async_trait;
@@ -23,7 +23,7 @@ use crate::{
 };
 
 /// The configuration of the round-robin scheduler core.
-#[derive(Deserialize)]
+#[derive(Debug, Deserialize)]
 pub struct RoundRobinConfig {
     /// The capacity of the active job queue. The scheduler will make task assignments from these
     /// jobs in a round-robin manner.
@@ -224,6 +224,7 @@ pub(super) struct RoundRobin<
     pub(super) cancellation_token: CancellationToken,
     pub(super) config: RoundRobinConfig,
     pub(super) storage_session_id: SessionId,
+
     pub(super) buffered_tasks: HashSet<(JobId, TaskId)>,
 
     pub(super) active_jobs: HashMap<JobId, JobTaskQueue>,
@@ -237,6 +238,7 @@ pub(super) struct RoundRobin<
     pub(super) cleanup_ready_jobs: VecDeque<(JobId, ResourceGroupId)>,
 
     pub(super) finalizing_jobs: HashSet<JobId>,
+    pub(super) finalizing_job_queue: VecDeque<(JobId, SystemTime)>,
 
     pub(super) inbound_queue_reader: AsyncInboundQueueReader<SchedulerStorageClientType>,
 }
@@ -272,6 +274,7 @@ impl<
         let finalizing_jobs = HashSet::with_capacity(
             config.commit_ready_task_capacity + config.cleanup_ready_task_capacity,
         );
+        let finalizing_job_queue = VecDeque::new();
         let inbound_queue_reader = AsyncInboundQueueReader::new(storage_client);
         Self {
             sink,
@@ -287,6 +290,7 @@ impl<
             commit_ready_jobs,
             cleanup_ready_jobs,
             finalizing_jobs,
+            finalizing_job_queue,
             inbound_queue_reader,
         }
     }
@@ -300,9 +304,12 @@ impl<
     ///
     /// * Forwards [`Self::consume_inbound_poll_result`]'s return values on failure.
     /// * Forwards [`Self::make_schedule_decisions`]'s return values on failure.
+    /// * Forwards [`Self::retire_expired_finalizing_jobs`]'s return values on failure.
     pub(super) async fn tick(&mut self) -> Result<(), SchedulerError> {
+        tracing::info!("Starting scheduling tick.");
         self.consume_inbound_poll_result().await?;
         self.make_schedule_decisions().await?;
+        self.retire_expired_finalizing_jobs()?;
         Ok(())
     }
 
@@ -327,16 +334,25 @@ impl<
     ///
     /// * Forwards [`Self::tick`]'s return values on failure.
     async fn run(mut self) -> Result<(), SchedulerError> {
+        tracing::info!(
+            config = ? self.config,
+            init_session_id = self.storage_session_id,
+            "Round-robin scheduler started."
+        );
         let tick_interval = Duration::from_millis(self.config.tick_interval_ms);
         loop {
             let now = tokio::time::Instant::now();
             let cancellation_token = self.cancellation_token.clone();
             select! {
                 () = cancellation_token.cancelled() => {
+                    tracing::info!("Round-robin scheduler cancelled. Shutting down.");
                     return Ok(());
                 }
                 result = self.tick() => {
-                    let () = result?;
+                    result.inspect_err(|err| tracing::error!(
+                        err = % err,
+                        "Round-robin scheduler exits on error."
+                    ))?;
                 }
             }
             let elapsed = now.elapsed();
@@ -358,6 +374,7 @@ impl<
         self.commit_ready_jobs.clear();
         self.cleanup_ready_jobs.clear();
         self.finalizing_jobs.clear();
+        self.finalizing_job_queue.clear();
 
         self.active_job_queue = Self::new_active_job_queue(self.config.active_job_queue_capacity);
         self.active_job_queue_round_robin_cursor = 0;
@@ -372,6 +389,7 @@ impl<
     ///
     /// * [`SchedulerError::Internal`] if the given job is not currently active.
     fn retire_active_job(&mut self, job_id: JobId) -> Result<(), SchedulerError> {
+        tracing::info!(job_id = ? job_id, "Retiring active job.");
         if let Some(index) = self.active_job_queue.iter().position(|entry| match entry {
             RoundRobinSlot::Job(id) => *id == job_id,
             _ => false,
@@ -392,6 +410,10 @@ impl<
         }
 
         if let Some(next_pending_job) = self.pop_next_pending_job() {
+            tracing::info!(
+                job_id = ? next_pending_job.job_id,
+                "Pending job promoted to active job."
+            );
             self.active_job_queue
                 .push(RoundRobinSlot::Job(next_pending_job.job_id));
             self.active_jobs
@@ -416,11 +438,50 @@ impl<
 
     /// Removes all of the given job's queued tasks from the buffered-task set.
     fn discard_job_tasks(&mut self, job_entry: JobTaskQueue) {
+        tracing::info!(
+            job_id = ? job_entry.job_id,
+            num_tasks = job_entry.task_ids.len(),
+            "Discarding job tasks."
+        );
         for task_id in job_entry.task_ids {
             self.buffered_tasks.remove(&(job_entry.job_id, task_id));
         }
     }
 
+    /// Inserts a job as it is considered finalizing (commit-ready or cleanup-ready). Once inserted,
+    /// any further tasks for the job will be ignored until this queue is reset.
+    fn mark_job_finalizing(&mut self, job_id: JobId) {
+        if self.finalizing_jobs.insert(job_id) {
+            self.finalizing_job_queue
+                .push_back((job_id, SystemTime::now()));
+        }
+    }
+
+    /// Retires expired finalizing jobs.
+    ///
+    /// A finalizing job is considered expired once it has remained in the finalizing state for more
+    /// than 6 hours. This timeout is currently hard-coded but may be made configurable through
+    /// [`RoundRobinConfig`] in the future.
+    ///
+    /// # Errors
+    ///
+    /// Returns an error if:
+    ///
+    /// * Forwards [`SystemTime::elapsed`]'s return values on failure.
+    fn retire_expired_finalizing_jobs(&mut self) -> Result<(), SchedulerError> {
+        const EXPIRATION_TIME: Duration = Duration::from_hours(6);
+        while let Some((job_id, insertion_time)) = self.finalizing_job_queue.front() {
+            if insertion_time.elapsed()? > EXPIRATION_TIME {
+                tracing::info!(job_id = ? job_id, "Finalizing job retired.");
+                self.finalizing_jobs.remove(job_id);
+                self.finalizing_job_queue.pop_front();
+            } else {
+                break;
+            }
+        }
+        Ok(())
+    }
+
     /// Loads polled inbound entries into the scheduler's internal buffers.
     ///
     /// If the polled session is newer than the current session, all existing placement states are
@@ -439,7 +500,8 @@ impl<
     /// * [`SchedulerError::InvalidSessionId`] if the polled session is older than the current
     ///   session.
     /// * Forwards [`DispatchQueueSink::bump_session_id`]'s return values on failure.
-    /// * Forwards [`Self::retire_active_job`]'s return values on failure.
+    /// * Forwards [`Self::enqueue_commit_ready_entries`]'s return values on failure.
+    /// * Forwards [`Self::enqueue_cleanup_ready_entries`]'s return values on failure.
     async fn ingest_inbound_entries(
         &mut self,
         curr_session_id: SessionId,
@@ -452,13 +514,40 @@ impl<
             return Err(SchedulerError::InvalidSessionId(storage_session_id));
         }
         if storage_session_id > curr_session_id {
+            tracing::info!(
+                curr_session_id = ? curr_session_id,
+                storage_session_id = ? storage_session_id,
+                "New session detected. Clearing existing placement state and bumping dispatch \
+                 queue session."
+            );
             self.storage_session_id = storage_session_id;
             self.clear();
             self.sink.bump_session_id(storage_session_id).await?;
         }
 
-        // Load commit ready tasks and cleanup ready tasks first to avoid loading a job that
-        // is already cancelled or commit-ready.
+        // Load commit-ready tasks and cleanup-ready tasks first to avoid loading a job that is
+        // already finalizing.
+        self.enqueue_commit_ready_entries(commit_ready_entries)?;
+        self.enqueue_cleanup_ready_entries(cleanup_ready_entries)?;
+        self.enqueue_ready_entries(ready_entries);
+
+        Ok(())
+    }
+
+    /// Enqueues polled commit-ready entries: each entry's job is marked finalizing, queued for a
+    /// commit-task assignment, and removed from the active or pending set.
+    ///
+    /// Entries whose tasks are already buffered are ignored.
+    ///
+    /// # Errors
+    ///
+    /// Returns an error if:
+    ///
+    /// * Forwards [`Self::retire_active_job`]'s return values on failure.
+    fn enqueue_commit_ready_entries(
+        &mut self,
+        commit_ready_entries: Vec<InboundEntry>,
+    ) -> Result<(), SchedulerError> {
         for inbound_entry in commit_ready_entries {
             if !self
                 .buffered_tasks
@@ -466,7 +555,13 @@ impl<
             {
                 continue;
             }
-            self.finalizing_jobs.insert(inbound_entry.job_id);
+
+            tracing::info!(
+                job_id = ? inbound_entry.job_id,
+                "Commit-ready task received. Finalizing job."
+            );
+
+            self.mark_job_finalizing(inbound_entry.job_id);
             self.commit_ready_jobs
                 .push_back((inbound_entry.job_id, inbound_entry.resource_group_id));
 
@@ -480,6 +575,23 @@ impl<
             }
         }
 
+        Ok(())
+    }
+
+    /// Enqueues polled cleanup-ready entries: each entry's job is marked finalizing, queued for a
+    /// cleanup-task assignment, and removed from the active or pending set.
+    ///
+    /// Entries whose tasks are already buffered are ignored.
+    ///
+    /// # Errors
+    ///
+    /// Returns an error if:
+    ///
+    /// * Forwards [`Self::retire_active_job`]'s return values on failure.
+    fn enqueue_cleanup_ready_entries(
+        &mut self,
+        cleanup_ready_entries: Vec<InboundEntry>,
+    ) -> Result<(), SchedulerError> {
         for inbound_entry in cleanup_ready_entries {
             if !self
                 .buffered_tasks
@@ -487,7 +599,13 @@ impl<
             {
                 continue;
             }
-            self.finalizing_jobs.insert(inbound_entry.job_id);
+
+            tracing::info!(
+                job_id = ? inbound_entry.job_id,
+                "Cleanup-ready task received. Finalizing job."
+            );
+
+            self.mark_job_finalizing(inbound_entry.job_id);
             self.cleanup_ready_jobs
                 .push_back((inbound_entry.job_id, inbound_entry.resource_group_id));
 
@@ -501,8 +619,19 @@ impl<
             }
         }
 
+        Ok(())
+    }
+
+    /// Enqueues polled regular ready entries into their jobs' task queues
+    ///
+    /// Entries of finalizing jobs and entries whose tasks are already buffered are ignored.
+    fn enqueue_ready_entries(&mut self, ready_entries: Vec<InboundEntry>) {
         for inbound_entry in ready_entries {
             if self.finalizing_jobs.contains(&inbound_entry.job_id) {
+                tracing::info!(
+                    job_id = ? inbound_entry.job_id,
+                    "Ready task received for a finalizing job. Ignored."
+                );
                 continue;
             }
             if !self
@@ -511,6 +640,13 @@ impl<
             {
                 continue;
             }
+
+            tracing::debug!(
+                job_id = ? inbound_entry.job_id,
+                task_id = ? inbound_entry.task_id,
+                "Inbound task received."
+            );
+
             if let Some(active_job) = self.active_jobs.get_mut(&inbound_entry.job_id) {
                 active_job.enqueue(inbound_entry.task_id);
                 continue;
@@ -519,7 +655,12 @@ impl<
                 pending_job.enqueue(inbound_entry.task_id);
                 continue;
             }
+
             if self.active_jobs.len() < self.config.active_job_queue_capacity {
+                tracing::info!(
+                    job_id = ? inbound_entry.job_id,
+                    "New job received. Placing in active job queue."
+                );
                 self.active_jobs.insert(
                     inbound_entry.job_id,
                     JobTaskQueue::new(
@@ -532,6 +673,11 @@ impl<
                     .push(RoundRobinSlot::Job(inbound_entry.job_id));
                 continue;
             }
+
+            tracing::info!(
+                job_id = ? inbound_entry.job_id,
+                "New job received. Placing in pending job queue."
+            );
             self.pending_jobs.insert(
                 inbound_entry.job_id,
                 JobTaskQueue::new(
@@ -542,8 +688,6 @@ impl<
             );
             self.pending_job_queue.push_back(inbound_entry.job_id);
         }
-
-        Ok(())
     }
 
     /// Consumes the in-flight inbound poll if it has completed, ingesting its entries and starting
@@ -569,6 +713,7 @@ impl<
                 commit_ready_entries,
                 cleanup_ready_entries,
             } => {
+                tracing::info!("Inbound poll completed.");
                 self.ingest_inbound_entries(
                     curr_session_id,
                     storage_session_id,
@@ -600,10 +745,11 @@ impl<
     /// * Forwards [`DispatchQueueSink::enqueue`]'s return values on failure.
     /// * Forwards [`Self::retire_active_job`]'s return values on failure.
     async fn make_schedule_decisions(&mut self) -> Result<(), SchedulerError> {
-        let mut remaining_dispatch_slots = self
+        let dispatch_slots = self
             .config
             .dispatch_queue_capacity
             .saturating_sub(self.sink.size());
+        let mut remaining_dispatch_slots = dispatch_slots;
         while remaining_dispatch_slots > 0 && !self.buffered_tasks.is_empty() {
             if self.active_job_queue_round_robin_cursor >= self.active_job_queue.len() {
                 self.active_job_queue_round_robin_cursor = 0;
@@ -635,7 +781,6 @@ impl<
                         })
                         .await?;
                     self.buffered_tasks.remove(&(job_id, TaskId::Cleanup));
-                    self.finalizing_jobs.remove(&job_id);
                     remaining_dispatch_slots -= 1;
                 }
                 RoundRobinSlot::CommitReady => {
@@ -651,7 +796,6 @@ impl<
                         })
                         .await?;
                     self.buffered_tasks.remove(&(job_id, TaskId::Commit));
-                    self.finalizing_jobs.remove(&job_id);
                     remaining_dispatch_slots -= 1;
                 }
                 RoundRobinSlot::Job(job_id) => {
@@ -677,6 +821,12 @@ impl<
             }
         }
 
+        tracing::info!(
+            dispatch_slots = dispatch_slots,
+            num_task_assignments_enqueued = dispatch_slots - remaining_dispatch_slots,
+            "Decision-making loop completed."
+        );
+
         Ok(())
     }
 
@@ -915,6 +1065,7 @@ impl<StorageClientType: SchedulerStorageClient + 'static>
 
         if max_ready_entries == 0 && max_commit_ready_entries == 0 && max_cleanup_ready_entries == 0
         {
+            tracing::info!("Inbound poll skipped: all entry limits are 0.");
             return Ok(());
         }
 
@@ -954,6 +1105,13 @@ impl<StorageClientType: SchedulerStorageClient + 'static>
             cleanup_ready_handle,
         });
 
+        tracing::info!(
+            max_ready_entries = ? max_ready_entries,
+            max_commit_ready_entries = ? max_commit_ready_entries,
+            max_cleanup_ready_entries = ? max_cleanup_ready_entries,
+            "Inbound poll initiated."
+        );
+
         Ok(())
     }
 }
diff --git a/components/spider-scheduler/src/core_impl/round_robin/tests.rs b/components/spider-scheduler/src/core_impl/round_robin/tests.rs
index 86d4b092..e63fe532 100644
--- a/components/spider-scheduler/src/core_impl/round_robin/tests.rs
+++ b/components/spider-scheduler/src/core_impl/round_robin/tests.rs
@@ -569,6 +569,9 @@ async fn assert_no_further_assignments(
 /// 3. Asserts both jobs leave the placement state with their buffered regular tasks discarded.
 /// 4. Unfreezes and asserts the drained sequence: each finalized job dispatches its finalizing task
 ///    exactly once and no further regular task, while the surviving jobs complete in FIFO order.
+/// 5. Re-delivers regular ready tasks for the finalized jobs alongside a fresh canary job. Asserts
+///    the re-delivered tasks are ignored (the finalizing gate persists after the finalizing tasks
+///    are dispatched) while the canary job schedules normally.
 ///
 /// # Errors
 ///
@@ -756,6 +759,28 @@ async fn assert_finalizing_ready_drops_jobs(finalizing_task_id: TaskId) -> anyho
     assert!(scheduler.pending_job_queue.is_empty());
     assert!(scheduler.commit_ready_jobs.is_empty());
     assert!(scheduler.cleanup_ready_jobs.is_empty());
+    assert_eq!(scheduler.finalizing_jobs.len(), NUM_FINALIZED_JOBS);
+
+    assert!(scheduler.finalizing_jobs.contains(&job_b.0));
+    assert!(scheduler.finalizing_jobs.contains(&job_q.0));
+
+    // Step 5: The finalizing gate remains active after the finalizing tasks have been dispatched,
+    // so re-delivered regular tasks for finalized jobs must be ignored. A fresh canary job is
+    // included in the same batch. Since a batch is ingested atomically within a single tick,
+    // successful dispatch of the canary's tasks proves that the finalized jobs' entries have
+    // already been processed (and ignored), rather than still being in flight.
+    let canary_jobs = make_jobs(1);
+    let mut late_batch = make_ready_batch(&[job_b, job_q], TASKS_PER_JOB, 0);
+    late_batch.extend(make_ready_batch(&canary_jobs, TASKS_PER_JOB, 0));
+    storage_client.push_ready_batch(DEFAULT_SESSION_ID, late_batch);
+
+    let late_assignments: Vec<_> = tick_and_drain_n(&mut scheduler, &reader, TASKS_PER_JOB)
+        .await?
+        .into_iter()
+        .map(|(_session_id, assignment)| assignment)
+        .collect();
+    assert_strict_rotation(&late_assignments, &canary_jobs, TASKS_PER_JOB);
+    assert_no_further_assignments(&mut scheduler, &reader).await?;
 
     Ok(())
 }
diff --git a/components/spider-scheduler/src/error.rs b/components/spider-scheduler/src/error.rs
index 50851809..34bb631e 100644
--- a/components/spider-scheduler/src/error.rs
+++ b/components/spider-scheduler/src/error.rs
@@ -37,4 +37,7 @@ pub enum SchedulerError {
 
     #[error("async result not ready")]
     ResultNotReady,
+
+    #[error(transparent)]
+    SystemTime(#[from] std::time::SystemTimeError),
 }