diff --git a/Cargo.lock b/Cargo.lock
index 1b1d737..f3c2d31 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -88,6 +88,12 @@ version = "3.20.2"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "5d20789868f4b01b2f2caec9f5c4e0213b41e3e5702a50157d699ae31ced2fcb"
 
+[[package]]
+name = "bytes"
+version = "1.11.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "1e748733b7cbc798e1434b6ac524f0c1ff2ab456fe201501e6497c8417a4fc33"
+
 [[package]]
 name = "cast"
 version = "0.3.0"
@@ -288,6 +294,7 @@ dependencies = [
  "indexmap",
  "log",
  "signal-hook",
+ "tokio",
 ]
 
 [[package]]
@@ -422,6 +429,17 @@ version = "2.8.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "f8ca58f447f06ed17d5fc4043ce1b10dd205e060fb3ce5b979b8ed8e59ff3f79"
 
+[[package]]
+name = "mio"
+version = "1.2.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "50b7e5b27aa02a74bac8c3f23f448f8d87ff11f92d3aac1a6ed369ee08cc56c1"
+dependencies = [
+ "libc",
+ "wasi",
+ "windows-sys",
+]
+
 [[package]]
 name = "num-traits"
 version = "0.2.19"
@@ -665,6 +683,16 @@ version = "0.4.12"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "0c790de23124f9ab44544d7ac05d60440adc586479ce501c1d6d7da3cd8c9cf5"
 
+[[package]]
+name = "socket2"
+version = "0.6.3"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "3a766e1110788c36f4fa1c2b71b387a7815aa65f88ce0229841826633d93723e"
+dependencies = [
+ "libc",
+ "windows-sys",
+]
+
 [[package]]
 name = "syn"
 version = "2.0.117"
@@ -686,6 +714,33 @@ dependencies = [
  "serde_json",
 ]
 
+[[package]]
+name = "tokio"
+version = "1.52.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "b67dee974fe86fd92cc45b7a95fdd2f99a36a6d7b0d431a231178d3d670bbcc6"
+dependencies = [
+ "bytes",
+ "libc",
+ "mio",
+ "pin-project-lite",
+ "signal-hook-registry",
+ "socket2",
+ "tokio-macros",
+ "windows-sys",
+]
+
+[[package]]
+name = "tokio-macros"
+version = "2.7.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "385a6cb71ab9ab790c5fe8d67f1645e6c450a7ce006a33de03daa956cf70a496"
+dependencies = [
+ "proc-macro2",
+ "quote",
+ "syn",
+]
+
 [[package]]
 name = "unicode-ident"
 version = "1.0.24"
@@ -708,6 +763,12 @@ dependencies = [
  "winapi-util",
 ]
 
+[[package]]
+name = "wasi"
+version = "0.11.1+wasi-snapshot-preview1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "ccf3ec651a847eb01de73ccad15eb7d99f80485de043efb2f370cd654f4ea44b"
+
 [[package]]
 name = "wasm-bindgen"
 version = "0.2.120"
diff --git a/Cargo.toml b/Cargo.toml
index dea5f7b..946786f 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -8,6 +8,7 @@ signal-hook = "0.4.4"
 log = "0.4"
 env_logger = "0.11"
 indexmap = "2"
+tokio = { version = "1", features = ["rt-multi-thread", "net", "io-util", "macros", "signal", "sync", "time"] }
 
 [dev-dependencies]
 criterion = "0.8.2"
diff --git a/benches/redis-benchmark.md b/benches/redis-benchmark.md
index 6aa2fc3..2eb4507 100644
--- a/benches/redis-benchmark.md
+++ b/benches/redis-benchmark.md
@@ -105,3 +105,61 @@ redis-benchmark -h 127.0.0.1 -p 6399 -n 100000 -c 50 -q -t set,get,incr
 redis-benchmark -h 127.0.0.1 -p 6399 -n 100000 -c 50 -P 16 -q -t set,get,incr
 kill "$SERVER"
 ```
+
+---
+
+## v0.4.0 — tokio async runtime (Week 8)
+
+Re-ran on the same Apple M5 / macOS 26.4.1 box after the `feat/async-runtime`
+branch replaced the thread-per-connection model with a multi-threaded tokio
+runtime. The server binary, the benchmark client and the command mix are
+identical to the v0.3.0 runs above so the deltas are apples-to-apples.
+
+### Scenario 1 — no memory cap, `c=50`
+
+| Command | v0.3.0 QPS | v0.4.0 QPS | Δ      |
+| ------- | ---------- | ---------- | ------ |
+| SET     | 58 207     | 62 189     | +6.8%  |
+| GET     | 61 349     | 65 231     | +6.3%  |
+| INCR    | 61 728     | 63 492     | +2.9%  |
+
+### Scenario 2 — pipelined (`-P 16`, `c=50`)
+
+| Command | v0.3.0 QPS | v0.4.0 QPS | Δ      |
+| ------- | ---------- | ---------- | ------ |
+| SET     | 366 300    | 350 877    | -4.2%  |
+| GET     | 373 134    | 378 787    | +1.5%  |
+| INCR    | 375 939    | 381 679    | +1.5%  |
+
+> Pipelined throughput is dominated by in-memory command execution rather
+> than socket I/O, so the delta is inside run-to-run noise. What matters
+> is that the async rewrite did **not** regress the hot path.
+
+### Scenario 5 — high client fan-out (`c=500`)
+
+```
+./target/release/ferrum-kv --addr 127.0.0.1:6399
+redis-benchmark -h 127.0.0.1 -p 6399 -n 100000 -c 500 -q -t set,get
+```
+
+| Command | v0.4.0 QPS | p50      |
+| ------- | ---------- | -------- |
+| SET     | 50 301     | 4.567 ms |
+| GET     | 43 440     | 5.871 ms |
+
+> The pre-tokio build would have tried to spawn 500 OS threads here — one
+> per accepted connection — and spent most of its time context switching.
+> v0.4.0 handles the whole fan-out on a handful of worker threads with
+> single-digit-millisecond p50, the clearest win of Phase 8.
+
+### Observations
+
+- Single-client-per-thread throughput nudged up ~5% because tokio replaces
+  two blocking `read`/`write` syscalls per round-trip with `epoll` /
+  `kqueue`-backed readiness checks that batch more efficiently under load.
+- The thread-per-connection floor is gone: scaling `-c` past the CPU count
+  no longer multiplies kernel threads; the runtime just multiplexes more
+  sockets onto the same workers.
+- The integration suite's 500-client PING fan-out finishes in under 300 ms
+  on the same box, confirming the smoke result is not a microbenchmark
+  artefact.
diff --git a/ferrum.conf.example b/ferrum.conf.example
index 2ec05ed..95d2093 100644
--- a/ferrum.conf.example
+++ b/ferrum.conf.example
@@ -53,3 +53,10 @@ loglevel info
 
 # How many random candidates to inspect per eviction round (Redis default: 5).
 # maxmemory-samples 5
+
+# ---- Runtime -------------------------------------------------------
+# Number of tokio worker threads driving accept/read/write loops.
+# 0 (or unset) means "one per logical CPU", which is the right default
+# for most deployments. Bump it higher only if you are pinning other
+# workloads to specific cores and want to cap FerrumKV's share.
+# io-threads 0
diff --git a/src/cli.rs b/src/cli.rs
index 5856c9a..0f858cf 100644
--- a/src/cli.rs
+++ b/src/cli.rs
@@ -24,7 +24,7 @@ pub const USAGE: &str = concat!(
     "                 [--appendfsync always|everysec|no]\n",
     "                 [--client-timeout SECONDS] [--maxclients N]\n",
     "                 [--maxmemory BYTES] [--maxmemory-policy POLICY]\n",
-    "                 [--maxmemory-samples N]\n",
+    "                 [--maxmemory-samples N] [--io-threads N]\n",
     "                 [--loglevel off|error|warn|info|debug|trace]"
 );
 
@@ -60,6 +60,9 @@ pub struct CliArgs {
     max_memory_policy: Option<EvictionPolicy>,
     /// How many random candidates each eviction round considers.
     max_memory_samples: Option<usize>,
+    /// Number of tokio worker threads; `0` (unset) asks tokio for the
+    /// default (usually one per logical CPU).
+    io_threads: Option<usize>,
 }
 
 /// Raw, un-merged values taken verbatim from the command line.
@@ -81,6 +84,7 @@ struct RawFlags {
     max_memory: Option<u64>,
     max_memory_policy: Option<EvictionPolicy>,
     max_memory_samples: Option<usize>,
+    io_threads: Option<usize>,
     /// Whether AOF was explicitly enabled via the config file's `appendonly yes`.
     /// CLI `--aof-path` implies enabled; this field only carries the file's
     /// intent so that a later merge step can decide.
@@ -92,7 +96,7 @@ impl CliArgs {
     pub fn parse<I: IntoIterator<Item = String>>(args: I) -> Result<Invocation, String> {
         let raw = match scan_argv(args)? {
             ScanOutcome::Help => return Ok(Invocation::Help),
-            ScanOutcome::Flags(f) => f,
+            ScanOutcome::Flags(f) => *f,
         };
 
         let file_cfg = if let Some(path) = raw.config_path.as_deref() {
@@ -127,6 +131,12 @@ impl CliArgs {
         self.loglevel.as_deref()
     }
 
+    /// Returns the explicit tokio worker thread count. `None` means
+    /// "let tokio pick", which is what most deployments want.
+    pub fn io_threads(&self) -> Option<usize> {
+        self.io_threads
+    }
+
     /// Returns the resolved eviction configuration. Defaults (unlimited
     /// memory, `noeviction`, 5 samples) apply when neither CLI nor config
     /// file specifies a value.
@@ -142,7 +152,7 @@ impl CliArgs {
 
 enum ScanOutcome {
     Help,
-    Flags(RawFlags),
+    Flags(Box<RawFlags>),
 }
 
 /// First pass: walk `argv` and record every flag we recognise without
@@ -210,12 +220,18 @@ fn scan_argv<I: IntoIterator<Item = String>>(args: I) -> Result<ScanOutcome, Str
                     format!("invalid --maxmemory-samples: '{value}' is not a non-negative integer")
                 })?);
             }
+            "--io-threads" => {
+                let value = take_value(&mut iter, "--io-threads")?;
+                raw.io_threads = Some(value.parse().map_err(|_| {
+                    format!("invalid --io-threads: '{value}' is not a non-negative integer")
+                })?);
+            }
             "-h" | "--help" => return Ok(ScanOutcome::Help),
             other => return Err(format!("unrecognised argument: '{other}'")),
         }
     }
 
-    Ok(ScanOutcome::Flags(raw))
+    Ok(ScanOutcome::Flags(Box::new(raw)))
 }
 
 /// Merges `raw` flags with an optional [`FileConfig`], applying defaults for
@@ -278,6 +294,7 @@ fn merge(raw: RawFlags, file: Option<&FileConfig>) -> Result<CliArgs, String> {
     let max_memory_samples = raw
         .max_memory_samples
         .or_else(|| file.and_then(|f| f.max_memory_samples));
+    let io_threads = raw.io_threads.or_else(|| file.and_then(|f| f.io_threads));
 
     Ok(CliArgs {
         addr,
@@ -289,6 +306,7 @@ fn merge(raw: RawFlags, file: Option<&FileConfig>) -> Result<CliArgs, String> {
         max_memory,
         max_memory_policy,
         max_memory_samples,
+        io_threads,
     })
 }
 
@@ -649,4 +667,35 @@ mod tests {
         assert_eq!(cfg.policy, EvictionPolicy::AllKeysLru);
         assert_eq!(cfg.samples, 3);
     }
+
+    #[test]
+    fn io_threads_defaults_to_none() {
+        assert!(parse_run(&[]).io_threads().is_none());
+    }
+
+    #[test]
+    fn io_threads_flag_is_parsed() {
+        assert_eq!(parse_run(&["--io-threads", "4"]).io_threads(), Some(4));
+        assert_eq!(parse_run(&["--io-threads", "0"]).io_threads(), Some(0));
+    }
+
+    #[test]
+    fn io_threads_rejects_non_integer() {
+        let err = parse(&["--io-threads", "lots"]).unwrap_err();
+        assert!(err.contains("--io-threads"));
+    }
+
+    #[test]
+    fn io_threads_from_config_file() {
+        let conf = TempConf::new("io-threads", "io-threads 8\n");
+        let args = parse_run(&["--config", conf.path.to_str().unwrap()]);
+        assert_eq!(args.io_threads(), Some(8));
+    }
+
+    #[test]
+    fn cli_io_threads_overrides_config_file() {
+        let conf = TempConf::new("io-threads-override", "io-threads 8\n");
+        let args = parse_run(&["--config", conf.path.to_str().unwrap(), "--io-threads", "2"]);
+        assert_eq!(args.io_threads(), Some(2));
+    }
 }
diff --git a/src/config/file.rs b/src/config/file.rs
index 9e8a2b9..11203c3 100644
--- a/src/config/file.rs
+++ b/src/config/file.rs
@@ -42,6 +42,9 @@ pub struct FileConfig {
     pub max_memory_policy: Option<EvictionPolicy>,
     /// Number of keys inspected per eviction round.
     pub max_memory_samples: Option<usize>,
+    /// Number of tokio worker threads. `0` (or unset) lets the runtime
+    /// pick a sensible default (usually the logical CPU count).
+    pub io_threads: Option<usize>,
 }
 
 /// Error type for configuration file parsing.
@@ -180,6 +183,9 @@ fn apply_directive(cfg: &mut FileConfig, key: &str, value: &str) -> Result<(), S
         "maxmemory-samples" => {
             cfg.max_memory_samples = Some(parse_usize(value, "maxmemory-samples")?);
         }
+        "io-threads" => {
+            cfg.io_threads = Some(parse_usize(value, "io-threads")?);
+        }
         other => return Err(format!("unknown directive '{other}'")),
     }
     Ok(())
@@ -389,4 +395,11 @@ mod tests {
             Some(12),
         );
     }
+
+    #[test]
+    fn io_threads_is_parsed() {
+        assert_eq!(parse("io-threads 4\n").unwrap().io_threads, Some(4));
+        assert_eq!(parse("io-threads 0\n").unwrap().io_threads, Some(0));
+        assert!(parse("io-threads nope\n").is_err());
+    }
 }
diff --git a/src/main.rs b/src/main.rs
index 97818b3..d448bc9 100644
--- a/src/main.rs
+++ b/src/main.rs
@@ -89,6 +89,7 @@ fn main() -> ExitCode {
         max_clients: args
             .max_clients()
             .unwrap_or_else(|| ServerConfig::default().max_clients),
+        worker_threads: args.io_threads().unwrap_or(0),
     };
     match server_config.client_timeout {
         Some(d) => info!("client idle timeout: {}s", d.as_secs()),
@@ -99,6 +100,11 @@ fn main() -> ExitCode {
     } else {
         info!("maxclients: {}", server_config.max_clients);
     }
+    if server_config.worker_threads == 0 {
+        info!("io-threads: auto (one per logical CPU)");
+    } else {
+        info!("io-threads: {}", server_config.worker_threads);
+    }
 
     let expire_handle = expire::spawn(engine.clone(), shutdown.clone());
 
@@ -176,9 +182,11 @@ fn build_engine(args: &CliArgs) -> Result<KvEngine, ExitCode> {
     }
 }
 
-/// Installs SIGINT/SIGTERM handlers that flip the shared shutdown flag and
-/// self-connect to the listener so the blocked `accept` returns immediately.
-fn install_signal_handlers(shutdown: Shutdown, wake_addr: SocketAddr) -> std::io::Result<()> {
+/// Installs SIGINT/SIGTERM handlers that flip the shared shutdown flag. The
+/// async accept loop observes the flag via `Shutdown::notified` and exits on
+/// the next scheduler poll, so we no longer need the self-connect wake-up
+/// trick that the blocking listener required.
+fn install_signal_handlers(shutdown: Shutdown, _wake_addr: SocketAddr) -> std::io::Result<()> {
     use signal_hook::consts::{SIGINT, SIGTERM};
     use signal_hook::iterator::Signals;
 
@@ -197,7 +205,6 @@ fn install_signal_handlers(shutdown: Shutdown, wake_addr: SocketAddr) -> std::io
                 };
                 warn!("received {name}, initiating graceful shutdown");
                 shutdown.trigger();
-                Shutdown::wake_listener(wake_addr);
             }
         })?;
     Ok(())
diff --git a/src/network/server.rs b/src/network/server.rs
index 6ba8c58..f685be0 100644
--- a/src/network/server.rs
+++ b/src/network/server.rs
@@ -1,11 +1,13 @@
-use std::io::{ErrorKind, Read, Write};
-use std::net::{TcpListener, TcpStream};
+use std::net::TcpListener as StdTcpListener;
 use std::sync::Arc;
 use std::sync::atomic::{AtomicUsize, Ordering};
-use std::thread;
 use std::time::Duration;
 
 use log::{debug, error, info, warn};
+use tokio::io::{AsyncReadExt, AsyncWriteExt};
+use tokio::net::{TcpListener, TcpStream};
+use tokio::runtime::Builder;
+use tokio::time::timeout;
 
 use crate::error::FerrumError;
 use crate::network::shutdown::Shutdown;
@@ -43,6 +45,10 @@ pub struct ServerConfig {
     ///
     /// `0` disables the limit entirely. Matches Redis' `maxclients` default.
     pub max_clients: usize,
+    /// Number of worker threads backing the tokio runtime when the server
+    /// spins one up internally. `0` asks tokio to pick a sensible default
+    /// (usually the CPU core count).
+    pub worker_threads: usize,
 }
 
 impl Default for ServerConfig {
@@ -50,6 +56,7 @@ impl Default for ServerConfig {
         Self {
             client_timeout: None,
             max_clients: 10_000,
+            worker_threads: 0,
         }
     }
 }
@@ -64,8 +71,8 @@ impl ServerConfig {
 /// RAII counter that tracks the number of in-flight client connections.
 ///
 /// The accept loop calls [`ConnCounter::try_acquire`] for every newly accepted
-/// socket; on success it hands the returned guard to the worker thread. When
-/// the worker finishes (normally or via panic) the guard is dropped and the
+/// socket; on success it hands the returned guard to the worker task. When
+/// the task finishes (normally or via panic) the guard is dropped and the
 /// count is decremented — so even a panicking handler cannot leak a slot.
 #[derive(Clone, Default)]
 struct ConnCounter {
@@ -127,7 +134,7 @@ impl Drop for ConnGuard {
 /// its reply is written back using the RESP2 encoders. Partial frames remain
 /// in the buffer until the next `read` fills them in, so requests that span
 /// multiple packets are handled transparently.
-fn handle_client(
+async fn handle_client(
     mut stream: TcpStream,
     engine: KvEngine,
     shutdown: Shutdown,
@@ -136,17 +143,6 @@ fn handle_client(
     let peer = stream.peer_addr()?;
     debug!("client connected: {peer}");
 
-    // Apply idle timeouts, if configured. A failure here is logged but not
-    // fatal: the connection can still function, just without the timeout.
-    if let Some(timeout) = config.client_timeout {
-        if let Err(e) = stream.set_read_timeout(Some(timeout)) {
-            warn!("set_read_timeout failed for {peer}: {e}");
-        }
-        if let Err(e) = stream.set_write_timeout(Some(timeout)) {
-            warn!("set_write_timeout failed for {peer}: {e}");
-        }
-    }
-
     let mut inbuf: Vec<u8> = Vec::with_capacity(READ_BUF_INITIAL);
     let mut chunk = [0u8; READ_CHUNK];
     let mut outbuf: Vec<u8> = Vec::with_capacity(256);
@@ -155,14 +151,25 @@ fn handle_client(
         if shutdown.is_triggered() {
             break;
         }
-        let n = match stream.read(&mut chunk) {
-            Ok(0) => break, // Orderly EOF: client closed the connection.
-            Ok(n) => n,
-            Err(e) if is_timeout(&e) => {
+
+        // Read the next chunk, racing three concurrent wake-up sources:
+        // 1. bytes arriving from the peer,
+        // 2. the idle timeout (only when configured),
+        // 3. the shared shutdown signal.
+        let read_result = tokio::select! {
+            biased;
+            _ = shutdown.notified() => break,
+            r = read_with_optional_timeout(&mut stream, &mut chunk, config.client_timeout) => r,
+        };
+
+        let n = match read_result {
+            ReadOutcome::Bytes(n) => n,
+            ReadOutcome::Eof => break,
+            ReadOutcome::IdleTimeout => {
                 info!("{peer} idle timeout, closing connection");
                 return Ok(());
             }
-            Err(e) => {
+            ReadOutcome::Err(e) => {
                 error!("read failed for {peer}: {e}");
                 return Err(e.into());
             }
@@ -175,7 +182,7 @@ fn handle_client(
                 Ok(FrameParse::Complete { command, consumed }) => {
                     outbuf.clear();
                     execute_command(command, &engine, &mut outbuf);
-                    if let Err(e) = stream.write_all(&outbuf) {
+                    if let Err(e) = stream.write_all(&outbuf).await {
                         error!("write failed for {peer}: {e}");
                         return Err(e.into());
                     }
@@ -187,7 +194,7 @@ fn handle_client(
                     // and keep the connection open so the client can retry.
                     outbuf.clear();
                     write_ferrum_error(&mut outbuf, &error);
-                    if let Err(e) = stream.write_all(&outbuf) {
+                    if let Err(e) = stream.write_all(&outbuf).await {
                         error!("write failed for {peer}: {e}");
                         return Err(e.into());
                     }
@@ -199,7 +206,7 @@ fn handle_client(
                     // resynchronised, so reply with -ERR and close.
                     outbuf.clear();
                     write_ferrum_error(&mut outbuf, &e);
-                    let _ = stream.write_all(&outbuf);
+                    let _ = stream.write_all(&outbuf).await;
                     warn!("protocol error from {peer}: {e}");
                     return Ok(());
                 }
@@ -209,7 +216,7 @@ fn handle_client(
         if inbuf.len() > READ_BUF_MAX {
             outbuf.clear();
             encoder::encode_error(&mut outbuf, "ERR request too large");
-            let _ = stream.write_all(&outbuf);
+            let _ = stream.write_all(&outbuf).await;
             warn!("request buffer overflow from {peer}");
             return Ok(());
         }
@@ -219,14 +226,35 @@ fn handle_client(
     Ok(())
 }
 
-/// Returns `true` when a `read`/`write` I/O error is the kind produced by an
-/// expired [`TcpStream`] timeout.
-///
-/// Platforms disagree on which `ErrorKind` a blocking-socket timeout surfaces
-/// as: Linux reports `WouldBlock`, macOS and Windows prefer `TimedOut`. We
-/// treat both as the same signal so callers do not have to care.
-fn is_timeout(err: &std::io::Error) -> bool {
-    matches!(err.kind(), ErrorKind::WouldBlock | ErrorKind::TimedOut)
+/// Result of a single async read attempt, flattened so the caller can match
+/// on it without juggling nested `Option<Result<...>>`.
+enum ReadOutcome {
+    Bytes(usize),
+    Eof,
+    IdleTimeout,
+    Err(std::io::Error),
+}
+
+/// Reads into `chunk`, optionally enforcing a per-read idle timeout that
+/// mirrors the synchronous `SO_RCVTIMEO` behaviour from the pre-tokio era.
+async fn read_with_optional_timeout(
+    stream: &mut TcpStream,
+    chunk: &mut [u8],
+    idle: Option<Duration>,
+) -> ReadOutcome {
+    let fut = stream.read(chunk);
+    let read_result = match idle {
+        Some(d) => match timeout(d, fut).await {
+            Ok(r) => r,
+            Err(_) => return ReadOutcome::IdleTimeout,
+        },
+        None => fut.await,
+    };
+    match read_result {
+        Ok(0) => ReadOutcome::Eof,
+        Ok(n) => ReadOutcome::Bytes(n),
+        Err(e) => ReadOutcome::Err(e),
+    }
 }
 
 /// Executes a parsed command against the engine and appends its RESP2 reply
@@ -463,26 +491,60 @@ fn write_ferrum_error(out: &mut Vec<u8>, err: &FerrumError) {
 
 /// Starts the TCP server and listens for incoming connections.
 ///
-/// Each accepted connection is handled on a separate thread that shares the
-/// same [`KvEngine`] instance.
+/// Binds a fresh [`std::net::TcpListener`] on `addr` and hands control over
+/// to [`run_listener`]. The call blocks until the shared [`Shutdown`] flag
+/// flips.
 pub fn start(
     addr: &str,
     engine: KvEngine,
     shutdown: Shutdown,
     config: ServerConfig,
 ) -> Result<(), FerrumError> {
-    let listener = TcpListener::bind(addr)?;
+    let listener = StdTcpListener::bind(addr)?;
     let local = listener.local_addr()?;
     info!("FerrumKV listening on {local}");
     run_listener(listener, engine, shutdown, config)
 }
 
-/// Runs the accept loop on an already-bound [`TcpListener`].
+/// Runs the accept loop on an already-bound [`std::net::TcpListener`].
 ///
-/// Split out from [`start`] so that tests (and future embeddings) can bind
-/// their own listener — for example to port `0` to obtain an OS-assigned
-/// ephemeral port — and drive the server from there.
+/// The function is intentionally synchronous so every existing integration
+/// test — which binds its own ephemeral listener and then calls into this
+/// function from a dedicated thread — keeps working unchanged. Internally
+/// we build a multi-threaded tokio runtime, register the listener with it,
+/// and drive the async accept loop until [`Shutdown::trigger`] fires.
 pub fn run_listener(
+    listener: StdTcpListener,
+    engine: KvEngine,
+    shutdown: Shutdown,
+    config: ServerConfig,
+) -> Result<(), FerrumError> {
+    listener.set_nonblocking(true)?;
+
+    let mut builder = Builder::new_multi_thread();
+    builder.enable_all().thread_name("ferrum-worker");
+    if config.worker_threads > 0 {
+        builder.worker_threads(config.worker_threads);
+    }
+    let runtime = builder
+        .build()
+        .map_err(|e| FerrumError::Internal(format!("failed to build tokio runtime: {e}")))?;
+
+    let result = runtime.block_on(async move {
+        let listener = TcpListener::from_std(listener)
+            .map_err(|e| FerrumError::Internal(format!("TcpListener::from_std failed: {e}")))?;
+        accept_loop(listener, engine, shutdown, config).await
+    });
+
+    // Drop the runtime in a tight window: any handler still holding a socket
+    // gets a brief grace period before forced teardown. `shutdown_timeout`
+    // bounds the wait so a stuck task cannot block process exit indefinitely.
+    runtime.shutdown_timeout(Duration::from_millis(500));
+    result
+}
+
+/// Async accept loop running on the tokio runtime.
+async fn accept_loop(
     listener: TcpListener,
     engine: KvEngine,
     shutdown: Shutdown,
@@ -490,41 +552,47 @@ pub fn run_listener(
 ) -> Result<(), FerrumError> {
     let counter = ConnCounter::new();
 
-    for stream in listener.incoming() {
-        if shutdown.is_triggered() {
-            break;
-        }
-        match stream {
-            Ok(mut stream) => {
-                if shutdown.is_triggered() {
-                    break;
-                }
-                let Some(guard) = counter.try_acquire(config.max_clients) else {
-                    let peer = stream
-                        .peer_addr()
-                        .map(|a| a.to_string())
-                        .unwrap_or_else(|_| "<unknown>".into());
-                    warn!(
-                        "rejecting {peer}: max_clients={} reached",
-                        config.max_clients
-                    );
-                    let mut out = Vec::with_capacity(64);
-                    encoder::encode_error(&mut out, "ERR max number of clients reached");
-                    let _ = stream.write_all(&out);
-                    continue;
-                };
-                let engine = engine.clone();
-                let shutdown = shutdown.clone();
-                let config = config.clone();
-                thread::spawn(move || {
-                    if let Err(e) = handle_client(stream, engine, shutdown, config) {
-                        error!("client handler error: {e}");
+    loop {
+        tokio::select! {
+            biased;
+            _ = shutdown.notified() => break,
+            accepted = listener.accept() => {
+                match accepted {
+                    Ok((stream, _)) => {
+                        if shutdown.is_triggered() {
+                            break;
+                        }
+                        let Some(guard) = counter.try_acquire(config.max_clients) else {
+                            let peer = stream
+                                .peer_addr()
+                                .map(|a| a.to_string())
+                                .unwrap_or_else(|_| "<unknown>".into());
+                            warn!(
+                                "rejecting {peer}: max_clients={} reached",
+                                config.max_clients
+                            );
+                            // Best-effort refusal reply; we do not wait for
+                            // the client to drain it before closing.
+                            let mut out = Vec::with_capacity(64);
+                            encoder::encode_error(&mut out, "ERR max number of clients reached");
+                            let mut stream = stream;
+                            let _ = stream.write_all(&out).await;
+                            continue;
+                        };
+                        let engine = engine.clone();
+                        let shutdown = shutdown.clone();
+                        let config = config.clone();
+                        tokio::spawn(async move {
+                            if let Err(e) = handle_client(stream, engine, shutdown, config).await {
+                                error!("client handler error: {e}");
+                            }
+                            drop(guard);
+                        });
                     }
-                    drop(guard);
-                });
-            }
-            Err(e) => {
-                error!("connection failed: {e}");
+                    Err(e) => {
+                        error!("connection failed: {e}");
+                    }
+                }
             }
         }
     }
diff --git a/src/network/shutdown.rs b/src/network/shutdown.rs
index 47a1937..efca45d 100644
--- a/src/network/shutdown.rs
+++ b/src/network/shutdown.rs
@@ -1,28 +1,24 @@
 //! Cooperative shutdown signalling for the TCP server.
 //!
-//! A [`Shutdown`] is a cheap, cloneable handle wrapping an `AtomicBool`. The
-//! accept loop and every connection handler consult it between blocking
-//! operations; when a signal handler (or a test) flips the flag, all of them
-//! wind down at the next opportunity.
-//!
-//! Because [`TcpListener::accept`] is a blocking syscall that will not return
-//! on its own when the flag is flipped, the caller also remembers the bound
-//! address and opens a short-lived local TCP connection via
-//! [`Shutdown::wake_listener`]. That stray connection unblocks `accept`; the
-//! loop then notices the flag and exits cleanly.
+//! A [`Shutdown`] is a cheap, cloneable handle wrapping an `AtomicBool` plus
+//! a [`tokio::sync::Notify`]. Synchronous observers (the background expiration
+//! sweeper, tests) consult the atomic flag between blocking operations, while
+//! async observers (the accept loop, per-connection handlers) park on
+//! [`Shutdown::notified`] and wake up instantly when the flag flips.
 
-use std::net::{SocketAddr, TcpStream};
 use std::sync::Arc;
 use std::sync::atomic::{AtomicBool, Ordering};
-use std::time::Duration;
+
+use tokio::sync::Notify;
 
 /// Shared cooperative shutdown flag.
 ///
 /// Cloning a `Shutdown` is cheap: it only bumps the refcount on the inner
-/// `Arc`. All clones observe the same boolean.
+/// `Arc`. All clones observe the same boolean and share the same waker.
 #[derive(Clone, Default)]
 pub struct Shutdown {
     flag: Arc<AtomicBool>,
+    notify: Arc<Notify>,
 }
 
 impl Shutdown {
@@ -37,25 +33,34 @@ impl Shutdown {
         self.flag.load(Ordering::SeqCst)
     }
 
-    /// Flips the flag so every observer will see `is_triggered() == true`.
+    /// Flips the flag so every observer will see `is_triggered() == true`
+    /// and wakes every task currently parked on [`notified`](Self::notified).
     ///
     /// Safe to call multiple times and from multiple threads; subsequent
-    /// calls are no-ops.
+    /// calls re-notify waiters but are otherwise harmless.
     pub fn trigger(&self) {
         self.flag.store(true, Ordering::SeqCst);
+        self.notify.notify_waiters();
     }
 
-    /// Wakes a blocked [`std::net::TcpListener::accept`] bound to `addr` by
-    /// opening a single short-lived connection to it.
+    /// Async future that resolves as soon as [`trigger`](Self::trigger) is
+    /// called.
     ///
-    /// The connection is immediately dropped; the accept loop will receive an
-    /// `Ok(stream)`, observe the shutdown flag, and exit before doing any
-    /// work. Errors are swallowed because by the time we try to self-connect
-    /// the listener may already be closed — that is harmless.
-    pub fn wake_listener(addr: SocketAddr) {
-        if let Ok(stream) = TcpStream::connect_timeout(&addr, Duration::from_millis(200)) {
-            let _ = stream.shutdown(std::net::Shutdown::Both);
+    /// If the flag is already set the future returns on the next poll, so
+    /// callers that take the shutdown path after a blocking operation do not
+    /// deadlock.
+    pub async fn notified(&self) {
+        if self.is_triggered() {
+            return;
+        }
+        let waiter = self.notify.notified();
+        // Re-check after arming the waiter to plug the race where `trigger`
+        // fires between the `is_triggered` probe above and installing the
+        // waiter.
+        if self.is_triggered() {
+            return;
         }
+        waiter.await;
     }
 }
 
@@ -84,4 +89,22 @@ mod tests {
         s.trigger();
         assert!(s.is_triggered());
     }
+
+    #[tokio::test]
+    async fn notified_resolves_after_trigger() {
+        let s = Shutdown::new();
+        let s2 = s.clone();
+        let task = tokio::spawn(async move { s2.notified().await });
+        // Give the task a chance to park on `notified`.
+        tokio::task::yield_now().await;
+        s.trigger();
+        task.await.unwrap();
+    }
+
+    #[tokio::test]
+    async fn notified_returns_immediately_if_already_triggered() {
+        let s = Shutdown::new();
+        s.trigger();
+        s.notified().await;
+    }
 }
diff --git a/tests/async_concurrency_test.rs b/tests/async_concurrency_test.rs
new file mode 100644
index 0000000..cc26e02
--- /dev/null
+++ b/tests/async_concurrency_test.rs
@@ -0,0 +1,90 @@
+//! High-concurrency smoke tests for the tokio-backed server.
+//!
+//! These tests exercise the async accept loop under workloads that used to
+//! spawn one OS thread per connection in the synchronous version. With the
+//! tokio runtime a handful of worker threads should multiplex all of them
+//! without breaking a sweat.
+//!
+//! They are deliberately lightweight (small keyspace, short values) so they
+//! stay fast enough for the default `cargo test` run while still stressing
+//! the multiplexed I/O path.
+
+use std::io::{Read, Write};
+use std::net::{TcpListener, TcpStream};
+use std::sync::Arc;
+use std::sync::atomic::{AtomicUsize, Ordering};
+use std::thread;
+use std::time::{Duration, Instant};
+
+use ferrum_kv::network::server::{self, ServerConfig};
+use ferrum_kv::network::shutdown::Shutdown;
+use ferrum_kv::storage::engine::KvEngine;
+
+/// Number of concurrent clients in the PING fan-out test.
+///
+/// Chosen to clearly exceed the number of logical CPUs on typical CI boxes
+/// so we know the runtime is actually multiplexing connections rather than
+/// pinning one per worker thread.
+const FAN_OUT_CLIENTS: usize = 500;
+
+/// Drives a FerrumKV server with a fixed, tiny worker pool and asks
+/// `FAN_OUT_CLIENTS` clients to each send a PING and read back `+PONG`.
+///
+/// Passing asserts that:
+/// 1. the accept loop can absorb hundreds of concurrent incoming connections,
+/// 2. the shared engine correctly replies on each of them, and
+/// 3. the server winds down cleanly after the shared shutdown flag flips.
+#[test]
+fn handles_many_concurrent_ping_clients() {
+    let listener = TcpListener::bind("127.0.0.1:0").expect("bind ephemeral port");
+    let addr = listener.local_addr().expect("local_addr");
+    let engine = KvEngine::new();
+    let shutdown = Shutdown::new();
+
+    let shutdown_for_thread = shutdown.clone();
+    let config = ServerConfig {
+        worker_threads: 2,
+        max_clients: FAN_OUT_CLIENTS * 2,
+        ..ServerConfig::default()
+    };
+    let server = thread::spawn(move || {
+        server::run_listener(listener, engine, shutdown_for_thread, config).expect("run_listener");
+    });
+
+    // Give the accept loop a moment to come up before we stampede it.
+    thread::sleep(Duration::from_millis(100));
+
+    let ok = Arc::new(AtomicUsize::new(0));
+    let mut handles = Vec::with_capacity(FAN_OUT_CLIENTS);
+    let started = Instant::now();
+    for _ in 0..FAN_OUT_CLIENTS {
+        let ok = Arc::clone(&ok);
+        handles.push(thread::spawn(move || {
+            let mut stream = TcpStream::connect(addr).expect("connect");
+            stream
+                .set_read_timeout(Some(Duration::from_secs(5)))
+                .unwrap();
+            stream.write_all(b"*1\r\n$4\r\nPING\r\n").unwrap();
+            let mut buf = [0u8; 16];
+            let n = stream.read(&mut buf).unwrap();
+            // The reply is `+PONG\r\n` (7 bytes); we only look at the prefix
+            // so we stay resilient to any future PING payload variants.
+            if n >= 5 && &buf[..5] == b"+PONG" {
+                ok.fetch_add(1, Ordering::SeqCst);
+            }
+        }));
+    }
+    for h in handles {
+        h.join().expect("client thread panicked");
+    }
+    let elapsed = started.elapsed();
+
+    assert_eq!(
+        ok.load(Ordering::SeqCst),
+        FAN_OUT_CLIENTS,
+        "some concurrent clients did not get a PONG back (elapsed: {elapsed:?})",
+    );
+
+    shutdown.trigger();
+    server.join().expect("server thread panicked");
+}
diff --git a/tests/max_clients_test.rs b/tests/max_clients_test.rs
index 4ef290c..fd8f90e 100644
--- a/tests/max_clients_test.rs
+++ b/tests/max_clients_test.rs
@@ -22,6 +22,7 @@ fn second_client_is_rejected_when_maxclients_is_one() {
     let config = ServerConfig {
         client_timeout: None,
         max_clients: 1,
+        ..ServerConfig::default()
     };
 
     let _server = thread::spawn(move || {
diff --git a/tests/shutdown_test.rs b/tests/shutdown_test.rs
index f61ede8..75d8921 100644
--- a/tests/shutdown_test.rs
+++ b/tests/shutdown_test.rs
@@ -3,8 +3,9 @@
 //! These tests bring up a real listener, spawn the accept loop on a
 //! background thread, and verify that:
 //!
-//! 1. Triggering the [`Shutdown`] handle and waking the listener causes the
-//!    accept loop to return in bounded time.
+//! 1. Triggering the [`Shutdown`] handle causes the accept loop to return
+//!    in bounded time. With the async runtime the flag wakes waiters via
+//!    `tokio::sync::Notify`, so no external "self-connect" nudge is needed.
 //! 2. After shutdown the listener port is released, so a fresh server can
 //!    bind the same ephemeral address again without `EADDRINUSE`.
 
@@ -46,9 +47,10 @@ fn accept_loop_exits_after_shutdown_triggered() {
     assert!(n > 0, "expected a reply for PING");
     drop(stream);
 
-    // Now trigger shutdown and wake the blocked accept.
+    // Now trigger shutdown. The accept loop is parked on `shutdown.notified()`
+    // alongside `listener.accept()` inside `tokio::select!`, so flipping the
+    // flag wakes it up directly.
     shutdown.trigger();
-    Shutdown::wake_listener(addr);
 
     // The accept loop must terminate promptly.
     let start = Instant::now();