|
| 1 | +--- |
| 2 | +title: OS Threads vs. Async overhead |
| 3 | +date: 2026-03-02 |
| 4 | +override:tags: [] |
| 5 | +layout: base.html |
| 6 | +permalink: "intro_to_rust_async/overhead_experiment/index.html" |
| 7 | +--- |
| 8 | + |
| 9 | +Let's compare the overhead of OS threads and async Tokio tasks on an IO-bound workload where tasks spend most of their time waiting on something. |
| 10 | + |
| 11 | +(Note that on CPU-bound computations, async doesn't provide any advantage over OS threads.) |
| 12 | + |
| 13 | +## OS threads |
| 14 | + |
| 15 | +```rust |
| 16 | +const NUM_THREADS: u32 = 10_000; |
| 17 | +fn main() { |
| 18 | + let mut handles = Vec::new(); |
| 19 | + for _ in 0..NUM_THREADS { |
| 20 | + // spawn an OS thread |
| 21 | + let handle = std::thread::spawn(my_function); |
| 22 | + handles.push(handle); |
| 23 | + } |
| 24 | + for handle in handles { |
| 25 | + handle.join().unwrap(); |
| 26 | + } |
| 27 | +} |
| 28 | +fn my_function() { |
| 29 | + // simulate waiting on I/O, like a network call |
| 30 | + std::thread::sleep(std::time::Duration::from_secs(1)); |
| 31 | +} |
| 32 | +``` |
| 33 | +`/usr/bin/time -v ./target` says: |
| 34 | +``` |
| 35 | +User time (seconds): 0.07 |
| 36 | +System time (seconds): 0.57 |
| 37 | +Percent of CPU this job got: 53% |
| 38 | +Elapsed (wall clock) time (h:mm:ss or m:ss): 0:01.19 |
| 39 | +Maximum resident set size (kbytes): 97436 |
| 40 | +``` |
| 41 | +Not too bad! |
| 42 | + |
| 43 | +Each thread is given a 2 MiB stack of virtual memory, |
| 44 | +but the OS only maps virtual pages to physical pages when the program accesses them. |
| 45 | +That's why this program uses only 9.7 KB of physical memory per thread. |
| 46 | + |
| 47 | +Unfortunately, the CPU spent 0.57 handling page faults and context switching, |
| 48 | +but only 0.07 seconds running our work. |
| 49 | + |
| 50 | +When I increase `NUM_THREADS` to 20,000, |
| 51 | +the OS kills the program with "Resource temporarily unavailable" |
| 52 | +because it exceeds Linux's `threads-max` limit. |
| 53 | +Increasing this limit allows us to run over 100,000 OS threads. |
| 54 | + |
| 55 | +## Async tasks |
| 56 | + |
| 57 | +```rust |
| 58 | +const NUM_TASKS: u32 = 10_000; |
| 59 | +#[tokio::main] |
| 60 | +async fn main() { |
| 61 | + let mut handles = Vec::new(); |
| 62 | + for _ in 0..NUM_TASKS { |
| 63 | + // spawn an async task |
| 64 | + let handle = tokio::spawn(my_function()); |
| 65 | + handles.push(handle); |
| 66 | + } |
| 67 | + for handle in handles { |
| 68 | + handle.await.unwrap(); |
| 69 | + } |
| 70 | +} |
| 71 | +async fn my_function() { |
| 72 | + // simulate waiting on I/O, like a network call |
| 73 | + tokio::time::sleep(std::time::Duration::from_secs(1)).await; |
| 74 | +} |
| 75 | +``` |
| 76 | +`/usr/bin/time -v ./target` says: |
| 77 | +``` |
| 78 | +User time (seconds): 0.02 |
| 79 | +System time (seconds): 0.01 |
| 80 | +Percent of CPU this job got: 3% |
| 81 | +Elapsed (wall clock) time (h:mm:ss or m:ss): 0:01.00 |
| 82 | +Maximum resident set size (kbytes): 6604 |
| 83 | +``` |
| 84 | + |
| 85 | +Async has even less overhead! |
| 86 | + |
| 87 | +Each only 660 bytes of memory are used per task. |
| 88 | + |
| 89 | +Now the CPU spent only 0.02+0.01 seconds finishing this task! |
0 commit comments