diff --git a/README.md b/README.md
index 836dfc7..ff54f48 100644
--- a/README.md
+++ b/README.md
@@ -34,10 +34,13 @@ The binary will be available at `target/release/quantus-miner`.
 
 | Argument | Environment Variable | Description | Default |
 |----------|---------------------|-------------|---------|
+| `--node-addr <ADDR>` | `MINER_NODE_ADDR` | Node address to connect to | `127.0.0.1:9833` |
 | `--cpu-workers <N>` | `MINER_CPU_WORKERS` | Number of CPU worker threads | Auto-detect |
-| `--gpu-devices <N>` | `MINER_GPU_DEVICES` | Number of GPU devices | 0 |
-| `--port <PORT>` | `MINER_PORT` | QUIC server port | 9833 |
-| `--metrics-port <PORT>` | `MINER_METRICS_PORT` | Prometheus metrics port | Disabled |
+| `--gpu-devices <N>` | `MINER_GPU_DEVICES` | Number of GPU devices | Auto-detect |
+| `--gpu-batch-size <N>` | `MINER_GPU_BATCH_SIZE` | GPU batch size in nonces | 1000000 |
+| `--cpu-batch-size <N>` | `MINER_CPU_BATCH_SIZE` | CPU batch size in hashes | 10000 |
+| `--gpu-throttle-ms <MS>` | `MINER_GPU_THROTTLE_MS` | Sleep duration (ms) between GPU batches | 0 |
+| `--metrics-port <PORT>` | `MINER_METRICS_PORT` | Prometheus metrics port | 9900 |
 
 ## GPU Mining
 
@@ -74,6 +77,9 @@ cargo build -p miner-cli --release
 # Pure GPU mining
 ./target/release/quantus-miner serve --gpu-devices 1
 
+# GPU mining with throttle (reduce GPU utilization)
+./target/release/quantus-miner serve --gpu-devices 1 --gpu-throttle-ms 50
+
 # Hybrid mining: 4 CPU + 1 GPU workers
 ./target/release/quantus-miner serve --cpu-workers 4 --gpu-devices 1
 
diff --git a/crates/engine-gpu/benches/gpu_engine_bench.rs b/crates/engine-gpu/benches/gpu_engine_bench.rs
index 29e47e3..64d68eb 100644
--- a/crates/engine-gpu/benches/gpu_engine_bench.rs
+++ b/crates/engine-gpu/benches/gpu_engine_bench.rs
@@ -8,7 +8,7 @@ use std::sync::atomic::AtomicBool;
 
 fn bench_cpu_vs_gpu_small(c: &mut Criterion) {
     let cpu_engine = FastCpuEngine::new(10_000);
-    let gpu_engine = GpuEngine::try_new(10_000_000).expect("Failed to init GPU");
+    let gpu_engine = GpuEngine::try_new(10_000_000, 0).expect("Failed to init GPU");
     let cancel_flag = AtomicBool::new(false);
     let cancel_check = AtomicBoolCancelCheck(&cancel_flag);
 
@@ -59,7 +59,7 @@ fn bench_cpu_vs_gpu_small(c: &mut Criterion) {
 
 fn bench_cpu_vs_gpu_medium(c: &mut Criterion) {
     let cpu_engine = FastCpuEngine::new(10_000);
-    let gpu_engine = GpuEngine::try_new(10_000_000).expect("Failed to init GPU");
+    let gpu_engine = GpuEngine::try_new(10_000_000, 0).expect("Failed to init GPU");
     let cancel_flag = AtomicBool::new(false);
     let cancel_check = AtomicBoolCancelCheck(&cancel_flag);
 
@@ -110,7 +110,7 @@ fn bench_cpu_vs_gpu_medium(c: &mut Criterion) {
 
 fn bench_cpu_vs_gpu_large(c: &mut Criterion) {
     let cpu_engine = FastCpuEngine::new(10_000);
-    let gpu_engine = GpuEngine::try_new(10_000_000).expect("Failed to init GPU");
+    let gpu_engine = GpuEngine::try_new(10_000_000, 0).expect("Failed to init GPU");
     let cancel_flag = AtomicBool::new(false);
     let cancel_check = AtomicBoolCancelCheck(&cancel_flag);
 
@@ -161,7 +161,7 @@ fn bench_cpu_vs_gpu_large(c: &mut Criterion) {
 
 fn bench_solution_finding(c: &mut Criterion) {
     let cpu_engine = FastCpuEngine::new(10_000);
-    let gpu_engine = GpuEngine::try_new(10_000_000).expect("Failed to init GPU");
+    let gpu_engine = GpuEngine::try_new(10_000_000, 0).expect("Failed to init GPU");
     let cancel_flag = AtomicBool::new(false);
     let cancel_check = AtomicBoolCancelCheck(&cancel_flag);
 
@@ -212,7 +212,7 @@ fn bench_solution_finding(c: &mut Criterion) {
 
 fn bench_throughput_per_second(c: &mut Criterion) {
     let cpu_engine = FastCpuEngine::new(10_000);
-    let gpu_engine = GpuEngine::try_new(10_000_000).expect("Failed to init GPU");
+    let gpu_engine = GpuEngine::try_new(10_000_000, 0).expect("Failed to init GPU");
     let cancel_flag = AtomicBool::new(false);
     let cancel_check = AtomicBoolCancelCheck(&cancel_flag);
 
@@ -262,7 +262,7 @@ fn bench_throughput_per_second(c: &mut Criterion) {
 }
 
 fn bench_gpu_batch_efficiency(c: &mut Criterion) {
-    let gpu_engine = GpuEngine::try_new(10_000_000).expect("Failed to init GPU");
+    let gpu_engine = GpuEngine::try_new(10_000_000, 0).expect("Failed to init GPU");
     let cancel_flag = AtomicBool::new(false);
     let cancel_check = AtomicBoolCancelCheck(&cancel_flag);
 
diff --git a/crates/engine-gpu/examples/verify_nonce.rs b/crates/engine-gpu/examples/verify_nonce.rs
index d361265..fce9d22 100644
--- a/crates/engine-gpu/examples/verify_nonce.rs
+++ b/crates/engine-gpu/examples/verify_nonce.rs
@@ -26,7 +26,7 @@ fn main() {
 
     // 3. Verify with GPU engine
     log::info!("Initializing GPU engine...");
-    let gpu_engine = GpuEngine::try_new(10_000_000).expect("Failed to init GPU");
+    let gpu_engine = GpuEngine::try_new(10_000_000, 0).expect("Failed to init GPU");
 
     // Search a small range around the valid nonce
     let gpu_range = Range {
diff --git a/crates/engine-gpu/src/lib.rs b/crates/engine-gpu/src/lib.rs
index 7aa9100..8604fee 100644
--- a/crates/engine-gpu/src/lib.rs
+++ b/crates/engine-gpu/src/lib.rs
@@ -36,6 +36,7 @@ pub struct GpuEngine {
     contexts: Vec<Arc<GpuContext>>,
     device_counter: AtomicUsize,
     batch_size: u64,
+    throttle_ms: u64,
 }
 
 // Thread-local storage for consistent GPU device assignment per worker thread
@@ -138,12 +139,12 @@ impl GpuContext {
 }
 
 impl GpuEngine {
-    /// Try to initialize the GPU engine with the given batch size.
-    pub fn try_new(batch_size: u64) -> Result<Self, Box<dyn std::error::Error>> {
-        block_on(Self::init(batch_size))
+    /// Try to initialize the GPU engine with the given batch size and throttle (ms between batches).
+    pub fn try_new(batch_size: u64, throttle_ms: u64) -> Result<Self, Box<dyn std::error::Error>> {
+        block_on(Self::init(batch_size, throttle_ms))
     }
 
-    async fn init(batch_size: u64) -> Result<Self, Box<dyn std::error::Error>> {
+    async fn init(batch_size: u64, throttle_ms: u64) -> Result<Self, Box<dyn std::error::Error>> {
         log::info!(target: "gpu_engine", "Initializing WGPU...");
         let instance = wgpu::Instance::new(&wgpu::InstanceDescriptor {
             backends: wgpu::Backends::PRIMARY,
@@ -218,15 +219,17 @@ impl GpuEngine {
 
         log::info!(
             target: "gpu_engine",
-            "GPU engine initialized with {} devices (batch size: {} nonces)",
+            "GPU engine initialized with {} devices (batch size: {} nonces, throttle: {}ms)",
             contexts.len(),
-            batch_size
+            batch_size,
+            throttle_ms
         );
 
         Ok(Self {
             contexts,
             device_counter: AtomicUsize::new(0),
             batch_size,
+            throttle_ms,
         })
     }
 
@@ -423,6 +426,24 @@ impl MinerEngine for GpuEngine {
             current_start = current_start.saturating_add(U512::from(this_batch_size));
             batch_num += 1;
 
+            // Apply throttle delay between batches (if configured and more batches remain)
+            // Sleep in small increments to remain responsive to cancellation
+            if self.throttle_ms > 0 && current_start <= range.end {
+                let sleep_interval =
+                    std::time::Duration::from_millis((self.throttle_ms / 10).max(1));
+                let mut remaining = std::time::Duration::from_millis(self.throttle_ms);
+                while remaining > std::time::Duration::ZERO {
+                    if cancel.is_cancelled() {
+                        return EngineStatus::Cancelled {
+                            hash_count: total_hashes,
+                        };
+                    }
+                    let sleep_time = remaining.min(sleep_interval);
+                    std::thread::sleep(sleep_time);
+                    remaining = remaining.saturating_sub(sleep_time);
+                }
+            }
+
             // Log progress periodically (every 10 batches)
             if batch_num.is_multiple_of(10) {
                 let elapsed = search_start.elapsed();
diff --git a/crates/miner-cli/src/main.rs b/crates/miner-cli/src/main.rs
index 51778f0..0466e5d 100644
--- a/crates/miner-cli/src/main.rs
+++ b/crates/miner-cli/src/main.rs
@@ -44,6 +44,14 @@ enum Command {
         )]
         metrics_port: u16,
 
+        /// GPU throttle delay in milliseconds between batches (0 = no throttle)
+        #[arg(
+            long = "gpu-throttle-ms",
+            env = "MINER_GPU_THROTTLE_MS",
+            default_value_t = 0
+        )]
+        gpu_throttle_ms: u64,
+
         /// Enable verbose logging
         #[arg(short, long, env = "MINER_VERBOSE")]
         verbose: bool,
@@ -102,6 +110,7 @@ async fn main() {
             gpu_devices,
             gpu_batch_size,
             cpu_batch_size,
+            gpu_throttle_ms,
             metrics_port,
             verbose,
         } => {
@@ -125,6 +134,7 @@ async fn main() {
                 gpu_devices,
                 gpu_batch_size,
                 cpu_batch_size,
+                gpu_throttle_ms,
             };
 
             if let Err(e) = run(config).await {
@@ -175,9 +185,9 @@ async fn run_benchmark(
 ) {
     let effective_cpu_workers = cpu_workers.unwrap_or_else(num_cpus::get);
 
-    // Initialize GPU engine
+    // Initialize GPU engine (no throttle for benchmark)
     let (gpu_engine, effective_gpu_devices) =
-        match miner_service::resolve_gpu_configuration(gpu_devices, gpu_batch_size) {
+        match miner_service::resolve_gpu_configuration(gpu_devices, gpu_batch_size, 0) {
             Ok((engine, count)) => (engine, count),
             Err(e) => {
                 eprintln!("❌ ERROR: {}", e);
diff --git a/crates/miner-service/src/lib.rs b/crates/miner-service/src/lib.rs
index a815fae..c3be608 100644
--- a/crates/miner-service/src/lib.rs
+++ b/crates/miner-service/src/lib.rs
@@ -31,6 +31,8 @@ pub struct ServiceConfig {
     pub gpu_batch_size: u64,
     /// CPU batch size in hashes
     pub cpu_batch_size: u64,
+    /// GPU throttle delay in milliseconds between batches (0 = no throttle)
+    pub gpu_throttle_ms: u64,
 }
 
 /// Engine type for tracking metrics per compute type.
@@ -414,6 +416,7 @@ fn worker_loop(
 pub fn resolve_gpu_configuration(
     requested_devices: Option<usize>,
     batch_size: u64,
+    throttle_ms: u64,
 ) -> anyhow::Result<(Option<Arc<dyn MinerEngine>>, usize)> {
     // Explicit 0 means no GPU
     if requested_devices == Some(0) {
@@ -421,7 +424,7 @@ pub fn resolve_gpu_configuration(
     }
 
     // Try to initialize GPU engine
-    let engine = engine_gpu::GpuEngine::try_new(batch_size);
+    let engine = engine_gpu::GpuEngine::try_new(batch_size, throttle_ms);
     let engine = match engine {
         Ok(e) => e,
         Err(e) => {
@@ -462,8 +465,11 @@ pub async fn run(config: ServiceConfig) -> anyhow::Result<()> {
     let effective_cpus = num_cpus::get().max(1);
 
     // Resolve GPU configuration
-    let (gpu_engine, gpu_devices) =
-        resolve_gpu_configuration(config.gpu_devices, config.gpu_batch_size)?;
+    let (gpu_engine, gpu_devices) = resolve_gpu_configuration(
+        config.gpu_devices,
+        config.gpu_batch_size,
+        config.gpu_throttle_ms,
+    )?;
 
     // Resolve CPU workers
     let cpu_workers = config.cpu_workers.unwrap_or_else(|| {
@@ -504,6 +510,12 @@ pub async fn run(config: ServiceConfig) -> anyhow::Result<()> {
     if let Some(ref engine) = gpu_engine {
         let name = engine.name();
         log::info!("🎮 GPU engine: {name}");
+        if config.gpu_throttle_ms > 0 {
+            log::info!(
+                "⏳ GPU throttle: {}ms between batches",
+                config.gpu_throttle_ms
+            );
+        }
     }
 
     let total_workers = cpu_workers + gpu_devices;