diff --git a/README.md b/README.md index 836dfc7..ff54f48 100644 --- a/README.md +++ b/README.md @@ -34,10 +34,13 @@ The binary will be available at `target/release/quantus-miner`. | Argument | Environment Variable | Description | Default | |----------|---------------------|-------------|---------| +| `--node-addr ` | `MINER_NODE_ADDR` | Node address to connect to | `127.0.0.1:9833` | | `--cpu-workers ` | `MINER_CPU_WORKERS` | Number of CPU worker threads | Auto-detect | -| `--gpu-devices ` | `MINER_GPU_DEVICES` | Number of GPU devices | 0 | -| `--port ` | `MINER_PORT` | QUIC server port | 9833 | -| `--metrics-port ` | `MINER_METRICS_PORT` | Prometheus metrics port | Disabled | +| `--gpu-devices ` | `MINER_GPU_DEVICES` | Number of GPU devices | Auto-detect | +| `--gpu-batch-size ` | `MINER_GPU_BATCH_SIZE` | GPU batch size in nonces | 1000000 | +| `--cpu-batch-size ` | `MINER_CPU_BATCH_SIZE` | CPU batch size in hashes | 10000 | +| `--gpu-throttle-ms ` | `MINER_GPU_THROTTLE_MS` | Sleep duration (ms) between GPU batches | 0 | +| `--metrics-port ` | `MINER_METRICS_PORT` | Prometheus metrics port | 9900 | ## GPU Mining @@ -74,6 +77,9 @@ cargo build -p miner-cli --release # Pure GPU mining ./target/release/quantus-miner serve --gpu-devices 1 +# GPU mining with throttle (reduce GPU utilization) +./target/release/quantus-miner serve --gpu-devices 1 --gpu-throttle-ms 50 + # Hybrid mining: 4 CPU + 1 GPU workers ./target/release/quantus-miner serve --cpu-workers 4 --gpu-devices 1 diff --git a/crates/engine-gpu/benches/gpu_engine_bench.rs b/crates/engine-gpu/benches/gpu_engine_bench.rs index 29e47e3..64d68eb 100644 --- a/crates/engine-gpu/benches/gpu_engine_bench.rs +++ b/crates/engine-gpu/benches/gpu_engine_bench.rs @@ -8,7 +8,7 @@ use std::sync::atomic::AtomicBool; fn bench_cpu_vs_gpu_small(c: &mut Criterion) { let cpu_engine = FastCpuEngine::new(10_000); - let gpu_engine = GpuEngine::try_new(10_000_000).expect("Failed to init GPU"); + let gpu_engine = GpuEngine::try_new(10_000_000, 0).expect("Failed to init GPU"); let cancel_flag = AtomicBool::new(false); let cancel_check = AtomicBoolCancelCheck(&cancel_flag); @@ -59,7 +59,7 @@ fn bench_cpu_vs_gpu_small(c: &mut Criterion) { fn bench_cpu_vs_gpu_medium(c: &mut Criterion) { let cpu_engine = FastCpuEngine::new(10_000); - let gpu_engine = GpuEngine::try_new(10_000_000).expect("Failed to init GPU"); + let gpu_engine = GpuEngine::try_new(10_000_000, 0).expect("Failed to init GPU"); let cancel_flag = AtomicBool::new(false); let cancel_check = AtomicBoolCancelCheck(&cancel_flag); @@ -110,7 +110,7 @@ fn bench_cpu_vs_gpu_medium(c: &mut Criterion) { fn bench_cpu_vs_gpu_large(c: &mut Criterion) { let cpu_engine = FastCpuEngine::new(10_000); - let gpu_engine = GpuEngine::try_new(10_000_000).expect("Failed to init GPU"); + let gpu_engine = GpuEngine::try_new(10_000_000, 0).expect("Failed to init GPU"); let cancel_flag = AtomicBool::new(false); let cancel_check = AtomicBoolCancelCheck(&cancel_flag); @@ -161,7 +161,7 @@ fn bench_cpu_vs_gpu_large(c: &mut Criterion) { fn bench_solution_finding(c: &mut Criterion) { let cpu_engine = FastCpuEngine::new(10_000); - let gpu_engine = GpuEngine::try_new(10_000_000).expect("Failed to init GPU"); + let gpu_engine = GpuEngine::try_new(10_000_000, 0).expect("Failed to init GPU"); let cancel_flag = AtomicBool::new(false); let cancel_check = AtomicBoolCancelCheck(&cancel_flag); @@ -212,7 +212,7 @@ fn bench_solution_finding(c: &mut Criterion) { fn bench_throughput_per_second(c: &mut Criterion) { let cpu_engine = FastCpuEngine::new(10_000); - let gpu_engine = GpuEngine::try_new(10_000_000).expect("Failed to init GPU"); + let gpu_engine = GpuEngine::try_new(10_000_000, 0).expect("Failed to init GPU"); let cancel_flag = AtomicBool::new(false); let cancel_check = AtomicBoolCancelCheck(&cancel_flag); @@ -262,7 +262,7 @@ fn bench_throughput_per_second(c: &mut Criterion) { } fn bench_gpu_batch_efficiency(c: &mut Criterion) { - let gpu_engine = GpuEngine::try_new(10_000_000).expect("Failed to init GPU"); + let gpu_engine = GpuEngine::try_new(10_000_000, 0).expect("Failed to init GPU"); let cancel_flag = AtomicBool::new(false); let cancel_check = AtomicBoolCancelCheck(&cancel_flag); diff --git a/crates/engine-gpu/examples/verify_nonce.rs b/crates/engine-gpu/examples/verify_nonce.rs index d361265..fce9d22 100644 --- a/crates/engine-gpu/examples/verify_nonce.rs +++ b/crates/engine-gpu/examples/verify_nonce.rs @@ -26,7 +26,7 @@ fn main() { // 3. Verify with GPU engine log::info!("Initializing GPU engine..."); - let gpu_engine = GpuEngine::try_new(10_000_000).expect("Failed to init GPU"); + let gpu_engine = GpuEngine::try_new(10_000_000, 0).expect("Failed to init GPU"); // Search a small range around the valid nonce let gpu_range = Range { diff --git a/crates/engine-gpu/src/lib.rs b/crates/engine-gpu/src/lib.rs index 7aa9100..8604fee 100644 --- a/crates/engine-gpu/src/lib.rs +++ b/crates/engine-gpu/src/lib.rs @@ -36,6 +36,7 @@ pub struct GpuEngine { contexts: Vec>, device_counter: AtomicUsize, batch_size: u64, + throttle_ms: u64, } // Thread-local storage for consistent GPU device assignment per worker thread @@ -138,12 +139,12 @@ impl GpuContext { } impl GpuEngine { - /// Try to initialize the GPU engine with the given batch size. - pub fn try_new(batch_size: u64) -> Result> { - block_on(Self::init(batch_size)) + /// Try to initialize the GPU engine with the given batch size and throttle (ms between batches). + pub fn try_new(batch_size: u64, throttle_ms: u64) -> Result> { + block_on(Self::init(batch_size, throttle_ms)) } - async fn init(batch_size: u64) -> Result> { + async fn init(batch_size: u64, throttle_ms: u64) -> Result> { log::info!(target: "gpu_engine", "Initializing WGPU..."); let instance = wgpu::Instance::new(&wgpu::InstanceDescriptor { backends: wgpu::Backends::PRIMARY, @@ -218,15 +219,17 @@ impl GpuEngine { log::info!( target: "gpu_engine", - "GPU engine initialized with {} devices (batch size: {} nonces)", + "GPU engine initialized with {} devices (batch size: {} nonces, throttle: {}ms)", contexts.len(), - batch_size + batch_size, + throttle_ms ); Ok(Self { contexts, device_counter: AtomicUsize::new(0), batch_size, + throttle_ms, }) } @@ -423,6 +426,24 @@ impl MinerEngine for GpuEngine { current_start = current_start.saturating_add(U512::from(this_batch_size)); batch_num += 1; + // Apply throttle delay between batches (if configured and more batches remain) + // Sleep in small increments to remain responsive to cancellation + if self.throttle_ms > 0 && current_start <= range.end { + let sleep_interval = + std::time::Duration::from_millis((self.throttle_ms / 10).max(1)); + let mut remaining = std::time::Duration::from_millis(self.throttle_ms); + while remaining > std::time::Duration::ZERO { + if cancel.is_cancelled() { + return EngineStatus::Cancelled { + hash_count: total_hashes, + }; + } + let sleep_time = remaining.min(sleep_interval); + std::thread::sleep(sleep_time); + remaining = remaining.saturating_sub(sleep_time); + } + } + // Log progress periodically (every 10 batches) if batch_num.is_multiple_of(10) { let elapsed = search_start.elapsed(); diff --git a/crates/miner-cli/src/main.rs b/crates/miner-cli/src/main.rs index 51778f0..0466e5d 100644 --- a/crates/miner-cli/src/main.rs +++ b/crates/miner-cli/src/main.rs @@ -44,6 +44,14 @@ enum Command { )] metrics_port: u16, + /// GPU throttle delay in milliseconds between batches (0 = no throttle) + #[arg( + long = "gpu-throttle-ms", + env = "MINER_GPU_THROTTLE_MS", + default_value_t = 0 + )] + gpu_throttle_ms: u64, + /// Enable verbose logging #[arg(short, long, env = "MINER_VERBOSE")] verbose: bool, @@ -102,6 +110,7 @@ async fn main() { gpu_devices, gpu_batch_size, cpu_batch_size, + gpu_throttle_ms, metrics_port, verbose, } => { @@ -125,6 +134,7 @@ async fn main() { gpu_devices, gpu_batch_size, cpu_batch_size, + gpu_throttle_ms, }; if let Err(e) = run(config).await { @@ -175,9 +185,9 @@ async fn run_benchmark( ) { let effective_cpu_workers = cpu_workers.unwrap_or_else(num_cpus::get); - // Initialize GPU engine + // Initialize GPU engine (no throttle for benchmark) let (gpu_engine, effective_gpu_devices) = - match miner_service::resolve_gpu_configuration(gpu_devices, gpu_batch_size) { + match miner_service::resolve_gpu_configuration(gpu_devices, gpu_batch_size, 0) { Ok((engine, count)) => (engine, count), Err(e) => { eprintln!("❌ ERROR: {}", e); diff --git a/crates/miner-service/src/lib.rs b/crates/miner-service/src/lib.rs index a815fae..c3be608 100644 --- a/crates/miner-service/src/lib.rs +++ b/crates/miner-service/src/lib.rs @@ -31,6 +31,8 @@ pub struct ServiceConfig { pub gpu_batch_size: u64, /// CPU batch size in hashes pub cpu_batch_size: u64, + /// GPU throttle delay in milliseconds between batches (0 = no throttle) + pub gpu_throttle_ms: u64, } /// Engine type for tracking metrics per compute type. @@ -414,6 +416,7 @@ fn worker_loop( pub fn resolve_gpu_configuration( requested_devices: Option, batch_size: u64, + throttle_ms: u64, ) -> anyhow::Result<(Option>, usize)> { // Explicit 0 means no GPU if requested_devices == Some(0) { @@ -421,7 +424,7 @@ pub fn resolve_gpu_configuration( } // Try to initialize GPU engine - let engine = engine_gpu::GpuEngine::try_new(batch_size); + let engine = engine_gpu::GpuEngine::try_new(batch_size, throttle_ms); let engine = match engine { Ok(e) => e, Err(e) => { @@ -462,8 +465,11 @@ pub async fn run(config: ServiceConfig) -> anyhow::Result<()> { let effective_cpus = num_cpus::get().max(1); // Resolve GPU configuration - let (gpu_engine, gpu_devices) = - resolve_gpu_configuration(config.gpu_devices, config.gpu_batch_size)?; + let (gpu_engine, gpu_devices) = resolve_gpu_configuration( + config.gpu_devices, + config.gpu_batch_size, + config.gpu_throttle_ms, + )?; // Resolve CPU workers let cpu_workers = config.cpu_workers.unwrap_or_else(|| { @@ -504,6 +510,12 @@ pub async fn run(config: ServiceConfig) -> anyhow::Result<()> { if let Some(ref engine) = gpu_engine { let name = engine.name(); log::info!("🎮 GPU engine: {name}"); + if config.gpu_throttle_ms > 0 { + log::info!( + "⏳ GPU throttle: {}ms between batches", + config.gpu_throttle_ms + ); + } } let total_workers = cpu_workers + gpu_devices;