diff --git a/readme.txt b/readme.txt new file mode 100644 index 0000000..e29a4af --- /dev/null +++ b/readme.txt @@ -0,0 +1,202 @@ +================================================================================ +GPU KERNEL PERFORMANCE PREDICTION - QUICK USER MANUAL +================================================================================ + +This project has two main components: +1. gpu-perf: GPU performance data collection using CUDA benchmarks +2. predict-kernel-perf: Performance prediction models (analytical & ML) + +================================================================================ +PART 1: DATA COLLECTION (gpu-perf) +================================================================================ + +REQUIREMENTS: +- NVIDIA GPU with CUDA support +- CUDA Toolkit (nvcc compiler) +- cuBLAS library +- Python 3.6+ +- Bash shell + +CUDA VERSION COMPATIBILITY: +---------------------------- +IMPORTANT: Different GPU architectures require different CUDA versions for +optimal performance. Using incompatible CUDA versions can result in absurd +performance numbers. + +If using module system (HPC clusters): + +# For RTX 4070 (Ada Lovelace, sm_89): +module load cuda-13.0 + +# For RTX 2080 Ti, Titan X (Turing, sm_75): +module load cuda-12.6 + +# For Titan V (Volta, sm_70): +module load cuda-12.6 +# AVOID CUDA 13.0+ for sm_70 - causes performance degradation + +Verify CUDA version: +nvcc --version + +STEP 1: GPU CALIBRATION (one-time setup per GPU) +------------------------------------------------- +cd gpu-perf/calibration + +# Compile and run calibration programs +# Replace "2080ti" with your GPU: 2080ti, 4070, titanv, or titanx + +nvcc -O3 -o ../bin/props props.cu +../bin/props > ../data/props_2080ti.out + +nvcc -O3 -o ../bin/stream_like stream_like.cu +../bin/stream_like > ../data/stream_like_2080ti.out + +nvcc -O3 -lcublas -o ../bin/gemm_cublas gemm_cublas.cu +../bin/gemm_cublas > ../data/gemm_cublas_2080ti.out + +cd .. + +STEP 2: GENERATE DATASET +------------------------- +# For RTX 2080 Ti: +./scripts/gen_trials_2080ti.sh +python3 scripts/build_final_dataset.py \ + "data/trials_*__2080ti.csv" \ + data/props_2080ti.out \ + data/stream_like_2080ti.out \ + data/gemm_cublas_2080ti.out \ + data/runs_2080ti_final.csv + +# For RTX 4070: +./scripts/gen_trials_4070.sh +python3 scripts/build_final_dataset.py \ + "data/trials_*__4070.csv" \ + data/props_4070.out \ + data/stream_like_4070.out \ + data/gemm_cublas_4070.out \ + data/runs_4070_final.csv + +# For Titan V: +./scripts/gen_trials_titanv.sh +python3 scripts/build_final_dataset.py \ + "data/trials_*__titanv.csv" \ + data/props_titanv.out \ + data/stream_like_titanv.out \ + data/gemm_cublas_titanv.out \ + data/runs_titanv_final.csv + +# For Titan X: +./scripts/gen_trials_titanx.sh +python3 scripts/build_final_dataset.py \ + "data/trials_*__titanx.csv" \ + data/props_titanx.out \ + data/stream_like_titanx.out \ + data/gemm_cublas_titanx.out \ + data/runs_titanx_final.csv + +OUTPUT: data/runs__final.csv (65+ kernel configurations × 40 metrics) + +NOTE: You can only generate data for ONE GPU at a time (the one in your system) + +================================================================================ +PART 2: PERFORMANCE PREDICTION (predict-kernel-perf) +================================================================================ + +REQUIREMENTS: +- Python 3.6+ +- pandas, numpy +- scikit-learn (for ML models) + +PREREQUISITES: +Place dataset files in predict-kernel-perf/data/: +- runs_2080ti_final.csv +- runs_4070_final.csv +- runs_titanv_final.csv +- runs_titanx_final.csv +- gpu_metrics.json + +(These files are generated by the gpu-perf pipeline above) + +RUN ANALYTICAL MODEL: +--------------------- +cd predict-kernel-perf/data +python3 ../scripts/analytical_model_occupancy.py + +OUTPUT: predict-kernel-perf/data/analytic_model_outputs/*.csv +- cross_gpu_predictions.csv +- exp1_same_config_new_gpu.csv +- exp2_new_configs_same_gpus.csv +- exp3a_new_kernels_*.csv + +RUN MACHINE LEARNING MODELS: +----------------------------- +cd predict-kernel-perf/data +python3 ../scripts/ml_baseline.py + +OUTPUT: predict-kernel-perf/data/ml_outputs/*.csv +- exp1_*_[model]_predictions.csv +- exp2_*_[model]_predictions.csv +- exp3_*_[model]_predictions.csv +- Per-kernel error analysis files + +================================================================================ +VISUALIZATION AND ANALYSIS +================================================================================ + +GENERATE PLOTS: +--------------- +cd predict-kernel-perf/data +python3 ../scripts/plots.py + +OUTPUT: Visualization plots comparing model predictions + +CREATE SUMMARY TABLES: +---------------------- +cd predict-kernel-perf/data +python3 ../scripts/create_tables.py + +OUTPUT: Summary tables of model performance metrics + +================================================================================ +QUICK START EXAMPLE (RTX 2080 Ti) +================================================================================ + +# 1. Calibrate GPU +cd gpu-perf/calibration +nvcc -O3 -o ../bin/props props.cu && ../bin/props > ../data/props_2080ti.out +nvcc -O3 -o ../bin/stream_like stream_like.cu && ../bin/stream_like > ../data/stream_like_2080ti.out +nvcc -O3 -lcublas -o ../bin/gemm_cublas gemm_cublas.cu && ../bin/gemm_cublas > ../data/gemm_cublas_2080ti.out +cd .. + +# 2. Generate dataset +./scripts/gen_trials_2080ti.sh +python3 scripts/build_final_dataset.py \ + "data/trials_*__2080ti.csv" \ + data/props_2080ti.out \ + data/stream_like_2080ti.out \ + data/gemm_cublas_2080ti.out \ + data/runs_2080ti_final.csv + +# 3. Copy data files to prediction directory +cp data/runs_*.csv ../predict-kernel-perf/data/ + +# 4. Run prediction models +cd ../predict-kernel-perf/data +python3 ../scripts/analytical_model_occupancy.py +python3 ../scripts/ml_baseline.py + +================================================================================ +TROUBLESHOOTING +================================================================================ + +- If nvcc not found: Ensure CUDA Toolkit is installed and in PATH +- If permission denied on scripts: Run "chmod +x scripts/*.sh" +- If Python modules missing: Run "pip3 install pandas numpy scikit-learn" +- If cuBLAS errors: Ensure cuBLAS library is installed with CUDA + +For detailed documentation, see: +- README.md (main documentation) +- gpu-perf/README.md (data collection details) +- predict-kernel-perf/README.md (modeling details) + +================================================================================