From 7e448af9dd6f9bcab9852e319b22ad1c576e42be Mon Sep 17 00:00:00 2001 From: Claude Date: Thu, 20 Nov 2025 01:36:38 +0000 Subject: [PATCH 1/4] Add readme.txt with compilation and execution instructions --- readme.txt | 181 +++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 181 insertions(+) create mode 100644 readme.txt diff --git a/readme.txt b/readme.txt new file mode 100644 index 0000000..3b0085a --- /dev/null +++ b/readme.txt @@ -0,0 +1,181 @@ +================================================================================ +GPU KERNEL PERFORMANCE PREDICTION - QUICK USER MANUAL +================================================================================ + +This project has two main components: +1. gpu-perf: GPU performance data collection using CUDA benchmarks +2. predict-kernel-perf: Performance prediction models (analytical & ML) + +================================================================================ +PART 1: DATA COLLECTION (gpu-perf) +================================================================================ + +REQUIREMENTS: +- NVIDIA GPU with CUDA support +- CUDA Toolkit (nvcc compiler) +- cuBLAS library +- Python 3.6+ +- Bash shell + +STEP 1: GPU CALIBRATION (one-time setup per GPU) +------------------------------------------------- +cd gpu-perf/calibration + +# Compile and run calibration programs +# Replace "2080ti" with your GPU: 2080ti, 4070, titanv, or titanx + +nvcc -O3 -o ../bin/props props.cu +../bin/props > ../data/props_2080ti.out + +nvcc -O3 -o ../bin/stream_like stream_like.cu +../bin/stream_like > ../data/stream_like_2080ti.out + +nvcc -O3 -lcublas -o ../bin/gemm_cublas gemm_cublas.cu +../bin/gemm_cublas > ../data/gemm_cublas_2080ti.out + +cd .. + +STEP 2: GENERATE DATASET +------------------------- +# For RTX 2080 Ti: +./scripts/gen_trials_2080ti.sh +python3 scripts/build_final_dataset.py \ + "data/trials_*__2080ti.csv" \ + data/props_2080ti.out \ + data/stream_like_2080ti.out \ + data/gemm_cublas_2080ti.out \ + data/runs_2080ti_final.csv + +# For RTX 4070: +./scripts/gen_trials_4070.sh +python3 scripts/build_final_dataset.py \ + "data/trials_*__4070.csv" \ + data/props_4070.out \ + data/stream_like_4070.out \ + data/gemm_cublas_4070.out \ + data/runs_4070_final.csv + +# For Titan V: +./scripts/gen_trials_titanv.sh +python3 scripts/build_final_dataset.py \ + "data/trials_*__titanv.csv" \ + data/props_titanv.out \ + data/stream_like_titanv.out \ + data/gemm_cublas_titanv.out \ + data/runs_titanv_final.csv + +# For Titan X: +./scripts/gen_trials_titanx.sh +python3 scripts/build_final_dataset.py \ + "data/trials_*__titanx.csv" \ + data/props_titanx.out \ + data/stream_like_titanx.out \ + data/gemm_cublas_titanx.out \ + data/runs_titanx_final.csv + +OUTPUT: data/runs__final.csv (65+ kernel configurations × 40 metrics) + +NOTE: You can only generate data for ONE GPU at a time (the one in your system) + +================================================================================ +PART 2: PERFORMANCE PREDICTION (predict-kernel-perf) +================================================================================ + +REQUIREMENTS: +- Python 3.6+ +- pandas, numpy +- scikit-learn (for ML models) + +PREREQUISITES: +Place dataset files in predict-kernel-perf/data/: +- runs_2080ti_final.csv +- runs_4070_final.csv +- runs_titanv_final.csv +- runs_titanx_final.csv +- gpu_metrics.json + +(These files are generated by the gpu-perf pipeline above) + +RUN ANALYTICAL MODEL: +--------------------- +cd predict-kernel-perf/scripts +python3 analytical_model_occupancy.py + +OUTPUT: analytic_model_outputs/*.csv +- cross_gpu_predictions.csv +- exp1_same_config_new_gpu.csv +- exp2_new_configs_same_gpus.csv +- exp3a_new_kernels_*.csv + +RUN MACHINE LEARNING MODELS: +----------------------------- +cd predict-kernel-perf/scripts +python3 ml_baseline.py + +OUTPUT: ml_outputs/*.csv +- exp1_*_[model]_predictions.csv +- exp2_*_[model]_predictions.csv +- exp3_*_[model]_predictions.csv +- Per-kernel error analysis files + +================================================================================ +VISUALIZATION AND ANALYSIS +================================================================================ + +GENERATE PLOTS: +--------------- +cd predict-kernel-perf/scripts +python3 plots.py + +OUTPUT: Visualization plots comparing model predictions + +CREATE SUMMARY TABLES: +---------------------- +cd predict-kernel-perf/scripts +python3 create_tables.py + +OUTPUT: Summary tables of model performance metrics + +================================================================================ +QUICK START EXAMPLE (RTX 2080 Ti) +================================================================================ + +# 1. Calibrate GPU +cd gpu-perf/calibration +nvcc -O3 -o ../bin/props props.cu && ../bin/props > ../data/props_2080ti.out +nvcc -O3 -o ../bin/stream_like stream_like.cu && ../bin/stream_like > ../data/stream_like_2080ti.out +nvcc -O3 -lcublas -o ../bin/gemm_cublas gemm_cublas.cu && ../bin/gemm_cublas > ../data/gemm_cublas_2080ti.out +cd .. + +# 2. Generate dataset +./scripts/gen_trials_2080ti.sh +python3 scripts/build_final_dataset.py \ + "data/trials_*__2080ti.csv" \ + data/props_2080ti.out \ + data/stream_like_2080ti.out \ + data/gemm_cublas_2080ti.out \ + data/runs_2080ti_final.csv + +# 3. Copy data files to prediction directory +cp data/runs_*.csv ../predict-kernel-perf/data/ + +# 4. Run prediction models +cd ../predict-kernel-perf/scripts +python3 analytical_model_occupancy.py +python3 ml_baseline.py + +================================================================================ +TROUBLESHOOTING +================================================================================ + +- If nvcc not found: Ensure CUDA Toolkit is installed and in PATH +- If permission denied on scripts: Run "chmod +x scripts/*.sh" +- If Python modules missing: Run "pip3 install pandas numpy scikit-learn" +- If cuBLAS errors: Ensure cuBLAS library is installed with CUDA + +For detailed documentation, see: +- README.md (main documentation) +- gpu-perf/README.md (data collection details) +- predict-kernel-perf/README.md (modeling details) + +================================================================================ From 645b834e6a2deeb967ba8e611beaba454693c15a Mon Sep 17 00:00:00 2001 From: Claude Date: Thu, 20 Nov 2025 01:43:11 +0000 Subject: [PATCH 2/4] Fix paths for predict-kernel-perf scripts - run from data/ directory --- readme.txt | 26 +++++++++++++------------- 1 file changed, 13 insertions(+), 13 deletions(-) diff --git a/readme.txt b/readme.txt index 3b0085a..a88432a 100644 --- a/readme.txt +++ b/readme.txt @@ -98,10 +98,10 @@ Place dataset files in predict-kernel-perf/data/: RUN ANALYTICAL MODEL: --------------------- -cd predict-kernel-perf/scripts -python3 analytical_model_occupancy.py +cd predict-kernel-perf/data +python3 ../scripts/analytical_model_occupancy.py -OUTPUT: analytic_model_outputs/*.csv +OUTPUT: predict-kernel-perf/data/analytic_model_outputs/*.csv - cross_gpu_predictions.csv - exp1_same_config_new_gpu.csv - exp2_new_configs_same_gpus.csv @@ -109,10 +109,10 @@ OUTPUT: analytic_model_outputs/*.csv RUN MACHINE LEARNING MODELS: ----------------------------- -cd predict-kernel-perf/scripts -python3 ml_baseline.py +cd predict-kernel-perf/data +python3 ../scripts/ml_baseline.py -OUTPUT: ml_outputs/*.csv +OUTPUT: predict-kernel-perf/data/ml_outputs/*.csv - exp1_*_[model]_predictions.csv - exp2_*_[model]_predictions.csv - exp3_*_[model]_predictions.csv @@ -124,15 +124,15 @@ VISUALIZATION AND ANALYSIS GENERATE PLOTS: --------------- -cd predict-kernel-perf/scripts -python3 plots.py +cd predict-kernel-perf/data +python3 ../scripts/plots.py OUTPUT: Visualization plots comparing model predictions CREATE SUMMARY TABLES: ---------------------- -cd predict-kernel-perf/scripts -python3 create_tables.py +cd predict-kernel-perf/data +python3 ../scripts/create_tables.py OUTPUT: Summary tables of model performance metrics @@ -160,9 +160,9 @@ python3 scripts/build_final_dataset.py \ cp data/runs_*.csv ../predict-kernel-perf/data/ # 4. Run prediction models -cd ../predict-kernel-perf/scripts -python3 analytical_model_occupancy.py -python3 ml_baseline.py +cd ../predict-kernel-perf/data +python3 ../scripts/analytical_model_occupancy.py +python3 ../scripts/ml_baseline.py ================================================================================ TROUBLESHOOTING From 8439f99d1bbd537245809f30bae5e51eff97531f Mon Sep 17 00:00:00 2001 From: Claude Date: Thu, 20 Nov 2025 02:45:18 +0000 Subject: [PATCH 3/4] Add CUDA version compatibility section for different GPU architectures --- readme.txt | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) diff --git a/readme.txt b/readme.txt index a88432a..55df807 100644 --- a/readme.txt +++ b/readme.txt @@ -17,6 +17,25 @@ REQUIREMENTS: - Python 3.6+ - Bash shell +CUDA VERSION COMPATIBILITY: +---------------------------- +IMPORTANT: Different GPU architectures require different CUDA versions for +optimal performance. Using incompatible CUDA versions can result in absurd +performance numbers. + +If using module system (HPC clusters): + +# For Titan V (Volta, sm_70): +module load cuda-12.6 +# AVOID CUDA 13.0+ for sm_70 - causes performance degradation + +# For RTX 2080 Ti, RTX 4070, Titan X (Turing/Ada, sm_75/sm_89): +module load cuda-12.6 +# or module load cuda-13.0 for newer architectures + +Verify CUDA version: +nvcc --version + STEP 1: GPU CALIBRATION (one-time setup per GPU) ------------------------------------------------- cd gpu-perf/calibration From 1b044397b129da209dea9dd783c8200e7598fa28 Mon Sep 17 00:00:00 2001 From: Claude Date: Thu, 20 Nov 2025 02:53:05 +0000 Subject: [PATCH 4/4] Update CUDA compatibility: only RTX 4070 uses CUDA 13.0, others use 12.6 --- readme.txt | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/readme.txt b/readme.txt index 55df807..e29a4af 100644 --- a/readme.txt +++ b/readme.txt @@ -25,13 +25,15 @@ performance numbers. If using module system (HPC clusters): -# For Titan V (Volta, sm_70): +# For RTX 4070 (Ada Lovelace, sm_89): +module load cuda-13.0 + +# For RTX 2080 Ti, Titan X (Turing, sm_75): module load cuda-12.6 -# AVOID CUDA 13.0+ for sm_70 - causes performance degradation -# For RTX 2080 Ti, RTX 4070, Titan X (Turing/Ada, sm_75/sm_89): +# For Titan V (Volta, sm_70): module load cuda-12.6 -# or module load cuda-13.0 for newer architectures +# AVOID CUDA 13.0+ for sm_70 - causes performance degradation Verify CUDA version: nvcc --version