From 2101b6dd4f3484b7616b4ff810d8a9cea67ae0ef Mon Sep 17 00:00:00 2001 From: 36000 Date: Wed, 19 Feb 2025 20:42:21 -0800 Subject: [PATCH 01/10] WIP tweaking PTT params --- cuslines/ptt.cu | 2 +- cuslines/ptt.cuh | 10 +++++----- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/cuslines/ptt.cu b/cuslines/ptt.cu index b36e747..7bb0763 100644 --- a/cuslines/ptt.cu +++ b/cuslines/ptt.cu @@ -213,7 +213,7 @@ __device__ int get_direction_ptt_d( REAL_T *__first_val_sh = first_val_sh + tidy; const REAL_T max_curvature = SIN(max_angle / 2) / step_size; // bigger numbers means wiggle more - const REAL_T probe_step_size = ((step_size / 2) / (PROBE_QUALITY - 1)); + const REAL_T probe_step_size = ((step_size / PROBE_FRAC) / (PROBE_QUALITY - 1)); REAL_T __tmp; diff --git a/cuslines/ptt.cuh b/cuslines/ptt.cuh index d8986b5..a8222fc 100644 --- a/cuslines/ptt.cuh +++ b/cuslines/ptt.cuh @@ -6,16 +6,16 @@ #define STEP_FRAC 20 // divides output step size (usually 0.5) into this many internal steps #define PROBE_FRAC 2 // divides output step size (usually 0.5) to find probe length -#define PROBE_QUALITY 4 +#define PROBE_QUALITY 4 // Number of probing steps #define SAMPLING_QUALITY 4 // can be 2-7 -#define PROBABILISTIC_BIAS 1 // 1 looks good. can be 0-log_2(N_WARPS) (typically 0-5). 0 is fully probabilistic, 4 is close to deterministic. -#define ALLOW_WEAK_LINK 1 +#define DETERMINISTIC_BIAS 0 // Should be 0, higher values bias more towards higher fODF values when tracking +#define ALLOW_WEAK_LINK 0 #define TRIES_PER_REJECTION_SAMPLING 1024 -#define DEFAULT_PTT_MINDATASUPPORT 0.05 +#define DEFAULT_PTT_MINDATASUPPORT 0.0 // 0.01 #define K_SMALL 0.0001 #define NORM_MIN_SUPPORT (DEFAULT_PTT_MINDATASUPPORT * PROBE_QUALITY) -#define PROBABILISTIC_GROUP_SZ POW2(PROBABILISTIC_BIAS) +#define PROBABILISTIC_GROUP_SZ POW2(DETERMINISTIC_BIAS) #if SAMPLING_QUALITY == 2 #define DISC_VERT_CNT DISC_2_VERT_CNT From 61fb586902b1d76556a4ba3f958a83097a382ea0 Mon Sep 17 00:00:00 2001 From: 36000 Date: Thu, 10 Jul 2025 12:26:40 -0700 Subject: [PATCH 02/10] put this back --- cuslines/ptt.cuh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cuslines/ptt.cuh b/cuslines/ptt.cuh index a8222fc..e3317ff 100644 --- a/cuslines/ptt.cuh +++ b/cuslines/ptt.cuh @@ -11,7 +11,7 @@ #define DETERMINISTIC_BIAS 0 // Should be 0, higher values bias more towards higher fODF values when tracking #define ALLOW_WEAK_LINK 0 #define TRIES_PER_REJECTION_SAMPLING 1024 -#define DEFAULT_PTT_MINDATASUPPORT 0.0 // 0.01 +#define DEFAULT_PTT_MINDATASUPPORT 0.01 // 0.01 #define K_SMALL 0.0001 #define NORM_MIN_SUPPORT (DEFAULT_PTT_MINDATASUPPORT * PROBE_QUALITY) From d257ad9cf699422eddd7f29409b1e86b2c9f51d0 Mon Sep 17 00:00:00 2001 From: 36000 Date: Mon, 25 Aug 2025 10:52:39 -0700 Subject: [PATCH 03/10] update for CUDA 13 compatibility --- cuslines/cuslines.cpp | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/cuslines/cuslines.cpp b/cuslines/cuslines.cpp index 4e8bc30..45163ad 100644 --- a/cuslines/cuslines.cpp +++ b/cuslines/cuslines.cpp @@ -146,9 +146,12 @@ class GPUTracker { //#pragma omp parallel for for (int n = 0; n < ngpus_; ++n) { + cudaMemLocation location = {}; + location.type = cudaMemLocationTypeDevice; + location.id = n; CHECK_CUDA(cudaSetDevice(n)); CHECK_CUDA(cudaMallocManaged(&dataf_d[n], sizeof(*dataf_d[n]) * dataf_info.size)); - CHECK_CUDA(cudaMemAdvise(dataf_d[n], sizeof(*dataf_d[n]) * dataf_info.size, cudaMemAdviseSetPreferredLocation, n)); + CHECK_CUDA(cudaMemAdvise(dataf_d[n], sizeof(*dataf_d[n]) * dataf_info.size, cudaMemAdviseSetPreferredLocation, location)); CHECK_CUDA(cudaMalloc(&H_d[n], sizeof(*H_d[n]) * H_info.size)); CHECK_CUDA(cudaMalloc(&R_d[n], sizeof(*R_d[n]) * R_info.size)); CHECK_CUDA(cudaMalloc(&delta_b_d[n], sizeof(*delta_b_d[n]) * delta_b_info.size)); From 173e48da04196cbb1d92e97d1e2e43fcc65c2266 Mon Sep 17 00:00:00 2001 From: 36000 Date: Mon, 25 Aug 2025 10:54:17 -0700 Subject: [PATCH 04/10] formatting --- cuslines/cuslines.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cuslines/cuslines.cpp b/cuslines/cuslines.cpp index 45163ad..ceb7002 100644 --- a/cuslines/cuslines.cpp +++ b/cuslines/cuslines.cpp @@ -146,7 +146,7 @@ class GPUTracker { //#pragma omp parallel for for (int n = 0; n < ngpus_; ++n) { - cudaMemLocation location = {}; + cudaMemLocation location = {}; location.type = cudaMemLocationTypeDevice; location.id = n; CHECK_CUDA(cudaSetDevice(n)); From bea03ceddfdbec14eaf7b1597e74091c7d791234 Mon Sep 17 00:00:00 2001 From: 36000 Date: Mon, 25 Aug 2025 12:10:27 -0700 Subject: [PATCH 05/10] try this --- Dockerfile | 1 + 1 file changed, 1 insertion(+) diff --git a/Dockerfile b/Dockerfile index 06e9de9..15a0e1e 100644 --- a/Dockerfile +++ b/Dockerfile @@ -27,6 +27,7 @@ ENV PATH /opt/anaconda/bin:${PATH} ENV LD_LIBRARY_PATH /opt/anaconda/lib:${LD_LIBRARY_PATH} # python prereqs +RUN conda tos accept --override-channels --channel conda-forge RUN conda install -c conda-forge git RUN pip install numpy>=2.0.0 RUN pip install scipy>=1.13.0 cython nibabel dipy tqdm From 00a22c8671551b60594f3b4570472e6ce21b51f3 Mon Sep 17 00:00:00 2001 From: 36000 Date: Mon, 25 Aug 2025 12:38:51 -0700 Subject: [PATCH 06/10] accept lots of TOS --- Dockerfile | 2 ++ 1 file changed, 2 insertions(+) diff --git a/Dockerfile b/Dockerfile index 15a0e1e..3a2cbdc 100644 --- a/Dockerfile +++ b/Dockerfile @@ -28,6 +28,8 @@ ENV LD_LIBRARY_PATH /opt/anaconda/lib:${LD_LIBRARY_PATH} # python prereqs RUN conda tos accept --override-channels --channel conda-forge +RUN conda tos accept --override-channels --channel https://repo.anaconda.com/pkgs/main +RUN conda tos accept --override-channels --channel https://repo.anaconda.com/pkgs/r RUN conda install -c conda-forge git RUN pip install numpy>=2.0.0 RUN pip install scipy>=1.13.0 cython nibabel dipy tqdm From 03397f602b0181b981a563b4830b04313a575737 Mon Sep 17 00:00:00 2001 From: 36000 Date: Mon, 25 Aug 2025 12:55:52 -0700 Subject: [PATCH 07/10] handle both 12/13 --- cuslines/cudamacro.h | 14 ++++++++++++++ cuslines/cuslines.cpp | 5 +---- 2 files changed, 15 insertions(+), 4 deletions(-) diff --git a/cuslines/cudamacro.h b/cuslines/cudamacro.h index 49ac24c..45a8fc3 100644 --- a/cuslines/cudamacro.h +++ b/cuslines/cudamacro.h @@ -45,6 +45,20 @@ exit(EXIT_FAILURE); \ }} +#if CUDART_VERSION >= 13000 +#define CUDA_MEM_ADVISE(devPtr, count, advice, device) \ + do { \ + cudaMemLocation loc; \ + loc.type = cudaMemLocationTypeDevice; \ + loc.id = (device); \ + CHECK_CUDA(cudaMemAdvise((devPtr), (count), (advice), loc)); \ + } while (0) +#else +#define CUDA_MEM_ADVISE(devPtr, count, advice, device) \ + CHECK_CUDA(cudaMemAdvise((devPtr), (count), (advice), (device))) +#endif + + #ifdef USE_NVTX #include "nvToolsExt.h" diff --git a/cuslines/cuslines.cpp b/cuslines/cuslines.cpp index ceb7002..a1ada94 100644 --- a/cuslines/cuslines.cpp +++ b/cuslines/cuslines.cpp @@ -146,12 +146,9 @@ class GPUTracker { //#pragma omp parallel for for (int n = 0; n < ngpus_; ++n) { - cudaMemLocation location = {}; - location.type = cudaMemLocationTypeDevice; - location.id = n; CHECK_CUDA(cudaSetDevice(n)); CHECK_CUDA(cudaMallocManaged(&dataf_d[n], sizeof(*dataf_d[n]) * dataf_info.size)); - CHECK_CUDA(cudaMemAdvise(dataf_d[n], sizeof(*dataf_d[n]) * dataf_info.size, cudaMemAdviseSetPreferredLocation, location)); + CHECK_CUDA(CUDA_MEM_ADVISE(dataf_d[n], sizeof(*dataf_d[n]) * dataf_info.size, cudaMemAdviseSetPreferredLocation, n)); CHECK_CUDA(cudaMalloc(&H_d[n], sizeof(*H_d[n]) * H_info.size)); CHECK_CUDA(cudaMalloc(&R_d[n], sizeof(*R_d[n]) * R_info.size)); CHECK_CUDA(cudaMalloc(&delta_b_d[n], sizeof(*delta_b_d[n]) * delta_b_info.size)); From 3057e5ba9207da13204ffcf8c45b745950739134 Mon Sep 17 00:00:00 2001 From: 36000 Date: Mon, 25 Aug 2025 13:04:29 -0700 Subject: [PATCH 08/10] bf --- cuslines/cudamacro.h | 12 +++++------- cuslines/cuslines.cpp | 2 +- 2 files changed, 6 insertions(+), 8 deletions(-) diff --git a/cuslines/cudamacro.h b/cuslines/cudamacro.h index 45a8fc3..e9b2e1e 100644 --- a/cuslines/cudamacro.h +++ b/cuslines/cudamacro.h @@ -46,13 +46,11 @@ }} #if CUDART_VERSION >= 13000 -#define CUDA_MEM_ADVISE(devPtr, count, advice, device) \ - do { \ - cudaMemLocation loc; \ - loc.type = cudaMemLocationTypeDevice; \ - loc.id = (device); \ - CHECK_CUDA(cudaMemAdvise((devPtr), (count), (advice), loc)); \ - } while (0) +#define CUDA_MEM_ADVISE(devPtr, count, advice, device) \ + cudaMemLocation loc; \ + loc.type = cudaMemLocationTypeDevice; \ + loc.id = (device); \ + CHECK_CUDA(cudaMemAdvise((devPtr), (count), (advice), loc)); \ #else #define CUDA_MEM_ADVISE(devPtr, count, advice, device) \ CHECK_CUDA(cudaMemAdvise((devPtr), (count), (advice), (device))) diff --git a/cuslines/cuslines.cpp b/cuslines/cuslines.cpp index a1ada94..1363705 100644 --- a/cuslines/cuslines.cpp +++ b/cuslines/cuslines.cpp @@ -148,7 +148,7 @@ class GPUTracker { for (int n = 0; n < ngpus_; ++n) { CHECK_CUDA(cudaSetDevice(n)); CHECK_CUDA(cudaMallocManaged(&dataf_d[n], sizeof(*dataf_d[n]) * dataf_info.size)); - CHECK_CUDA(CUDA_MEM_ADVISE(dataf_d[n], sizeof(*dataf_d[n]) * dataf_info.size, cudaMemAdviseSetPreferredLocation, n)); + CUDA_MEM_ADVISE(dataf_d[n], sizeof(*dataf_d[n]) * dataf_info.size, cudaMemAdviseSetPreferredLocation, n); CHECK_CUDA(cudaMalloc(&H_d[n], sizeof(*H_d[n]) * H_info.size)); CHECK_CUDA(cudaMalloc(&R_d[n], sizeof(*R_d[n]) * R_info.size)); CHECK_CUDA(cudaMalloc(&delta_b_d[n], sizeof(*delta_b_d[n]) * delta_b_info.size)); From 80c35be576c1224702c26513c00938dd8c9ef2fb Mon Sep 17 00:00:00 2001 From: 36000 Date: Mon, 25 Aug 2025 13:10:44 -0700 Subject: [PATCH 09/10] typo --- cuslines/cudamacro.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cuslines/cudamacro.h b/cuslines/cudamacro.h index e9b2e1e..7f03c6c 100644 --- a/cuslines/cudamacro.h +++ b/cuslines/cudamacro.h @@ -50,7 +50,7 @@ cudaMemLocation loc; \ loc.type = cudaMemLocationTypeDevice; \ loc.id = (device); \ - CHECK_CUDA(cudaMemAdvise((devPtr), (count), (advice), loc)); \ + CHECK_CUDA(cudaMemAdvise((devPtr), (count), (advice), loc)); #else #define CUDA_MEM_ADVISE(devPtr, count, advice, device) \ CHECK_CUDA(cudaMemAdvise((devPtr), (count), (advice), (device))) From 38250f0b34d61e5fa77e59703d3343b32397644c Mon Sep 17 00:00:00 2001 From: 36000 Date: Mon, 25 Aug 2025 13:17:06 -0700 Subject: [PATCH 10/10] update ENV in dockerfile --- Dockerfile | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/Dockerfile b/Dockerfile index 3a2cbdc..889371d 100644 --- a/Dockerfile +++ b/Dockerfile @@ -16,15 +16,15 @@ RUN wget https://github.com/Kitware/CMake/releases/download/v3.24.0/cmake-3.24.0 && mkdir /opt/cmake \ && /tmp/cmake-install.sh --skip-license --prefix=/opt/cmake \ && rm /tmp/cmake-install.sh -ENV PATH /opt/cmake/bin:${PATH} +ENV PATH=/opt/cmake/bin:${PATH} RUN curl -L "https://repo.anaconda.com/miniconda/Miniconda3-latest-Linux-x86_64.sh" \ -o "/tmp/Miniconda3.sh" RUN bash /tmp/Miniconda3.sh -b -p /opt/anaconda RUN rm -rf /tmp/Miniconda3.sh RUN cd /opt && eval "$(/opt/anaconda/bin/conda shell.bash hook)" -ENV PATH /opt/anaconda/bin:${PATH} -ENV LD_LIBRARY_PATH /opt/anaconda/lib:${LD_LIBRARY_PATH} +ENV PATH=/opt/anaconda/bin:${PATH} +ENV LD_LIBRARY_PATH=/opt/anaconda/lib:${LD_LIBRARY_PATH} # python prereqs RUN conda tos accept --override-channels --channel conda-forge