From a523d71a13022861c845ef6bbf1854e36a03984e Mon Sep 17 00:00:00 2001 From: TheRealGioviok <64363733+TheRealGioviok@users.noreply.github.com> Date: Wed, 29 Mar 2023 22:28:50 +0200 Subject: [PATCH 1/5] feat: branchless get_inverse_implication --- src/BooleanNet.cu | 15 +++------------ 1 file changed, 3 insertions(+), 12 deletions(-) diff --git a/src/BooleanNet.cu b/src/BooleanNet.cu index 090d307..0ccd3a0 100644 --- a/src/BooleanNet.cu +++ b/src/BooleanNet.cu @@ -1,18 +1,9 @@ #include "BooleanNet.cuh" +// NOTE: strict mapping of this function for impl_type in [0,3]. __device__ char get_inverse_implication(char impl_type){ - if (impl_type == 0){ - return 3; - } - else if (impl_type == 1){ - return 1; - } - else if (impl_type == 2){ - return 2; - } - else if (impl_type == 3){ - return 0; - } + // Not using swith case because of problems with some gpu's and compiler, but still better than 4 if-else + return (3 - impl_type) - (impl_type == 1) + (impl_type == 2); } __host__ void BooleanNet::get_all_implications(std::vector genes, char* expr_values, int nsamples, float statThresh, float pvalThresh, float * implication_matrix){ From e533f557bfb59601cb88e88964288b507f5d48fb Mon Sep 17 00:00:00 2001 From: TheRealGioviok <64363733+TheRealGioviok@users.noreply.github.com> Date: Wed, 29 Mar 2023 22:37:03 +0200 Subject: [PATCH 2/5] feat: branchless getQuadrantCounts --- src/BooleanNet.cu | 38 +++++++++++--------------------------- 1 file changed, 11 insertions(+), 27 deletions(-) diff --git a/src/BooleanNet.cu b/src/BooleanNet.cu index 0ccd3a0..e863ef5 100644 --- a/src/BooleanNet.cu +++ b/src/BooleanNet.cu @@ -50,34 +50,18 @@ __host__ void BooleanNet::get_all_implications(std::vector genes, c } __host__ __device__ void BooleanNet::getQuadrantCounts(int gene1, int gene2, char* expr_values, int nsamples, int* quadrant_counts){ - for (int i = 0; i < 4; i++){ - quadrant_counts[i] = 0; - } - // for (int i = 0; i < nsamples; i++){ - // printf("%d\t", expr_values[gene1 * nsamples + i]); - // } - // printf("\n"); - // for (int i = 0; i < nsamples; i++){ - // printf("%d\t", expr_values[gene2 * nsamples + i]); - // } - // printf("\n"); + quadrant_counts[0] = quadrant_counts[1] = quadrant_counts[2] = quadrant_counts[3] = 0; + int g1_ns = gene1 * nsamples; + int g2_ns = gene2 * nsamples; for (int i = 0; i < nsamples; i++){ - if (expr_values[gene1 * nsamples + i] == -1){ - if (expr_values[gene2 * nsamples + i] == -1){ - quadrant_counts[0]++; - } - else if (expr_values[gene2 * nsamples + i] == 1){ - quadrant_counts[1]++; - } - } - else if (expr_values[gene1 * nsamples + i] == 1){ - if (expr_values[gene2 * nsamples + i] == -1){ - quadrant_counts[2]++; - } - else if (expr_values[gene2 * nsamples + i] == 1){ - quadrant_counts[3]++; - } - } + bool k1p = g1_ns + i == 1; + bool k1n = g1_ns + i == -1; + bool k2p = g2_ns + i == 1; + bool k2n = g2_ns + i == -1; + quadrant_counts[0] += k2n && k1n; + quadrant_counts[1] += k2p && k1n; + quadrant_counts[2] += k2n && k1p; + quadrant_counts[3] += k2p && k1p; } } From 3b3adefdd60ce72b9d0d2ed96a1e9df5c30fdd95 Mon Sep 17 00:00:00 2001 From: TheRealGioviok <64363733+TheRealGioviok@users.noreply.github.com> Date: Wed, 29 Mar 2023 22:56:11 +0200 Subject: [PATCH 3/5] feat: branchless getSingleImplication --- src/BooleanNet.cu | 28 ++++++++-------------------- 1 file changed, 8 insertions(+), 20 deletions(-) diff --git a/src/BooleanNet.cu b/src/BooleanNet.cu index e863ef5..12091fd 100644 --- a/src/BooleanNet.cu +++ b/src/BooleanNet.cu @@ -72,26 +72,14 @@ __host__ __device__ void BooleanNet::getSingleImplication(int* quadrant_counts, return; } - if (impl_type == 0){ - double n_expected = (double)(n_first_low * n_second_high) / n_total; - *statistic = (n_expected - quadrant_counts[1]) / sqrt(n_expected); - *pval = ((((double)quadrant_counts[1] / n_first_low) + ((double)quadrant_counts[1] / n_second_high)) / 2); - } - else if (impl_type == 1){ - double n_expected = (double)(n_first_low * n_second_low) / n_total; - *statistic = (n_expected - quadrant_counts[0]) / sqrt(n_expected); - *pval = ((((double)quadrant_counts[0] / n_first_low) + ((double)quadrant_counts[0] / n_second_low)) / 2); - } - else if (impl_type == 2){ - double n_expected = (double)(n_first_high * n_second_high) / n_total; - *statistic = (n_expected - quadrant_counts[3]) / sqrt(n_expected); - *pval = ((((double)quadrant_counts[3] / n_first_high) + ((double)quadrant_counts[3] / n_second_high)) / 2); - } - else if (impl_type == 3){ - double n_expected = (double)(n_first_high * n_second_low) / n_total; - *statistic = (n_expected - quadrant_counts[2]) / sqrt(n_expected); - *pval = ((((double)quadrant_counts[2] / n_first_high) + ((double)quadrant_counts[2] / n_second_low)) / 2); - } + int n1 = (impl_type > 1) * n_first_high + (impl_type <= 1) * n_first_low; + int n2 = (impl_type & 1) * n_second_low + (1 - (impl_type & 1)) * n_second_high; + int np = n1 * n2; + int q_index = impl_type ^ 1; + double n_expected = (double)np / n_total; + *statistic = (n_expected - quadrant_counts[q_index] / sqrt(n_expected)); + *pval = ((double)(n1 + n2)) / (2 * np) * quadrant_counts[q_index]; + } __host__ __device__ char BooleanNet::is_zero(int n_first_low, int n_first_high, int n_second_low, int n_second_high, char impl_type){ if (impl_type == 0){ From 129229666b89aec88e269003a179555d57e9f9b8 Mon Sep 17 00:00:00 2001 From: TheRealGioviok <64363733+TheRealGioviok@users.noreply.github.com> Date: Thu, 30 Mar 2023 01:35:03 +0200 Subject: [PATCH 4/5] Feat: branchless is_zero check --- src/BooleanNet.cu | 39 +++++++++++++++++++-------------------- 1 file changed, 19 insertions(+), 20 deletions(-) diff --git a/src/BooleanNet.cu b/src/BooleanNet.cu index 12091fd..2a1c608 100644 --- a/src/BooleanNet.cu +++ b/src/BooleanNet.cu @@ -82,26 +82,25 @@ __host__ __device__ void BooleanNet::getSingleImplication(int* quadrant_counts, } __host__ __device__ char BooleanNet::is_zero(int n_first_low, int n_first_high, int n_second_low, int n_second_high, char impl_type){ - if (impl_type == 0){ - if (n_first_low > 0 && n_second_high > 0) - return 0; - } - else if (impl_type == 1){ - if (n_first_low > 0 && n_second_low > 0) - return 0; - } - else if (impl_type == 2){ - if (n_first_high > 0 && n_second_high > 0) - return 0; - } - else if (impl_type == 3){ - if (n_first_high > 0 && n_second_low > 0) - return 0; - } - else { - printf("Invalid impl_type in is_zero\n"); - } - return 1; +#ifdef DEBUG + if (impl_type & 0xfffffffc) printf("Invalid impl_type in is_zero\n"); +#endif + + // For an explanation of the below method, see also: + // https://en.wikipedia.org/wiki/Karnaugh_map + // https://en.wikipedia.org/wiki/Quine%E2%80%93McCluskey_algorithm + + bool ih = impl_type & 2; // a + bool il = impl_type & 1; // b + bool n1l = n_first_low > 0; // c + bool n1h = n_first_high > 0; // d + bool n2l = n_second_low > 0; // e + bool n2h = n_second_high > 0; // f + + // SOP form + // Calculated at: http://www.32x8.com/qmm6_____A-B-C-D-E-F_____m_0-1-2-3-4-5-6-7-8-10-11-12-13-14-15-16-17-18-19-20-21-22-23-24-25-27-28-29-30-31-32-33-34-35-36-38-39-40-41-42-43-44-45-46-47-48-49-50-51-52-53-55-56-57-58-59-60-61-62-63___________option-4_____899788965371824592779 + return (!ih && !n1l) || (!il && !n2h) || (n2l && n2h) || + (n1l && n1h) || (il && !n2h) || (ih && !n1h); } __global__ void getImplication(char * expr_values, uint64_t ngenes, int nsamples, BooleanNet * net, float statThresh, float pvalThresh, uint32_t * impl_len, impl * d_implications, uint32_t * d_symm_impl_len, symm_impl * d_symm_implications){ From 5033e2a3a53c2f8752266aa0962d920cd3127fa6 Mon Sep 17 00:00:00 2001 From: TheRealGioviok <64363733+TheRealGioviok@users.noreply.github.com> Date: Thu, 30 Mar 2023 01:58:49 +0200 Subject: [PATCH 5/5] chore: Comments and tidying up. Removed dead comments and unused variables --- src/BooleanNet.cu | 71 ++++++++++++++++++++++++----------------------- 1 file changed, 37 insertions(+), 34 deletions(-) diff --git a/src/BooleanNet.cu b/src/BooleanNet.cu index 2a1c608..735c4d8 100644 --- a/src/BooleanNet.cu +++ b/src/BooleanNet.cu @@ -7,57 +7,55 @@ __device__ char get_inverse_implication(char impl_type){ } __host__ void BooleanNet::get_all_implications(std::vector genes, char* expr_values, int nsamples, float statThresh, float pvalThresh, float * implication_matrix){ -#if 1 int gene1, gene2; int n_first_low, n_first_high, n_second_high, n_second_low, n_total; float statistic, pval; - int i = 0; + // Iterate over the two genes for (gene1 = 0; gene1 < genes.size(); gene1++){ for (gene2 = 0; gene2 < genes.size(); gene2++){ - if (gene1 != gene2){ + if (gene1 != gene2) { // Skip duplicates int quadrant_counts[4]; - getQuadrantCounts(gene1, gene2, expr_values, nsamples, quadrant_counts); - // for (int i = 0; i < 4; i++){ - // if (i == 2) printf("\n"); - // printf("%d\t", quadrant_counts[i]); - // } - // printf("\n"); + // Get count + getQuadrantCounts(gene1, gene2, expr_values, nsamples, quadrant_counts); - n_first_low = quadrant_counts[0] + quadrant_counts[1]; - n_first_high = quadrant_counts[2] + quadrant_counts[3]; - n_second_high = quadrant_counts[1] + quadrant_counts[3]; - n_second_low = quadrant_counts[0] + quadrant_counts[2]; + // Calculate n1l, n1h, n2l, n2h from the quadrants count + n_first_low = quadrant_counts[0] + quadrant_counts[1]; + n_first_high = quadrant_counts[2] + quadrant_counts[3]; + n_second_high = quadrant_counts[1] + quadrant_counts[3]; + n_second_low = quadrant_counts[0] + quadrant_counts[2]; + // Total count n_total = n_first_low + n_first_high; for (char impl_type = 0; impl_type < 4; impl_type++){ + // Get implication getSingleImplication(quadrant_counts, n_total, n_first_low, n_first_high, n_second_low, n_second_high, impl_type, &statistic, &pval); - if (statistic > statThresh && pval < pvalThresh){ - // implication_matrix[i] = gene1; - // implication_matrix[i+1] = gene2; - // implication_matrix[i+2] = impl_type; - // implication_matrix[i+3] = statistic; - // implication_matrix[i+4] = pval; - printf("%s\t%s\t%d\t%f\t%f\t\n", genes[gene1].c_str(), genes[gene2].c_str(), impl_type, statistic, pval); - } - i += 5; + // Check against thresholds + if (statistic > statThresh && pval < pvalThresh) printf("%s\t%s\t%d\t%f\t%f\t\n", genes[gene1].c_str(), genes[gene2].c_str(), impl_type, statistic, pval); } } } } -#endif } __host__ __device__ void BooleanNet::getQuadrantCounts(int gene1, int gene2, char* expr_values, int nsamples, int* quadrant_counts){ + // Init quadrant count quadrant_counts[0] = quadrant_counts[1] = quadrant_counts[2] = quadrant_counts[3] = 0; + + // Precalculate products, as they will stay constant in the for loop int g1_ns = gene1 * nsamples; int g2_ns = gene2 * nsamples; + + // Iterate over samples for (int i = 0; i < nsamples; i++){ + // Get conditions bool k1p = g1_ns + i == 1; bool k1n = g1_ns + i == -1; bool k2p = g2_ns + i == 1; bool k2n = g2_ns + i == -1; + + // Update count when appropriate quadrant_counts[0] += k2n && k1n; quadrant_counts[1] += k2p && k1n; quadrant_counts[2] += k2n && k1p; @@ -71,16 +69,26 @@ __host__ __device__ void BooleanNet::getSingleImplication(int* quadrant_counts, *pval = 1.0; return; } - + // Get the two operands int n1 = (impl_type > 1) * n_first_high + (impl_type <= 1) * n_first_low; int n2 = (impl_type & 1) * n_second_low + (1 - (impl_type & 1)) * n_second_high; + + // Calculate product int np = n1 * n2; - int q_index = impl_type ^ 1; + + // Get check index based on the implication's type + int q_index = impl_type ^ 1; // 0->1, 1->0, 2->3, 3->2 + + // Calculate expected n double n_expected = (double)np / n_total; + + // Update statistic *statistic = (n_expected - quadrant_counts[q_index] / sqrt(n_expected)); - *pval = ((double)(n1 + n2)) / (2 * np) * quadrant_counts[q_index]; + // Update pval + *pval = ((double)(n1 + n2)) / (2 * np) * quadrant_counts[q_index]; } + __host__ __device__ char BooleanNet::is_zero(int n_first_low, int n_first_high, int n_second_low, int n_second_high, char impl_type){ #ifdef DEBUG if (impl_type & 0xfffffffc) printf("Invalid impl_type in is_zero\n"); @@ -97,6 +105,8 @@ __host__ __device__ char BooleanNet::is_zero(int n_first_low, int n_first_high, bool n2l = n_second_low > 0; // e bool n2h = n_second_high > 0; // f + // Note that this method considers the implication's type ([0-3]) as two booleans. There may be a better method considering impl_type as a number, but I haven't found it. + // SOP form // Calculated at: http://www.32x8.com/qmm6_____A-B-C-D-E-F_____m_0-1-2-3-4-5-6-7-8-10-11-12-13-14-15-16-17-18-19-20-21-22-23-24-25-27-28-29-30-31-32-33-34-35-36-38-39-40-41-42-43-44-45-46-47-48-49-50-51-52-53-55-56-57-58-59-60-61-62-63___________option-4_____899788965371824592779 return (!ih && !n1l) || (!il && !n2h) || (n2l && n2h) || @@ -106,20 +116,13 @@ __host__ __device__ char BooleanNet::is_zero(int n_first_low, int n_first_high, __global__ void getImplication(char * expr_values, uint64_t ngenes, int nsamples, BooleanNet * net, float statThresh, float pvalThresh, uint32_t * impl_len, impl * d_implications, uint32_t * d_symm_impl_len, symm_impl * d_symm_implications){ uint64_t gi = (uint64_t) blockIdx.x * (uint64_t) blockDim.x + (uint64_t) threadIdx.x; - // uint64_t gene1 = gi / ngenes; - // uint64_t gene2 = gi % ngenes; - uint64_t gene1 = ngenes - 2 - floor(sqrt((double)-8*gi + 4*ngenes*(ngenes-1)-7)/2.0 - 0.5); uint64_t gene2 = gi + gene1 + 1 - ngenes*(ngenes-1)/2 + (ngenes-gene1)*((ngenes-gene1)-1)/2; uint64_t nels = (ngenes * (ngenes - 1)) / 2; - // if (gi % (nels / 100) == 0) - // printf("Processed %ld%% of %ld total genes, index: %ld\n", gi / (nels / 100), nels, gi); + if (gene1 == gene2 || gi >= nels) return; - if (gene1 == gene2 || gi >= nels){ - return; - } int n_first_low, n_first_high, n_second_high, n_second_low, n_total; float all_statistic[4], all_pval[4];