Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
22 commits
Select commit Hold shift + click to select a range
0420005
set back to cpu mode when finishing test-PROPD-lrm-lrv.R
suzannejin Feb 11, 2026
1a0085d
remove line in test for replacing nan values
suzannejin Feb 11, 2026
71eb178
remove unnecesary files
suzannejin Feb 11, 2026
b2d0e2e
add singularity definition files
suzannejin Feb 11, 2026
5a49556
create propdGenewise function placeholders
suzannejin Feb 12, 2026
992cfef
first genewise implementation. needs to test
suzannejin Feb 12, 2026
532b324
add tests for genewise propd
suzannejin Feb 12, 2026
2906611
update docs for genewise propd
suzannejin Feb 12, 2026
de26273
change genewise fdr to average across all pairs for each gene
suzannejin Feb 12, 2026
3a38f78
Merge branch 'dev' into add_genewise_r_function
suzannejin Feb 13, 2026
a127dca
modify propdGenewise to return all metrics always
suzannejin Feb 13, 2026
e6ba41d
update genewise propd docs
Feb 13, 2026
5bd5ed8
Merge branch 'dev' into dev-cuda
zalbanob Feb 5, 2026
b47dd20
Merge remote-tracking branch 'origin/dev-cuda-fix-tests' into dev-cuda
zalbanob Feb 16, 2026
29d03a5
removed my old singularity files
zalbanob Feb 16, 2026
6d8636e
Merge remote-tracking branch 'remotes/origin/add_genewise_r_function'…
zalbanob Feb 16, 2026
8537dd8
Integrating the GPU backend with the CPU
zalbanob Feb 17, 2026
79ca926
Moved cpu into its own dispatcher
zalbanob Feb 18, 2026
d9b9aca
Faster LRM
zalbanob Mar 20, 2026
7c28d18
improved radix select
zalbanob Mar 23, 2026
f631ae8
better comments, and renaming for clarity
zalbanob Mar 23, 2026
d9e383a
removed fast math
zalbanob Mar 24, 2026
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 0 additions & 3 deletions .gitmodules
Original file line number Diff line number Diff line change
@@ -1,6 +1,3 @@
[submodule "bench/nvbench"]
path = bench/nvbench
url = https://github.com/NVIDIA/nvbench.git
[submodule "thirdparty/Catch2"]
path = thirdparty/Catch2
url = https://github.com/catchorg/Catch2.git
71 changes: 46 additions & 25 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,9 @@ cmake_policy(SET CMP0048 NEW)

project(PROPR CXX)

set(PROPR_ENABLE_CUDA "AUTO" CACHE STRING "Enable CUDA backend: AUTO, ON, or OFF")
set_property(CACHE PROPR_ENABLE_CUDA PROPERTY STRINGS AUTO ON OFF)
set(PROPR_HAS_CUDA OFF)

if ("${CMAKE_BUILD_TYPE}" STREQUAL "" OR ${CMAKE_BUILD_TYPE} STREQUAL "NOMODE")
message("WORKING ON NO MODE")
Expand Down Expand Up @@ -31,31 +34,48 @@ else ()
message("R : NOT FOUND")
endif ()

check_language(CUDA)
if (CMAKE_CUDA_COMPILER)
set(USE_CUDA ON)
message("Build CUDA Support")
set(PROPERTIES CUDA_ARCHITECTURES 90)
enable_language(CUDA)
include_directories(${CMAKE_CUDA_TOOLKIT_INCLUDE_DIRECTORIES})
set(USE_CUDA ON)
set(CMAKE_CUDA_STANDARD 17)
set(CMAKE_CUDA_STANDARD_REQUIRED ON)
set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} --extended-lambda --expt-relaxed-constexpr -lineinfo -O3 --use_fast_math --extra-device-vectorization")
message("CUDA : FOUND")
find_package(CUDAToolkit REQUIRED)
execute_process(
COMMAND nvidia-smi --query-gpu=compute_cap --format=csv
COMMAND tail -n 1
COMMAND tr -d .
OUTPUT_VARIABLE NV_CC
OUTPUT_STRIP_TRAILING_WHITESPACE
)
message("Detected Compute Capability : " ${NV_CC})
set(PROPERTIES CUDA_ARCHITECTURES ${NV_CC})
set(CMAKE_CUDA_ARCHITECTURES ${NV_CC})
if (NOT (PROPR_ENABLE_CUDA STREQUAL "AUTO" OR PROPR_ENABLE_CUDA STREQUAL "ON" OR PROPR_ENABLE_CUDA STREQUAL "OFF"))
message(FATAL_ERROR "PROPR_ENABLE_CUDA must be one of AUTO, ON, OFF")
endif ()

if (PROPR_ENABLE_CUDA STREQUAL "ON" OR PROPR_ENABLE_CUDA STREQUAL "AUTO")
check_language(CUDA)
if (CMAKE_CUDA_COMPILER)
set(USE_CUDA ON)
set(PROPR_HAS_CUDA ON)
message("Build CUDA Support")
enable_language(CUDA)
include_directories(${CMAKE_CUDA_TOOLKIT_INCLUDE_DIRECTORIES})
set(CMAKE_CUDA_STANDARD 17)
set(CMAKE_CUDA_STANDARD_REQUIRED ON)
set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} --extended-lambda --expt-relaxed-constexpr -lineinfo -O3 --extra-device-vectorization") #--use_fast_math
message("CUDA : FOUND")
find_package(CUDAToolkit REQUIRED)

execute_process(
COMMAND nvidia-smi --query-gpu=compute_cap --format=csv,noheader
COMMAND head -n 1
COMMAND tr -d .
OUTPUT_VARIABLE NV_CC
OUTPUT_STRIP_TRAILING_WHITESPACE
ERROR_QUIET
RESULT_VARIABLE NVSIMI_RC
)

if (NVSIMI_RC EQUAL 0 AND NOT NV_CC STREQUAL "")
set(CMAKE_CUDA_ARCHITECTURES ${NV_CC})
message("Detected Compute Capability : " ${NV_CC})
else ()
set(CMAKE_CUDA_ARCHITECTURES 75)
message("Detected Compute Capability : unavailable, defaulting to 75")
endif ()
elseif (PROPR_ENABLE_CUDA STREQUAL "ON")
message(FATAL_ERROR "CUDA requested (PROPR_ENABLE_CUDA=ON) but CUDA toolkit/compiler not found.")
else ()
message("CUDA : NOT FOUND (building CPU-only backend)")
endif ()
else ()
message(FATAL_ERROR "CUDA : NOT FOUND")
message("CUDA : DISABLED (PROPR_ENABLE_CUDA=OFF)")
endif ()

if (CMAKE_INSTALL_PREFIX_INITIALIZED_TO_DEFAULT)
Expand All @@ -73,9 +93,10 @@ option(PROPR_BUILD_TESTS "Option to enable building tests" OFF)
add_subdirectory(src)
message("Install Path : ${CMAKE_INSTALL_PREFIX}")
message("PROPR Tests : ${PROPR_BUILD_TESTS}")
message("PROPR CUDA : ${PROPR_HAS_CUDA}")

install(DIRECTORY inst/include DESTINATION ${CMAKE_INSTALL_PREFIX}/PROPR
FILES_MATCHING
PATTERN "*.hpp"
PATTERN "*.h"
PATTERN "*.cuh")
PATTERN "*.cuh")
1 change: 1 addition & 0 deletions NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@ export(logratio_with_alpha)
export(logratio_without_alpha)
export(pcor.bshrink)
export(propd)
export(propdGenewise)
export(propr)
export(ratios)
export(results_to_matrix)
Expand Down
24 changes: 23 additions & 1 deletion OLDCMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -73,6 +73,14 @@ set(PROPR_LIB_SOURCES

set(PROPR_BENCHMARK_SOURCES bench/src/main_benchmark.cu )

set(PROPR_TEST_SOURCES
tests/cpp_test/test_lrm.cpp
tests/cpp_test/test_lrv.cpp
tests/cpp_test/test_main.cpp
tests/cpp_test/test_omega.cpp
)


add_library(propr SHARED ${PROPR_LIB_SOURCES})

target_compile_features(propr PUBLIC cxx_std_14)
Expand All @@ -91,8 +99,9 @@ target_include_directories(propr PUBLIC
${CUDAToolkit_INCLUDE_DIRS}
# Local project headers (e.g., src/include/interface, src/include/kernels etc.)
inst/include
# Prereq headers (if they are part of the main library)
# Prereq headers (e.g., Catch2 for tests, if they are part of the main library)
# The original CMakeLists.txt had `include_directories(prereq)`.
# Assuming `prereq` includes `prereq/catch2` and other necessary headers.
${CMAKE_CURRENT_SOURCE_DIR}/prereq
)

Expand Down Expand Up @@ -120,3 +129,16 @@ set_target_properties(propr_benchmark PROPERTIES
CUDA_STANDARD 14
CUDA_ARCHITECTURES "${CMAKE_CUDA_ARCHITECTURES}"
)

add_executable(propr_tests ${PROPR_TEST_SOURCES})
target_link_libraries(propr_tests PRIVATE propr)
target_include_directories(propr_tests PRIVATE
${CMAKE_CURRENT_SOURCE_DIR}/prereq/catch2
inst/include
)

target_compile_features(propr_tests PUBLIC cxx_std_14)
set_target_properties(propr_tests PROPERTIES CUDA_STANDARD 14 CUDA_ARCHITECTURES "${CMAKE_CUDA_ARCHITECTURES}")

enable_testing()
add_test(NAME run_propr_cpp_tests COMMAND propr_tests)
112 changes: 112 additions & 0 deletions R/2-propd-genewise.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,112 @@
#' Convert pairwise propd results into genewise
#'
#' This function converts pairwise propd results into genewise results. The resulting
#' genewise results are direct indirectors of genes being differentially expressed.
#'
#' @param propd A \code{\link{propd}} object, with FDR values from updateF. Note:
#' for the moment, only theta results with F-stats are supported. Later we will look
#' on the option to get FDR values based on permutations.
#' @param pairwise_fdr FDR threashold to consider a pairwise relationship as significant.
#' Default is 0.05.
#' @param backend Backend to use for connectivity statistics (`"auto"`, `"cpu"`, or `"cuda"`).
#' @return A data frame with genewise results that can be used to identify differentially
#' expressed genes. It contains the following columns:
#' - "id": gene identifier
#' - "lfc": Log Fold Change of the gene, using the geometric mean of all genes as reference.
#' - "lrmD": Log Ratio Mean Difference of the gene. Equivalent to the LFC, but using a subset
#' of genes as reference (only the ones that are significantly connected to the gene).
#' - "connectivity": number of significant pairwise relationships the gene has.
#' - "wconnectivity": weighted connectivity, which sums the strength of significant pairwise
#' relationships (defined as 1/theta) for the gene.
#' - "FDR_mean": average FDR value across all pairwise comparisons for the gene, which can be
#' used as a genewise significance statistic. Note that this is a simple average and may
#' not be the most robust method for determining genewise significance, but it provides a
#' starting point for identifying genes of interest based on their pairwise relationships.
#' Future updates may include more sophisticated methods for calculating genewise p-values
#' or FDR values based on the pairwise results.
#'
#' @rdname propdGenewise
#' @export
propdGenewise <- function(propd, pairwise_fdr = 0.05, backend='auto') {

# for the moment it only works with theta results with F-stats,
# but later we will look into the option to get FDR values based on permutations.
if (!"FDR" %in% colnames(propd@results)) {
stop("Please run updateF on the propd object to get FDR values before running propdGenewise.")
}

# not working for alpha != NA too
if (!is.na(propd@alpha)) {
stop("propdGenewise currently only works for alpha = NA. Future updates may include support for other alpha values.")
}

# not working for more than 2 groups too
if (length(unique(propd@group)) > 2) {
stop("propdGenewise currently only works for 2 groups. Future updates may include support for more than 2 groups.")
}

# get features and number of features
features <- colnames(propd@counts)
nfeatures <- length(features)

pair <- propd@results$Pair
partner <- propd@results$Partner

if (!is.numeric(pair) || !is.numeric(partner)) {
pair <- match(pair, features)
partner <- match(partner, features)
if (any(is.na(pair)) || any(is.na(partner))) {
stop("Some features in Pair/Partner are not present in propd@counts.")
}
}

## ---- Connectivity + weighted connectivity + FDR mean (CPU/CUDA dispatch) ----
stats <- genewiseConnectivityRcpp(
partner = as.integer(partner),
pair = as.integer(pair),
theta = as.numeric(propd@results$theta),
fdr = as.numeric(propd@results$FDR),
num_genes = nfeatures,
pairwise_fdr = pairwise_fdr,
backend = backend
)

connectivity <- as.integer(stats$connectivity)
wconnectivity <- as.numeric(stats$wconnectivity)
fdr_mean <- as.numeric(stats$FDR_mean)

## ---- Build FDR adjacency for lrmD ----
fdr_mat <- results_to_matrix(propd@results, what = "FDR", features = features)
adj <- (fdr_mat > 0) & (fdr_mat < pairwise_fdr)

## ---- Build lrm matrices ----
lrm1_all <- results_to_matrix(propd@results, what = "lrm1", features = features)
lrm2_all <- results_to_matrix(propd@results, what = "lrm2", features = features)
# results_to_matrix returns symmetric matrices, but lrm values are directed:
# lrm(Partner, Pair) = mean(log(x_Partner / x_Pair)), with Partner > Pair.
# Negate the upper triangle so that mat[g, j] = mean(log(x_g / x_j)) for all g, j.
lrm1_all[upper.tri(lrm1_all)] <- -lrm1_all[upper.tri(lrm1_all)]
lrm2_all[upper.tri(lrm2_all)] <- -lrm2_all[upper.tri(lrm2_all)]

## ---- LFC (CLR-based log fold change) ----
# lrm differences represent log fold changes; averaging across all genes as
# reference is equivalent to using the geometric mean (CLR transformation)
lrm_diff <- lrm1_all - lrm2_all
lfc <- rowMeans(lrm_diff, na.rm = TRUE) / log(2)

## ---- lrmD (LFC using only significant partners as reference) ----
lrm_diff <- ifelse(adj, lrm_diff, NA) # keep only significant pairwise relationships
lrmD <- apply(lrm_diff, 1, median, na.rm = TRUE) / log(2)

## ---- Compile results into a data frame ----
data.frame(
id = features,
lfc = lfc,
lrmD = lrmD,
connectivity = connectivity,
wconnectivity = wconnectivity,
FDR_mean = fdr_mean,
stringsAsFactors = FALSE,
row.names = NULL
)
}
Loading