From e04022f6500f44c055613c85febf42d9d4b33fc1 Mon Sep 17 00:00:00 2001 From: Maarten van Kessel <112690347+mvankessel-EMC@users.noreply.github.com> Date: Thu, 6 Nov 2025 13:05:33 +0100 Subject: [PATCH 01/33] Updated colapse algorithm (#339) * Updated colapse algorithm * updated setup * added optional skips * added if to check if cdmconnector is installed * updated setup * updated test * dep update fixes * Updated setup * fixed eunomia download? * made tests faster * updated tests * revamped tests * updated checks * removed dfer * updated eunomia_data_folder * updated setup * updated path * fixed reproduced error from actions * comment out CG to see if DatabaseConnector overrides some CDMConnector functions * added stats for setNames * explicitly load stats * updated tests * updated tests --- .../workflows/R-CMD-check-depends-only.yaml | 1 - .github/workflows/R-CMD-check.yaml | 1 - DESCRIPTION | 3 +- R/constructPathways.R | 70 +- man/doEraCollapseNew.Rd | 19 + tests/testthat/helper-ableToRun.R | 14 +- .../testthat/helper-generateCohortTableCDMC.R | 2 +- tests/testthat/helper-generateCohortTableCG.R | 5 +- tests/testthat/setup.R | 52 +- tests/testthat/test-CDMInterface.R | 180 +++-- tests/testthat/test-CDMInterfaceCDMC.R | 1 - tests/testthat/test-CDMInterfaceDBC.R | 264 ++++---- tests/testthat/test-CRAN.R | 10 +- .../testthat/test-TreatmentPatternsResults.R | 13 +- tests/testthat/test-computePathways.R | 382 +++++------ .../testthat/test-executeTreatmentPatterns.R | 42 +- tests/testthat/test-export.R | 636 +++++++++--------- tests/testthat/test-exportPatientLevel.R | 2 +- tests/testthat/test-ggSunburst.R | 2 +- tests/testthat/test-multipleCohortTables.R | 2 - 20 files changed, 872 insertions(+), 829 deletions(-) create mode 100644 man/doEraCollapseNew.Rd diff --git a/.github/workflows/R-CMD-check-depends-only.yaml b/.github/workflows/R-CMD-check-depends-only.yaml index c872cf1e..816be23f 100644 --- a/.github/workflows/R-CMD-check-depends-only.yaml +++ b/.github/workflows/R-CMD-check-depends-only.yaml @@ -42,7 +42,6 @@ jobs: r-version: ${{ matrix.config.r }} http-user-agent: ${{ matrix.config.http-user-agent }} use-public-rspm: true - extra-repositories: 'https://OHDSI.github.io/drat' - uses: r-lib/actions/setup-r-dependencies@v2 with: diff --git a/.github/workflows/R-CMD-check.yaml b/.github/workflows/R-CMD-check.yaml index ea346767..e84503da 100644 --- a/.github/workflows/R-CMD-check.yaml +++ b/.github/workflows/R-CMD-check.yaml @@ -40,7 +40,6 @@ jobs: r-version: ${{ matrix.config.r }} http-user-agent: ${{ matrix.config.http-user-agent }} use-public-rspm: true - extra-repositories: 'https://OHDSI.github.io/drat' - uses: r-lib/actions/setup-r-dependencies@v2 with: diff --git a/DESCRIPTION b/DESCRIPTION index 4eb59c0b..ff817a50 100755 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -48,7 +48,8 @@ Suggests: DBI, withr, plotly, - PaRe + PaRe, + stats License: Apache License (>= 2) Encoding: UTF-8 LazyData: true diff --git a/R/constructPathways.R b/R/constructPathways.R index cbc55b73..76a420cb 100755 --- a/R/constructPathways.R +++ b/R/constructPathways.R @@ -78,11 +78,16 @@ constructPathways <- function(settings, andromeda) { splitTime = settings$splitTime ) - doEraCollapse( + doEraCollapseNew( andromeda = andromeda, eraCollapseSize = settings$eraCollapseSize ) + # doEraCollapse( + # andromeda = andromeda, + # eraCollapseSize = settings$eraCollapseSize + # ) + doCombinationWindow( andromeda = andromeda, combinationWindow = settings$combinationWindow, @@ -366,6 +371,69 @@ doSplitEventCohorts <- function( return(invisible(NULL)) } + +#' doEraCollapseNew +#' +#' @param andromeda (`Andromeda::andromeda()`) +#' @param eraCollapseSize (`integer(1)`) +#' +#' @returns `NULL` +doEraCollapseNew <- function(andromeda, eraCollapseSize) { + andromeda$treatmentHistory <- andromeda$treatmentHistory |> + dplyr::group_by(.data$personId, .data$eventCohortId, .data$n_target) %>% + dbplyr::window_order(.data$eventStartDate, .data$eventEndDate) |> + dplyr::mutate( + diff = .data$eventStartDate - dplyr::lag(.data$eventEndDate), + flag = dplyr::case_when( + .data$diff <= eraCollapseSize ~ 1, + .default = 0 + ), + flag = dplyr::case_when( + dplyr::lead(.data$flag) == 1 + | .data$flag == 1 + ~ 1, + .default = 0 + ), + row = dplyr::case_when( + .data$flag == 1 & .data$diff <= eraCollapseSize ~ dplyr::row_number(), + .default = 0 + ), + end_date = dplyr::case_when( + .data$row == max(.data$row, na.rm = TRUE) ~ .data$eventEndDate + ) + ) |> + dplyr::mutate( + eventEndDate_old = .data$eventEndDate, + eventEndDate = dplyr::case_when( + .data$flag == 1 ~ max(.data$end_date, na.rm = TRUE), + .default = .data$eventEndDate_old + ) + ) |> + dplyr::mutate( + keep = dplyr::case_when( + .data$flag == 1 & .data$row == min(.data$row, na.rm = TRUE) ~ TRUE, + .data$flag == 0 ~ TRUE, + .default = FALSE + ) + ) |> + dplyr::ungroup() |> + dplyr::filter(.data$keep) |> + dplyr::select(-"flag", -"eventEndDate_old", -"end_date", -"row") + + attrCounts <- fetchAttritionCounts(andromeda, "treatmentHistory") + appendAttrition( + toAdd = data.frame( + number_records = attrCounts$nRecords, + number_subjects = attrCounts$nSubjects, + reason_id = 5, + reason = sprintf("Collapsing eras, eraCollapse (%s)", eraCollapseSize) + ), + andromeda = andromeda + ) + + return(invisible(NULL)) +} + #' doEraCollapse #' #' Updates the treatmentHistory data.frame where if gapSame is smaller than the diff --git a/man/doEraCollapseNew.Rd b/man/doEraCollapseNew.Rd new file mode 100644 index 00000000..e04d73a8 --- /dev/null +++ b/man/doEraCollapseNew.Rd @@ -0,0 +1,19 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/constructPathways.R +\name{doEraCollapseNew} +\alias{doEraCollapseNew} +\title{doEraCollapseNew} +\usage{ +doEraCollapseNew(andromeda, eraCollapseSize) +} +\arguments{ +\item{andromeda}{(\code{Andromeda::andromeda()})} + +\item{eraCollapseSize}{(\code{integer(1)})} +} +\value{ +\code{NULL} +} +\description{ +doEraCollapseNew +} diff --git a/tests/testthat/helper-ableToRun.R b/tests/testthat/helper-ableToRun.R index 715fdd54..c899feae 100644 --- a/tests/testthat/helper-ableToRun.R +++ b/tests/testthat/helper-ableToRun.R @@ -7,13 +7,13 @@ ableToRun <- function() { require("duckdb", character.only = TRUE, quietly = TRUE, warn.conflicts = FALSE) ), - CG = all( - require("CirceR", character.only = TRUE, quietly = TRUE, warn.conflicts = FALSE), - require("CohortGenerator", character.only = TRUE, quietly = TRUE, warn.conflicts = FALSE), - require("DatabaseConnector", character.only = TRUE, quietly = TRUE, warn.conflicts = FALSE), - require("SqlRender", character.only = TRUE, quietly = TRUE, warn.conflicts = FALSE), - require("Eunomia", character.only = TRUE, quietly = TRUE, warn.conflicts = FALSE) - ), + # CG = all( + # require("CirceR", character.only = TRUE, quietly = TRUE, warn.conflicts = FALSE), + # require("CohortGenerator", character.only = TRUE, quietly = TRUE, warn.conflicts = FALSE), + # require("DatabaseConnector", character.only = TRUE, quietly = TRUE, warn.conflicts = FALSE), + # require("SqlRender", character.only = TRUE, quietly = TRUE, warn.conflicts = FALSE), + # require("Eunomia", character.only = TRUE, quietly = TRUE, warn.conflicts = FALSE) + # ), plotting = all( require("ggplot2", character.only = TRUE, quietly = TRUE, warn.conflicts = FALSE), diff --git a/tests/testthat/helper-generateCohortTableCDMC.R b/tests/testthat/helper-generateCohortTableCDMC.R index 8f191c53..bad5f185 100644 --- a/tests/testthat/helper-generateCohortTableCDMC.R +++ b/tests/testthat/helper-generateCohortTableCDMC.R @@ -6,7 +6,7 @@ generateCohortTableCDMC <- function() { duckdb::duckdb(), dbdir = CDMConnector::eunomiaDir() ) - + cdm <- CDMConnector::cdmFromCon( con = con, cdmSchema = "main", diff --git a/tests/testthat/helper-generateCohortTableCG.R b/tests/testthat/helper-generateCohortTableCG.R index 51d27362..fc791fe0 100644 --- a/tests/testthat/helper-generateCohortTableCG.R +++ b/tests/testthat/helper-generateCohortTableCG.R @@ -1,6 +1,9 @@ generateCohortTableCG <- function() { if (ableToRun()$CG) { - connectionDetails <- Eunomia::getEunomiaConnectionDetails() + connectionDetails <- Eunomia::getEunomiaConnectionDetails( + databaseFile = file.path(Sys.getenv("EUNOMIA_DATA_FOLDER_CG"), "GiBleed_5.3.sqlite") + ) + cohortTableName <- "cohort_table" resultSchema <- "main" cdmSchema <- "main" diff --git a/tests/testthat/setup.R b/tests/testthat/setup.R index 7dedb02d..9218386c 100644 --- a/tests/testthat/setup.R +++ b/tests/testthat/setup.R @@ -1,11 +1,43 @@ -if (Sys.getenv("EUNOMIA_DATA_FOLDER", "") == "") { - Sys.setenv("EUNOMIA_DATA_FOLDER" = tempfile("eunomiaData")) - dir.create(Sys.getenv("EUNOMIA_DATA_FOLDER")) - - withr::defer( - { - unlink(Sys.getenv("EUNOMIA_DATA_FOLDER"), recursive = TRUE, force = TRUE) - }, - testthat::teardown_env() - ) +require("withr", quietly = TRUE, warn.conflicts = FALSE, character.only = TRUE) +require("stats", quietly = TRUE, warn.conflicts = FALSE, character.only = TRUE) + +# if (Sys.getenv("EUNOMIA_DATA_FOLDER_CG") == "") { +# Sys.setenv("EUNOMIA_DATA_FOLDER_CG" = tempfile("eunomiaData_CG")) +# dir.create(Sys.getenv("EUNOMIA_DATA_FOLDER_CG")) +# +# if ( +# require("Eunomia", quietly = TRUE, warn.conflicts = FALSE, character.only = TRUE) +# & require("DatabaseConnector", quietly = TRUE, warn.conflicts = FALSE, character.only = TRUE) +# & require("SqlRender", quietly = TRUE, warn.conflicts = FALSE, character.only = TRUE) +# & require("CirceR", quietly = TRUE, warn.conflicts = FALSE, character.only = TRUE) +# ) { +# Eunomia::getDatabaseFile( +# datasetName = "GiBleed", +# pathToData = Sys.getenv("EUNOMIA_DATA_FOLDER_CG"), +# databaseFile = file.path(Sys.getenv("EUNOMIA_DATA_FOLDER_CG"), "GiBleed_5.3.sqlite") +# ) +# .CG <- generateCohortTableCG() +# } +# +# withr::defer( +# { +# unlink(Sys.getenv("EUNOMIA_DATA_FOLDER_CG"), recursive = TRUE, force = TRUE) +# } +# ) +# } else { +# .CG <- generateCohortTableCG() +# } + +Sys.setenv("EUNOMIA_DATA_FOLDER" = file.path(tempdir(), "eunomiaData")) +dir.create(Sys.getenv("EUNOMIA_DATA_FOLDER")) + +if (require("CDMConnector", quietly = TRUE, warn.conflicts = FALSE, character.only = TRUE)) { + CDMConnector::downloadEunomiaData(overwrite = TRUE) + .CM <- generateCohortTableCDMC() } + +# withr::defer( +# { +# unlink(Sys.getenv("EUNOMIA_DATA_FOLDER"), recursive = TRUE, force = TRUE) +# } +# ) diff --git a/tests/testthat/test-CDMInterface.R b/tests/testthat/test-CDMInterface.R index 9f0221eb..a728ea9e 100644 --- a/tests/testthat/test-CDMInterface.R +++ b/tests/testthat/test-CDMInterface.R @@ -1,92 +1,88 @@ -library(testthat) -library(TreatmentPatterns) -library(dplyr) - -test_that("fetchCohortTable", { - skip_on_cran() - skip_on_ci() - cg <- generateCohortTableCG() - cdmc <- generateCohortTableCDMC() - - aCG <- Andromeda::andromeda() - aCDMC <- Andromeda::andromeda() - - dbcInterface <- TreatmentPatterns:::CDMInterface$new( - connectionDetails = cg$connectionDetails, - cdmSchema = "main", - resultSchema = "main" - ) - - cdmcInterface <- TreatmentPatterns:::CDMInterface$new( - cdm = cdmc$cdm - ) - - minEraDuration <- 120 - - x <- dbcInterface$fetchCohortTable( - cohorts = cg$cohorts, - cohortTableName = cg$cohortTableName, - andromeda = aCG, - andromedaTableName = cg$cohortTableName, - minEraDuration = minEraDuration - ) - - x <- cdmcInterface$fetchCohortTable( - cohorts = cdmc$cohorts, - cohortTableName = cdmc$cohortTableName, - andromeda = aCDMC, - andromedaTableName = cdmc$cohortTableName, - minEraDuration = minEraDuration - ) - - # Check nRows - expect_identical( - aCG$cohort_table %>% collect() %>% nrow(), - aCDMC$cohort_table %>% collect() %>% nrow() - ) - - # check n > 1 treatments - expect_identical( - aCG$cohort_table %>% - group_by(.data$subject_id) %>% - summarize(n = n()) %>% - filter(n > 1) %>% - collect() %>% - mutate(subject_id = as.numeric(subject_id)) %>% - pull(n) %>% - sum(), - aCDMC$cohort_table %>% - group_by(.data$subject_id) %>% - summarize(n = n()) %>% - filter(n > 1) %>% - collect() %>% - mutate(subject_id = as.numeric(subject_id)) %>% - pull(n) %>% - sum() - ) - - # check n == 1 treatments - expect_identical( - aCG$cohort_table %>% - group_by(.data$subject_id) %>% - summarize(n = n()) %>% - filter(n == 1) %>% - collect() %>% - mutate(subject_id = as.numeric(subject_id)) %>% - pull(n) %>% - sum(), - aCDMC$cohort_table %>% - group_by(.data$subject_id) %>% - summarize(n = n()) %>% - filter(n == 1) %>% - collect() %>% - mutate(subject_id = as.numeric(subject_id)) %>% - pull(n) %>% - sum() - ) - - dbcInterface$disconnect() - Andromeda::close(aCG) - Andromeda::close(aCDMC) - DBI::dbDisconnect(cdmc$con, shutdown = TRUE) -}) +# library(testthat) +# library(TreatmentPatterns) +# library(dplyr) +# +# test_that("fetchCohortTable", { +# skip_on_cran() +# skip_on_ci() +# +# aCG <- Andromeda::andromeda() +# aCDMC <- Andromeda::andromeda() +# +# dbcInterface <- TreatmentPatterns:::CDMInterface$new( +# connectionDetails = .CG$connectionDetails, +# cdmSchema = "main", +# resultSchema = "main" +# ) +# +# cdmcInterface <- TreatmentPatterns:::CDMInterface$new( +# cdm = .CM$cdm +# ) +# +# minEraDuration <- 120 +# +# x <- dbcInterface$fetchCohortTable( +# cohorts = .CG$cohorts, +# cohortTableName = .CG$cohortTableName, +# andromeda = aCG, +# andromedaTableName = .CG$cohortTableName, +# minEraDuration = minEraDuration +# ) +# +# x <- cdmcInterface$fetchCohortTable( +# cohorts = .CM$cohorts, +# cohortTableName = .CM$cohortTableName, +# andromeda = aCDMC, +# andromedaTableName = .CM$cohortTableName, +# minEraDuration = minEraDuration +# ) +# +# # Check nRows +# expect_identical( +# aCG$cohort_table %>% collect() %>% nrow(), +# aCDMC$cohort_table %>% collect() %>% nrow() +# ) +# +# # check n > 1 treatments +# expect_identical( +# aCG$cohort_table %>% +# group_by(.data$subject_id) %>% +# summarize(n = n()) %>% +# filter(n > 1) %>% +# collect() %>% +# mutate(subject_id = as.numeric(subject_id)) %>% +# pull(n) %>% +# sum(), +# aCDMC$cohort_table %>% +# group_by(.data$subject_id) %>% +# summarize(n = n()) %>% +# filter(n > 1) %>% +# collect() %>% +# mutate(subject_id = as.numeric(subject_id)) %>% +# pull(n) %>% +# sum() +# ) +# +# # check n == 1 treatments +# expect_identical( +# aCG$cohort_table %>% +# group_by(.data$subject_id) %>% +# summarize(n = n()) %>% +# filter(n == 1) %>% +# collect() %>% +# mutate(subject_id = as.numeric(subject_id)) %>% +# pull(n) %>% +# sum(), +# aCDMC$cohort_table %>% +# group_by(.data$subject_id) %>% +# summarize(n = n()) %>% +# filter(n == 1) %>% +# collect() %>% +# mutate(subject_id = as.numeric(subject_id)) %>% +# pull(n) %>% +# sum() +# ) +# +# Andromeda::close(aCG) +# Andromeda::close(aCDMC) +# }) diff --git a/tests/testthat/test-CDMInterfaceCDMC.R b/tests/testthat/test-CDMInterfaceCDMC.R index 9e43dd27..adf34c14 100644 --- a/tests/testthat/test-CDMInterfaceCDMC.R +++ b/tests/testthat/test-CDMInterfaceCDMC.R @@ -9,7 +9,6 @@ if (ableToRun()$CDMC) { withr::defer({ Andromeda::close(andromeda) - DBI::dbDisconnect(con, shutdown = TRUE) }) cohorts <- data.frame( diff --git a/tests/testthat/test-CDMInterfaceDBC.R b/tests/testthat/test-CDMInterfaceDBC.R index 7c9ec2be..8d4b1202 100644 --- a/tests/testthat/test-CDMInterfaceDBC.R +++ b/tests/testthat/test-CDMInterfaceDBC.R @@ -1,137 +1,127 @@ -library(testthat) -library(TreatmentPatterns) -library(dplyr) - -test_that("Method: new", { - skip("Eunomia [2.0.0] bug") - skip_on_cran() - skip_if_not(ableToRun()$CG) - connectionDetails <- Eunomia::getEunomiaConnectionDetails() - - cdmInterface <- TreatmentPatterns:::CDMInterface$new( - connectionDetails = connectionDetails, - cdmSchema = "main", - resultSchema = "main" - ) - - expect_true(R6::is.R6( - cdmInterface - )) - - cdmInterface$disconnect() -}) - -test_that("Method: fetchMetadata", { - skip("Eunomia [2.0.0] bug") - skip_on_cran() - skip_if_not(ableToRun()$CG) - connectionDetails <- Eunomia::getEunomiaConnectionDetails() - - cdmInterface <- TreatmentPatterns:::CDMInterface$new( - connectionDetails = connectionDetails, - cdmSchema = "main", - resultSchema = "main" - ) - - andromeda <- Andromeda::andromeda() - - cdmInterface$fetchMetadata(andromeda) - - metadata <- andromeda$metadata %>% collect() - - expect_in( - c("cdmSourceName", "cdmSourceAbbreviation", "cdmReleaseDate", "vocabularyVersion"), - names(metadata) - ) - - expect_identical(metadata$rVersion, base::version$version.string) - expect_identical(metadata$platform, base::version$platform) - expect_identical(nrow(metadata), 1L) - expect_identical(ncol(metadata), 8L) - cdmInterface$disconnect() -}) - -test_that("Method: fetchCohortTable", { - skip("Eunomia [2.0.0] bug") - skip_on_cran() - skip_if_not(ableToRun()$CG) - globals <- generateCohortTableCG() - - andromeda <- Andromeda::andromeda() - andromedaTableName <- "cohortTable" - - cdmInterface <- TreatmentPatterns:::CDMInterface$new( - connectionDetails = globals$connectionDetails, - cdmSchema = globals$cdmSchema, - resultSchema = globals$resultSchema - ) - - cdmInterface$fetchCohortTable( - cohorts = globals$cohorts, - cohortTableName = globals$cohortTableName, - andromeda = andromeda, - andromedaTableName = andromedaTableName, - minEraDuration = 0 - ) - - cdmInterface$disconnect() - - expect_equal(names(andromeda), andromedaTableName) -}) - -test_that("fetchCohortTable: empty", { - skip("Eunomia [2.0.0] bug") - skip_on_cran() - skip_if_not(ableToRun()$CG) - globals <- generateCohortTableCG() - - andromeda <- Andromeda::andromeda() - andromedaTableName <- "cohortTable" - - cdmInterface <- TreatmentPatterns:::CDMInterface$new( - connectionDetails = globals$connectionDetails, - cdmSchema = globals$cdmSchema, - resultSchema = globals$resultSchema - ) - - cohorts <- data.frame( - cohortId = numeric(), - cohortName = character(), - type = character() - ) - - # Empty - cdmInterface$fetchCohortTable( - cohorts = cohorts, - cohortTableName = globals$cohortTableName, - andromeda = andromeda, - andromedaTableName = andromedaTableName, - minEraDuration = 5 - ) - - res <- andromeda[[andromedaTableName]] %>% dplyr::collect() - - cdmInterface$disconnect() - - expect_identical(ncol(res), 6L) - expect_identical(nrow(res), 0L) -}) - -test_that("Method: disconnect", { - skip("Eunomia [2.0.0] bug") - skip_on_cran() - skip_if_not(ableToRun()$CG) - connectionDetails <- Eunomia::getEunomiaConnectionDetails() - - cdmInterface <- TreatmentPatterns:::CDMInterface$new( - connectionDetails = connectionDetails, - cdmSchema = "main", - resultSchema = "main" - ) - - andromeda <- Andromeda::andromeda() - - cdmInterface$disconnect() - - expect_error(cdmInterface$fetchMetadata(andromeda)) -}) +# library(testthat) +# library(TreatmentPatterns) +# library(dplyr) +# +# test_that("Method: new", { +# skip("Eunomia [2.0.0] bug") +# skip_on_cran() +# skip_if_not(ableToRun()$CG) +# connectionDetails <- Eunomia::getEunomiaConnectionDetails() +# +# cdmInterface <- TreatmentPatterns:::CDMInterface$new( +# connectionDetails = connectionDetails, +# cdmSchema = "main", +# resultSchema = "main" +# ) +# +# expect_true(R6::is.R6( +# cdmInterface +# )) +# }) +# +# test_that("Method: fetchMetadata", { +# skip("Eunomia [2.0.0] bug") +# skip_on_cran() +# skip_if_not(ableToRun()$CG) +# connectionDetails <- Eunomia::getEunomiaConnectionDetails() +# +# cdmInterface <- TreatmentPatterns:::CDMInterface$new( +# connectionDetails = connectionDetails, +# cdmSchema = "main", +# resultSchema = "main" +# ) +# +# andromeda <- Andromeda::andromeda() +# +# cdmInterface$fetchMetadata(andromeda) +# +# metadata <- andromeda$metadata %>% collect() +# +# expect_in( +# c("cdmSourceName", "cdmSourceAbbreviation", "cdmReleaseDate", "vocabularyVersion"), +# names(metadata) +# ) +# +# expect_identical(metadata$rVersion, base::version$version.string) +# expect_identical(metadata$platform, base::version$platform) +# expect_identical(nrow(metadata), 1L) +# expect_identical(ncol(metadata), 8L) +# cdmInterface$disconnect() +# }) +# +# test_that("Method: fetchCohortTable", { +# skip("Eunomia [2.0.0] bug") +# skip_on_cran() +# skip_if_not(ableToRun()$CG) +# +# andromeda <- Andromeda::andromeda() +# andromedaTableName <- "cohortTable" +# +# cdmInterface <- TreatmentPatterns:::CDMInterface$new( +# connectionDetails = .CG$connectionDetails, +# cdmSchema = .CG$cdmSchema, +# resultSchema = .CG$resultSchema +# ) +# +# cdmInterface$fetchCohortTable( +# cohorts = .CG$cohorts, +# cohortTableName = .CG$cohortTableName, +# andromeda = andromeda, +# andromedaTableName = andromedaTableName, +# minEraDuration = 0 +# ) +# +# expect_equal(names(andromeda), andromedaTableName) +# }) +# +# test_that("fetchCohortTable: empty", { +# skip("Eunomia [2.0.0] bug") +# skip_on_cran() +# skip_if_not(ableToRun()$CG) +# +# andromeda <- Andromeda::andromeda() +# andromedaTableName <- "cohortTable" +# +# cdmInterface <- TreatmentPatterns:::CDMInterface$new( +# connectionDetails = .CG$connectionDetails, +# cdmSchema = .CG$cdmSchema, +# resultSchema = .CG$resultSchema +# ) +# +# cohorts <- data.frame( +# cohortId = numeric(), +# cohortName = character(), +# type = character() +# ) +# +# # Empty +# cdmInterface$fetchCohortTable( +# cohorts = cohorts, +# cohortTableName = .CG$cohortTableName, +# andromeda = andromeda, +# andromedaTableName = andromedaTableName, +# minEraDuration = 5 +# ) +# +# res <- andromeda[[andromedaTableName]] %>% dplyr::collect() +# +# expect_identical(ncol(res), 6L) +# expect_identical(nrow(res), 0L) +# }) +# +# test_that("Method: disconnect", { +# skip("Eunomia [2.0.0] bug") +# skip_on_cran() +# skip_if_not(ableToRun()$CG) +# connectionDetails <- Eunomia::getEunomiaConnectionDetails() +# +# cdmInterface <- TreatmentPatterns:::CDMInterface$new( +# connectionDetails = connectionDetails, +# cdmSchema = "main", +# resultSchema = "main" +# ) +# +# andromeda <- Andromeda::andromeda() +# +# expect_error(cdmInterface$fetchMetadata(andromeda)) +# }) diff --git a/tests/testthat/test-CRAN.R b/tests/testthat/test-CRAN.R index 944f84bf..24acf20f 100644 --- a/tests/testthat/test-CRAN.R +++ b/tests/testthat/test-CRAN.R @@ -18,12 +18,10 @@ if (interactive()) { test_that("CRAN Tests", { run_on_cran() - globals <- generateCohortTableCDMC() - outputEnv <- TreatmentPatterns::computePathways( - cohorts = globals$cohorts, - cdm = globals$cdm, - globals$cohortTableName + cohorts = .CM$cohorts, + cdm = .CM$cdm, + .CM$cohortTableName ) expect_s4_class(outputEnv, class = "Andromeda") @@ -43,8 +41,6 @@ test_that("CRAN Tests", { expect_true("treatmentHistory" %in% names(outputEnv)) expect_true("treatmentHistoryFinal" %in% names(outputEnv)) - DBI::dbDisconnect(globals$con, shutdown = TRUE) - tpRes <- TreatmentPatterns::export(outputEnv) expect_true(R6::is.R6(tpRes)) diff --git a/tests/testthat/test-TreatmentPatternsResults.R b/tests/testthat/test-TreatmentPatternsResults.R index 5a5d21d4..080ae227 100644 --- a/tests/testthat/test-TreatmentPatternsResults.R +++ b/tests/testthat/test-TreatmentPatternsResults.R @@ -1,12 +1,11 @@ test_that("Method: new(data.frame)", { skip_on_cran() skip_if_not(ableToRun()$CDMC) - globals <- suppressWarnings(generateCohortTableCDMC()) result <- TreatmentPatterns::executeTreatmentPatterns( - cohorts = globals$cohorts, - cohortTableName = globals$cohortTableName, - cdm = globals$cdm + cohorts = .CM$cohorts, + cohortTableName = .CM$cohortTableName, + cdm = .CM$cdm ) expect_s3_class(result$analyses, class = c("tbl_df", "tbl", "data.frame")) @@ -84,6 +83,8 @@ test_that("Method: new(wrongFile)", { test_that("Method: plotEventDuration()", { skip_on_cran() + skip_if_not_installed("ggplot2") + results <- TreatmentPatternsResults$new() results$load(filePath = system.file(package = "TreatmentPatterns", "DummyOutput")) @@ -94,6 +95,8 @@ test_that("Method: plotEventDuration()", { test_that("Method: plotSankey()", { skip_on_cran() + skip_if_not_installed("networkD3") + results <- TreatmentPatternsResults$new() results$load(filePath = system.file(package = "TreatmentPatterns", "DummyOutput")) @@ -106,6 +109,8 @@ test_that("Method: plotSankey()", { test_that("Method: plotSunburst()", { skip_on_cran() + skip_if_not_installed("sunburstR") + results <- TreatmentPatternsResults$new() results$load(filePath = system.file(package = "TreatmentPatterns", "DummyOutput")) diff --git a/tests/testthat/test-computePathways.R b/tests/testthat/test-computePathways.R index 90c6e44b..8307342d 100644 --- a/tests/testthat/test-computePathways.R +++ b/tests/testthat/test-computePathways.R @@ -4,42 +4,40 @@ library(TreatmentPatterns) library(dplyr) library(stringr) -test_that("computePathways DatabaseConnector", { - skip("Eunomia [2.0.0] bug") - skip_on_cran() - skip_if_not(ableToRun()$CG) - globals <- generateCohortTableCG() - - expect_message( - expect_message( - expect_message( - computePathways( - cohorts = globals$cohorts, - cohortTableName = globals$cohortTableName, - connectionDetails = globals$connectionDetails, - cdmSchema = "main", - resultSchema = "main" - ), - "After maxPathLength: 553" - ), - "After combinationWindow: 554" - ), - "Original number of rows: 8366" - ) -}) +# test_that("computePathways DatabaseConnector", { +# skip("Eunomia [2.0.0] bug") +# skip_on_cran() +# skip_if_not(ableToRun()$CG) +# +# expect_message( +# expect_message( +# expect_message( +# computePathways( +# cohorts = .CG$cohorts, +# cohortTableName = .CG$cohortTableName, +# connectionDetails = .CG$connectionDetails, +# cdmSchema = "main", +# resultSchema = "main" +# ), +# "After maxPathLength: 554" +# ), +# "After combinationWindow: 554" +# ), +# "Original number of rows: 8366" +# ) +# }) test_that("computePathways CDMConnector", { skip_on_cran() skip_if_not(ableToRun()$CDMC) - globals <- generateCohortTableCDMC() expect_message( expect_message( expect_message( computePathways( - cohorts = globals$cohorts, - cdm = globals$cdm, - globals$cohortTableName + cohorts = .CM$cohorts, + cdm = .CM$cdm, + .CM$cohortTableName ), ">> Starting on" ), @@ -47,45 +45,41 @@ test_that("computePathways CDMConnector", { ), "-- treatment construction done" ) - - DBI::dbDisconnect(globals$con, shutdown = TRUE) }) -test_that("nrow exitCohorts > 0", { - skip("Eunomia [2.0.0] bug") - skip_on_cran() - skip_if_not(ableToRun()$CG) - globals <- generateCohortTableCG() - - cohorts <- globals$cohorts %>% - mutate(type = case_when( - .data$cohortName == "Acetaminophen" ~ "exit", - .default = .data$type - )) - - expect_message( - computePathways( - connectionDetails = globals$connectionDetails, - cdmSchema = globals$cdmSchema, - resultSchema = globals$resultSchema, - cohorts = cohorts, - cohortTableName = globals$cohortTableName - ), - "Records: 2117" - ) -}) +# test_that("nrow exitCohorts > 0", { +# skip("Eunomia [2.0.0] bug") +# skip_on_cran() +# skip_if_not(ableToRun()$CG) +# +# cohorts <- .CG$cohorts %>% +# mutate(type = case_when( +# .data$cohortName == "Acetaminophen" ~ "exit", +# .default = .data$type +# )) +# +# expect_message( +# computePathways( +# connectionDetails = .CG$connectionDetails, +# cdmSchema = .CG$cdmSchema, +# resultSchema = .CG$resultSchema, +# cohorts = cohorts, +# cohortTableName = .CG$cohortTableName +# ), +# "Records: 2117" +# ) +# }) # Parameter sweep ---- test_that("windowStart", { skip_on_cran() skip_if_not(ableToRun()$CDMC) - globals <- generateCohortTableCDMC() expect_error( computePathways( - cohorts = globals$cohorts, - cohortTableName = globals$cohortTableName, - cdm = globals$cdm, + cohorts = .CM$cohorts, + cohortTableName = .CM$cohortTableName, + cdm = .CM$cdm, windowStart = "0" ), "Must be of type.+'integerish'" @@ -93,9 +87,9 @@ test_that("windowStart", { expect_error( computePathways( - cohorts = globals$cohorts, - cohortTableName = globals$cohortTableName, - cdm = globals$cdm, + cohorts = .CM$cohorts, + cohortTableName = .CM$cohortTableName, + cdm = .CM$cdm, windowStart = Inf ), "Must be of type.+'integerish'" @@ -103,9 +97,9 @@ test_that("windowStart", { expect_message( computePathways( - cohorts = globals$cohorts, - cohortTableName = globals$cohortTableName, - cdm = globals$cdm, + cohorts = .CM$cohorts, + cohortTableName = .CM$cohortTableName, + cdm = .CM$cdm, windowStart = 0 ), "Records: 8366" @@ -113,9 +107,9 @@ test_that("windowStart", { expect_message( computePathways( - cohorts = globals$cohorts, - cohortTableName = globals$cohortTableName, - cdm = globals$cdm, + cohorts = .CM$cohorts, + cohortTableName = .CM$cohortTableName, + cdm = .CM$cdm, windowStart = -30 ), "Records: 8366" @@ -123,9 +117,9 @@ test_that("windowStart", { expect_message( computePathways( - cohorts = globals$cohorts, - cohortTableName = globals$cohortTableName, - cdm = globals$cdm, + cohorts = .CM$cohorts, + cohortTableName = .CM$cohortTableName, + cdm = .CM$cdm, windowStart = 30 ), "Records: 6267" @@ -135,13 +129,12 @@ test_that("windowStart", { test_that("minEraDuration", { skip_on_cran() skip_if_not(ableToRun()$CDMC) - globals <- generateCohortTableCDMC() expect_error( computePathways( - cohorts = globals$cohorts, - cohortTableName = globals$cohortTableName, - cdm = globals$cdm, + cohorts = .CM$cohorts, + cohortTableName = .CM$cohortTableName, + cdm = .CM$cdm, minEraDuration = "0" ), "Must be of type.+'numeric'" @@ -151,19 +144,18 @@ test_that("minEraDuration", { test_that("splitEventCohorts", { skip_on_cran() skip_if_not(ableToRun()$CDMC) - globals <- generateCohortTableCDMC() andromeda_empty <- computePathways( - cohorts = globals$cohorts, - cohortTableName = globals$cohortTableName, - cdm = globals$cdm, + cohorts = .CM$cohorts, + cohortTableName = .CM$cohortTableName, + cdm = .CM$cdm, splitEventCohorts = NULL ) andromeda_Clavulanate <- computePathways( - cohorts = globals$cohorts, - cohortTableName = globals$cohortTableName, - cdm = globals$cdm, + cohorts = .CM$cohorts, + cohortTableName = .CM$cohortTableName, + cdm = .CM$cdm, splitEventCohorts = 4, splitTime = 30 ) @@ -175,9 +167,9 @@ test_that("splitEventCohorts", { expect_error( computePathways( - cohorts = globals$cohorts, - cohortTableName = globals$cohortTableName, - cdm = globals$cdm, + cohorts = .CM$cohorts, + cohortTableName = .CM$cohortTableName, + cdm = .CM$cdm, splitEventCohorts = "1" ), "Must be of type.+'integerish'" @@ -190,13 +182,12 @@ test_that("splitEventCohorts", { test_that("splitTime", { skip_on_cran() skip_if_not(ableToRun()$CDMC) - globals <- generateCohortTableCDMC() expect_error( computePathways( - cohorts = globals$cohorts, - cohortTableName = globals$cohortTableName, - cdm = globals$cdm, + cohorts = .CM$cohorts, + cohortTableName = .CM$cohortTableName, + cdm = .CM$cdm, splitTime = "1" ), "Must be of type.+'integerish'" @@ -206,27 +197,26 @@ test_that("splitTime", { test_that("eraCollapseSize", { skip_on_cran() skip_if_not(ableToRun()$CDMC) - globals <- generateCohortTableCDMC() andromeda_0 <- computePathways( - cohorts = globals$cohorts, - cohortTableName = globals$cohortTableName, - cdm = globals$cdm, + cohorts = .CM$cohorts, + cohortTableName = .CM$cohortTableName, + cdm = .CM$cdm, eraCollapseSize = 0 ) andromeda_10000 <- computePathways( - cohorts = globals$cohorts, - cohortTableName = globals$cohortTableName, - cdm = globals$cdm, + cohorts = .CM$cohorts, + cohortTableName = .CM$cohortTableName, + cdm = .CM$cdm, eraCollapseSize = 10000 ) expect_error( computePathways( - cohorts = globals$cohorts, - cohortTableName = globals$cohortTableName, - cdm = globals$cdm, + cohorts = .CM$cohorts, + cohortTableName = .CM$cohortTableName, + cdm = .CM$cdm, eraCollapseSize = "" ), " Must be of type.+'numeric'" @@ -239,14 +229,13 @@ test_that("eraCollapseSize", { test_that("combinationWindow", { skip_on_cran() skip_if_not(ableToRun()$CDMC) - globals <- generateCohortTableCDMC() expect_error( suppressWarnings( computePathways( - cohorts = globals$cohorts, - cohortTableName = globals$cohortTableName, - cdm = globals$cdm, + cohorts = .CM$cohorts, + cohortTableName = .CM$cohortTableName, + cdm = .CM$cdm, combinationWindow = "" ) ), @@ -357,43 +346,40 @@ test_that("minPostCombinationDuration: 30", { dplyr::pull(.data$pathway) expect_identical(pathway, "A-A+B-B") - - DBI::dbDisconnect(con) }) test_that("filterTreatments", { skip_on_cran() skip_if_not(ableToRun()$CDMC) - globals <- generateCohortTableCDMC() expect_error( computePathways( - cohorts = globals$cohorts, - cohortTableName = globals$cohortTableName, - cdm = globals$cdm, + cohorts = .CM$cohorts, + cohortTableName = .CM$cohortTableName, + cdm = .CM$cdm, filterTreatments = "" ), "Must be a subset of" ) first <- computePathways( - cohorts = globals$cohorts, - cohortTableName = globals$cohortTableName, - cdm = globals$cdm, + cohorts = .CM$cohorts, + cohortTableName = .CM$cohortTableName, + cdm = .CM$cdm, filterTreatments = "First" ) changes <- computePathways( - cohorts = globals$cohorts, - cohortTableName = globals$cohortTableName, - cdm = globals$cdm, + cohorts = .CM$cohorts, + cohortTableName = .CM$cohortTableName, + cdm = .CM$cdm, filterTreatments = "Changes" ) all <- computePathways( - cohorts = globals$cohorts, - cohortTableName = globals$cohortTableName, - cdm = globals$cdm, + cohorts = .CM$cohorts, + cohortTableName = .CM$cohortTableName, + cdm = .CM$cdm, filterTreatments = "All" ) @@ -439,25 +425,31 @@ test_that("filterTreatments", { object = c("numeric", "integer") ) - expect_identical( - "numeric", - class(firstTH$eventStartDate), - class(changesTH$eventStartDate), - class(allTH$eventStartDate) + expect_contains( + expected = c( + class(firstTH$eventStartDate), + class(changesTH$eventStartDate), + class(allTH$eventStartDate) + ), + object = c("numeric", "integer") ) - expect_identical( - "numeric", - class(firstTH$eventEndDate), - class(changesTH$eventStartDate), - class(allTH$eventEndDate) + expect_contains( + expected = c( + class(firstTH$eventEndDate), + class(changesTH$eventStartDate), + class(allTH$eventEndDate) + ), + object = c("numeric", "integer") ) - expect_identical( - "numeric", - class(firstTH$age), - class(changesTH$age), - class(allTH$age) + expect_contains( + expected = c( + class(firstTH$age), + class(changesTH$age), + class(allTH$age) + ), + object = c("numeric", "integer") ) expect_identical( @@ -467,18 +459,22 @@ test_that("filterTreatments", { class(allTH$sex) ) - expect_identical( - "numeric", - class(firstTH$durationEra), - class(changesTH$durationEra), - class(allTH$durationEra) + expect_contains( + expected = c( + class(firstTH$durationEra), + class(changesTH$durationEra), + class(allTH$durationEra) + ), + object = c("numeric", "integer") ) - expect_identical( - "numeric", - class(firstTH$sortOrder), - class(changesTH$sortOrder), - class(allTH$sortOrder) + expect_contains( + expected = c( + class(firstTH$sortOrder), + class(changesTH$sortOrder), + class(allTH$sortOrder) + ), + object = c("numeric", "integer") ) expect_true( @@ -556,8 +552,6 @@ test_that("FRFS combination", { expect_equal(nFRFS, 1) expect_equal(nLRFS, 0) - - DBI::dbDisconnect(con) }) test_that("LRFS combination", { @@ -607,104 +601,88 @@ test_that("LRFS combination", { expect_equal(nFRFS, 0) expect_equal(nLRFS, 1) - - DBI::dbDisconnect(con) }) test_that("No target records", { skip_on_cran() skip_if_not(ableToRun()$CDMC) - params <- suppressWarnings(generateCohortTableCDMC()) - - params$cohorts$cohortId[8] <- 9 + .CM$cohorts$cohortId[8] <- 9 expect_warning({ outputEnv <- computePathways( - cohorts = params$cohorts, - cohortTableName = params$cohortTableName, - cdm = params$cdm + cohorts = .CM$cohorts, + cohortTableName = .CM$cohortTableName, + cdm = .CM$cdm ) }) expect_true(nrow(outputEnv$treatmentHistory %>% collect()) == 0) - - DBI::dbDisconnect(params$con, shutdown = TRUE) }) test_that("Empty cohort table", { skip_on_cran() skip_if_not(ableToRun()$CDMC) - params <- suppressWarnings(generateCohortTableCDMC()) - - params$cdm$cohort_table <- params$cdm$cohort_table %>% + .CM$cdm$cohort_table <- .CM$cdm$cohort_table %>% filter(.data$cohort_definition_id <= 0) %>% compute() expect_warning({ outputEnv <- computePathways( - cohorts = params$cohorts, - cohortTableName = params$cohortTableName, - cdm = params$cdm + cohorts = .CM$cohorts, + cohortTableName = .CM$cohortTableName, + cdm = .CM$cdm ) }) expect_true(nrow(outputEnv$treatmentHistory %>% collect()) == 0) - - DBI::dbDisconnect(params$con, shutdown = TRUE) }) test_that("No target defined", { skip_on_cran() skip_if_not(ableToRun()$CDMC) - params <- suppressWarnings(generateCohortTableCDMC()) - - params$cohorts$type <- rep("event", 8) + .CM$cohorts$type <- rep("event", 8) expect_error({ outputEnv <- computePathways( - cohorts = params$cohorts, - cohortTableName = params$cohortTableName, - cdm = params$cdm + cohorts = .CM$cohorts, + cohortTableName = .CM$cohortTableName, + cdm = .CM$cdm ) }) - - DBI::dbDisconnect(params$con, shutdown = TRUE) }) -test_that("Attrition", { - skip_on_cran() - skip_on_os(os = "linux") - skip_if_not(ableToRun()$CDMC) - skip_if_not(ableToRun()$CG) - - params <- suppressWarnings(generateCohortTableCDMC()) - outputEnvCDMC <- computePathways( - cohorts = params$cohorts, - cohortTableName = params$cohortTableName, - cdm = params$cdm - ) - - params <- suppressWarnings(generateCohortTableCG()) - outputEnvCG <- computePathways( - cohorts = params$cohorts, - cohortTableName = params$cohortTableName, - connectionDetails = params$connectionDetails, - cdmSchema = params$cdmSchema, - resultSchema = params$resultSchema - ) - - expect_identical( - outputEnvCDMC$attrition %>% - collect() %>% - select(-"time_stamp"), - outputEnvCG$attrition %>% - collect() %>% - select(-"time_stamp") - ) - - Andromeda::close(outputEnvCG) - Andromeda::close(outputEnvCDMC) -}) +# test_that("Attrition", { +# skip_on_cran() +# skip_on_os(os = "linux") +# skip_if_not(ableToRun()$CDMC) +# skip_if_not(ableToRun()$CG) +# +# outputEnvCDMC <- computePathways( +# cohorts = .CM$cohorts, +# cohortTableName = .CM$cohortTableName, +# cdm = .CM$cdm +# ) +# +# outputEnvCG <- computePathways( +# cohorts = .CM$cohorts, +# cohortTableName = .CM$cohortTableName, +# connectionDetails = .CM$connectionDetails, +# cdmSchema = .CM$cdmSchema, +# resultSchema = .CM$resultSchema +# ) +# +# expect_identical( +# outputEnvCDMC$attrition %>% +# collect() %>% +# select(-"time_stamp"), +# outputEnvCG$attrition %>% +# collect() %>% +# select(-"time_stamp") +# ) +# +# Andromeda::close(outputEnvCG) +# Andromeda::close(outputEnvCDMC) +# }) diff --git a/tests/testthat/test-executeTreatmentPatterns.R b/tests/testthat/test-executeTreatmentPatterns.R index 076f91c4..824de197 100644 --- a/tests/testthat/test-executeTreatmentPatterns.R +++ b/tests/testthat/test-executeTreatmentPatterns.R @@ -6,37 +6,31 @@ test_that("void", { expect_error(TreatmentPatterns::executeTreatmentPatterns()) }) -test_that("CohortGenerator", { - skip_on_cran() - skip_on_os(os = "linux") - skip_if_not(ableToRun()$CG) - - global <- generateCohortTableCG() - - result <- TreatmentPatterns::executeTreatmentPatterns( - cohorts = global$cohorts, - cohortTableName = global$cohortTableName, - connectionDetails = global$connectionDetails, - cdmSchema = global$cdmSchema, - resultSchema = global$resultSchema - ) - - expect_true("TreatmentPatternsResults" %in% class(result)) -}) +# test_that("CohortGenerator", { +# skip_on_cran() +# skip_on_os(os = "linux") +# skip_if_not(ableToRun()$CG) +# +# result <- TreatmentPatterns::executeTreatmentPatterns( +# cohorts = .CG$cohorts, +# cohortTableName = .CG$cohortTableName, +# connectionDetails = .CG$connectionDetails, +# cdmSchema = .CG$cdmSchema, +# resultSchema = .CG$resultSchema +# ) +# +# expect_true("TreatmentPatternsResults" %in% class(result)) +# }) test_that("CDMConnector", { skip_on_cran() skip_if_not(ableToRun()$CDMC) - globals <- generateCohortTableCDMC() - result <- TreatmentPatterns::executeTreatmentPatterns( - cohorts = globals$cohorts, - cohortTableName = globals$cohortTableName, - cdm = globals$cdm + cohorts = .CM$cohorts, + cohortTableName = .CM$cohortTableName, + cdm = .CM$cdm ) expect_true("TreatmentPatternsResults" %in% class(result)) - - DBI::dbDisconnect(globals$con, shutdown = TRUE) }) diff --git a/tests/testthat/test-export.R b/tests/testthat/test-export.R index 90444941..491bf0ae 100644 --- a/tests/testthat/test-export.R +++ b/tests/testthat/test-export.R @@ -26,305 +26,293 @@ test_that("empty treatmentHistory table", { ) }) -# CohortGenerator ---- -test_that("outputPath", { - skip_on_cran() - skip_on_os(os = "linux") - skip_if_not(ableToRun()$CG) - - globals <- generateCohortTableCG() - - andromeda <- TreatmentPatterns::computePathways( - cohorts = globals$cohorts, - cohortTableName = globals$cohortTableName, - connectionDetails = globals$connectionDetails, - cdmSchema = globals$cdmSchema, - resultSchema = globals$resultSchema - ) - - ## file.path(tempDirCG) ---- - tempDirLocal <- file.path(tempdir(), "output") - - result <- export(andromeda, outputPath = tempDirLocal) - - expect_true( - file.exists(file.path(tempDirLocal, "treatment_pathways.csv")) - ) - - expect_true( - file.exists(file.path(tempDirLocal, "summary_event_duration.csv")) - ) - - expect_true( - file.exists(file.path(tempDirLocal, "counts_year.csv")) - ) - - expect_true( - file.exists(file.path(tempDirLocal, "counts_age.csv")) - ) - - expect_true( - file.exists(file.path(tempDirLocal, "counts_sex.csv")) - ) - - expect_true( - file.exists(file.path(tempDirLocal, "attrition.csv")) - ) - - expect_true( - file.exists(file.path(tempDirLocal, "cdm_source_info.csv")) - ) - - expect_true( - file.exists(file.path(tempDirLocal, "analyses.csv")) - ) - - expect_true( - file.exists(file.path(tempDirLocal, "metadata.csv")) - ) - - ## 3 ---- - expect_error( - TreatmentPatterns::export( - andromeda, - outputPath = 3, - nonePaths = TRUE, - stratify = TRUE - ), - "Variable 'outputPath':" - ) - - Andromeda::close(andromeda) -}) - -test_that("ageWindow", { - skip_on_cran() - skip_on_os(os = "linux") - skip_if_not(ableToRun()$CG) - - globals <- generateCohortTableCG() - - andromeda <- TreatmentPatterns::computePathways( - cohorts = globals$cohorts, - cohortTableName = globals$cohortTableName, - connectionDetails = globals$connectionDetails, - cdmSchema = globals$cdmSchema, - resultSchema = globals$resultSchema - ) - - ## 10 ---- - expect_message( - result <- export( - andromeda = andromeda, - ageWindow = 10, - nonePaths = TRUE, - stratify = TRUE - ) - ) - - treatmentPathways <- result$treatment_pathways - - expect_true( - all(c("0-10", "10-20", "20-30", "30-40", "40-50", "all") %in% treatmentPathways$age) - ) - - ## c(0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 150) ---- - expect_message( - result <- export( - andromeda = andromeda, - ageWindow = c(0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 150), - nonePaths = TRUE, - stratify = TRUE - ) - ) - - treatmentPathways <- result$treatment_pathways - - expect_true(all( - c( - "0-2", "2-4", "4-6", "6-8", "8-10", "10-12", - "12-14", "14-16", "16-18", "18-150", "all" - ) %in% treatmentPathways$age - )) - - Andromeda::close(andromeda) -}) - -test_that("minCellCount", { - skip_on_cran() - skip_on_os(os = "linux") - skip_if_not(ableToRun()$CG) - - globals <- generateCohortTableCG() - - andromeda <- TreatmentPatterns::computePathways( - cohorts = globals$cohorts, - cohortTableName = globals$cohortTableName, - connectionDetails = globals$connectionDetails, - cdmSchema = globals$cdmSchema, - resultSchema = globals$resultSchema - ) - - ## 10 ---- - expect_message( - result <- export( - andromeda = andromeda, - minCellCount = 10, - censorType = "remove", - nonePaths = TRUE, - stratify = TRUE - ), - "Removing \\d+ pathways with a frequency <10." - ) - - treatmentPathways <- result$treatment_pathways - - expect_equal(min(treatmentPathways$freq), 10) - - ## "10" ---- - expect_error( - export( - andromeda = andromeda, - minCellCount = "10", - nonePaths = TRUE, - stratify = TRUE - ) - ) - - Andromeda::close(andromeda) -}) - -test_that("archiveName", { - skip_on_cran() - skip_on_os(os = "linux") - skip_if_not(ableToRun()$CG) - - globals <- generateCohortTableCG() - - andromeda <- TreatmentPatterns::computePathways( - cohorts = globals$cohorts, - cohortTableName = globals$cohortTableName, - connectionDetails = globals$connectionDetails, - cdmSchema = globals$cdmSchema, - resultSchema = globals$resultSchema - ) - - tempDirLocal <- file.path(tempdir(), "output") - - ## "output.zip" ---- - expect_message( - export( - andromeda = andromeda, - outputPath = tempDirLocal, - archiveName = "output.zip" - ) - ) - - expect_true( - file.exists(file.path(tempDirLocal, "output.zip")) - ) - - ## 3 ---- - expect_error( - export( - andromeda = andromeda, - outputPath = tempDirLocal, - archiveName = 3, - nonePaths = TRUE, - stratify = TRUE - ) - ) - - Andromeda::close(andromeda) -}) - -test_that("censorType", { - skip_on_cran() - skip_on_os(os = "linux") - skip_if_not(ableToRun()$CG) - - globals <- generateCohortTableCG() - - andromeda <- computePathways( - cohorts = globals$cohorts, - cohortTableName = globals$cohortTableName, - connectionDetails = globals$connectionDetails, - cdmSchema = globals$cdmSchema, - resultSchema = globals$resultSchema - ) - - ## "remove" ---- - expect_message( - result <- TreatmentPatterns::export( - andromeda = andromeda, - minCellCount = 10, - censorType = "remove", - nonePaths = TRUE, - stratify = TRUE - ), - "Removing \\d+ pathways with a frequency <10." - ) - - treatmentPathways <- result$treatment_pathways - - expect_equal(min(treatmentPathways$freq), 10) - - ## "minCellCount" ---- - expect_message( - TreatmentPatterns::export( - andromeda = andromeda, - minCellCount = 10, - censorType = "minCellCount", - nonePaths = TRUE, - stratify = TRUE - ), - "Censoring \\d+ pathways with a frequency <10 to 10." - ) - - treatmentPathways <- result$treatment_pathways - - expect_equal(min(treatmentPathways$freq), 10) - - ## "mean" ---- - expect_message( - result <- TreatmentPatterns::export( - andromeda = andromeda, - minCellCount = 10, - censorType = "mean", - nonePaths = TRUE, - stratify = TRUE - ), - "Censoring \\d+ pathways with a frequency <10 to mean." - ) - - treatmentPathways <- result$treatment_pathways - - expect_equal(min(treatmentPathways$freq), 2) - - ## "stuff" ---- - expect_error( - export( - andromeda = andromeda, - censorType = "Stuff", - nonePaths = TRUE, - stratify = TRUE - ) - ) - - Andromeda::close(andromeda) -}) +# # CohortGenerator ---- +# test_that("outputPath", { +# skip_on_cran() +# skip_on_os(os = "linux") +# skip_if_not(ableToRun()$CG) +# +# andromeda <- TreatmentPatterns::computePathways( +# cohorts = .CG$cohorts, +# cohortTableName = .CG$cohortTableName, +# connectionDetails = .CG$connectionDetails, +# cdmSchema = .CG$cdmSchema, +# resultSchema = .CG$resultSchema +# ) +# +# ## file.path(tempDirCG) ---- +# tempDirLocal <- file.path(tempdir(), "output") +# +# result <- export(andromeda, outputPath = tempDirLocal) +# +# expect_true( +# file.exists(file.path(tempDirLocal, "treatment_pathways.csv")) +# ) +# +# expect_true( +# file.exists(file.path(tempDirLocal, "summary_event_duration.csv")) +# ) +# +# expect_true( +# file.exists(file.path(tempDirLocal, "counts_year.csv")) +# ) +# +# expect_true( +# file.exists(file.path(tempDirLocal, "counts_age.csv")) +# ) +# +# expect_true( +# file.exists(file.path(tempDirLocal, "counts_sex.csv")) +# ) +# +# expect_true( +# file.exists(file.path(tempDirLocal, "attrition.csv")) +# ) +# +# expect_true( +# file.exists(file.path(tempDirLocal, "cdm_source_info.csv")) +# ) +# +# expect_true( +# file.exists(file.path(tempDirLocal, "analyses.csv")) +# ) +# +# expect_true( +# file.exists(file.path(tempDirLocal, "metadata.csv")) +# ) +# +# ## 3 ---- +# expect_error( +# TreatmentPatterns::export( +# andromeda, +# outputPath = 3, +# nonePaths = TRUE, +# stratify = TRUE +# ), +# "Variable 'outputPath':" +# ) +# +# Andromeda::close(andromeda) +# }) +# +# test_that("ageWindow", { +# skip_on_cran() +# skip_on_os(os = "linux") +# skip_if_not(ableToRun()$CG) +# +# andromeda <- TreatmentPatterns::computePathways( +# cohorts = .CG$cohorts, +# cohortTableName = .CG$cohortTableName, +# connectionDetails = .CG$connectionDetails, +# cdmSchema = .CG$cdmSchema, +# resultSchema = .CG$resultSchema +# ) +# +# ## 10 ---- +# expect_message( +# result <- export( +# andromeda = andromeda, +# ageWindow = 10, +# nonePaths = TRUE, +# stratify = TRUE +# ) +# ) +# +# treatmentPathways <- result$treatment_pathways +# +# expect_true( +# all(c("0-10", "10-20", "20-30", "30-40", "40-50", "all") %in% treatmentPathways$age) +# ) +# +# ## c(0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 150) ---- +# expect_message( +# result <- export( +# andromeda = andromeda, +# ageWindow = c(0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 150), +# nonePaths = TRUE, +# stratify = TRUE +# ) +# ) +# +# treatmentPathways <- result$treatment_pathways +# +# expect_true(all( +# c( +# "0-2", "2-4", "4-6", "6-8", "8-10", "10-12", +# "12-14", "14-16", "16-18", "18-150", "all" +# ) %in% treatmentPathways$age +# )) +# +# Andromeda::close(andromeda) +# }) +# +# test_that("minCellCount", { +# skip_on_cran() +# skip_on_os(os = "linux") +# skip_if_not(ableToRun()$CG) +# +# andromeda <- TreatmentPatterns::computePathways( +# cohorts = .CG$cohorts, +# cohortTableName = .CG$cohortTableName, +# connectionDetails = .CG$connectionDetails, +# cdmSchema = .CG$cdmSchema, +# resultSchema = .CG$resultSchema +# ) +# +# ## 10 ---- +# expect_message( +# result <- export( +# andromeda = andromeda, +# minCellCount = 10, +# censorType = "remove", +# nonePaths = TRUE, +# stratify = TRUE +# ), +# "Removing \\d+ pathways with a frequency <10." +# ) +# +# treatmentPathways <- result$treatment_pathways +# +# expect_equal(min(treatmentPathways$freq), 10) +# +# ## "10" ---- +# expect_error( +# export( +# andromeda = andromeda, +# minCellCount = "10", +# nonePaths = TRUE, +# stratify = TRUE +# ) +# ) +# +# Andromeda::close(andromeda) +# }) +# +# test_that("archiveName", { +# skip_on_cran() +# skip_on_os(os = "linux") +# skip_if_not(ableToRun()$CG) +# +# andromeda <- TreatmentPatterns::computePathways( +# cohorts = .CG$cohorts, +# cohortTableName = .CG$cohortTableName, +# connectionDetails = .CG$connectionDetails, +# cdmSchema = .CG$cdmSchema, +# resultSchema = .CG$resultSchema +# ) +# +# tempDirLocal <- file.path(tempdir(), "output") +# +# ## "output.zip" ---- +# expect_message( +# export( +# andromeda = andromeda, +# outputPath = tempDirLocal, +# archiveName = "output.zip" +# ) +# ) +# +# expect_true( +# file.exists(file.path(tempDirLocal, "output.zip")) +# ) +# +# ## 3 ---- +# expect_error( +# export( +# andromeda = andromeda, +# outputPath = tempDirLocal, +# archiveName = 3, +# nonePaths = TRUE, +# stratify = TRUE +# ) +# ) +# +# Andromeda::close(andromeda) +# }) +# +# test_that("censorType", { +# skip_on_cran() +# skip_on_os(os = "linux") +# skip_if_not(ableToRun()$CG) +# +# andromeda <- computePathways( +# cohorts = .CG$cohorts, +# cohortTableName = .CG$cohortTableName, +# connectionDetails = .CG$connectionDetails, +# cdmSchema = .CG$cdmSchema, +# resultSchema = .CG$resultSchema +# ) +# +# ## "remove" ---- +# expect_message( +# result <- TreatmentPatterns::export( +# andromeda = andromeda, +# minCellCount = 10, +# censorType = "remove", +# nonePaths = TRUE, +# stratify = TRUE +# ), +# "Removing \\d+ pathways with a frequency <10." +# ) +# +# treatmentPathways <- result$treatment_pathways +# +# expect_equal(min(treatmentPathways$freq), 10) +# +# ## "minCellCount" ---- +# expect_message( +# TreatmentPatterns::export( +# andromeda = andromeda, +# minCellCount = 10, +# censorType = "minCellCount", +# nonePaths = TRUE, +# stratify = TRUE +# ), +# "Censoring \\d+ pathways with a frequency <10 to 10." +# ) +# +# treatmentPathways <- result$treatment_pathways +# +# expect_equal(min(treatmentPathways$freq), 10) +# +# ## "mean" ---- +# expect_message( +# result <- TreatmentPatterns::export( +# andromeda = andromeda, +# minCellCount = 10, +# censorType = "mean", +# nonePaths = TRUE, +# stratify = TRUE +# ), +# "Censoring \\d+ pathways with a frequency <10 to mean." +# ) +# +# treatmentPathways <- result$treatment_pathways +# +# expect_equal(min(treatmentPathways$freq), 2) +# +# ## "stuff" ---- +# expect_error( +# export( +# andromeda = andromeda, +# censorType = "Stuff", +# nonePaths = TRUE, +# stratify = TRUE +# ) +# ) +# +# Andromeda::close(andromeda) +# }) # CDMConnector ---- test_that("outputPath", { skip_on_cran() skip_if_not(ableToRun()$CDMC) - globals <- generateCohortTableCDMC() - andromeda <- TreatmentPatterns::computePathways( - cohorts = globals$cohorts, - cohortTableName = globals$cohortTableName, - cdm = globals$cdm + cohorts = .CM$cohorts, + cohortTableName = .CM$cohortTableName, + cdm = .CM$cdm ) tempDirLocal <- file.path(tempdir(), "output") @@ -379,19 +367,16 @@ test_that("outputPath", { ) Andromeda::close(andromeda) - DBI::dbDisconnect(globals$con, shutdown = TRUE) }) test_that("ageWindow", { skip_on_cran() skip_if_not(ableToRun()$CDMC) - globals <- generateCohortTableCDMC() - andromeda <- TreatmentPatterns::computePathways( - cohorts = globals$cohorts, - cohortTableName = globals$cohortTableName, - cdm = globals$cdm + cohorts = .CM$cohorts, + cohortTableName = .CM$cohortTableName, + cdm = .CM$cdm ) ## 10 ---- @@ -428,19 +413,16 @@ test_that("ageWindow", { )) Andromeda::close(andromeda) - DBI::dbDisconnect(globals$con, shutdown = TRUE) }) test_that("minCellCount", { skip_on_cran() skip_if_not(ableToRun()$CDMC) - globals <- generateCohortTableCDMC() - andromeda <- TreatmentPatterns::computePathways( - cohorts = globals$cohorts, - cohortTableName = globals$cohortTableName, - cdm = globals$cdm + cohorts = .CM$cohorts, + cohortTableName = .CM$cohortTableName, + cdm = .CM$cdm ) ## 10 ---- @@ -470,19 +452,16 @@ test_that("minCellCount", { ) Andromeda::close(andromeda) - DBI::dbDisconnect(globals$con, shutdown = TRUE) }) test_that("archiveName", { skip_on_cran() skip_if_not(ableToRun()$CDMC) - globals <- generateCohortTableCDMC() - andromeda <- TreatmentPatterns::computePathways( - cohorts = globals$cohorts, - cohortTableName = globals$cohortTableName, - cdm = globals$cdm + cohorts = .CM$cohorts, + cohortTableName = .CM$cohortTableName, + cdm = .CM$cdm ) tempDirLocal <- file.path(tempdir(), "output") @@ -514,19 +493,16 @@ test_that("archiveName", { ) Andromeda::close(andromeda) - DBI::dbDisconnect(globals$con, shutdown = TRUE) }) test_that("censorType", { skip_on_cran() skip_if_not(ableToRun()$CDMC) - globals <- generateCohortTableCDMC() - andromeda <- TreatmentPatterns::computePathways( - cohorts = globals$cohorts, - cohortTableName = globals$cohortTableName, - cdm = globals$cdm + cohorts = .CM$cohorts, + cohortTableName = .CM$cohortTableName, + cdm = .CM$cdm ) ## "remove" ---- @@ -589,19 +565,16 @@ test_that("censorType", { ) Andromeda::close(andromeda) - DBI::dbDisconnect(globals$con, shutdown = TRUE) }) test_that("counts", { skip_on_cran() skip_if_not(ableToRun()$CDMC) - globals <- generateCohortTableCDMC() - andromeda <- TreatmentPatterns::computePathways( - cohorts = globals$cohorts, - cohortTableName = globals$cohortTableName, - cdm = globals$cdm + cohorts = .CM$cohorts, + cohortTableName = .CM$cohortTableName, + cdm = .CM$cdm ) ## "remove" ---- @@ -663,19 +636,16 @@ test_that("counts", { expect_identical(totalAll, totalYears) Andromeda::close(andromeda) - DBI::dbDisconnect(globals$con, shutdown = TRUE) }) test_that("attrition", { skip_on_cran() skip_if_not(ableToRun()$CDMC) - globals <- generateCohortTableCDMC() - andromeda <- TreatmentPatterns::computePathways( - cohorts = globals$cohorts, - cohortTableName = globals$cohortTableName, - cdm = globals$cdm + cohorts = .CM$cohorts, + cohortTableName = .CM$cohortTableName, + cdm = .CM$cdm ) tempDirLocal <- file.path(tempdir(), "output") @@ -691,19 +661,16 @@ test_that("attrition", { expect_true(file.exists(file.path(tempDirLocal, "attrition.csv"))) Andromeda::close(andromeda) - DBI::dbDisconnect(globals$con, shutdown = TRUE) }) test_that("stratify, none paths", { skip_on_cran() skip_if_not(ableToRun()$CDMC) - globals <- suppressWarnings(generateCohortTableCDMC()) - andromeda <- TreatmentPatterns::computePathways( - cohorts = globals$cohorts, - cohortTableName = globals$cohortTableName, - cdm = globals$cdm + cohorts = .CM$cohorts, + cohortTableName = .CM$cohortTableName, + cdm = .CM$cdm ) result <- export( @@ -781,5 +748,4 @@ test_that("stratify, none paths", { } Andromeda::close(andromeda) - DBI::dbDisconnect(globals$con, shutdown = TRUE) }) diff --git a/tests/testthat/test-exportPatientLevel.R b/tests/testthat/test-exportPatientLevel.R index f55035ea..5524776a 100644 --- a/tests/testthat/test-exportPatientLevel.R +++ b/tests/testthat/test-exportPatientLevel.R @@ -39,7 +39,7 @@ test_that("exportPatientLevel", { cdm_source_info <- read.csv(file.path(tempdir(), "cdm_source_info.csv")) expect_equal(ncol(treatment_history), 13) - expect_equal(nrow(treatment_history), 553) + expect_equal(nrow(treatment_history), 554) expect_equal(ncol(metadata), 5) expect_equal(nrow(metadata), 1) diff --git a/tests/testthat/test-ggSunburst.R b/tests/testthat/test-ggSunburst.R index fadad304..ac784ee7 100644 --- a/tests/testthat/test-ggSunburst.R +++ b/tests/testthat/test-ggSunburst.R @@ -17,7 +17,7 @@ test_that("ggSunburst", { ) expect_true(all(unique(gg$data$frac) %in% unique(df$frac))) - expect_true(all(class(gg) %in% c("gg", "ggplot"))) + expect_true(any(class(gg) %in% c("gg", "ggplot"))) }) test_that("ggSunburst: groupCombinations", { diff --git a/tests/testthat/test-multipleCohortTables.R b/tests/testthat/test-multipleCohortTables.R index b1030831..4aef4b28 100644 --- a/tests/testthat/test-multipleCohortTables.R +++ b/tests/testthat/test-multipleCohortTables.R @@ -61,8 +61,6 @@ test_that("multiple cohort_tables", { result <- TreatmentPatterns::export(andromeda, minCellCount = 1) expect_identical(result$treatment_pathways$pathway, "A") - - DBI::dbDisconnect(con, shutdown = TRUE) }) test_that("multiple cohort_tables", { From 6ebf3df846d5707afee24d0e735b2222e2c59255 Mon Sep 17 00:00:00 2001 From: Maarten van Kessel <112690347+mvankessel-EMC@users.noreply.github.com> Date: Fri, 7 Nov 2025 11:05:05 +0100 Subject: [PATCH 02/33] Prune dep (#333) * Moved dependencies to suggests * Updated setup * download through CDMConnector * download both * Updated setup * Replaced stringr with stringi * Fixed tests and updated setup * Fixed warnings * removed png * removed stray stringr call * updated action * updated arg * ignore vignettes * moved args * updated example --- .../workflows/R-CMD-check-depends-only.yaml | 3 +- DESCRIPTION | 27 ++++++++------- NAMESPACE | 6 +--- R/TreatmentPatterns-package.R | 6 +--- R/constructPathways.R | 2 +- R/createSankeyDiagram.R | 25 ++++++++------ R/createSunburstPlot.R | 8 ++++- R/export.R | 5 ++- R/ggSunburst.R | 10 ++++-- R/plotEventDuration.R | 7 ++-- R/utils.R | 5 +++ man/createSankeyDiagram.Rd | 4 ++- man/createSunburstPlot.Rd | 4 ++- man/ggSunburst.Rd | 4 ++- tests/testthat/test-computePathways.R | 1 - tests/testthat/test-createSankeyDiagram.R | 34 ++++++++++++------- tests/testthat/test-createSunburstPlot.R | 13 +++++++ tests/testthat/test-ggSunburst.R | 6 ++++ tests/testthat/test-plotEventDuration.R | 8 +++++ 19 files changed, 121 insertions(+), 57 deletions(-) create mode 100644 R/utils.R diff --git a/.github/workflows/R-CMD-check-depends-only.yaml b/.github/workflows/R-CMD-check-depends-only.yaml index 816be23f..4a7e511a 100644 --- a/.github/workflows/R-CMD-check-depends-only.yaml +++ b/.github/workflows/R-CMD-check-depends-only.yaml @@ -51,4 +51,5 @@ jobs: - uses: r-lib/actions/check-r-package@v2 with: - upload-snapshots: true \ No newline at end of file + upload-snapshots: true + args: 'c("--no-manual", "--no-build-vignettes", "--ignore-vignettes")' \ No newline at end of file diff --git a/DESCRIPTION b/DESCRIPTION index ff817a50..81d97203 100755 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -16,20 +16,16 @@ Language: en-US Depends: R (>= 4.2) Imports: - checkmate, - dplyr, - stringr, utils, stats, - Andromeda, - tidyr, R6, - sunburstR, - networkD3, - htmlwidgets, - ggplot2, + stringi, + jsonlite, + checkmate, + dplyr, + tidyr, dbplyr, - jsonlite + Andromeda Suggests: knitr, rmarkdown, @@ -48,12 +44,16 @@ Suggests: DBI, withr, plotly, - PaRe, - stats + sunburstR, + networkD3, + ggplot2, + htmlwidgets, + visOmopResults, + PaRe License: Apache License (>= 2) Encoding: UTF-8 LazyData: true -RoxygenNote: 7.3.2 +RoxygenNote: 7.3.3 VignetteBuilder: knitr Config/testthat/edition: 3 Config/testthat/parallel: true @@ -73,3 +73,4 @@ Collate: 'getResultsDataModelSpecification.R' 'ggSunburst.R' 'plotEventDuration.R' + 'utils.R' diff --git a/NAMESPACE b/NAMESPACE index df2b87cf..beffb2dd 100755 --- a/NAMESPACE +++ b/NAMESPACE @@ -14,15 +14,11 @@ import(Andromeda) import(R6) import(checkmate) import(dplyr) -import(ggplot2) -import(stringr) -import(sunburstR) +import(stringi) import(utils) importFrom(dbplyr,window_order) -importFrom(htmlwidgets,JS) importFrom(jsonlite,fromJSON) importFrom(jsonlite,toJSON) -importFrom(networkD3,sankeyNetwork) importFrom(stats,median) importFrom(stats,quantile) importFrom(stats,sd) diff --git a/R/TreatmentPatterns-package.R b/R/TreatmentPatterns-package.R index 89ef4c54..2bdb39cb 100644 --- a/R/TreatmentPatterns-package.R +++ b/R/TreatmentPatterns-package.R @@ -19,18 +19,14 @@ ## usethis namespace: start #' @import checkmate -#' @import stringr +#' @import stringi #' @import utils #' @import dplyr #' @import Andromeda #' @import R6 -#' @import sunburstR -#' @import ggplot2 #' @importFrom jsonlite fromJSON toJSON #' @importFrom dbplyr window_order -#' @importFrom networkD3 sankeyNetwork #' @importFrom stats sd median quantile -#' @importFrom htmlwidgets JS #' @importFrom tidyr pivot_wider ## usethis namespace: end NULL diff --git a/R/constructPathways.R b/R/constructPathways.R index 76a420cb..e10df907 100755 --- a/R/constructPathways.R +++ b/R/constructPathways.R @@ -136,7 +136,7 @@ constructPathways <- function(settings, andromeda) { eventCohortNames <- andromeda$treatmentHistory %>% dplyr::select("eventCohortName") %>% dplyr::pull() %>% - stringr::str_split(pattern = "\\+") %>% + stringi::stri_split(regex = "\\+") %>% lapply(FUN = function(x) { paste(sort(x), collapse = "+") }) %>% diff --git a/R/createSankeyDiagram.R b/R/createSankeyDiagram.R index 51be99bc..baf65c44 100644 --- a/R/createSankeyDiagram.R +++ b/R/createSankeyDiagram.R @@ -17,7 +17,7 @@ splitPathItems <- function(treatmentPathways) { data <- treatmentPathways %>% rowwise() %>% - dplyr::mutate(pathway = stringr::str_split(.data$pathway, pattern = "-")) %>% + dplyr::mutate(pathway = stringi::stri_split(str = .data$pathway, regex = "-")) %>% dplyr::mutate(freq = as.integer(.data$freq)) data <- data %>% @@ -52,8 +52,8 @@ createLinks <- function(data) { doGroupCombinations <- function(treatmentPathways, groupCombinations) { if (groupCombinations) { treatmentPathways$pathway <- treatmentPathways$pathway %>% - stringr::str_replace_all( - pattern = "((\\w+)?\\+\\w+)+", + stringi::stri_replace_all( + regex = "((\\w+)?\\+\\w+)+", replacement = "Combination" ) } @@ -83,11 +83,11 @@ createLinkedData <- function(data) { nameToId <- function(item, names) { item <- item %>% - stringr::str_replace_all(pattern = "\\(", replacement = "\\\\(") %>% - stringr::str_replace_all(pattern = "\\)", replacement = "\\\\)") %>% - stringr::str_replace_all(pattern = "\\+", replacement = "\\\\+") %>% - stringr::str_replace_all(pattern = "\\&", replacement = "\\\\&") %>% - stringr::str_replace_all(pattern = "\\.", replacement = "\\\\.") + stringi::stri_replace_all(regex = "\\(", replacement = "\\\\(") %>% + stringi::stri_replace_all(regex = "\\)", replacement = "\\\\)") %>% + stringi::stri_replace_all(regex = "\\+", replacement = "\\\\+") %>% + stringi::stri_replace_all(regex = "\\&", replacement = "\\\\&") %>% + stringi::stri_replace_all(regex = "\\.", replacement = "\\\\.") return(grep(sprintf("^%s$",item), names) - 1) } @@ -124,7 +124,8 @@ validateCreateSankeyDiagram <- function() { } setColourScale <- function(linkedData, colors) { - domain <- stringr::str_split_i(linkedData$nodes$names, "\\d\\.", i = 2) + domain <- stringi::stri_split(str = linkedData$nodes$names, regex = "\\d\\.") |> + sapply(function(x) x[[2]]) labels <- if (!is.null(colors)) { labels <- if (is.list(colors)) { @@ -180,10 +181,14 @@ setColourScale <- function(linkedData, colors) { #' index_year = rep("all", 5) #' ) #' -#' createSankeyDiagram(treatmentPathways) +#' if (interactive()) { +#' createSankeyDiagram(treatmentPathways) +#' } createSankeyDiagram <- function(treatmentPathways, groupCombinations = FALSE, colors = NULL, ...) { validateCreateSankeyDiagram() + checkInstall("networkD3") + treatmentPathways <- doGroupCombinations( treatmentPathways = treatmentPathways, groupCombinations = groupCombinations diff --git a/R/createSunburstPlot.R b/R/createSunburstPlot.R index d5fa463f..00a6c954 100644 --- a/R/createSunburstPlot.R +++ b/R/createSunburstPlot.R @@ -36,9 +36,15 @@ #' index_year = rep("all", 5) #' ) #' -#' createSunburstPlot(treatmentPatwhays) +#' if (interactive()) { +#' createSunburstPlot(treatmentPatwhays) +#' } createSunburstPlot <- function(treatmentPathways, groupCombinations = FALSE, ...) { validateCreateSunburstPlot() + + checkInstall("sunburstR") + checkInstall("htmlwidgets") + treatmentPathways <- doGroupCombinations( treatmentPathways = treatmentPathways, groupCombinations = groupCombinations diff --git a/R/export.R b/R/export.R index 93345096..acfa30d8 100644 --- a/R/export.R +++ b/R/export.R @@ -576,7 +576,10 @@ groupByAgeWindow <- function(treatmentHistory, ageWindow) { dplyr::rowwise() %>% dplyr::mutate( ageBin = paste( - unlist(stringr::str_extract_all(as.character(cut(.data$age, makeAgeWindow(ageWindow))), "\\d+")), + unlist(stringi::stri_extract_all( + str = as.character(cut(.data$age, makeAgeWindow(ageWindow))), + regex = "\\d+" + )), collapse = "-" ) ) diff --git a/R/ggSunburst.R b/R/ggSunburst.R index 3a818208..9a4f58ec 100644 --- a/R/ggSunburst.R +++ b/R/ggSunburst.R @@ -13,6 +13,7 @@ computeXCoords <- function(table, size) { splitPaths <- function(table) { table %>% dplyr::mutate(path_id = row_number()) %>% + dplyr::mutate(path = .data$pathway) %>% tidyr::separate_longer_delim(cols = "pathway", delim = "-") %>% dplyr::group_by(.data$sex, .data$age, .data$index_year, .data$path_id) %>% dplyr::mutate(layer = row_number()) %>% @@ -94,7 +95,7 @@ makeGgSunburst <- function(data) { gg + ggplot2::coord_polar() + - ggplot2::theme_bw() + + visOmopResults::themeVisOmop() + ggplot2::theme( axis.text.y = ggplot2::element_blank(), axis.ticks = ggplot2::element_blank() @@ -122,8 +123,13 @@ makeGgSunburst <- function(data) { #' index_year = rep("all", 5) #' ) #' -#' ggSunburst(treatmentPatwhays) +#' if (interactive()) { +#' ggSunburst(treatmentPatwhays) +#' } ggSunburst <- function(treatmentPathways, groupCombinations = FALSE, unit = "percent") { + checkInstall("ggplot2") + checkInstall("visOmopResults") + size <- if (unit == "percent") { 100 } else if (unit == "count") { diff --git a/R/plotEventDuration.R b/R/plotEventDuration.R index 1aca8b6c..99c43fa3 100644 --- a/R/plotEventDuration.R +++ b/R/plotEventDuration.R @@ -107,7 +107,9 @@ plotEventDuration <- function(eventDurations, minCellCount = 0, treatmentGroups checkmate::assertLogical(x = includeOverall, len = 1, null.ok = FALSE, add = assertCol) checkmate::reportAssertions(assertCol) - #browser() + checkInstall("ggplot2") + checkInstall("visOmopResults") + eventDurations <- eventDurations %>% filter( .data$event_count >= minCellCount, @@ -142,5 +144,6 @@ plotEventDuration <- function(eventDurations, minCellCount = 0, treatmentGroups ggplot2::labs( title = "Duration of events per line", x = "time (days)" - ) + ) + + visOmopResults::themeVisOmop() } diff --git a/R/utils.R b/R/utils.R new file mode 100644 index 00000000..8866cb2a --- /dev/null +++ b/R/utils.R @@ -0,0 +1,5 @@ +checkInstall <- function(pkg) { + if (!require(pkg, quietly = TRUE, warn.conflicts = FALSE, character.only = TRUE)) { + stop(sprintf("`%s` in an optional package, please install it with: `install.packages('%s')`", pkg, pkg)) + } +} diff --git a/man/createSankeyDiagram.Rd b/man/createSankeyDiagram.Rd index a990ef5c..63e8e687 100644 --- a/man/createSankeyDiagram.Rd +++ b/man/createSankeyDiagram.Rd @@ -42,5 +42,7 @@ treatmentPathways <- data.frame( index_year = rep("all", 5) ) -createSankeyDiagram(treatmentPathways) +if (interactive()) { + createSankeyDiagram(treatmentPathways) +} } diff --git a/man/createSunburstPlot.Rd b/man/createSunburstPlot.Rd index 0a103e51..8efe718d 100644 --- a/man/createSunburstPlot.Rd +++ b/man/createSunburstPlot.Rd @@ -35,5 +35,7 @@ treatmentPatwhays <- data.frame( index_year = rep("all", 5) ) -createSunburstPlot(treatmentPatwhays) +if (interactive()) { + createSunburstPlot(treatmentPatwhays) +} } diff --git a/man/ggSunburst.Rd b/man/ggSunburst.Rd index 1a815306..d2e2409f 100644 --- a/man/ggSunburst.Rd +++ b/man/ggSunburst.Rd @@ -35,5 +35,7 @@ treatmentPatwhays <- data.frame( index_year = rep("all", 5) ) -ggSunburst(treatmentPatwhays) +if (interactive()) { + ggSunburst(treatmentPatwhays) +} } diff --git a/tests/testthat/test-computePathways.R b/tests/testthat/test-computePathways.R index 8307342d..4d591588 100644 --- a/tests/testthat/test-computePathways.R +++ b/tests/testthat/test-computePathways.R @@ -2,7 +2,6 @@ library(testthat) library(TreatmentPatterns) library(dplyr) -library(stringr) # test_that("computePathways DatabaseConnector", { # skip("Eunomia [2.0.0] bug") diff --git a/tests/testthat/test-createSankeyDiagram.R b/tests/testthat/test-createSankeyDiagram.R index 88c222bd..f414ec7f 100644 --- a/tests/testthat/test-createSankeyDiagram.R +++ b/tests/testthat/test-createSankeyDiagram.R @@ -16,14 +16,16 @@ test_that("void", { test_that("minimal", { skip_on_cran() + skip_if_not_installed("networkD3") + p <- createSankeyDiagram(treatmentPathways = dummyData) - pLabels <- stringr::str_remove_all(string = p$x$nodes$name, pattern = "\\d\\.") + pLabels <- stringi::stri_replace_all(str = p$x$nodes$name, regex = "\\d\\.", replacement = "") pLabels <- pLabels["Stopped" != pLabels] |> unique() |> sort() - actualLabels <- stringr::str_split(string = dummyData$path, pattern = "-") |> + actualLabels <- stringi::stri_split(str = dummyData$path, regex = "-") |> unlist() |> unique() |> sort() @@ -33,16 +35,18 @@ test_that("minimal", { test_that("groupCombinations: TRUE", { skip_on_cran() + skip_if_not_installed("networkD3") + p <- createSankeyDiagram(treatmentPathways = dummyData, groupCombinations = TRUE) - pLabels <- stringr::str_remove_all(string = p$x$nodes$name, pattern = "\\d\\.") + pLabels <- stringi::stri_replace_all(str = p$x$nodes$name, regex = "\\d\\.", replacement = "") pLabels <- pLabels["Stopped" != pLabels] |> unique() |> sort() - actualLabels <- stringr::str_split(string = dummyData$path, pattern = "-") |> + actualLabels <- stringi::stri_split(str = dummyData$path, regex = "-") |> unlist() |> - stringr::str_replace_all(pattern = ".+\\+.+", replacement = "Combination") |> + stringi::stri_replace_all(regex = ".+\\+.+", replacement = "Combination") |> unlist() |> unique() |> sort() @@ -66,7 +70,7 @@ test_that("groupCombinations: TRUE", { ) p <- createSankeyDiagram(treatmentPathways = df, groupCombinations = TRUE) - labels <- stringr::str_remove_all(string = p$x$nodes$name, pattern = "\\d\\.") + labels <- stringi::stri_replace_all(str = p$x$nodes$name, regex = "\\d\\.", replacement = "") expect_true(all(labels %in% c("A", "B", "C", "D", "Z_Y", "1_2", "Combination", "Stopped"))) @@ -78,11 +82,13 @@ test_that("groupCombinations: TRUE", { test_that("colors", { skip_on_cran() + skip_if_not_installed("networkD3") + actualColors <- c("#ff33cc", "#ff0000", "#00ff00", "#0000ff", "#ffffff", "#000000") p <- createSankeyDiagram(treatmentPathways = dummyData, colors = actualColors) - pColors <- unlist(stringr::str_extract_all(string = p$x$options$colourScale, "\\#\\w{6}")) + pColors <- unlist(stringi::stri_extract_all(str = p$x$options$colourScale, regex = "\\#\\w{6}")) expect_true(all(pColors %in% actualColors)) @@ -99,9 +105,9 @@ test_that("colors", { p <- createSankeyDiagram(treatmentPathways = dummyData, colors = actualColors) - pColors <- unlist(stringr::str_extract_all(string = p$x$options$colourScale, "\\#\\w{6}")) - labels <- unlist(stringr::str_extract_all(string = p$x$options$colourScale, "\\'\\d\\.[\\w\\+\\-]+\\'")) - labels <- stringr::str_remove_all(string = labels, pattern = "[\\'|\\d{1}\\.]") + pColors <- unlist(stringi::stri_extract_all(str = p$x$options$colourScale, regex = "\\#\\w{6}")) + labels <- unlist(stringi::stri_extract_all(str = p$x$options$colourScale, regex = "\\'\\d\\.[\\w\\+\\-]+\\'")) + labels <- stringi::stri_replace_all(str = labels, regex = "[\\'|\\d{1}\\.]", replacement = "") labels <- unique(labels) l <- as.list(unique(pColors)) @@ -116,6 +122,8 @@ test_that("colors", { test_that("2 path levels", { skip_on_cran() + skip_if_not_installed("networkD3") + dummyData <- data.frame( pathway = c("A", "A-B+C", "A-D", "B+C", "D"), freq = c(206, 6, 14, 48, 221), @@ -126,12 +134,12 @@ test_that("2 path levels", { p <- createSankeyDiagram(treatmentPathways = dummyData) - pLabels <- stringr::str_remove_all(string = p$x$nodes$name, pattern = "\\d\\.") + pLabels <- stringi::stri_replace_all(str = p$x$nodes$name, regex = "\\d\\.", replacement = "") pLabels <- pLabels["Stopped" != pLabels] |> unique() |> sort() - actualLabels <- stringr::str_split(string = dummyData$path, pattern = "-") |> + actualLabels <- stringi::stri_split(str = dummyData$path, regex = "-") |> unlist() |> unique() |> sort() @@ -141,6 +149,8 @@ test_that("2 path levels", { test_that("1 path levels", { skip_on_cran() + skip_if_not_installed("networkD3") + treatmentPathways <- data.frame( pathway = c("a", "b", "c"), freq = c(55, 8, 11), diff --git a/tests/testthat/test-createSunburstPlot.R b/tests/testthat/test-createSunburstPlot.R index 47779560..172ae1de 100644 --- a/tests/testthat/test-createSunburstPlot.R +++ b/tests/testthat/test-createSunburstPlot.R @@ -9,14 +9,24 @@ dummyData <- data.frame( index_year = c(rep("all", 5), rep("2020", 5)) ) +test_that("void", { + expect_error(createSunburstPlot()) +}) + test_that("minimal", { skip_on_cran() + testthat::skip_if_not_installed("sunburstR") + testthat::skip_if_not_installed("htmlwidgets") + p <- createSunburstPlot(treatmentPathways = dummyData) expect_s3_class(p$x$data, "json") }) test_that("groupCombinations: TRUE", { skip_on_cran() + testthat::skip_if_not_installed("sunburstR") + testthat::skip_if_not_installed("htmlwidgets") + p <- createSunburstPlot( treatmentPathways = dummyData, groupCombinations = TRUE @@ -27,6 +37,9 @@ test_that("groupCombinations: TRUE", { test_that("colors", { skip_on_cran() + testthat::skip_if_not_installed("sunburstR") + testthat::skip_if_not_installed("htmlwidgets") + actualColors <- c("#ff33cc", "#ff0000", "#00ff00", "#0000ff", "#ffffff", "#000000") p <- createSunburstPlot(treatmentPathways = dummyData, colors = actualColors) diff --git a/tests/testthat/test-ggSunburst.R b/tests/testthat/test-ggSunburst.R index ac784ee7..69742577 100644 --- a/tests/testthat/test-ggSunburst.R +++ b/tests/testthat/test-ggSunburst.R @@ -1,5 +1,7 @@ test_that("ggSunburst", { skip_on_cran() + skip_if_not_installed("ggplot2") + treatmentPathways <- data.frame( pathway = c("A", "C-B", "A-B-C", "B", "B+A", "B-A-C"), freq = c(100, 75, 25, 500, 350, 20), @@ -22,6 +24,8 @@ test_that("ggSunburst", { test_that("ggSunburst: groupCombinations", { skip_on_cran() + skip_if_not_installed("ggplot2") + treatmentPathways <- data.frame( pathway = c("A", "C-B", "A-B-C", "B", "B+A", "B-A-C"), freq = c(100, 75, 25, 500, 350, 20), @@ -38,6 +42,8 @@ test_that("ggSunburst: groupCombinations", { test_that("ggSunburst: unit", { skip_on_cran() + skip_if_not_installed("ggplot2") + treatmentPathways <- data.frame( pathway = c("A", "C-B", "A-B-C", "B", "B+A", "B-A-C"), freq = c(100, 75, 25, 500, 350, 20), diff --git a/tests/testthat/test-plotEventDuration.R b/tests/testthat/test-plotEventDuration.R index 3c956ac6..1a975711 100644 --- a/tests/testthat/test-plotEventDuration.R +++ b/tests/testthat/test-plotEventDuration.R @@ -1,5 +1,7 @@ test_that("defaults", { skip_on_cran() + skip_if_not_installed("ggplot2") + data <- read.csv(system.file(package = "TreatmentPatterns", "DummyOutput", "summary_event_duration.csv")) gg <- plotEventDuration(eventDurations = data) @@ -10,6 +12,8 @@ test_that("defaults", { test_that("minCellCount", { skip_on_cran() + skip_if_not_installed("ggplot2") + data <- read.csv(system.file(package = "TreatmentPatterns", "DummyOutput", "summary_event_duration.csv")) gg <- plotEventDuration( @@ -35,6 +39,8 @@ test_that("minCellCount", { test_that("treatmentGroups", { skip_on_cran() + skip_if_not_installed("ggplot2") + data <- read.csv(system.file(package = "TreatmentPatterns", "DummyOutput", "summary_event_duration.csv")) gg <- plotEventDuration( @@ -61,6 +67,8 @@ test_that("treatmentGroups", { test_that("treatment lines", { skip_on_cran() + skip_if_not_installed("ggplot2") + data <- read.csv(system.file(package = "TreatmentPatterns", "DummyOutput", "summary_event_duration.csv")) gg <- plotEventDuration( From 6a82315acb842ecdea62e406b16bf28ba6da1267 Mon Sep 17 00:00:00 2001 From: mvankessel-EMC Date: Fri, 7 Nov 2025 11:09:57 +0100 Subject: [PATCH 03/33] added reporting of missing ids --- R/constructPathways.R | 18 +++++++++++++++++- 1 file changed, 17 insertions(+), 1 deletion(-) diff --git a/R/constructPathways.R b/R/constructPathways.R index e10df907..942d2676 100755 --- a/R/constructPathways.R +++ b/R/constructPathways.R @@ -587,12 +587,28 @@ doCombinationWindow <- function( dplyr::summarise(dplyr::n()) %>% pull() + switchedPersons <- andromeda$treatmentHistory %>% + dplyr::filter( + .data$switch == 1 | + .data$combinationFRFS == 1 | + .data$combinationLRFS == 1 + ) %>% + dplyr::pull(.data$personId) + + totalPersons <- andromeda$treatmentHistory %>% + dplyr::pull(.data$personId) + + missingIds <- totalPersons[!totalPersons %in% switchedPersons] + sumSelectedRows <- andromeda$treatmentHistory %>% dplyr::summarise(sum = sum(.data$selectedRows, na.rm = TRUE)) %>% dplyr::pull() if (sumSwitchComb != sumSelectedRows) { - stop(sprintf("Expected switches before combination (%s) to be equal to switches after combination (%s)", sumSelectedRows, sumSwitchComb)) + stop(sprintf( + "Expected switches before combination (%s) to be equal to switches after combination (%s)\nMissing person IDs: %s", + sumSelectedRows, sumSwitchComb, missingIds + )) } # Do transformations for each of the three newly added columns From 3eee749f284a1c7767c4e66138a8d134e2a69dcd Mon Sep 17 00:00:00 2001 From: mvankessel-EMC Date: Fri, 7 Nov 2025 11:42:41 +0100 Subject: [PATCH 04/33] added publications and presentations --- ...{a999_Strategus.Rmd => a998_Strategus.Rmd} | 0 vignettes/a999_Publications-Presentations.Rmd | 37 +++++++++++++++++++ 2 files changed, 37 insertions(+) rename vignettes/{a999_Strategus.Rmd => a998_Strategus.Rmd} (100%) create mode 100644 vignettes/a999_Publications-Presentations.Rmd diff --git a/vignettes/a999_Strategus.Rmd b/vignettes/a998_Strategus.Rmd similarity index 100% rename from vignettes/a999_Strategus.Rmd rename to vignettes/a998_Strategus.Rmd diff --git a/vignettes/a999_Publications-Presentations.Rmd b/vignettes/a999_Publications-Presentations.Rmd new file mode 100644 index 00000000..8233576a --- /dev/null +++ b/vignettes/a999_Publications-Presentations.Rmd @@ -0,0 +1,37 @@ +--- +title: "Publications and Presentations" +always_allow_html: yes +output: + html_document: + df_print: paged + html_vignette: + vignette: > + %\VignetteIndexEntry{Publications-Presentations} + %\VignetteEngine{knitr::rmarkdown} + %\VignetteEncoding{UTF-8} + pdf_document: + toc: yes +--- + +```{r setup, include=FALSE} +knitr::opts_chunk$set(echo = TRUE) +``` + +### Publications +- [TreatmentPatterns: An R package to facilitate the standardized development and analysis of treatment patterns across disease domains (Aniek F. Markus, 2022)](https://doi.org/10.1016/j.cmpb.2022.107081) +- [Treatment of systemic lupus erythematosus: Analysis of treatment patterns in adult and paediatric patients across four European countries (Mike Du, 2024)](https://doi.org/10.1016/j.ejim.2024.08.008) +- [Supporting Pharmacovigilance Signal Validation and Prioritization with Analyses of Routinely Collected Health Data: Lessons Learned from an EHDEN Network Study (Oskar Gauffin, 2023)](https://doi.org/10.1007/s40264-023-01353-w) +- [Real-World Evidence to Support EU Regulatory Decision Making—Results From a Pilot of Regulatory Use Cases (Stefanie Prilla, 2024)](https://doi.org/10.1002/cpt.3355) +- [Utility of Treatment Pattern Analysis Using a Common Data Model: A Scoping Review (Eun-Gee Park, 2025)](https://doi.org/10.4258/hir.2025.31.1.4) +- [Markov modeling for cost-effectiveness using federated health data network (Markus Haug, 2024)](https://doi.org/10.1093/jamia/ocae044) +- [Real-world treatment trajectories of adults with newly diagnosed asthma or COPD (Aniek F. Markus, 2024)](https://doi.org/10.1136/bmjresp-2023-002127) +- [TrajectoryViz: Interactive visualization of treatment trajectories (Maarja Pajusalu, 2024)](https://doi.org/10.1016/j.imu.2024.101558) +- [Towards the Common Data Model for an Intensive Medicine Data Space in Europe (Carlos Luis Parra-Calderón, 2025)](https://doi.org/10.3233/SHTI250393) + +### Presentations +- [2025 Introduction to the HADES TreatmentPatterns Package](https://www.youtube.com/watch?v=VraAakLqR8M) +- [2024 Tutorial: Conducting Off-The-Shelf Characterization Studies Using DARWIN EU® Tools and the OMOP CDM](https://youtu.be/OXBAhoI0mco?t=10072) +- [2024 10-Minute Tutorials: Treatment Patterns (Maarten van Kessel)](https://www.youtube.com/watch?v=dLssMjPcuss) +- [2022 TreatmentPatterns: An R package to facilitate the standardized development and analysis (Markus)](https://www.youtube.com/watch?v=lu4fcOXdlJs) + +Would you like to add something? Please [open an issue](https://github.com/darwin-eu/TreatmentPatterns/issues). From a14fc1c84f9a0809efc8de8201e194b15746a984 Mon Sep 17 00:00:00 2001 From: mvankessel-EMC Date: Tue, 6 Jan 2026 12:58:25 +0100 Subject: [PATCH 05/33] updated collapse --- R/constructPathways.R | 116 ++++++++++++++++++++++++------------------ 1 file changed, 66 insertions(+), 50 deletions(-) diff --git a/R/constructPathways.R b/R/constructPathways.R index 942d2676..53c4f190 100755 --- a/R/constructPathways.R +++ b/R/constructPathways.R @@ -371,7 +371,6 @@ doSplitEventCohorts <- function( return(invisible(NULL)) } - #' doEraCollapseNew #' #' @param andromeda (`Andromeda::andromeda()`) @@ -379,57 +378,74 @@ doSplitEventCohorts <- function( #' #' @returns `NULL` doEraCollapseNew <- function(andromeda, eraCollapseSize) { - andromeda$treatmentHistory <- andromeda$treatmentHistory |> - dplyr::group_by(.data$personId, .data$eventCohortId, .data$n_target) %>% - dbplyr::window_order(.data$eventStartDate, .data$eventEndDate) |> - dplyr::mutate( - diff = .data$eventStartDate - dplyr::lag(.data$eventEndDate), - flag = dplyr::case_when( - .data$diff <= eraCollapseSize ~ 1, - .default = 0 - ), - flag = dplyr::case_when( - dplyr::lead(.data$flag) == 1 - | .data$flag == 1 - ~ 1, - .default = 0 - ), - row = dplyr::case_when( - .data$flag == 1 & .data$diff <= eraCollapseSize ~ dplyr::row_number(), - .default = 0 - ), - end_date = dplyr::case_when( - .data$row == max(.data$row, na.rm = TRUE) ~ .data$eventEndDate - ) - ) |> - dplyr::mutate( - eventEndDate_old = .data$eventEndDate, - eventEndDate = dplyr::case_when( - .data$flag == 1 ~ max(.data$end_date, na.rm = TRUE), - .default = .data$eventEndDate_old - ) - ) |> - dplyr::mutate( - keep = dplyr::case_when( - .data$flag == 1 & .data$row == min(.data$row, na.rm = TRUE) ~ TRUE, - .data$flag == 0 ~ TRUE, - .default = FALSE + start <- TRUE + counter <- 0 + + while (start) { + andromeda$treatmentHistory <- andromeda$treatmentHistory |> + dplyr::group_by(.data$personId, .data$eventCohortId, .data$n_target) |> + dbplyr::window_order(.data$eventStartDate, .data$eventEndDate) |> + dplyr::mutate( + diff = .data$eventStartDate - dplyr::lag(.data$eventEndDate), + flag = dplyr::case_when( + .data$diff <= eraCollapseSize ~ 1, + .default = 0 + ), + flag = dplyr::case_when( + dplyr::lead(.data$flag) == 1 + | .data$flag == 1 + ~ 1, + .default = 0 + ), + row = dplyr::case_when( + .data$flag == 1 & .data$diff <= eraCollapseSize ~ dplyr::row_number(), + .default = 0 + ), + end_date = dplyr::case_when( + .data$row == max(.data$row, na.rm = TRUE) ~ .data$eventEndDate + ) ) - ) |> - dplyr::ungroup() |> - dplyr::filter(.data$keep) |> - dplyr::select(-"flag", -"eventEndDate_old", -"end_date", -"row") + + flags <- andromeda$treatmentHistory |> + dplyr::pull(.data$flag) |> + as.logical() - attrCounts <- fetchAttritionCounts(andromeda, "treatmentHistory") - appendAttrition( - toAdd = data.frame( - number_records = attrCounts$nRecords, - number_subjects = attrCounts$nSubjects, - reason_id = 5, - reason = sprintf("Collapsing eras, eraCollapse (%s)", eraCollapseSize) - ), - andromeda = andromeda - ) + if (any(flags)) { + andromeda$treatmentHistory <- andromeda$treatmentHistory |> + dplyr::mutate( + eventEndDate_old = .data$eventEndDate, + eventEndDate = dplyr::case_when( + .data$flag == 1 ~ max(.data$end_date, na.rm = TRUE), + .default = .data$eventEndDate_old + ) + ) |> + dplyr::mutate( + keep = dplyr::case_when( + .data$flag == 1 & .data$row == min(.data$row, na.rm = TRUE) ~ TRUE, + .data$flag == 0 ~ TRUE, + .default = FALSE + ) + ) |> + dplyr::ungroup() |> + dplyr::filter(.data$keep) |> + dplyr::select(-"flag", -"eventEndDate_old", -"end_date", -"row") + + counter <- counter + 1 + + attrCounts <- fetchAttritionCounts(andromeda, "treatmentHistory") + appendAttrition( + toAdd = data.frame( + number_records = attrCounts$nRecords, + number_subjects = attrCounts$nSubjects, + reason_id = 5, + reason = sprintf("Iteration %s: Collapsing eras, eraCollapse (%s)", counter, eraCollapseSize) + ), + andromeda = andromeda + ) + } else { + start <- FALSE + } + } return(invisible(NULL)) } From c3437b90eb40f1e43da5e0f31a1690d3ea55e900 Mon Sep 17 00:00:00 2001 From: mvankessel-EMC Date: Tue, 6 Jan 2026 14:45:17 +0100 Subject: [PATCH 06/33] fixed grouping --- R/constructPathways.R | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/R/constructPathways.R b/R/constructPathways.R index 53c4f190..2bde597f 100755 --- a/R/constructPathways.R +++ b/R/constructPathways.R @@ -412,6 +412,8 @@ doEraCollapseNew <- function(andromeda, eraCollapseSize) { if (any(flags)) { andromeda$treatmentHistory <- andromeda$treatmentHistory |> + dplyr::group_by(.data$personId, .data$eventCohortId, .data$n_target) |> + dbplyr::window_order(.data$eventStartDate, .data$eventEndDate) |> dplyr::mutate( eventEndDate_old = .data$eventEndDate, eventEndDate = dplyr::case_when( @@ -428,7 +430,7 @@ doEraCollapseNew <- function(andromeda, eraCollapseSize) { ) |> dplyr::ungroup() |> dplyr::filter(.data$keep) |> - dplyr::select(-"flag", -"eventEndDate_old", -"end_date", -"row") + dplyr::select(-"diff", -"flag", -"row", -"end_date", -"eventEndDate_old", -"keep") counter <- counter + 1 @@ -443,6 +445,9 @@ doEraCollapseNew <- function(andromeda, eraCollapseSize) { andromeda = andromeda ) } else { + andromeda$treatmentHistory <- andromeda$treatmentHistory |> + dplyr::select(-"diff", -"flag", -"row", -"end_date") + start <- FALSE } } From cd1476258ec99a4e1f53b91f91b5b6695394bff5 Mon Sep 17 00:00:00 2001 From: mvankessel-EMC Date: Tue, 6 Jan 2026 15:13:37 +0100 Subject: [PATCH 07/33] updated nrow for updated attrition --- tests/testthat/test-exportPatientLevel.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/testthat/test-exportPatientLevel.R b/tests/testthat/test-exportPatientLevel.R index 5524776a..5419c972 100644 --- a/tests/testthat/test-exportPatientLevel.R +++ b/tests/testthat/test-exportPatientLevel.R @@ -47,6 +47,6 @@ test_that("exportPatientLevel", { expect_equal(ncol(attrition), 5) expect_equal(nrow(attrition), 11) - expect_equal(ncol(cdm_source_info), 10) + expect_equal(ncol(cdm_source_info), 11) expect_equal(nrow(cdm_source_info), 1) }) From e62eb619d132e9fe0efcbba5a7111c15bd55dff5 Mon Sep 17 00:00:00 2001 From: mvankessel-EMC Date: Tue, 6 Jan 2026 15:14:05 +0100 Subject: [PATCH 08/33] updated nrow for updated attrition --- tests/testthat/test-exportPatientLevel.R | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/testthat/test-exportPatientLevel.R b/tests/testthat/test-exportPatientLevel.R index 5419c972..8f4b6264 100644 --- a/tests/testthat/test-exportPatientLevel.R +++ b/tests/testthat/test-exportPatientLevel.R @@ -45,8 +45,8 @@ test_that("exportPatientLevel", { expect_equal(nrow(metadata), 1) expect_equal(ncol(attrition), 5) - expect_equal(nrow(attrition), 11) + expect_equal(nrow(attrition), 10) - expect_equal(ncol(cdm_source_info), 11) + expect_equal(ncol(cdm_source_info), 10) expect_equal(nrow(cdm_source_info), 1) }) From 3ce7033e5e8fa1954a3b587efa4b557fc6d1a0db Mon Sep 17 00:00:00 2001 From: mvankessel-EMC Date: Tue, 6 Jan 2026 16:13:15 +0100 Subject: [PATCH 09/33] replaced == with %in% --- R/export.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/R/export.R b/R/export.R index acfa30d8..6f432705 100644 --- a/R/export.R +++ b/R/export.R @@ -759,7 +759,7 @@ getFilteredSubjects <- function(andromeda) { out <- andromeda$currentCohorts %>% dplyr::anti_join(andromeda$treatmentHistory, join_by(personId == personId)) %>% - dplyr::filter(.data$cohortId == targetCohortId) %>% + dplyr::filter(.data$cohortId %in% targetCohortId) %>% dplyr::mutate( indexYear = floor(.data$startDate / 365.25) + 1970, eventCohortName = "None", From cf1d0df2086ccda20a0cd08d33ce6103bcfada7d Mon Sep 17 00:00:00 2001 From: mvankessel-EMC Date: Tue, 6 Jan 2026 16:21:37 +0100 Subject: [PATCH 10/33] added test for nonePaths --- tests/testthat/test-pathwaysMultipleTargetsLogical.R | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/tests/testthat/test-pathwaysMultipleTargetsLogical.R b/tests/testthat/test-pathwaysMultipleTargetsLogical.R index 8fa2f9ea..4eef39a1 100644 --- a/tests/testthat/test-pathwaysMultipleTargetsLogical.R +++ b/tests/testthat/test-pathwaysMultipleTargetsLogical.R @@ -162,6 +162,10 @@ test_that("Pathways", { result <- TreatmentPatterns::export(andromeda, minCellCount = 1) + expect_no_error({ + result <- TreatmentPatterns::export(andromeda, minCellCount = 1) + }) + subjectIds <- cohort_table %>% pull(.data$subject_id) %>% unique() From 0c56d0534a48c98377242df9296c62285085fef1 Mon Sep 17 00:00:00 2001 From: mvankessel-EMC Date: Wed, 7 Jan 2026 16:04:39 +0100 Subject: [PATCH 11/33] unified db interface to CDMConnector --- R/CDMInterface.R | 531 ++++++++++------------------------------ R/computePathways.R | 33 +-- inst/sql/selectData.sql | 28 --- 3 files changed, 148 insertions(+), 444 deletions(-) delete mode 100644 inst/sql/selectData.sql diff --git a/R/CDMInterface.R b/R/CDMInterface.R index 994306cb..6a70c066 100644 --- a/R/CDMInterface.R +++ b/R/CDMInterface.R @@ -14,411 +14,140 @@ # See the License for the specific language governing permissions and # limitations under the License. -#' @title CDMInterface -#' -#' @description -#' Abstract interface to the CDM, using CDMConnector or DatabaseConnector. -#' -#' @noRd -CDMInterface <- R6::R6Class( - classname = "CDMInterface", - public = list( - ## Public ---- - ### Methods ---- - #' @description - #' Initializer method - #' - #' @template param_connectionDetails - #' @template param_cdmSchema - #' @template param_resultSchema - #' @param tempEmulationSchema Schema used to emulate temp tables. - #' @template param_cdm - #' - #' @return (`invisible(self)`) - initialize = function(connectionDetails = NULL, cdmSchema = NULL, resultSchema = NULL, tempEmulationSchema = NULL, cdm = NULL) { - private$.connectionDetails <- connectionDetails - if (!is.null(private$.connectionDetails)) { - private$.connection <- DatabaseConnector::connect(private$.connectionDetails) - } - private$.cdmSchema <- cdmSchema - private$.resultSchema <- resultSchema - private$.tempEmulationSchema <- tempEmulationSchema - private$.cdm <- cdm - - if (!is.null(cdm)) { - private$.type <- "CDMConnector" - } else if (!is.null(connectionDetails)) { - private$.type <- "DatabaseConnector" - } else { - stop("Could not assert if CDMConnector or DatabaseConnector is being used.") - } - return(invisible(self)) - }, - - #' @description - #' Fetch specified cohort IDs from a specified cohort table - #' - #' @template param_cohorts - #' @template param_cohortTableName - #' @template param_andromeda - #' @param andromedaTableName (`character(1)`)\cr - #' Name of the table in the Andromeda object where the data will be loaded. - #' @template param_minEraDuration - #' - #' @return (`andromeda`) - fetchCohortTable = function(cohorts, cohortTableName, andromeda, andromedaTableName, minEraDuration = NULL) { - switch( - private$.type, - CDMConnector = private$cdmconFetchCohortTable(cohorts, cohortTableName, andromeda, andromedaTableName, minEraDuration), - DatabaseConnector = private$dbconFetchCohortTable(cohorts, cohortTableName, andromeda, andromedaTableName, minEraDuration) - ) - }, - - #' @description - #' Fetch cdm_source from CDM - #' - #' @template param_andromeda - #' - #' @return (`andromeda`) - fetchCdmSource = function(andromeda) { - switch( - private$.type, - CDMConnector = private$cdmconFetchCdmSource(andromeda), - DatabaseConnector = private$dbconFetchCdmSource(andromeda) - ) - }, - - #' @description - #' Fetch meta data from CDM - #' - #' @template param_andromeda - #' - #' @return (`andromeda`) - fetchMetadata = function(andromeda) { - andromeda$metadata <- data.frame( - execution_start = as.numeric(Sys.time()), - package_version = as.character(utils::packageVersion("TreatmentPatterns")), - r_version = base::version$version.string, - platform = base::version$platform - ) - return(andromeda) - }, - - #' @description - #' Destroys instance - #' - #' @return (NULL) - disconnect = function() { - if (!is.null(private$.connection)) { - DatabaseConnector::disconnect(private$.connection) - } - private$.cdm <- NULL - }, - - checkCohortTable = function() { - switch( - private$.type, - CDMConnector = private$cdmconCheckCohortTable(andromeda), - DatabaseConnector = private$dbconCheckCohortTable(andromeda) - ) - } - ), - private = list( - ## Private ---- - ### Fields ---- - .connectionDetails = NULL, - .connection = NULL, - .cdmSchema = NULL, - .resultSchema = NULL, - .tempEmulationSchema = NULL, - .cdm = NULL, - .type = "", - - ### Methods ---- - finalize = function() { - self$disconnect() - }, - - dbAppendAttrition = function(n, andromeda, cohortIds) { - appendAttrition( - toAdd = data.frame( - number_records = sum(n), - number_subjects = length(n), - reason_id = 1, - reason = sprintf("Qualifying records for cohort definitions: %s", paste(cohortIds, collapse = ", ")), - time_stamp = as.numeric(Sys.time()) - ), - andromeda = andromeda - ) - }, - - #### DatabaseConnector ---- - # cohortIds (`integer(n)`) - # cohortTableName (`character(1)`) - dbconFetchCohortTable = function(cohorts, cohortTableName, andromeda, andromedaTableName, minEraDuration) { - targetCohortId <- getCohortIds(cohorts, "target") - - n <- lapply(cohortTableName, function(cohortTable) { - DatabaseConnector::renderTranslateQuerySql( - connection = private$.connection, - sql = " - SELECT COUNT(*) - FROM @resultSchema.@cohortTable - WHERE cohort_definition_id IN (@cohortIds) - GROUP BY subject_id;", - resultSchema = private$.resultSchema, - cohortTable = cohortTable, - cohortIds = cohorts$cohortId - ) |> - unlist() |> - as.numeric() - }) |> - unlist() - - private$dbAppendAttrition(n, andromeda, sort(cohorts$cohortId)) - - # Select relevant data - sql <- lapply(cohortTableName, function(tableName) { - SqlRender::loadRenderTranslateSql( - sqlFilename = "selectData.sql", - packageName = "TreatmentPatterns", - dbms = private$.connection@dbms, - tempEmulationSchema = private$.tempEmulationSchema, - resultSchema = private$.resultSchema, - cdmSchema = private$.cdmSchema, - cohortTable = tableName, - cohortIds = cohorts$cohortId, - minEraDuration = minEraDuration - ) - }) - - renderedSql <- paste(sql, collapse = "\nUNION ALL\n") - - DatabaseConnector::renderTranslateExecuteSql( - connection = private$.connection, - oracleTempSchema = private$.tempEmulationSchema, - sql = sprintf( - "DROP TABLE IF EXISTS #tp_dbc_cohort_table; - - SELECT * - INTO #tp_dbc_cohort_table - FROM ( - %s - ) a;", - renderedSql - ), - tempEmulationSchema = private$.tempEmulationSchema - ) - - DatabaseConnector::renderTranslateQuerySqlToAndromeda( - connection = private$.connection, - andromeda = andromeda, - andromedaTableName = andromedaTableName, - tempEmulationSchema = private$.tempEmulationSchema, - sql = " - SELECT - #tp_dbc_cohort_table.cohort_definition_id AS cohort_definition_id, - #tp_dbc_cohort_table.subject_id AS subject_id, - #tp_dbc_cohort_table.cohort_start_date, - #tp_dbc_cohort_table.cohort_end_date, - #tp_dbc_cohort_table.age, - #tp_dbc_cohort_table.sex, - #tp_dbc_cohort_table.subject_id_origin - FROM #tp_dbc_cohort_table - INNER JOIN ( - SELECT #tp_dbc_cohort_table.subject_id - FROM #tp_dbc_cohort_table - WHERE #tp_dbc_cohort_table.cohort_definition_id IN (@targetCohortId) - ) AS cross_sec - ON cross_sec.subject_id = #tp_dbc_cohort_table.subject_id", - targetCohortId = targetCohortId - ) - - names(andromeda[[andromedaTableName]]) <- tolower(names(andromeda[[andromedaTableName]])) - - if (utils::packageVersion("Andromeda") >= package_version("1.0.0")) { - andromeda[[andromedaTableName]] <- andromeda[[andromedaTableName]] %>% - dplyr::mutate( - cohort_start_date = dplyr::sql("datediff('day', DATE '1970-01-01', cohort_start_date)"), - cohort_end_date = dplyr::sql("datediff('day', DATE '1970-01-01', cohort_end_date)") - ) - } else { - andromeda[[andromedaTableName]] <- andromeda[[andromedaTableName]] %>% - dplyr::mutate( - cohort_start_date = as.integer(.data$cohort_start_date), - cohort_end_date = as.integer(.data$cohort_end_date) - ) - } - - n <- andromeda[[andromedaTableName]] %>% - dplyr::group_by(.data$subject_id) %>% - dplyr::summarise(n = dplyr::n()) %>% - dplyr::pull() - - appendAttrition( - toAdd = data.frame( - number_records = sum(n), - number_subjects = length(n), - reason_id = 2, - reason = sprintf("Removing records < minEraDuration (%s)", minEraDuration), - time_stamp = as.numeric(Sys.time()) - ), - andromeda = andromeda - ) - return(andromeda) - }, - - dbconFetchCdmSource = function(andromeda) { - renderedSql <- SqlRender::render( - sql = "SELECT * FROM @cdmSchema.cdm_source;", - cdmSchema = private$.cdmSchema - ) - - translatedSql <- SqlRender::translate( - sql = renderedSql, - targetDialect = private$.connection@dbms - ) +fetchMetadata <- function(andromeda) { + andromeda$metadata <- data.frame( + execution_start = as.numeric(Sys.time()), + package_version = as.character(utils::packageVersion("TreatmentPatterns")), + r_version = base::version$version.string, + platform = base::version$platform + ) + return(andromeda) +} + +dbAppendAttrition <- function(n, andromeda, cohortIds) { + appendAttrition( + toAdd = data.frame( + number_records = sum(n), + number_subjects = length(n), + reason_id = 1, + reason = sprintf("Qualifying records for cohort definitions: %s", paste(cohortIds, collapse = ", ")), + time_stamp = as.numeric(Sys.time()) + ), + andromeda = andromeda + ) +} - DatabaseConnector::querySqlToAndromeda( - connection = private$.connection, - sql = translatedSql, - andromeda = andromeda, - andromedaTableName = "cdm_source_info" +fetchCohortTable <- function(cdm, cohorts, cohortTableName, andromeda, andromedaTableName, minEraDuration) { + targetCohortIds <- cohorts %>% + dplyr::filter(.data$type == "target") %>% + dplyr::select("cohortId") %>% + dplyr::pull() + + n <- sapply(cohortTableName, function(tableName) { + cdm[[tableName]] %>% + dplyr::group_by(.data$subject_id) %>% + dplyr::summarise(n = dplyr::n()) %>% + dplyr::pull() + }) |> unlist() + + if (length(n) == 0) { + n <- 0 + } + + dbAppendAttrition(n, andromeda, sort(cohorts$cohortId)) + + cohortIds <- cohorts$cohortId + + for (tableName in cohortTableName) { + tbl <- cdm[[tableName]] %>% + dplyr::group_by(.data$subject_id) %>% + dplyr::mutate( + subject_id_origin = .data$subject_id + ) %>% + dplyr::ungroup() %>% + dbplyr::window_order(.data$subject_id, .data$cohort_start_date) |> + dplyr::mutate(r = dplyr::row_number()) %>% + dplyr::group_by(.data$subject_id_origin) %>% + dplyr::mutate( + subject_id = min(.data$r, na.rm = TRUE) + ) %>% + dplyr::select(-"r") %>% + dplyr::ungroup() %>% + dplyr::filter(.data$cohort_definition_id %in% cohortIds) %>% + dplyr::filter(!!CDMConnector::datediff("cohort_start_date", "cohort_end_date", interval = "day") >= minEraDuration) %>% + dplyr::group_by(.data$subject_id) %>% + dplyr::ungroup() %>% + dplyr::inner_join( + cdm$person, + by = dplyr::join_by(subject_id_origin == person_id) + ) %>% + dplyr::inner_join( + cdm$concept, + by = dplyr::join_by(gender_concept_id == concept_id)) %>% + dplyr::mutate( + date_of_birth = as.Date(paste0(as.character(year_of_birth), "-01-01"))) %>% + dplyr::mutate( + age = !!CDMConnector::datediff("date_of_birth", "cohort_start_date", interval = "year")) %>% + dplyr::mutate( + subject_id_origin = as.character(subject_id_origin) + ) %>% + dplyr::rename(sex = "concept_name") %>% + dplyr::mutate( + temp_date = as.Date("1970-01-01") + ) %>% + dplyr::mutate( + cohort_start_date = !!CDMConnector::datediff(start = "temp_date", end = "cohort_start_date", interval = "day"), + cohort_end_date = !!CDMConnector::datediff(start = "temp_date", end = "cohort_end_date", interval = "day") + ) %>% + dplyr::select( + "cohort_definition_id", + "subject_id", + "subject_id_origin", + "cohort_start_date", + "cohort_end_date", + "age", + "sex" ) - names(andromeda$cdm_source_info) <- tolower(names(andromeda$cdm_source_info)) - return(andromeda) - }, - #### CDMConnector ---- - # cohortIds (`integer(n)`) - # cohortTableName (`character(1)`) - # andromeda (`Andromeda::andromeda()`) - # andromedaTableName (`character(1)`) - cdmconFetchCohortTable = function(cohorts, cohortTableName, andromeda, andromedaTableName, minEraDuration) { - targetCohortIds <- cohorts %>% - dplyr::filter(.data$type == "target") %>% - dplyr::select("cohortId") %>% - dplyr::pull() - - n <- sapply(cohortTableName, function(tableName) { - private$.cdm[[tableName]] %>% - dplyr::group_by(.data$subject_id) %>% - dplyr::summarise(n = dplyr::n()) %>% - dplyr::pull() - }) |> unlist() - - if (length(n) == 0) { - n <- 0 - } - - private$dbAppendAttrition(n, andromeda, sort(cohorts$cohortId)) - - cohortIds <- cohorts$cohortId - - for (tableName in cohortTableName) { - tbl <- private$.cdm[[tableName]] %>% - dplyr::group_by(.data$subject_id) %>% - dplyr::mutate( - subject_id_origin = .data$subject_id - ) %>% - dplyr::ungroup() %>% - dbplyr::window_order(.data$subject_id, .data$cohort_start_date) |> - mutate(r = dplyr::row_number()) %>% - dplyr::group_by(.data$subject_id_origin) %>% - dplyr::mutate( - subject_id = min(.data$r, na.rm = TRUE) - ) %>% - dplyr::select(-"r") %>% - dplyr::ungroup() %>% - dplyr::filter(.data$cohort_definition_id %in% cohortIds) %>% - dplyr::filter(!!CDMConnector::datediff("cohort_start_date", "cohort_end_date", interval = "day") >= minEraDuration) %>% - dplyr::group_by(.data$subject_id) %>% - dplyr::ungroup() %>% - dplyr::inner_join( - private$.cdm$person, - by = dplyr::join_by(subject_id_origin == person_id) - ) %>% - dplyr::inner_join( - private$.cdm$concept, - by = dplyr::join_by(gender_concept_id == concept_id)) %>% - dplyr::mutate( - date_of_birth = as.Date(paste0(as.character(year_of_birth), "-01-01"))) %>% - dplyr::mutate( - age = !!CDMConnector::datediff("date_of_birth", "cohort_start_date", interval = "year")) %>% - dplyr::mutate( - subject_id_origin = as.character(subject_id_origin) - ) %>% - dplyr::rename(sex = "concept_name") %>% - dplyr::mutate( - temp_date = as.Date("1970-01-01") - ) %>% - dplyr::mutate( - cohort_start_date = !!CDMConnector::datediff(start = "temp_date", end = "cohort_start_date", interval = "day"), - cohort_end_date = !!CDMConnector::datediff(start = "temp_date", end = "cohort_end_date", interval = "day") - ) %>% - dplyr::select( - "cohort_definition_id", - "subject_id", - "subject_id_origin", - "cohort_start_date", - "cohort_end_date", - "age", - "sex" - ) - - if (is.null(andromeda[[andromedaTableName]])) { - dplyr::copy_to(dest = andromeda, df = tbl, name = andromedaTableName, overwrite = TRUE) - } else { - dplyr::copy_to(dest = andromeda, df = tbl, name = "tbl_temp", overwrite = TRUE) - andromeda[[andromedaTableName]] <- andromeda[[andromedaTableName]] %>% - dplyr::union_all(andromeda$tbl_temp) - andromeda$tbl_temp <- NULL - } - } - - targetId <- as.numeric(targetCohortIds) - + if (is.null(andromeda[[andromedaTableName]])) { + dplyr::copy_to(dest = andromeda, df = tbl, name = andromedaTableName, overwrite = TRUE) + } else { + dplyr::copy_to(dest = andromeda, df = tbl, name = "tbl_temp", overwrite = TRUE) andromeda[[andromedaTableName]] <- andromeda[[andromedaTableName]] %>% - dplyr::mutate(cohort_definition_id = as.numeric(.data$cohort_definition_id)) %>% - dplyr::group_by(.data$subject_id) %>% - dplyr::filter(any(.data$cohort_definition_id %in% targetId, na.rm = TRUE)) %>% - dplyr::ungroup() - - n <- andromeda[[andromedaTableName]] %>% - dplyr::group_by(.data$subject_id) %>% - dplyr::summarise(n = dplyr::n()) %>% - dplyr::pull() - - appendAttrition( - toAdd = data.frame( - number_records = sum(n), - number_subjects = length(n), - reason_id = 2, - reason = sprintf("Removing records < minEraDuration (%s)", minEraDuration), - time_stamp = as.numeric(Sys.time()) - ), - andromeda = andromeda - ) - return(andromeda) - }, - - # andromeda (`Andromeda::andromeda()`) - cdmconFetchCdmSource = function(andromeda) { - cdmSource <- private$.cdm$cdm_source %>% - dplyr::collect() - andromeda$cdm_source_info <- cdmSource - return(andromeda) + dplyr::union_all(andromeda$tbl_temp) + andromeda$tbl_temp <- NULL } - ), + } + + targetId <- as.numeric(targetCohortIds) + + andromeda[[andromedaTableName]] <- andromeda[[andromedaTableName]] %>% + dplyr::mutate(cohort_definition_id = as.numeric(.data$cohort_definition_id)) %>% + dplyr::group_by(.data$subject_id) %>% + dplyr::filter(any(.data$cohort_definition_id %in% targetId, na.rm = TRUE)) %>% + dplyr::ungroup() + + n <- andromeda[[andromedaTableName]] %>% + dplyr::group_by(.data$subject_id) %>% + dplyr::summarise(n = dplyr::n()) %>% + dplyr::pull() - # Active ---- - active = list( - connectionDetails = function() return(private$.connectionDetails), - connection = function() return(private$.connection), - cdmSchema = function() return(private$.cdmSchema), - resultSchema = function() return(private$.resultSchema), - tempEmulationSchema = function() return(private$.tempEmulationSchema), - cdm = function() return(private$.cdm), - type = function() return(private$.type) + appendAttrition( + toAdd = data.frame( + number_records = sum(n), + number_subjects = length(n), + reason_id = 2, + reason = sprintf("Removing records < minEraDuration (%s)", minEraDuration), + time_stamp = as.numeric(Sys.time()) + ), + andromeda = andromeda ) -) + return(andromeda) +} + +fetchCdmSource = function(cdm, andromeda) { + cdmSource <- cdm$cdm_source %>% + dplyr::collect() + andromeda$cdm_source_info <- cdmSource + return(andromeda) +} diff --git a/R/computePathways.R b/R/computePathways.R index abd99262..78f01819 100644 --- a/R/computePathways.R +++ b/R/computePathways.R @@ -137,6 +137,20 @@ computePathways <- function( maxPathLength = 5, overlapMethod = "truncate", concatTargets = TRUE) { + + if (!is.null(connectionDetails)) { + con <- DatabaseConnector::connect(connectionDetails) + cdm <- CDMConnector::cdmFromCon( + con = con, + cdmSchema = cdmSchema, + writeSchema = resultSchema, + cohortTables = cohortTableName + ) + withr::defer({ + DatabaseConnector::disconnect(con) + }) + } + validateComputePathways() args <- eval( @@ -144,18 +158,6 @@ computePathways <- function( envir = sys.frame(sys.nframe()) ) - cdmInterface <- CDMInterface$new( - connectionDetails = connectionDetails, - cdmSchema = cdmSchema, - resultSchema = resultSchema, - tempEmulationSchema = tempEmulationSchema, - cdm = cdm - ) - - withr::defer({ - cdmInterface$disconnect() - }) - andromeda <- Andromeda::andromeda() argsToSave <- as.character(jsonlite::toJSON(args[-grep("cdm|connectionDetails", names(args))])) @@ -170,9 +172,10 @@ computePathways <- function( description = description ) - andromeda <- cdmInterface$fetchMetadata(andromeda) - andromeda <- cdmInterface$fetchCdmSource(andromeda) - andromeda <- cdmInterface$fetchCohortTable( + andromeda <- fetchMetadata(andromeda) + andromeda <- fetchCdmSource(cdm, andromeda) + andromeda <- fetchCohortTable( + cdm = cdm, cohorts = cohorts, cohortTableName = cohortTableName, andromeda = andromeda, diff --git a/inst/sql/selectData.sql b/inst/sql/selectData.sql deleted file mode 100644 index 066188c3..00000000 --- a/inst/sql/selectData.sql +++ /dev/null @@ -1,28 +0,0 @@ -SELECT - @cohortTable.cohort_definition_id, - new_id AS subject_id, - @cohortTable.cohort_start_date, - @cohortTable.cohort_end_date, - CAST(subject_id_origin AS VARCHAR) AS subject_id_origin, - YEAR(@cohortTable.cohort_start_date) - person.year_of_birth AS age, - concept.concept_name AS sex - FROM @resultSchema.@cohortTable - INNER JOIN ( - SELECT - ROW_NUMBER() OVER (ORDER BY subject_id) AS new_id, - subject_id AS subject_id_origin - FROM ( - SELECT - DISTINCT @cohortTable.subject_id - FROM - @resultSchema.@cohortTable - ) unique_subjects - ) new_subject_ids - ON @cohortTable.subject_id = subject_id_origin - INNER JOIN @cdmSchema.person - ON subject_id_origin = person.person_id - INNER JOIN @cdmSchema.concept - ON person.gender_concept_id = concept.concept_id - WHERE - @cohortTable.cohort_definition_id IN (@cohortIds) - AND DATEDIFF(d, cohort_start_date, cohort_end_date) >= @minEraDuration \ No newline at end of file From 2344045d48727e1d8c40a0a775fd4768e3a79084 Mon Sep 17 00:00:00 2001 From: mvankessel-EMC Date: Thu, 8 Jan 2026 09:38:13 +0100 Subject: [PATCH 12/33] disabled CDMInterface tests --- tests/testthat/test-CDMInterfaceCDMC.R | 226 ++++++++++++------------- 1 file changed, 113 insertions(+), 113 deletions(-) diff --git a/tests/testthat/test-CDMInterfaceCDMC.R b/tests/testthat/test-CDMInterfaceCDMC.R index adf34c14..522aece6 100644 --- a/tests/testthat/test-CDMInterfaceCDMC.R +++ b/tests/testthat/test-CDMInterfaceCDMC.R @@ -1,113 +1,113 @@ -library(testthat) -library(TreatmentPatterns) -library(dplyr) - -if (ableToRun()$CDMC) { - andromeda <- Andromeda::andromeda() - - con <- DBI::dbConnect(duckdb::duckdb(), dbdir = eunomiaDir()) - - withr::defer({ - Andromeda::close(andromeda) - }) - - cohorts <- data.frame( - cohortId = c(1, 2, 3), - cohortName = c("Disease X", "Drug A", "Drug B"), - type = c("target", "event", "event") - ) - - cohort_table <- tibble( - cohort_definition_id = c(3, 2, 1), - subject_id = c(1, 1, 1), - cohort_start_date = as.Date(c("2014-10-10", "2014-11-07", "2014-10-10")), - cohort_end_date = as.Date(c("2015-08-01", "2014-12-04", "2015-08-01")) - ) - - dplyr::copy_to(con, cohort_table, overwrite = TRUE) - - localCdm <- cdmFromCon( - con = con, - cdmSchema = "main", - writeSchema = "main", - cohortTables = "cohort_table" - ) - - cdmInterface <- TreatmentPatterns:::CDMInterface$new(cdm = localCdm) -} - -test_that("Method: new", { - skip_on_cran() - skip_if_not(ableToRun()$CDMC) - expect_true(R6::is.R6( - TreatmentPatterns:::CDMInterface$new(cdm = localCdm) - )) -}) - -test_that("Method: new - empty", { - skip_on_cran() - expect_error( - TreatmentPatterns:::CDMInterface$new(), - "Could not assert if CDMConnector or DatabaseConnector is being used." - ) -}) - -test_that("Method: fetchMetadata", { - skip_on_cran() - skip_if_not(ableToRun()$CDMC) - andromeda <- cdmInterface$fetchMetadata(andromeda) - - metadata <- andromeda$metadata %>% collect() - - expect_in( - c("execution_start", "package_version", "r_version", "platform"), - names(metadata) - ) - - expect_identical(metadata$r_version, base::version$version.string) - expect_identical(metadata$platform, base::version$platform) - expect_identical(nrow(metadata), 1L) - expect_identical(ncol(metadata), 4L) -}) - -test_that("Method: fetchCohortTable", { - skip_on_cran() - skip_if_not(ableToRun()$CDMC) - # Update CDM with new dummy data - cdmInterface <- TreatmentPatterns:::CDMInterface$new( - cdm = localCdm - ) - - # Viral Sinusitis - cdmInterface$fetchCohortTable( - cohorts = cohorts, - cohortTableName = "cohort_table", - andromeda = andromeda, - andromedaTableName = "cohortTable", - minEraDuration = 5 - ) - - res <- andromeda$cohortTable - - expect_identical(ncol(res), 7L) - expect_identical(res %>% collect() %>% nrow(), 3L) - - # [!] Disabled - # Empty - # cdmInterface$fetchCohortTable( - # cohorts = data.frame( - # cohortId = numeric(), - # cohortName = character(), - # type = character() - # ), - # cohortTableName = "cohort_table", - # andromeda = andromeda, - # andromedaTableName = "cohortTable", - # minEraDuration = 5 - # ) - # - # res <- andromeda$cohortTable - # - # expect_identical(ncol(res), 7L) - # expect_identical(res %>% collect() %>% nrow(), 0L) -}) +# library(testthat) +# library(TreatmentPatterns) +# library(dplyr) +# +# if (ableToRun()$CDMC) { +# andromeda <- Andromeda::andromeda() +# +# con <- DBI::dbConnect(duckdb::duckdb(), dbdir = eunomiaDir()) +# +# withr::defer({ +# Andromeda::close(andromeda) +# }) +# +# cohorts <- data.frame( +# cohortId = c(1, 2, 3), +# cohortName = c("Disease X", "Drug A", "Drug B"), +# type = c("target", "event", "event") +# ) +# +# cohort_table <- tibble( +# cohort_definition_id = c(3, 2, 1), +# subject_id = c(1, 1, 1), +# cohort_start_date = as.Date(c("2014-10-10", "2014-11-07", "2014-10-10")), +# cohort_end_date = as.Date(c("2015-08-01", "2014-12-04", "2015-08-01")) +# ) +# +# dplyr::copy_to(con, cohort_table, overwrite = TRUE) +# +# localCdm <- cdmFromCon( +# con = con, +# cdmSchema = "main", +# writeSchema = "main", +# cohortTables = "cohort_table" +# ) +# +# cdmInterface <- TreatmentPatterns:::CDMInterface$new(cdm = localCdm) +# } +# +# test_that("Method: new", { +# skip_on_cran() +# skip_if_not(ableToRun()$CDMC) +# expect_true(R6::is.R6( +# TreatmentPatterns:::CDMInterface$new(cdm = localCdm) +# )) +# }) +# +# test_that("Method: new - empty", { +# skip_on_cran() +# expect_error( +# TreatmentPatterns:::CDMInterface$new(), +# "Could not assert if CDMConnector or DatabaseConnector is being used." +# ) +# }) +# +# test_that("Method: fetchMetadata", { +# skip_on_cran() +# skip_if_not(ableToRun()$CDMC) +# andromeda <- cdmInterface$fetchMetadata(andromeda) +# +# metadata <- andromeda$metadata %>% collect() +# +# expect_in( +# c("execution_start", "package_version", "r_version", "platform"), +# names(metadata) +# ) +# +# expect_identical(metadata$r_version, base::version$version.string) +# expect_identical(metadata$platform, base::version$platform) +# expect_identical(nrow(metadata), 1L) +# expect_identical(ncol(metadata), 4L) +# }) +# +# test_that("Method: fetchCohortTable", { +# skip_on_cran() +# skip_if_not(ableToRun()$CDMC) +# # Update CDM with new dummy data +# cdmInterface <- TreatmentPatterns:::CDMInterface$new( +# cdm = localCdm +# ) +# +# # Viral Sinusitis +# cdmInterface$fetchCohortTable( +# cohorts = cohorts, +# cohortTableName = "cohort_table", +# andromeda = andromeda, +# andromedaTableName = "cohortTable", +# minEraDuration = 5 +# ) +# +# res <- andromeda$cohortTable +# +# expect_identical(ncol(res), 7L) +# expect_identical(res %>% collect() %>% nrow(), 3L) +# +# # [!] Disabled +# # Empty +# # cdmInterface$fetchCohortTable( +# # cohorts = data.frame( +# # cohortId = numeric(), +# # cohortName = character(), +# # type = character() +# # ), +# # cohortTableName = "cohort_table", +# # andromeda = andromeda, +# # andromedaTableName = "cohortTable", +# # minEraDuration = 5 +# # ) +# # +# # res <- andromeda$cohortTable +# # +# # expect_identical(ncol(res), 7L) +# # expect_identical(res %>% collect() %>% nrow(), 0L) +# }) From a2b044d286c17aad87631557452022c4246a0136 Mon Sep 17 00:00:00 2001 From: mvankessel-EMC Date: Thu, 8 Jan 2026 14:08:19 +0100 Subject: [PATCH 13/33] moved CDMConnector to Imports --- DESCRIPTION | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/DESCRIPTION b/DESCRIPTION index 81d97203..02603311 100755 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -25,7 +25,8 @@ Imports: dplyr, tidyr, dbplyr, - Andromeda + Andromeda, + CDMConnector Suggests: knitr, rmarkdown, @@ -33,7 +34,6 @@ Suggests: testthat (>= 3.0.0), usethis, Eunomia, - CDMConnector, DatabaseConnector (>= 6.0.0), SqlRender, CohortGenerator, From 61eca55900512d690f6dfdd673df132ca9f9d1f2 Mon Sep 17 00:00:00 2001 From: Maarten van Kessel Date: Thu, 15 Jan 2026 10:37:58 +0100 Subject: [PATCH 14/33] cast personId to int --- R/CDMInterface.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/R/CDMInterface.R b/R/CDMInterface.R index 6a70c066..51f52949 100644 --- a/R/CDMInterface.R +++ b/R/CDMInterface.R @@ -69,7 +69,7 @@ fetchCohortTable <- function(cdm, cohorts, cohortTableName, andromeda, andromeda dplyr::mutate(r = dplyr::row_number()) %>% dplyr::group_by(.data$subject_id_origin) %>% dplyr::mutate( - subject_id = min(.data$r, na.rm = TRUE) + subject_id = as.integer(min(.data$r, na.rm = TRUE)) ) %>% dplyr::select(-"r") %>% dplyr::ungroup() %>% From acc3e58e603db2391f04faa4b11e28bd464bb03e Mon Sep 17 00:00:00 2001 From: Maarten van Kessel Date: Thu, 15 Jan 2026 10:38:24 +0100 Subject: [PATCH 15/33] fix attempt --- R/constructPathways.R | 146 +++++++++++++++++++++++++++++++++++++----- 1 file changed, 129 insertions(+), 17 deletions(-) diff --git a/R/constructPathways.R b/R/constructPathways.R index 2bde597f..b1bdd670 100755 --- a/R/constructPathways.R +++ b/R/constructPathways.R @@ -448,6 +448,17 @@ doEraCollapseNew <- function(andromeda, eraCollapseSize) { andromeda$treatmentHistory <- andromeda$treatmentHistory |> dplyr::select(-"diff", -"flag", -"row", -"end_date") + attrCounts <- fetchAttritionCounts(andromeda, "treatmentHistory") + appendAttrition( + toAdd = data.frame( + number_records = attrCounts$nRecords, + number_subjects = attrCounts$nSubjects, + reason_id = 5, + reason = sprintf("No eras needed Collapsing, eraCollapse (%s)", eraCollapseSize) + ), + andromeda = andromeda + ) + start <- FALSE } } @@ -522,6 +533,16 @@ doEraCollapse <- function(andromeda, eraCollapseSize) { return(invisible(NULL)) } +debugTest <- function(andromeda) { + rows <- andromeda$treatmentHistory %>% + dplyr::filter(is.na(.data$eventStartDate) | is.na(.data$eventEndDate)) %>% + dplyr::collect() %>% + nrow() + + if (rows > 0) { + eval(expr = expression(browser()), envir = sys.frame(which = sys.nframe() - 1)) + } +} #' Combine overlapping events into combinations #' @@ -544,7 +565,9 @@ doCombinationWindow <- function( minPostCombinationDuration, overlapMethod) { # Find which rows contain some overlap + debugTest(andromeda) selectRowsCombinationWindow(andromeda, combinationWindow, overlapMethod) + debugTest(andromeda) # While rows that need modification exist: iterations <- 1 @@ -557,6 +580,7 @@ doCombinationWindow <- function( dplyr::filter(.data$selectedRows) %>% dplyr::count() %>% dplyr::pull() != 0) { + debugTest(andromeda) # Which rows have gap previous shorter than combination window OR # min(current duration era, previous duration era) -> add column switch andromeda$treatmentHistory <- andromeda$treatmentHistory %>% @@ -568,6 +592,7 @@ doCombinationWindow <- function( .default = 0 )) + debugTest(andromeda) # For rows selected not in column switch -> # if treatmentHistory[r - 1, event_end_date] <= # treatmentHistory[r, event_end_date] -> @@ -582,6 +607,7 @@ doCombinationWindow <- function( )) %>% dplyr::ungroup() + debugTest(andromeda) # For rows selected not in column switch -> # if treatmentHistory[r - 1, event_end_date] > # treatmentHistory[r, event_end_date] -> @@ -599,6 +625,7 @@ doCombinationWindow <- function( )) %>% dplyr::ungroup() + debugTest(andromeda) sumSwitchComb <- andromeda$treatmentHistory %>% dplyr::filter( .data$switch == 1 | @@ -608,6 +635,7 @@ doCombinationWindow <- function( dplyr::summarise(dplyr::n()) %>% pull() + debugTest(andromeda) switchedPersons <- andromeda$treatmentHistory %>% dplyr::filter( .data$switch == 1 | @@ -616,25 +644,59 @@ doCombinationWindow <- function( ) %>% dplyr::pull(.data$personId) + debugTest(andromeda) totalPersons <- andromeda$treatmentHistory %>% + dplyr::filter(.data$selectedRows == TRUE) %>% dplyr::pull(.data$personId) + debugTest(andromeda) missingIds <- totalPersons[!totalPersons %in% switchedPersons] + debugTest(andromeda) sumSelectedRows <- andromeda$treatmentHistory %>% dplyr::summarise(sum = sum(.data$selectedRows, na.rm = TRUE)) %>% dplyr::pull() + debugTest(andromeda) if (sumSwitchComb != sumSelectedRows) { - stop(sprintf( - "Expected switches before combination (%s) to be equal to switches after combination (%s)\nMissing person IDs: %s", - sumSelectedRows, sumSwitchComb, missingIds - )) + thPath <- file.path(tempdir(), sprintf("%s_problematic_treatment_history.csv", iterations)) + ctPath <- file.path(tempdir(), sprintf("%s_problematic_cohort_table.csv", iterations)) + + andromeda$treatmentHistory %>% + dplyr::filter(.data$personId %in% missingIds) %>% + dplyr::collect() %>% + write.csv(thPath) + + message(sprintf("Wrote problematic persons from Treatment History to: %s", thPath)) + + andromeda$cohortTable %>% + dplyr::filter(.data$personId %in% missingIds) %>% + dplyr::collect() %>% + write.csv(ctPath) + + message(sprintf("Wrote problematic persons from Cohort Table to: %s", ctPath)) + + andromeda$treatmentHistory <- andromeda$treatmentHistory %>% + dplyr::filter(!.data$personId %in% missingIds) + + # attrCounts <- fetchAttritionCounts(andromeda, "treatmentHistory") + # appendAttrition( + # toAdd = data.frame( + # number_records = attrCounts$nRecords, + # number_subjects = attrCounts$nSubjects, + # reason_id = 999, + # reason = sprintf("Filterd out problematic subjects: %s", paste0(missingIds, collapse = ", ")) + # ), + # andromeda = andromeda + # ) + + stop(sprintf(": %s", paste0(missingIds, collapse = ", "))) } # Do transformations for each of the three newly added columns # Construct helpers + debugTest(andromeda) andromeda$treatmentHistory <- andromeda$treatmentHistory %>% dplyr::group_by(.data$personId, .data$n_target) %>% dplyr::mutate( @@ -663,34 +725,58 @@ doCombinationWindow <- function( ) %>% dplyr::ungroup() + debugTest(andromeda) if (overlapMethod == "truncate") { + debugTest(andromeda) andromeda$treatmentHistory <- andromeda$treatmentHistory %>% + dplyr::group_by(.data$personId, .data$targetCohortId) %>% + dbplyr::window_order(.data$sortOrder) %>% dplyr::mutate( eventEndDate = dplyr::case_when( - dplyr::lead(.data$switch) == 1 ~ .data$eventStartDateNext, + dplyr::lead(.data$switch) == 1 + & !is.na(.data$eventStartDateNext) ~ .data$eventStartDateNext, .default = .data$eventEndDate ) - ) + ) %>% + dplyr::ungroup() %>% + dbplyr::window_order() } + debugTest(andromeda) andromeda[[sprintf("addRowsFRFS_%s", iterations)]] <- andromeda$treatmentHistory %>% dplyr::filter(.data$combinationFRFS == 1) + debugTest(andromeda) andromeda[[sprintf("addRowsFRFS_%s", iterations)]] <- andromeda[[sprintf("addRowsFRFS_%s", iterations)]] %>% - dplyr::mutate(eventEndDate = .data$eventEndDatePrevious) + dplyr::mutate( + eventEndDate = dplyr::case_when( + !is.na(.data$eventEndDatePrevious) ~ .data$eventEndDatePrevious, + .default = .data$eventEndDate + ) + ) + debugTest(andromeda) andromeda[[sprintf("addRowsFRFS_%s", iterations)]] <- andromeda[[sprintf("addRowsFRFS_%s", iterations)]] %>% dplyr::mutate(eventCohortId = paste0(.data$eventCohortId, "+", .data$eventCohortIdPrevious)) + debugTest(andromeda) + andromeda$treatmentHistory <- andromeda$treatmentHistory %>% + dplyr::group_by(.data$personId, .data$targetCohortId) %>% + dbplyr::window_order(.data$sortOrder) %>% dplyr::mutate( eventEndDate = dplyr::case_when( - dplyr::lead(.data$combinationFRFS) == 1 ~ eventStartDateNext, + dplyr::lead(.data$combinationFRFS) == 1 + & !is.na(.data$eventStartDateNext) + ~ eventStartDateNext, .default = .data$eventEndDate ), checkDuration = dplyr::case_when(dplyr::lead(.data$combinationFRFS) == 1 ~ 1) - ) + ) %>% + dplyr::ungroup() %>% + dbplyr::window_order() + debugTest(andromeda) andromeda$treatmentHistory <- andromeda$treatmentHistory %>% dplyr::mutate( eventStartDate = dplyr::case_when( @@ -703,28 +789,41 @@ doCombinationWindow <- function( ) ) + debugTest(andromeda) andromeda$treatmentHistory <- andromeda$treatmentHistory %>% - dplyr::mutate(eventCohortId = dplyr::case_when( - .data$combinationLRFS == 1 ~ paste0(.data$eventCohortId, "+", .data$eventCohortIdPrevious), - .default = .data$eventCohortId - )) + dplyr::mutate( + eventCohortId = dplyr::case_when( + .data$combinationLRFS == 1 + & !is.na(.data$eventCohortIdPrevious) ~ paste0(.data$eventCohortId, "+", .data$eventCohortIdPrevious), + .default = .data$eventCohortId + ) + ) + debugTest(andromeda) andromeda[[sprintf("addRowsLRFS_%s", iterations)]] <- andromeda$treatmentHistory %>% - dbplyr::window_order(.data$personId, .data$sortOrder) %>% + dplyr::group_by(.data$personId, .data$targetCohortId) %>% + dbplyr::window_order(.data$sortOrder) %>% dplyr::filter(dplyr::lead(.data$combinationLRFS) == 1) + debugTest(andromeda) andromeda[[sprintf("addRowsLRFS_%s", iterations)]] <- andromeda[[sprintf("addRowsLRFS_%s", iterations)]] %>% dplyr::mutate( - eventStartDate = .data$eventEndDateNext, + eventStartDate = dplyr::case_when( + is.na(.data$eventEndDateNext) ~ .data$eventStartDate, + .default = .data$eventEndDateNext + ), checkDuration = 1 ) + debugTest(andromeda) andromeda$treatmentHistory <- andromeda$treatmentHistory %>% - dplyr::group_by(.data$personId, .data$n_target) %>% + dplyr::group_by(.data$personId, .data$targetCohortId) %>% dbplyr::window_order(.data$sortOrder) %>% dplyr::mutate( eventEndDate = dplyr::case_when( - dplyr::lead(.data$combinationLRFS) == 1 ~ .data$eventStartDateNext, + dplyr::lead(.data$combinationLRFS) == 1 + & !is.na(.data$eventStartDateNext) + ~ .data$eventStartDateNext, .default = .data$eventEndDate ), checkDuration = dplyr::case_when( @@ -733,17 +832,21 @@ doCombinationWindow <- function( ) ) + debugTest(andromeda) + # [!] HERE andromeda$treatmentHistory <- andromeda$treatmentHistory %>% dplyr::union_all(andromeda[[sprintf("addRowsFRFS_%s", iterations)]]) %>% dplyr::union_all(andromeda[[sprintf("addRowsLRFS_%s", iterations)]]) %>% dplyr::mutate(durationEra = .data$eventEndDate - .data$eventStartDate) + debugTest(andromeda) andromeda$treatmentHistory <- andromeda$treatmentHistory %>% # dplyr::filter(.data$eventStartDate != .data$eventEndDate) # Original from mi-erasmus and older versions of DARWIN TreatmentPatterns # dbplyr::window_order(.data$sortOrder) %>% dplyr::filter(.data$durationEra >= minPostCombinationDuration | is.na(.data$durationEra)) + debugTest(andromeda) attrCounts <- fetchAttritionCounts(andromeda, "treatmentHistory") appendAttrition( toAdd = data.frame( @@ -755,17 +858,20 @@ doCombinationWindow <- function( andromeda = andromeda ) + debugTest(andromeda) andromeda$treatmentHistory <- andromeda$treatmentHistory %>% dplyr::select( "personId", "indexYear", "eventCohortId", "targetCohortId", "eventStartDate", "age", "sex", "eventEndDate", "durationEra", "gapPrevious", "n_target" ) + debugTest(andromeda) selectRowsCombinationWindow(andromeda, combinationWindow, overlapMethod) iterations <- iterations + 1 } + debugTest(andromeda) attrCounts <- fetchAttritionCounts(andromeda, "treatmentHistory") appendAttrition( toAdd = data.frame( @@ -777,6 +883,7 @@ doCombinationWindow <- function( andromeda = andromeda ) + debugTest(andromeda) andromeda$treatmentHistory <- andromeda$treatmentHistory %>% select(-"gapPrevious", -"selectedRows") return(invisible(NULL)) @@ -793,6 +900,7 @@ doCombinationWindow <- function( #' #' @return (`invisible(NULL)`) selectRowsCombinationWindow <- function(andromeda, combinationWindow, overlapMethod) { + debugTest(andromeda) # Order treatmentHistory by person_id, event_start_date, event_end_date # andromeda$treatmentHistory <- andromeda$treatmentHistory %>% # arrange(.data$personId, .data$eventStartDate, .data$eventEndDate) @@ -819,6 +927,7 @@ selectRowsCombinationWindow <- function(andromeda, combinationWindow, overlapMet # dplyr::mutate(allRows = ifelse(.data$gapPrevious < 0, dplyr::row_number(), NA)) # Select one row per iteration for each person + debugTest(andromeda) rows <- andromeda$treatmentHistory %>% dplyr::filter(!is.na(.data$allRows)) %>% dplyr::group_by(.data$personId, .data$n_target) %>% @@ -828,6 +937,7 @@ selectRowsCombinationWindow <- function(andromeda, combinationWindow, overlapMet dplyr::pull() if (overlapMethod == "truncate") { + debugTest(andromeda) treatmentHistory <- andromeda$treatmentHistory %>% dplyr::mutate( selectedRows = dplyr::case_when( @@ -836,6 +946,7 @@ selectRowsCombinationWindow <- function(andromeda, combinationWindow, overlapMet ) ) } else if (overlapMethod == "keep") { + debugTest(andromeda) treatmentHistory <- andromeda$treatmentHistory %>% dplyr::mutate( selectedRows = dplyr::case_when( @@ -846,6 +957,7 @@ selectRowsCombinationWindow <- function(andromeda, combinationWindow, overlapMet } # treatmentHistory[, ALL_ROWS := NULL] + debugTest(andromeda) andromeda$treatmentHistory <- treatmentHistory %>% dplyr::select(-"allRows") %>% dplyr::arrange(.data$personId, .data$eventStartDate, .data$eventEndDate, .data$eventCohortId) From 243a015f9de6cc1dcfbafe127d6ef73a75c9c7bf Mon Sep 17 00:00:00 2001 From: Maarten van Kessel Date: Wed, 21 Jan 2026 14:29:16 +0100 Subject: [PATCH 16/33] stability fix --- R/CDMInterface.R | 21 ++++---- R/computePathways.R | 4 +- R/constructPathways.R | 117 ++++++++++++------------------------------ 3 files changed, 48 insertions(+), 94 deletions(-) diff --git a/R/CDMInterface.R b/R/CDMInterface.R index 51f52949..8de02f60 100644 --- a/R/CDMInterface.R +++ b/R/CDMInterface.R @@ -27,8 +27,8 @@ fetchMetadata <- function(andromeda) { dbAppendAttrition <- function(n, andromeda, cohortIds) { appendAttrition( toAdd = data.frame( - number_records = sum(n), - number_subjects = length(n), + number_records = as.integer(sum(n)), + number_subjects = as.integer(length(n)), reason_id = 1, reason = sprintf("Qualifying records for cohort definitions: %s", paste(cohortIds, collapse = ", ")), time_stamp = as.numeric(Sys.time()) @@ -42,11 +42,11 @@ fetchCohortTable <- function(cdm, cohorts, cohortTableName, andromeda, andromeda dplyr::filter(.data$type == "target") %>% dplyr::select("cohortId") %>% dplyr::pull() - - n <- sapply(cohortTableName, function(tableName) { + + n <- lapply(cohortTableName, function(tableName) { cdm[[tableName]] %>% dplyr::group_by(.data$subject_id) %>% - dplyr::summarise(n = dplyr::n()) %>% + dplyr::summarise(n = as.integer(dplyr::n())) %>% dplyr::pull() }) |> unlist() @@ -59,7 +59,7 @@ fetchCohortTable <- function(cdm, cohorts, cohortTableName, andromeda, andromeda cohortIds <- cohorts$cohortId for (tableName in cohortTableName) { - tbl <- cdm[[tableName]] %>% + cdm$tbl <- cdm[[tableName]] %>% dplyr::group_by(.data$subject_id) %>% dplyr::mutate( subject_id_origin = .data$subject_id @@ -107,12 +107,13 @@ fetchCohortTable <- function(cdm, cohorts, cohortTableName, andromeda, andromeda "cohort_end_date", "age", "sex" - ) - + ) %>% + dplyr::compute() + if (is.null(andromeda[[andromedaTableName]])) { - dplyr::copy_to(dest = andromeda, df = tbl, name = andromedaTableName, overwrite = TRUE) + dplyr::copy_to(dest = andromeda, df = cdm$tbl, name = andromedaTableName, overwrite = TRUE) } else { - dplyr::copy_to(dest = andromeda, df = tbl, name = "tbl_temp", overwrite = TRUE) + dplyr::copy_to(dest = andromeda, df = cdm$tbl, name = "tbl_temp", overwrite = TRUE) andromeda[[andromedaTableName]] <- andromeda[[andromedaTableName]] %>% dplyr::union_all(andromeda$tbl_temp) andromeda$tbl_temp <- NULL diff --git a/R/computePathways.R b/R/computePathways.R index 78f01819..5906dd7c 100644 --- a/R/computePathways.R +++ b/R/computePathways.R @@ -144,8 +144,10 @@ computePathways <- function( con = con, cdmSchema = cdmSchema, writeSchema = resultSchema, - cohortTables = cohortTableName ) + + cdm[[cohortTableName]] <- dplyr::tbl(src = con, cohortTableName) + withr::defer({ DatabaseConnector::disconnect(con) }) diff --git a/R/constructPathways.R b/R/constructPathways.R index b1bdd670..c4ae843d 100755 --- a/R/constructPathways.R +++ b/R/constructPathways.R @@ -496,6 +496,8 @@ doEraCollapse <- function(andromeda, eraCollapseSize) { dplyr::filter(.data$gap <= eraCollapseSize) %>% dplyr::pull(.data$row) + browser() + for (row in rows) { record <- andromeda$treatmentHistory %>% dplyr::group_by(.data$eventCohortId, .data$personId, .data$n_target) %>% @@ -533,17 +535,6 @@ doEraCollapse <- function(andromeda, eraCollapseSize) { return(invisible(NULL)) } -debugTest <- function(andromeda) { - rows <- andromeda$treatmentHistory %>% - dplyr::filter(is.na(.data$eventStartDate) | is.na(.data$eventEndDate)) %>% - dplyr::collect() %>% - nrow() - - if (rows > 0) { - eval(expr = expression(browser()), envir = sys.frame(which = sys.nframe() - 1)) - } -} - #' Combine overlapping events into combinations #' #' doCombinationWindow is an internal function that combines overlapping events @@ -565,49 +556,49 @@ doCombinationWindow <- function( minPostCombinationDuration, overlapMethod) { # Find which rows contain some overlap - debugTest(andromeda) selectRowsCombinationWindow(andromeda, combinationWindow, overlapMethod) - debugTest(andromeda) - + # While rows that need modification exist: iterations <- 1 - # n <- andromeda$treatmentHistory %>% - # summarise(sum = sum(.data$selectedRows), .groups = "drop") %>% - # dplyr::pull() - while (andromeda$treatmentHistory %>% dplyr::filter(.data$selectedRows) %>% dplyr::count() %>% dplyr::pull() != 0) { - debugTest(andromeda) + # Which rows have gap previous shorter than combination window OR # min(current duration era, previous duration era) -> add column switch andromeda$treatmentHistory <- andromeda$treatmentHistory %>% - dplyr::mutate(switch = case_when( - .data$selectedRows == 1 & - -.data$gapPrevious < combinationWindow & - !(-.data$gapPrevious == .data$durationEra | - -gapPrevious == dplyr::lag(.data$durationEra, order_by = .data$sortOrder)) ~ 1, + dplyr::group_by(.data$personId, .data$targetCohortId) %>% + dplyr::mutate( + switch = case_when( + .data$selectedRows == 1 + & -.data$gapPrevious < combinationWindow + & !( + -.data$gapPrevious == .data$durationEra | + -gapPrevious == dplyr::lag(.data$durationEra, order_by = .data$sortOrder) + ) + ~ 1, .default = 0 )) - debugTest(andromeda) # For rows selected not in column switch -> # if treatmentHistory[r - 1, event_end_date] <= # treatmentHistory[r, event_end_date] -> # add column combination first received, first stopped andromeda$treatmentHistory <- andromeda$treatmentHistory %>% dplyr::group_by(.data$personId, .data$n_target) %>% - dplyr::mutate(combinationFRFS = case_when( - .data$selectedRows == 1 & - switch == 0 & - dplyr::lag(eventEndDate, order_by = .data$sortOrder) < eventEndDate ~ 1, + dbplyr::window_order(.data$sortOrder) %>% + dplyr::mutate( + combinationFRFS = case_when( + .data$selectedRows == 1 + & .data$switch == 0 + & dplyr::lag(.data$eventEndDate) < .data$eventEndDate ~ 1, .default = 0 )) %>% - dplyr::ungroup() + dplyr::ungroup() %>% + dbplyr::window_order() - debugTest(andromeda) # For rows selected not in column switch -> # if treatmentHistory[r - 1, event_end_date] > # treatmentHistory[r, event_end_date] -> @@ -625,7 +616,6 @@ doCombinationWindow <- function( )) %>% dplyr::ungroup() - debugTest(andromeda) sumSwitchComb <- andromeda$treatmentHistory %>% dplyr::filter( .data$switch == 1 | @@ -635,7 +625,6 @@ doCombinationWindow <- function( dplyr::summarise(dplyr::n()) %>% pull() - debugTest(andromeda) switchedPersons <- andromeda$treatmentHistory %>% dplyr::filter( .data$switch == 1 | @@ -644,20 +633,16 @@ doCombinationWindow <- function( ) %>% dplyr::pull(.data$personId) - debugTest(andromeda) totalPersons <- andromeda$treatmentHistory %>% dplyr::filter(.data$selectedRows == TRUE) %>% dplyr::pull(.data$personId) - debugTest(andromeda) missingIds <- totalPersons[!totalPersons %in% switchedPersons] - debugTest(andromeda) sumSelectedRows <- andromeda$treatmentHistory %>% dplyr::summarise(sum = sum(.data$selectedRows, na.rm = TRUE)) %>% dplyr::pull() - debugTest(andromeda) if (sumSwitchComb != sumSelectedRows) { thPath <- file.path(tempdir(), sprintf("%s_problematic_treatment_history.csv", iterations)) ctPath <- file.path(tempdir(), sprintf("%s_problematic_cohort_table.csv", iterations)) @@ -679,55 +664,40 @@ doCombinationWindow <- function( andromeda$treatmentHistory <- andromeda$treatmentHistory %>% dplyr::filter(!.data$personId %in% missingIds) - # attrCounts <- fetchAttritionCounts(andromeda, "treatmentHistory") - # appendAttrition( - # toAdd = data.frame( - # number_records = attrCounts$nRecords, - # number_subjects = attrCounts$nSubjects, - # reason_id = 999, - # reason = sprintf("Filterd out problematic subjects: %s", paste0(missingIds, collapse = ", ")) - # ), - # andromeda = andromeda - # ) - stop(sprintf(": %s", paste0(missingIds, collapse = ", "))) } # Do transformations for each of the three newly added columns # Construct helpers - - debugTest(andromeda) andromeda$treatmentHistory <- andromeda$treatmentHistory %>% dplyr::group_by(.data$personId, .data$n_target) %>% dplyr::mutate( eventStartDateNext = dplyr::lead( .data$eventStartDate, - order_by = .data$eventStartDate + order_by = .data$sortOrder ) ) %>% dplyr::mutate( eventEndDatePrevious = dplyr::lag( .data$eventEndDate, - order_by = .data$eventStartDate + order_by = .data$sortOrder ) ) %>% dplyr::mutate( eventEndDateNext = dplyr::lead( .data$eventEndDate, - order_by = .data$eventStartDate + order_by = .data$sortOrder ) ) %>% dplyr::mutate( eventCohortIdPrevious = dplyr::lag( .data$eventCohortId, - order_by = .data$eventStartDate + order_by = .data$sortOrder ) ) %>% dplyr::ungroup() - debugTest(andromeda) if (overlapMethod == "truncate") { - debugTest(andromeda) andromeda$treatmentHistory <- andromeda$treatmentHistory %>% dplyr::group_by(.data$personId, .data$targetCohortId) %>% dbplyr::window_order(.data$sortOrder) %>% @@ -742,11 +712,9 @@ doCombinationWindow <- function( dbplyr::window_order() } - debugTest(andromeda) andromeda[[sprintf("addRowsFRFS_%s", iterations)]] <- andromeda$treatmentHistory %>% dplyr::filter(.data$combinationFRFS == 1) - - debugTest(andromeda) + andromeda[[sprintf("addRowsFRFS_%s", iterations)]] <- andromeda[[sprintf("addRowsFRFS_%s", iterations)]] %>% dplyr::mutate( eventEndDate = dplyr::case_when( @@ -755,12 +723,9 @@ doCombinationWindow <- function( ) ) - debugTest(andromeda) andromeda[[sprintf("addRowsFRFS_%s", iterations)]] <- andromeda[[sprintf("addRowsFRFS_%s", iterations)]] %>% dplyr::mutate(eventCohortId = paste0(.data$eventCohortId, "+", .data$eventCohortIdPrevious)) - debugTest(andromeda) - andromeda$treatmentHistory <- andromeda$treatmentHistory %>% dplyr::group_by(.data$personId, .data$targetCohortId) %>% dbplyr::window_order(.data$sortOrder) %>% @@ -776,7 +741,6 @@ doCombinationWindow <- function( dplyr::ungroup() %>% dbplyr::window_order() - debugTest(andromeda) andromeda$treatmentHistory <- andromeda$treatmentHistory %>% dplyr::mutate( eventStartDate = dplyr::case_when( @@ -789,7 +753,6 @@ doCombinationWindow <- function( ) ) - debugTest(andromeda) andromeda$treatmentHistory <- andromeda$treatmentHistory %>% dplyr::mutate( eventCohortId = dplyr::case_when( @@ -799,13 +762,11 @@ doCombinationWindow <- function( ) ) - debugTest(andromeda) andromeda[[sprintf("addRowsLRFS_%s", iterations)]] <- andromeda$treatmentHistory %>% dplyr::group_by(.data$personId, .data$targetCohortId) %>% dbplyr::window_order(.data$sortOrder) %>% dplyr::filter(dplyr::lead(.data$combinationLRFS) == 1) - debugTest(andromeda) andromeda[[sprintf("addRowsLRFS_%s", iterations)]] <- andromeda[[sprintf("addRowsLRFS_%s", iterations)]] %>% dplyr::mutate( eventStartDate = dplyr::case_when( @@ -815,7 +776,6 @@ doCombinationWindow <- function( checkDuration = 1 ) - debugTest(andromeda) andromeda$treatmentHistory <- andromeda$treatmentHistory %>% dplyr::group_by(.data$personId, .data$targetCohortId) %>% dbplyr::window_order(.data$sortOrder) %>% @@ -832,21 +792,17 @@ doCombinationWindow <- function( ) ) - debugTest(andromeda) - # [!] HERE andromeda$treatmentHistory <- andromeda$treatmentHistory %>% dplyr::union_all(andromeda[[sprintf("addRowsFRFS_%s", iterations)]]) %>% dplyr::union_all(andromeda[[sprintf("addRowsLRFS_%s", iterations)]]) %>% dplyr::mutate(durationEra = .data$eventEndDate - .data$eventStartDate) - debugTest(andromeda) andromeda$treatmentHistory <- andromeda$treatmentHistory %>% # dplyr::filter(.data$eventStartDate != .data$eventEndDate) # Original from mi-erasmus and older versions of DARWIN TreatmentPatterns # dbplyr::window_order(.data$sortOrder) %>% dplyr::filter(.data$durationEra >= minPostCombinationDuration | is.na(.data$durationEra)) - debugTest(andromeda) attrCounts <- fetchAttritionCounts(andromeda, "treatmentHistory") appendAttrition( toAdd = data.frame( @@ -858,20 +814,18 @@ doCombinationWindow <- function( andromeda = andromeda ) - debugTest(andromeda) andromeda$treatmentHistory <- andromeda$treatmentHistory %>% dplyr::select( "personId", "indexYear", "eventCohortId", "targetCohortId", "eventStartDate", "age", "sex", "eventEndDate", "durationEra", "gapPrevious", "n_target" ) - debugTest(andromeda) selectRowsCombinationWindow(andromeda, combinationWindow, overlapMethod) iterations <- iterations + 1 } - debugTest(andromeda) + attrCounts <- fetchAttritionCounts(andromeda, "treatmentHistory") appendAttrition( toAdd = data.frame( @@ -882,8 +836,7 @@ doCombinationWindow <- function( ), andromeda = andromeda ) - - debugTest(andromeda) + andromeda$treatmentHistory <- andromeda$treatmentHistory %>% select(-"gapPrevious", -"selectedRows") return(invisible(NULL)) @@ -900,19 +853,19 @@ doCombinationWindow <- function( #' #' @return (`invisible(NULL)`) selectRowsCombinationWindow <- function(andromeda, combinationWindow, overlapMethod) { - debugTest(andromeda) # Order treatmentHistory by person_id, event_start_date, event_end_date # andromeda$treatmentHistory <- andromeda$treatmentHistory %>% # arrange(.data$personId, .data$eventStartDate, .data$eventEndDate) - andromeda$treatmentHistory <- andromeda$treatmentHistory %>% + dbplyr::window_order(.data$eventStartDate, .data$eventEndDate, .data$eventCohortId) %>% dplyr::mutate(sortOrder = as.numeric(.data$eventStartDate) + as.numeric(.data$eventEndDate) * row_number() / n() * 10^-6) %>% + dbplyr::window_order() %>% dplyr::group_by(.data$personId, .data$n_target) %>% dbplyr::window_order(.data$sortOrder) %>% # Use -365 * 1000000 instead of -Inf, because -Inf is not castable for export dplyr::mutate(gapPrevious = .data$eventStartDate - dplyr::lag(.data$eventEndDate, order_by = .data$sortOrder, default = -365 * 1000000)) %>% dplyr::ungroup() %>% - dplyr::mutate(allRows = ifelse(.data$gapPrevious < 0, dplyr::row_number(), NA)) %>% + dplyr::mutate(allRows = dplyr::if_else(.data$gapPrevious < 0, dplyr::row_number(), NA)) %>% dbplyr::window_order(.data$sortOrder) %>% dplyr::mutate(gapPrevious = case_when( is.na(.data$gapPrevious) & eventEndDate == lead(eventEndDate) ~ lead(gapPrevious), @@ -927,17 +880,16 @@ selectRowsCombinationWindow <- function(andromeda, combinationWindow, overlapMet # dplyr::mutate(allRows = ifelse(.data$gapPrevious < 0, dplyr::row_number(), NA)) # Select one row per iteration for each person - debugTest(andromeda) rows <- andromeda$treatmentHistory %>% dplyr::filter(!is.na(.data$allRows)) %>% dplyr::group_by(.data$personId, .data$n_target) %>% + dbplyr::window_order(.data$sortOrder) %>% dplyr::filter(dplyr::row_number() == 1) %>% dplyr::ungroup() %>% dplyr::select("allRows") %>% dplyr::pull() if (overlapMethod == "truncate") { - debugTest(andromeda) treatmentHistory <- andromeda$treatmentHistory %>% dplyr::mutate( selectedRows = dplyr::case_when( @@ -946,7 +898,6 @@ selectRowsCombinationWindow <- function(andromeda, combinationWindow, overlapMet ) ) } else if (overlapMethod == "keep") { - debugTest(andromeda) treatmentHistory <- andromeda$treatmentHistory %>% dplyr::mutate( selectedRows = dplyr::case_when( @@ -957,7 +908,7 @@ selectRowsCombinationWindow <- function(andromeda, combinationWindow, overlapMet } # treatmentHistory[, ALL_ROWS := NULL] - debugTest(andromeda) + andromeda$treatmentHistory <- treatmentHistory %>% dplyr::select(-"allRows") %>% dplyr::arrange(.data$personId, .data$eventStartDate, .data$eventEndDate, .data$eventCohortId) From f711a7b850d073a39bd8abd4cd2a787b721b3c4f Mon Sep 17 00:00:00 2001 From: mvankessel-EMC Date: Wed, 21 Jan 2026 14:50:34 +0100 Subject: [PATCH 17/33] removed browser --- R/constructPathways.R | 2 -- 1 file changed, 2 deletions(-) diff --git a/R/constructPathways.R b/R/constructPathways.R index c4ae843d..2bd6b12b 100755 --- a/R/constructPathways.R +++ b/R/constructPathways.R @@ -496,8 +496,6 @@ doEraCollapse <- function(andromeda, eraCollapseSize) { dplyr::filter(.data$gap <= eraCollapseSize) %>% dplyr::pull(.data$row) - browser() - for (row in rows) { record <- andromeda$treatmentHistory %>% dplyr::group_by(.data$eventCohortId, .data$personId, .data$n_target) %>% From 0eb69e61f33e7e3e163cb03bbab5556c62d99feb Mon Sep 17 00:00:00 2001 From: mvankessel-EMC Date: Wed, 21 Jan 2026 14:58:38 +0100 Subject: [PATCH 18/33] added skip if CirceR is not installed --- tests/testthat/test-exportPatientLevel.R | 1 + 1 file changed, 1 insertion(+) diff --git a/tests/testthat/test-exportPatientLevel.R b/tests/testthat/test-exportPatientLevel.R index 8f4b6264..8a5f98cf 100644 --- a/tests/testthat/test-exportPatientLevel.R +++ b/tests/testthat/test-exportPatientLevel.R @@ -1,6 +1,7 @@ test_that("exportPatientLevel", { skip_on_cran() skip_if_not_installed("CDMConnector") + skip_if_not_installed("CirceR") con <- DBI::dbConnect(duckdb::duckdb(), dbdir = CDMConnector::eunomiaDir()) cdm <- CDMConnector::cdmFromCon(con, cdmSchema = "main", writeSchema = "main") From b46241f2b568fa2d9319f877a799646bece2e4b1 Mon Sep 17 00:00:00 2001 From: mvankessel-EMC Date: Wed, 21 Jan 2026 15:29:53 +0100 Subject: [PATCH 19/33] updated test for extra attrition row --- tests/testthat/test-exportPatientLevel.R | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/testthat/test-exportPatientLevel.R b/tests/testthat/test-exportPatientLevel.R index 8a5f98cf..6875785f 100644 --- a/tests/testthat/test-exportPatientLevel.R +++ b/tests/testthat/test-exportPatientLevel.R @@ -40,13 +40,13 @@ test_that("exportPatientLevel", { cdm_source_info <- read.csv(file.path(tempdir(), "cdm_source_info.csv")) expect_equal(ncol(treatment_history), 13) - expect_equal(nrow(treatment_history), 554) + expect_equal(nrow(treatment_history), 553) expect_equal(ncol(metadata), 5) expect_equal(nrow(metadata), 1) expect_equal(ncol(attrition), 5) - expect_equal(nrow(attrition), 10) + expect_equal(nrow(attrition), 11) expect_equal(ncol(cdm_source_info), 10) expect_equal(nrow(cdm_source_info), 1) From a27dadf90aab4d0e536d8946f732b0fc5f9722c4 Mon Sep 17 00:00:00 2001 From: mvankessel-EMC Date: Thu, 22 Jan 2026 08:52:38 +0100 Subject: [PATCH 20/33] Fix for multiple target cohorts and multiple cohort tables --- R/CDMInterface.R | 27 +++++++++++++-------------- R/computePathways.R | 4 +--- 2 files changed, 14 insertions(+), 17 deletions(-) diff --git a/R/CDMInterface.R b/R/CDMInterface.R index 8de02f60..6dddf827 100644 --- a/R/CDMInterface.R +++ b/R/CDMInterface.R @@ -62,17 +62,8 @@ fetchCohortTable <- function(cdm, cohorts, cohortTableName, andromeda, andromeda cdm$tbl <- cdm[[tableName]] %>% dplyr::group_by(.data$subject_id) %>% dplyr::mutate( - subject_id_origin = .data$subject_id + subject_id_origin = as.character(.data$subject_id) ) %>% - dplyr::ungroup() %>% - dbplyr::window_order(.data$subject_id, .data$cohort_start_date) |> - dplyr::mutate(r = dplyr::row_number()) %>% - dplyr::group_by(.data$subject_id_origin) %>% - dplyr::mutate( - subject_id = as.integer(min(.data$r, na.rm = TRUE)) - ) %>% - dplyr::select(-"r") %>% - dplyr::ungroup() %>% dplyr::filter(.data$cohort_definition_id %in% cohortIds) %>% dplyr::filter(!!CDMConnector::datediff("cohort_start_date", "cohort_end_date", interval = "day") >= minEraDuration) %>% dplyr::group_by(.data$subject_id) %>% @@ -89,15 +80,15 @@ fetchCohortTable <- function(cdm, cohorts, cohortTableName, andromeda, andromeda dplyr::mutate( age = !!CDMConnector::datediff("date_of_birth", "cohort_start_date", interval = "year")) %>% dplyr::mutate( - subject_id_origin = as.character(subject_id_origin) + subject_id_origin = as.character(.data$subject_id_origin) ) %>% dplyr::rename(sex = "concept_name") %>% dplyr::mutate( temp_date = as.Date("1970-01-01") ) %>% dplyr::mutate( - cohort_start_date = !!CDMConnector::datediff(start = "temp_date", end = "cohort_start_date", interval = "day"), - cohort_end_date = !!CDMConnector::datediff(start = "temp_date", end = "cohort_end_date", interval = "day") + cohort_start_date = as.integer(!!CDMConnector::datediff(start = "temp_date", end = "cohort_start_date", interval = "day")), + cohort_end_date = as.integer(!!CDMConnector::datediff(start = "temp_date", end = "cohort_end_date", interval = "day")) ) %>% dplyr::select( "cohort_definition_id", @@ -119,7 +110,15 @@ fetchCohortTable <- function(cdm, cohorts, cohortTableName, andromeda, andromeda andromeda$tbl_temp <- NULL } } - + + andromeda[[andromedaTableName]] <- andromeda[[andromedaTableName]] %>% + dplyr::mutate(r = dplyr::row_number()) %>% + dplyr::group_by(.data$subject_id_origin) %>% + dplyr::mutate( + subject_id = as.integer(min(.data$r, na.rm = TRUE)) + ) %>% + dplyr::select(-"r") + targetId <- as.numeric(targetCohortIds) andromeda[[andromedaTableName]] <- andromeda[[andromedaTableName]] %>% diff --git a/R/computePathways.R b/R/computePathways.R index 5906dd7c..da96cf4b 100644 --- a/R/computePathways.R +++ b/R/computePathways.R @@ -148,9 +148,7 @@ computePathways <- function( cdm[[cohortTableName]] <- dplyr::tbl(src = con, cohortTableName) - withr::defer({ - DatabaseConnector::disconnect(con) - }) + on.exit(DatabaseConnector::disconnect(con)) } validateComputePathways() From 1a6e5b4613c4fd340774b94c2411e9d36dd2c3ef Mon Sep 17 00:00:00 2001 From: mvankessel-EMC Date: Thu, 22 Jan 2026 09:07:16 +0100 Subject: [PATCH 21/33] added test for multi table and target --- tests/testthat/test-multipleCohortTables.R | 68 ++++++++++++++++++++++ 1 file changed, 68 insertions(+) diff --git a/tests/testthat/test-multipleCohortTables.R b/tests/testthat/test-multipleCohortTables.R index 4aef4b28..03dcf1f8 100644 --- a/tests/testthat/test-multipleCohortTables.R +++ b/tests/testthat/test-multipleCohortTables.R @@ -111,3 +111,71 @@ test_that("multiple cohort_tables", { expect_identical(result$treatment_pathways$pathway, "A") }) + +test_that("multiple cohort_tables andt Targets", { + skip_on_cran() + skip_if_not_installed("CDMConnector") + skip_if_not_installed("DatabaseConnector") + skip_if_not_installed("CirceR") + skip_if_not_installed("duckdb") + + server <- CDMConnector::eunomiaDir() + connectionDetails <- DatabaseConnector::createConnectionDetails( + dbms = "duckdb", + server = server + ) + con <- DatabaseConnector::connect(connectionDetails) + cdm <- CDMConnector::cdmFromCon(con, cdmSchema = "main", writeSchema = "main") + + cohortSet <- CDMConnector::readCohortSet( + path = system.file(package = "TreatmentPatterns", "exampleCohorts") + ) + + cdm <- CDMConnector::generateCohortSet( + cdm = cdm, + cohortSet = cohortSet, + name = "cohort_table" + ) + + cdm$target_cohort_table <- cdm$cohort_table %>% + dplyr::filter(.data$cohort_definition_id == 8) %>% + dplyr::mutate(cohort_definition_id = .data$cohort_definition_id + 1) %>% + dplyr::compute(name = "target_cohort_table", temporary = FALSE, overwrite = TRUE) + + cdm$target_cohort_table <- cdm$target_cohort_table %>% + dplyr::union_all( + cdm$target_cohort_table %>% + dplyr::mutate(cohort_definition_id = 9) + ) %>% + dplyr::compute(name = "target_cohort_table", temporary = FALSE, overwrite = TRUE) + + cohorts <- cohortSet %>% + # Remove 'cohort' and 'json' columns + select(-"cohort", -"json") %>% + mutate(type = c("event", "event", "event", "event", "exit", "event", "event", "target")) %>% + rename( + cohortId = "cohort_definition_id", + cohortName = "cohort_name", + ) %>% + select("cohortId", "cohortName", "type") %>% + dplyr::add_row( + data.frame( + cohortId = 9, + cohortName = "viralsinusitis_2", + type = "target" + ) + ) + + outputEnv <- TreatmentPatterns::computePathways( + cohorts = cohorts, + cohortTableName = c("cohort_table", "target_cohort_table"), + cdm = cdm + ) + + outCounts <- outputEnv$treatmentHistoryFinal %>% + dplyr::group_by(.data$targetCohortId) %>% + dplyr::summarise(n = dplyr::n()) %>% + dplyr::pull(.data$n) + + expect_identical(outCounts[1], outCounts[2]) +}) From e3436d381fded5f4472d55901983abb586806745 Mon Sep 17 00:00:00 2001 From: mvankessel-EMC Date: Mon, 26 Jan 2026 11:59:50 +0100 Subject: [PATCH 22/33] updated actions --- extras/database_tests/CDMConnector-odbc.R | 26 +----- .../database_tests/DatabaseConnector-jdbc.R | 86 +++++++------------ 2 files changed, 34 insertions(+), 78 deletions(-) diff --git a/extras/database_tests/CDMConnector-odbc.R b/extras/database_tests/CDMConnector-odbc.R index 3af775ec..16ce0c20 100644 --- a/extras/database_tests/CDMConnector-odbc.R +++ b/extras/database_tests/CDMConnector-odbc.R @@ -41,29 +41,10 @@ test_that("Test Database", { CDMConnector::dropSourceTable(cdm, cohortTableName) }) - ## new() ---- - cdmInterface <- TreatmentPatterns:::CDMInterface$new( - connectionDetails = NULL, - cdmSchema = NULL, - resultSchema = NULL, - tempEmulationSchema = NULL, - cdm = cdm - ) - - ## disconnect() ---- - # When defered - withr::defer({ - cdmInterface$disconnect() - }) - - expect_true(R6::is.R6( - cdmInterface - )) - ## fetchMetadata() ---- andromeda <- Andromeda::andromeda() - andromeda <- cdmInterface$fetchMetadata(andromeda) + andromeda <- TreatmentPatterns:::fetchMetadata(andromeda = andromeda) metadata <- andromeda$metadata %>% collect() @@ -79,7 +60,7 @@ test_that("Test Database", { expect_identical(ncol(metadata), 4L) ## fetchCdmSource() - andromeda <- cdmInterface$fetchCdmSource(andromeda) + andromeda <- TreatmentPatterns:::fetchCdmSource(cdm = cdm, andromeda = andromeda) # Close when defered withr::defer({ Andromeda::close(andromeda) @@ -97,7 +78,8 @@ test_that("Test Database", { type = "target" ) - andromeda <- cdmInterface$fetchCohortTable( + andromeda <- TreatmentPatterns:::fetchCohortTable( + cdm = cdm, cohorts = cohorts, cohortTableName = cohortTableName, andromeda = andromeda, diff --git a/extras/database_tests/DatabaseConnector-jdbc.R b/extras/database_tests/DatabaseConnector-jdbc.R index f952efee..607d0c84 100644 --- a/extras/database_tests/DatabaseConnector-jdbc.R +++ b/extras/database_tests/DatabaseConnector-jdbc.R @@ -65,70 +65,43 @@ test_that("Test Database", { pathToDriver = jdbcDriverFolder ) - ## Prepare ---- - cohortTableName <- "temp_tp_cohort_table_2" - connection <- DatabaseConnector::connect(CONNECTION_DETAILS) - DatabaseConnector::renderTranslateExecuteSql( - connection = connection, - sql = " - DROP TABLE IF EXISTS @resultSchema.@cohortTableName; - - SELECT * - INTO @resultSchema.@cohortTableName - FROM ( - SELECT TOP 10 - 1 AS cohort_definition_id, - person.person_id AS subject_id, - observation_period.observation_period_start_date AS cohort_start_date, - observation_period.observation_period_start_date AS cohort_end_date - FROM @cdmSchema.observation_period - INNER JOIN @cdmSchema.person - ON observation_period.person_id = person.person_id - ) a;", - cdmSchema = CDM_SCHEMA, - resultSchema = RESULT_SCHEMA, - cohortTableName = cohortTableName - ) - - DatabaseConnector::disconnect(connection) - - withr::defer({ - connection <- DatabaseConnector::connect(CONNECTION_DETAILS) - DatabaseConnector::renderTranslateExecuteSql( - connection = connection, - sql = " - DROP TABLE IF EXISTS @resultSchema.@cohortTableName; - ", - resultSchema = RESULT_SCHEMA, - cohortTableName = cohortTableName - ) - DatabaseConnector::disconnect(connection) - }) - ## new() ---- - cdmInterface <- TreatmentPatterns:::CDMInterface$new( - connectionDetails = CONNECTION_DETAILS, - cdmSchema = CDM_SCHEMA, - resultSchema = RESULT_SCHEMA, - tempEmulationSchema = RESULT_SCHEMA + # Make CDM Reference with JDBC ---- + cdm <- CDMConnector::cdmFromCon( + con = connection, + cdmSchema = Sys.getenv("CDM_SCHEMA"), + writeSchema = Sys.getenv("RESULT_SCHEMA") ) - ## disconnect() ---- - # When defered + ## Prepare ---- + cohortTableName <- "temp_tp_cohort_table_2" + + cdm[[cohortTableName]] <- cdm$observation_period %>% + dplyr::mutate( + cohort_definition_id = 1, + ) %>% + dplyr::inner_join( + cdm$person, dplyr::join_by(person_id == person_id) + ) %>% + dplyr::select( + cohort_definition_id, + subject_id = "person_id", + cohort_start_date = "observation_period_start_date", + cohort_end_date = "observation_period_start_date" + ) %>% + head(10) %>% + dplyr::compute(name = cohortTableName) + withr::defer({ - cdmInterface$disconnect() + CDMConnector::dropSourceTable(cdm, cohortTableName) }) - expect_true(R6::is.R6( - cdmInterface - )) - ## fetchMetadata() ---- andromeda <- Andromeda::andromeda() - andromeda <- cdmInterface$fetchMetadata(andromeda) - + andromeda <- TreatmentPatterns:::fetchMetadata(andromeda = andromeda) + metadata <- andromeda$metadata %>% collect() @@ -143,7 +116,7 @@ test_that("Test Database", { expect_identical(ncol(metadata), 4L) ## fetchCdmSource() - andromeda <- cdmInterface$fetchCdmSource(andromeda) + andromeda <- TreatmentPatterns:::fetchCdmSource(cdm = cdm, andromeda = andromeda) # Close when defered withr::defer({ Andromeda::close(andromeda) @@ -161,7 +134,8 @@ test_that("Test Database", { type = "target" ) - andromeda <- cdmInterface$fetchCohortTable( + andromeda <- TreatmentPatterns:::fetchCohortTable( + cdm = cdm, cohorts = cohorts, cohortTableName = cohortTableName, andromeda = andromeda, From 66524050a2ff1de4db4a52fe100999b3e9643a1c Mon Sep 17 00:00:00 2001 From: mvankessel-EMC Date: Mon, 26 Jan 2026 12:16:23 +0100 Subject: [PATCH 23/33] postgres and oracle fix --- R/CDMInterface.R | 2 +- extras/database_tests/CDMConnector-odbc.R | 3 ++- extras/database_tests/DatabaseConnector-jdbc.R | 3 ++- 3 files changed, 5 insertions(+), 3 deletions(-) diff --git a/R/CDMInterface.R b/R/CDMInterface.R index 6dddf827..7902f693 100644 --- a/R/CDMInterface.R +++ b/R/CDMInterface.R @@ -62,7 +62,7 @@ fetchCohortTable <- function(cdm, cohorts, cohortTableName, andromeda, andromeda cdm$tbl <- cdm[[tableName]] %>% dplyr::group_by(.data$subject_id) %>% dplyr::mutate( - subject_id_origin = as.character(.data$subject_id) + subject_id_origin = .data$subject_id ) %>% dplyr::filter(.data$cohort_definition_id %in% cohortIds) %>% dplyr::filter(!!CDMConnector::datediff("cohort_start_date", "cohort_end_date", interval = "day") >= minEraDuration) %>% diff --git a/extras/database_tests/CDMConnector-odbc.R b/extras/database_tests/CDMConnector-odbc.R index 16ce0c20..683365d4 100644 --- a/extras/database_tests/CDMConnector-odbc.R +++ b/extras/database_tests/CDMConnector-odbc.R @@ -15,7 +15,8 @@ test_that("Test Database", { cdm <- CDMConnector::cdmFromCon( con = con, cdmSchema = Sys.getenv("CDM_SCHEMA"), - writeSchema = Sys.getenv("RESULT_SCHEMA") + writeSchema = Sys.getenv("RESULT_SCHEMA"), + cdmVersion = "5.4" ) ## Prepare ---- diff --git a/extras/database_tests/DatabaseConnector-jdbc.R b/extras/database_tests/DatabaseConnector-jdbc.R index 607d0c84..bf57ff70 100644 --- a/extras/database_tests/DatabaseConnector-jdbc.R +++ b/extras/database_tests/DatabaseConnector-jdbc.R @@ -71,7 +71,8 @@ test_that("Test Database", { cdm <- CDMConnector::cdmFromCon( con = connection, cdmSchema = Sys.getenv("CDM_SCHEMA"), - writeSchema = Sys.getenv("RESULT_SCHEMA") + writeSchema = Sys.getenv("RESULT_SCHEMA"), + cdmVersion = "5.4" ) ## Prepare ---- From c20181e99ca1547b025d1fa145ede12531dc184b Mon Sep 17 00:00:00 2001 From: mvankessel-EMC Date: Mon, 26 Jan 2026 12:56:28 +0100 Subject: [PATCH 24/33] bumped cdm version to 5.3 --- extras/database_tests/CDMConnector-odbc.R | 2 +- extras/database_tests/DatabaseConnector-jdbc.R | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/extras/database_tests/CDMConnector-odbc.R b/extras/database_tests/CDMConnector-odbc.R index 683365d4..a5d9458a 100644 --- a/extras/database_tests/CDMConnector-odbc.R +++ b/extras/database_tests/CDMConnector-odbc.R @@ -16,7 +16,7 @@ test_that("Test Database", { con = con, cdmSchema = Sys.getenv("CDM_SCHEMA"), writeSchema = Sys.getenv("RESULT_SCHEMA"), - cdmVersion = "5.4" + cdmVersion = "5.3" ) ## Prepare ---- diff --git a/extras/database_tests/DatabaseConnector-jdbc.R b/extras/database_tests/DatabaseConnector-jdbc.R index bf57ff70..ebdb19ff 100644 --- a/extras/database_tests/DatabaseConnector-jdbc.R +++ b/extras/database_tests/DatabaseConnector-jdbc.R @@ -72,7 +72,7 @@ test_that("Test Database", { con = connection, cdmSchema = Sys.getenv("CDM_SCHEMA"), writeSchema = Sys.getenv("RESULT_SCHEMA"), - cdmVersion = "5.4" + cdmVersion = "5.3" ) ## Prepare ---- From 64416eb0ad1416a7c42c3ff3168bf703925ce360 Mon Sep 17 00:00:00 2001 From: mvankessel-EMC Date: Mon, 26 Jan 2026 13:16:54 +0100 Subject: [PATCH 25/33] quated cohort_definition_id in select --- extras/database_tests/CDMConnector-odbc.R | 2 +- extras/database_tests/DatabaseConnector-jdbc.R | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/extras/database_tests/CDMConnector-odbc.R b/extras/database_tests/CDMConnector-odbc.R index a5d9458a..799d5d18 100644 --- a/extras/database_tests/CDMConnector-odbc.R +++ b/extras/database_tests/CDMConnector-odbc.R @@ -30,7 +30,7 @@ test_that("Test Database", { cdm$person, dplyr::join_by(person_id == person_id) ) %>% dplyr::select( - cohort_definition_id, + "cohort_definition_id", subject_id = "person_id", cohort_start_date = "observation_period_start_date", cohort_end_date = "observation_period_start_date" diff --git a/extras/database_tests/DatabaseConnector-jdbc.R b/extras/database_tests/DatabaseConnector-jdbc.R index ebdb19ff..f7ec53ad 100644 --- a/extras/database_tests/DatabaseConnector-jdbc.R +++ b/extras/database_tests/DatabaseConnector-jdbc.R @@ -86,7 +86,7 @@ test_that("Test Database", { cdm$person, dplyr::join_by(person_id == person_id) ) %>% dplyr::select( - cohort_definition_id, + "cohort_definition_id", subject_id = "person_id", cohort_start_date = "observation_period_start_date", cohort_end_date = "observation_period_start_date" From fe7c9932b6b3960496cbfe4860203587d89cd947 Mon Sep 17 00:00:00 2001 From: mvankessel-EMC Date: Mon, 26 Jan 2026 13:27:36 +0100 Subject: [PATCH 26/33] drop temp table in db after were done and compute the table as a permanent table --- R/CDMInterface.R | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/R/CDMInterface.R b/R/CDMInterface.R index 7902f693..4a2987a2 100644 --- a/R/CDMInterface.R +++ b/R/CDMInterface.R @@ -59,7 +59,7 @@ fetchCohortTable <- function(cdm, cohorts, cohortTableName, andromeda, andromeda cohortIds <- cohorts$cohortId for (tableName in cohortTableName) { - cdm$tbl <- cdm[[tableName]] %>% + cdm$tp_temp_tbl <- cdm[[tableName]] %>% dplyr::group_by(.data$subject_id) %>% dplyr::mutate( subject_id_origin = .data$subject_id @@ -99,18 +99,20 @@ fetchCohortTable <- function(cdm, cohorts, cohortTableName, andromeda, andromeda "age", "sex" ) %>% - dplyr::compute() + dplyr::compute(name = "tp_temp_tbl", overwrite = TRUE, temporary = FALSE) if (is.null(andromeda[[andromedaTableName]])) { - dplyr::copy_to(dest = andromeda, df = cdm$tbl, name = andromedaTableName, overwrite = TRUE) + dplyr::copy_to(dest = andromeda, df = cdm$tp_temp_tbl, name = andromedaTableName, overwrite = TRUE) } else { - dplyr::copy_to(dest = andromeda, df = cdm$tbl, name = "tbl_temp", overwrite = TRUE) + dplyr::copy_to(dest = andromeda, df = cdm$tp_temp_tbl, name = "tbl_temp", overwrite = TRUE) andromeda[[andromedaTableName]] <- andromeda[[andromedaTableName]] %>% dplyr::union_all(andromeda$tbl_temp) andromeda$tbl_temp <- NULL } } + cdm <- CDMConnector::dropSourceTable(cdm = cdm, name = "tp_temp_tbl") + andromeda[[andromedaTableName]] <- andromeda[[andromedaTableName]] %>% dplyr::mutate(r = dplyr::row_number()) %>% dplyr::group_by(.data$subject_id_origin) %>% From f018b2e30ed244883597b839022532ee22fc3542 Mon Sep 17 00:00:00 2001 From: mvankessel-EMC Date: Wed, 4 Feb 2026 15:46:46 +0100 Subject: [PATCH 27/33] added explicit qpdf install --- .github/workflows/jdbc-dbc-spark.yaml | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/.github/workflows/jdbc-dbc-spark.yaml b/.github/workflows/jdbc-dbc-spark.yaml index a6ec6574..b0e49fb0 100644 --- a/.github/workflows/jdbc-dbc-spark.yaml +++ b/.github/workflows/jdbc-dbc-spark.yaml @@ -40,6 +40,11 @@ jobs: distribution: 'corretto' java-version: '8' + - name: Update Homebrew and Install qpdf + run: | + brew update + brew install qpdf + - uses: r-lib/actions/setup-r@v2 with: r-version: ${{ matrix.config.r }} From 1e93154628403c44bce9967536d7d4df6f6ac0f7 Mon Sep 17 00:00:00 2001 From: mvankessel-EMC Date: Wed, 4 Feb 2026 15:50:23 +0100 Subject: [PATCH 28/33] added step to ensure qpdf install --- .github/workflows/jdbc-dbc-spark.yaml | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/.github/workflows/jdbc-dbc-spark.yaml b/.github/workflows/jdbc-dbc-spark.yaml index b0e49fb0..aae9f223 100644 --- a/.github/workflows/jdbc-dbc-spark.yaml +++ b/.github/workflows/jdbc-dbc-spark.yaml @@ -45,6 +45,11 @@ jobs: brew update brew install qpdf + - name: Ensure qpdf is correctly installed + run: | + brew install qpdf || true + brew postinstall qpdf + - uses: r-lib/actions/setup-r@v2 with: r-version: ${{ matrix.config.r }} From c19c7a197d07ae7cf79e2c803e4b0a9759c56e37 Mon Sep 17 00:00:00 2001 From: mvankessel-EMC Date: Wed, 4 Feb 2026 15:52:39 +0100 Subject: [PATCH 29/33] removed old step --- .github/workflows/jdbc-dbc-spark.yaml | 5 ----- 1 file changed, 5 deletions(-) diff --git a/.github/workflows/jdbc-dbc-spark.yaml b/.github/workflows/jdbc-dbc-spark.yaml index aae9f223..4c23d155 100644 --- a/.github/workflows/jdbc-dbc-spark.yaml +++ b/.github/workflows/jdbc-dbc-spark.yaml @@ -40,11 +40,6 @@ jobs: distribution: 'corretto' java-version: '8' - - name: Update Homebrew and Install qpdf - run: | - brew update - brew install qpdf - - name: Ensure qpdf is correctly installed run: | brew install qpdf || true From ddd3fda948887f914a9cae6c2d0b0f8836325e02 Mon Sep 17 00:00:00 2001 From: mvankessel-EMC Date: Wed, 4 Feb 2026 15:58:50 +0100 Subject: [PATCH 30/33] switch from macos to ubuntu --- .github/workflows/jdbc-dbc-spark.yaml | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) diff --git a/.github/workflows/jdbc-dbc-spark.yaml b/.github/workflows/jdbc-dbc-spark.yaml index 4c23d155..eaf833af 100644 --- a/.github/workflows/jdbc-dbc-spark.yaml +++ b/.github/workflows/jdbc-dbc-spark.yaml @@ -16,7 +16,7 @@ jobs: fail-fast: false matrix: config: - - {os: macos-latest, r: 'release'} + - {os: ubuntu-latest, r: 'release'} env: STATUS: 0 GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }} @@ -40,11 +40,6 @@ jobs: distribution: 'corretto' java-version: '8' - - name: Ensure qpdf is correctly installed - run: | - brew install qpdf || true - brew postinstall qpdf - - uses: r-lib/actions/setup-r@v2 with: r-version: ${{ matrix.config.r }} From db5e79246ea2bd9a3fc0d43fa5a04d8f5db8566c Mon Sep 17 00:00:00 2001 From: mvankessel-EMC Date: Mon, 9 Feb 2026 09:32:02 +0100 Subject: [PATCH 31/33] updated min pkg requirement and check if online for tests --- DESCRIPTION | 4 ++-- tests/testthat/helper-ableToRun.R | 20 +++++++++++--------- 2 files changed, 13 insertions(+), 11 deletions(-) diff --git a/DESCRIPTION b/DESCRIPTION index 02603311..2237a252 100755 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -26,7 +26,7 @@ Imports: tidyr, dbplyr, Andromeda, - CDMConnector + CDMConnector (>= 2.2.0) Suggests: knitr, rmarkdown, @@ -34,7 +34,7 @@ Suggests: testthat (>= 3.0.0), usethis, Eunomia, - DatabaseConnector (>= 6.0.0), + DatabaseConnector (>= 7.0.0), SqlRender, CohortGenerator, ResultModelManager, diff --git a/tests/testthat/helper-ableToRun.R b/tests/testthat/helper-ableToRun.R index c899feae..4630e51d 100644 --- a/tests/testthat/helper-ableToRun.R +++ b/tests/testthat/helper-ableToRun.R @@ -1,20 +1,22 @@ +isOnline <- function(site="http://example.com/") { + tryCatch({ + readLines(site,n=1) + TRUE + }, + warning = function(w) invokeRestart("muffleWarning"), + error = function(e) FALSE) +} + ableToRun <- function() { list( CDMC = all( require("CirceR", character.only = TRUE, quietly = TRUE, warn.conflicts = FALSE), require("CDMConnector", character.only = TRUE, quietly = TRUE, warn.conflicts = FALSE), require("DBI", character.only = TRUE, quietly = TRUE, warn.conflicts = FALSE), - require("duckdb", character.only = TRUE, quietly = TRUE, warn.conflicts = FALSE) + require("duckdb", character.only = TRUE, quietly = TRUE, warn.conflicts = FALSE), + isOnline() ), - # CG = all( - # require("CirceR", character.only = TRUE, quietly = TRUE, warn.conflicts = FALSE), - # require("CohortGenerator", character.only = TRUE, quietly = TRUE, warn.conflicts = FALSE), - # require("DatabaseConnector", character.only = TRUE, quietly = TRUE, warn.conflicts = FALSE), - # require("SqlRender", character.only = TRUE, quietly = TRUE, warn.conflicts = FALSE), - # require("Eunomia", character.only = TRUE, quietly = TRUE, warn.conflicts = FALSE) - # ), - plotting = all( require("ggplot2", character.only = TRUE, quietly = TRUE, warn.conflicts = FALSE), require("webshot2", character.only = TRUE, quietly = TRUE, warn.conflicts = FALSE), From 1cfd17464ec471f104935276f97cf883397f6c47 Mon Sep 17 00:00:00 2001 From: ablack3 Date: Thu, 12 Feb 2026 18:23:27 +0000 Subject: [PATCH 32/33] debug github action test on spark --- .github/workflows/odbc-cdmc-spark.yaml | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/.github/workflows/odbc-cdmc-spark.yaml b/.github/workflows/odbc-cdmc-spark.yaml index 3b21d4de..1ea6b860 100644 --- a/.github/workflows/odbc-cdmc-spark.yaml +++ b/.github/workflows/odbc-cdmc-spark.yaml @@ -91,7 +91,8 @@ jobs: httpPath = Sys.getenv("HTTP_PATH"), workspace = Sys.getenv("WORKSPACE"), uid = Sys.getenv("USER"), - pwd = Sys.getenv("PASSWORD") + pwd = Sys.getenv("PASSWORD"), + useNativeQuery = FALSE ), file = "./args.rds" ) From de541fd4d00495ed3e1f00a8424f1935e68a5d66 Mon Sep 17 00:00:00 2001 From: mvankessel-EMC Date: Fri, 13 Feb 2026 15:42:46 +0100 Subject: [PATCH 33/33] drop tbl fix for snowflake --- R/CDMInterface.R | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/R/CDMInterface.R b/R/CDMInterface.R index d2b70de8..088c62d6 100644 --- a/R/CDMInterface.R +++ b/R/CDMInterface.R @@ -111,7 +111,9 @@ fetchCohortTable <- function(cdm, cohorts, cohortTableName, andromeda, andromeda } } - cdm <- CDMConnector::dropSourceTable(cdm = cdm, name = "tp_temp_tbl") + if ("tp_temp_tbl" %in% CDMConnector::listSourceTables(cdm)) { + cdm <- CDMConnector::dropSourceTable(cdm = cdm, name = "tp_temp_tbl") + } andromeda[[andromedaTableName]] <- andromeda[[andromedaTableName]] %>% dplyr::mutate(r = dplyr::row_number()) %>%