diff --git a/R/Aggregation.R b/R/Aggregation.R index c970b8da..1a45d1f5 100644 --- a/R/Aggregation.R +++ b/R/Aggregation.R @@ -1,4 +1,4 @@ -# Copyright 2021 Observational Health Data Sciences and Informatics +# Copyright 2022 Observational Health Data Sciences and Informatics # # This file is part of FeatureExtraction # @@ -27,7 +27,7 @@ aggregateCovariates <- function(covariateData) { if (!isCovariateData(covariateData)) stop("Data not of class CovariateData") - if (!Andromeda::isValidAndromeda(covariateData)) + if (!Andromeda::isValidAndromeda(covariateData)) stop("CovariateData object is closed") if (isAggregatedCovariateData(covariateData)) stop("Data appears to already be aggregated") @@ -39,8 +39,8 @@ aggregateCovariates <- function(covariateData) { attr(result, "metaData") <- attr(covariateData, "metaData") class(result) <- "CovariateData" attr(class(result), "package") <- "FeatureExtraction" - populationSize <- attr(covariateData, "metaData")$populationSize - + populationSize <- attr(covariateData, "metaData")$populationSize + # Aggregate binary variables result$covariates <- covariateData$analysisRef %>% filter(rlang::sym("isBinary") == "Y") %>% @@ -48,8 +48,9 @@ aggregateCovariates <- function(covariateData) { inner_join(covariateData$covariates, by = "covariateId") %>% group_by(rlang::sym("covariateId")) %>% summarize(sumValue = sum(rlang::sym("covariateValue"), na.rm = TRUE), - averageValue = sum(rlang::sym("covariateValue") / populationSize, na.rm = TRUE)) - + averageValue = sum(rlang::sym("covariateValue")/populationSize, + na.rm = TRUE)) + # Aggregate continuous variables where missing means zero computeStats <- function(data) { zeroFraction <- 1 - (nrow(data)/populationSize) @@ -61,47 +62,37 @@ aggregateCovariates <- function(covariateData) { result <- tibble(covariateId = data$covariateId[1], countValue = nrow(data), minValue = quants[1], - maxValue = quants[7], - averageValue = mean(data$covariateValue) * (1 - zeroFraction), - standardDeviation = sqrt((populationSize * sum(data$covariateValue^2) - sum(data$covariateValue)^2)/(populationSize * (populationSize - 1))), - medianValue = quants[4], - p10Value = quants[2], - p25Value = quants[3], - p75Value = quants[5], - p90Value = quants[6]) + + maxValue = quants[7], averageValue = mean(data$covariateValue) * (1 - zeroFraction), standardDeviation = sqrt((populationSize * + sum(data$covariateValue^2) - sum(data$covariateValue)^2)/(populationSize * (populationSize - + 1))), medianValue = quants[4], p10Value = quants[2], p25Value = quants[3], p75Value = quants[5], + p90Value = quants[6]) } - + covariatesContinuous1 <- covariateData$analysisRef %>% filter(rlang::sym("isBinary") == "N" & rlang::sym("missingMeansZero") == "Y") %>% inner_join(covariateData$covariateRef, by = "analysisId") %>% inner_join(covariateData$covariates, by = "covariateId") %>% - Andromeda::groupApply("covariateId", computeStats) %>% + Andromeda::groupApply("covariateId", computeStats) %>% bind_rows() - + # Aggregate continuous variables where missing means missing computeStats <- function(data) { probs <- c(0, 0.1, 0.25, 0.5, 0.75, 0.9, 1) quants <- quantile(data$covariateValue, probs = probs, type = 1) - result <- tibble(covariateId = data$covariateId[1], - countValue = length(data$covariateValue), - minValue = quants[1], - maxValue = quants[7], - averageValue = mean(data$covariateValue), - standardDeviation = sd(data$covariateValue), - medianValue = quants[4], - p10Value = quants[2], - p25Value = quants[3], - p75Value = quants[5], - p90Value = quants[6]) + result <- tibble(covariateId = data$covariateId[1], countValue = length(data$covariateValue), + minValue = quants[1], maxValue = quants[7], averageValue = mean(data$covariateValue), standardDeviation = sd(data$covariateValue), + medianValue = quants[4], p10Value = quants[2], p25Value = quants[3], p75Value = quants[5], + p90Value = quants[6]) } - + covariatesContinuous2 <- covariateData$analysisRef %>% filter(rlang::sym("isBinary") == "N" & rlang::sym("missingMeansZero") == "N") %>% inner_join(covariateData$covariateRef, by = "analysisId") %>% inner_join(covariateData$covariates, by = "covariateId") %>% - Andromeda::groupApply("covariateId", computeStats) %>% + Andromeda::groupApply("covariateId", computeStats) %>% bind_rows() - + covariatesContinuous <- bind_rows(covariatesContinuous1, covariatesContinuous2) if (nrow(covariatesContinuous) > 0) { result$covariatesContinuous <- covariatesContinuous diff --git a/R/CompareCohorts.R b/R/CompareCohorts.R index 99225763..72ba7550 100644 --- a/R/CompareCohorts.R +++ b/R/CompareCohorts.R @@ -1,4 +1,4 @@ -# Copyright 2021 Observational Health Data Sciences and Informatics +# Copyright 2022 Observational Health Data Sciences and Informatics # # This file is part of FeatureExtraction # @@ -32,7 +32,10 @@ #' of mean. #' #' @export -computeStandardizedDifference <- function(covariateData1, covariateData2, cohortId1 = NULL, cohortId2 = NULL) { +computeStandardizedDifference <- function(covariateData1, + covariateData2, + cohortId1 = NULL, + cohortId2 = NULL) { if (!isCovariateData(covariateData1)) stop("covariateData1 is not of type 'covariateData'") if (!isCovariateData(covariateData1)) @@ -48,21 +51,19 @@ computeStandardizedDifference <- function(covariateData1, covariateData2, cohort covariates1 <- covariates1 %>% filter(.data$cohortDefinitionId == cohortId1) } - covariates1 <- covariates1 %>% - select(covariateId = "covariateId", - count1 = "sumValue") %>% + covariates1 <- covariates1 %>% + select(covariateId = "covariateId", count1 = "sumValue") %>% collect() - + covariates2 <- covariateData2$covariates if (!is.null(cohortId2)) { covariates2 <- covariates2 %>% filter(.data$cohortDefinitionId == cohortId2) } - covariates2 <- covariates2 %>% - select(covariateId = "covariateId", - count2 = "sumValue") %>% + covariates2 <- covariates2 %>% + select(covariateId = "covariateId", count2 = "sumValue") %>% collect() - + n1 <- attr(covariateData1, "metaData")$populationSize if (!is.null(cohortId1)) { n1 <- n1[as.character(cohortId1)] @@ -78,9 +79,11 @@ computeStandardizedDifference <- function(covariateData1, covariateData2, cohort m$mean2 <- m$count2/n2 m$sd1 <- sqrt(m$mean1 * (1 - m$mean1)) m$sd2 <- sqrt(m$mean2 * (1 - m$mean2)) - m$sd <- sqrt((m$sd1^2 + m$sd2^2) / 2) + m$sd <- sqrt((m$sd1^2 + m$sd2^2)/2) m$stdDiff <- (m$mean2 - m$mean1)/m$sd - result <- bind_rows(result, m[, c("covariateId", "mean1", "sd1", "mean2", "sd2", "sd", "stdDiff")]) + result <- bind_rows(result, + m[, + c("covariateId", "mean1", "sd1", "mean2", "sd2", "sd", "stdDiff")]) } if (!is.null(covariateData1$covariatesContinuous) && !is.null(covariateData2$covariatesContinuous)) { covariates1 <- covariateData1$covariatesContinuous @@ -89,22 +92,18 @@ computeStandardizedDifference <- function(covariateData1, covariateData2, cohort filter(.data$cohortDefinitionId == cohortId1) } covariates1 <- covariates1 %>% - select(covariateId = "covariateId", - mean1 = "averageValue", - sd1 = "standardDeviation") %>% + select(covariateId = "covariateId", mean1 = "averageValue", sd1 = "standardDeviation") %>% collect() - + covariates2 <- covariateData2$covariatesContinuous if (!is.null(cohortId2)) { covariates2 <- covariates2 %>% filter(.data$cohortDefinitionId == cohortId2) } covariates2 <- covariates2 %>% - select(covariateId = "covariateId", - mean2 = "averageValue", - sd2 = "standardDeviation") %>% + select(covariateId = "covariateId", mean2 = "averageValue", sd2 = "standardDeviation") %>% collect() - + m <- merge(covariates1, covariates2, all = T) m$mean1[is.na(m$mean1)] <- 0 m$sd1[is.na(m$sd1)] <- 0 @@ -112,16 +111,20 @@ computeStandardizedDifference <- function(covariateData1, covariateData2, cohort m$sd2[is.na(m$sd2)] <- 0 m$sd <- sqrt(m$sd1^2 + m$sd2^2) m$stdDiff <- (m$mean2 - m$mean1)/m$sd - result <- bind_rows(result, m[, c("covariateId", "mean1", "sd1", "mean2", "sd2", "sd", "stdDiff")]) + result <- bind_rows(result, + m[, + c("covariateId", "mean1", "sd1", "mean2", "sd2", "sd", "stdDiff")]) } covariateRef1 <- covariateData1$covariateRef %>% collect() covariateRef2 <- covariateData2$covariateRef %>% collect() - + result <- result %>% - left_join(select(covariateRef1, covariateId = "covariateId", covariateName1 = "covariateName"), by = "covariateId") %>% - left_join(select(covariateRef2, covariateId = "covariateId", covariateName2 = "covariateName"), by = "covariateId") %>% + left_join(select(covariateRef1, covariateId = "covariateId", covariateName1 = "covariateName"), + by = "covariateId") %>% + left_join(select(covariateRef2, covariateId = "covariateId", covariateName2 = "covariateName"), + by = "covariateId") %>% mutate(covariateName = case_when(is.na(covariateName1) ~ covariateName2, TRUE ~ covariateName1)) %>% select(-rlang::sym("covariateName1"), -rlang::sym("covariateName2")) %>% diff --git a/R/CovariateData.R b/R/CovariateData.R index aea7edd1..879d8106 100644 --- a/R/CovariateData.R +++ b/R/CovariateData.R @@ -1,4 +1,4 @@ -# Copyright 2021 Observational Health Data Sciences and Informatics +# Copyright 2022 Observational Health Data Sciences and Informatics # # This file is part of FeatureExtraction # @@ -17,21 +17,22 @@ #' Covariate Data #' #' @description -#' \code{CovariateData} is an S4 class that inherits from \code{\link[Andromeda]{Andromeda}}. It contains -#' information on covariates, which can be either captured on a per-person basis, or aggregated across -#' the cohort(s). -#' -#' By default covariates refer to a specific time period, with for example different covariate IDs for +#' \code{CovariateData} is an S4 class that inherits from \code{\link[Andromeda]{Andromeda}}. It +#' contains information on covariates, which can be either captured on a per-person basis, or +#' aggregated across the cohort(s). +#' By default covariates refer to a specific time period, with for example different covariate IDs for #' whether a diagnosis code was observed in the year before and month before index date. However, a -#' \code{CovariateData} can also be temporal, meaning that next to a covariate ID there is also a time ID, -#' which identifies the (user specified) time window the covariate was captured. -#' -#' A \code{CovariateData} object is typically created using \code{\link{getDbCovariateData}}, can only be saved using -#' \code{\link{saveCovariateData}}, and loaded using \code{\link{loadCovariateData}}. -#' -#' @seealso \code{\link{isCovariateData}}, \code{\link{isAggregatedCovariateData}}, \code{\link{isTemporalCovariateData}} +#' \code{CovariateData} can also be temporal, meaning that next to a covariate ID there is also a time +#' ID, which identifies the (user specified) time window the covariate was captured. +#' A \code{CovariateData} object is typically created using \code{\link{getDbCovariateData}}, can only +#' be saved using \code{\link{saveCovariateData}}, and loaded using \code{\link{loadCovariateData}}. +#' +#' @seealso +#' \code{\link{isCovariateData}}, \code{\link{isAggregatedCovariateData}}, +#' \code{\link{isTemporalCovariateData}} #' @name CovariateData-class -#' @aliases CovariateData +#' @aliases +#' CovariateData #' @export #' @import Andromeda #' @importClassesFrom RSQLite SQLiteConnection @@ -63,7 +64,7 @@ saveCovariateData <- function(covariateData, file) { stop("Must specify file") if (!inherits(covariateData, "CovariateData")) stop("Data not of class CovariateData") - + Andromeda::saveAndromeda(covariateData, file) } @@ -89,8 +90,8 @@ loadCovariateData <- function(file, readOnly) { if (!file.exists(file)) stop("Cannot find file ", file) if (file.info(file)$isdir) - stop(file , " is a folder, but should be a file") - if (!missing(readOnly)) + stop(file, " is a folder, but should be a file") + if (!missing(readOnly)) warning("readOnly argument has been deprecated") covariateData <- Andromeda::loadAndromeda(file) class(covariateData) <- "CovariateData" @@ -99,10 +100,11 @@ loadCovariateData <- function(file, readOnly) { } # show() -#' @param object An object of class `CovariateData`. -#' +#' @param object An object of class `CovariateData`. +#' #' @export -#' @rdname CovariateData-class +#' @rdname +#' CovariateData-class setMethod("show", "CovariateData", function(object) { cli::cat_line(pillar::style_subtle("# CovariateData object")) cli::cat_line("") @@ -122,21 +124,26 @@ setMethod("show", "CovariateData", function(object) { # summary() -#' @param object An object of class `CovariateData`. -#' +#' @param object An object of class `CovariateData`. +#' #' @export -#' @rdname CovariateData-class +#' @rdname +#' CovariateData-class setMethod("summary", "CovariateData", function(object) { covariateValueCount <- 0 if (!is.null(object$covariates)) { - covariateValueCount <- covariateValueCount + (object$covariates %>% count() %>% pull()) + covariateValueCount <- covariateValueCount + (object$covariates %>% + count() %>% + pull()) } if (!is.null(object$covariatesContinuous)) { - covariateValueCount <- covariateValueCount + (object$covariatesContinuous %>% count() %>% pull()) + covariateValueCount <- covariateValueCount + (object$covariatesContinuous %>% + count() %>% + pull()) } - result <- list(metaData = attr(object, "metaData"), - covariateCount = object$covariateRef %>% count() %>% pull(), - covariateValueCount = covariateValueCount) + result <- list(metaData = attr(object, "metaData"), covariateCount = object$covariateRef %>% + count() %>% + pull(), covariateValueCount = covariateValueCount) class(result) <- "summary.CovariateData" return(result) }) @@ -151,11 +158,11 @@ print.summary.CovariateData <- function(x, ...) { #' Check whether an object is a CovariateData object #' -#' @param x The object to check. +#' @param x The object to check. #' #' @return #' A logical value. -#' +#' #' @export isCovariateData <- function(x) { return(inherits(x, "CovariateData")) @@ -163,59 +170,51 @@ isCovariateData <- function(x) { #' Check whether covariate data is aggregated #' -#' @param x The covariate data object to check. +#' @param x The covariate data object to check. #' #' @return #' A logical value. -#' +#' #' @export isAggregatedCovariateData <- function(x) { if (!isCovariateData(x)) stop("Object not of class CovariateData") - if (!Andromeda::isValidAndromeda(x)) + if (!Andromeda::isValidAndromeda(x)) stop("CovariateData object is closed") return(!is.null(x$covariatesContinuous) || !"rowId" %in% colnames(x$covariates)) } #' Check whether covariate data is temporal #' -#' @param x The covariate data object to check. +#' @param x The covariate data object to check. #' #' @return #' A logical value. -#' +#' #' @export isTemporalCovariateData <- function(x) { if (!isCovariateData(x)) stop("Object not of class CovariateData") - if (!Andromeda::isValidAndromeda(x)) + if (!Andromeda::isValidAndromeda(x)) stop("CovariateData object is closed") return("timeId" %in% colnames(x$covariates)) } createEmptyCovariateData <- function(cohortId, aggregated, temporal) { - dummy <- tibble(covariateId = 1, - covariateValue = 1) + dummy <- tibble(covariateId = 1, covariateValue = 1) if (!aggregated) { dummy$rowId <- 1 } if (!is.null(temporal) && temporal) { dummy$timeId <- 1 } - covariateData <- Andromeda::andromeda(covariates = dummy[!1, ], - covariateRef = tibble(covariateId = 1, - covariateName = "", - analysisId = 1, - conceptId = 1)[!1, ], - analysisRef = tibble(analysisId = 1, - analysisName = "", - domainId = "", - startDay = 1, - endDay = 1, - isBinary = "", - missingMeansZero = "")[!1, ]) - attr(covariateData, "metaData") <- list(populationSize = 0, - cohortId = cohortId) + covariateData <- Andromeda::andromeda(covariates = dummy[!1, + ], + covariateRef = tibble(covariateId = 1, + covariateName = "", analysisId = 1, conceptId = 1)[!1, ], analysisRef = tibble(analysisId = 1, + analysisName = "", domainId = "", startDay = 1, endDay = 1, isBinary = "", missingMeansZero = "")[!1, + ]) + attr(covariateData, "metaData") <- list(populationSize = 0, cohortId = cohortId) class(covariateData) <- "CovariateData" return(covariateData) } diff --git a/R/DefaultCovariateSettings.R b/R/DefaultCovariateSettings.R index ee9b6dda..07fbe794 100644 --- a/R/DefaultCovariateSettings.R +++ b/R/DefaultCovariateSettings.R @@ -1,4 +1,4 @@ -# Copyright 2021 Observational Health Data Sciences and Informatics +# Copyright 2022 Observational Health Data Sciences and Informatics # # This file is part of FeatureExtraction # @@ -292,6 +292,9 @@ #' @param useHfrs The Hospital Frailty Risk Score score #' using all conditions prior to the window #' end. (analysis ID 926) +#' @param useEfi Electronic frailty score, using all +#' conditions prior to the window end. +#' (analysis ID 927) #' @param useDistinctConditionCountLongTerm The number of distinct condition #' concepts observed in the long term #' window. (analysis ID 905) @@ -378,235 +381,88 @@ #' settings <- createCovariateSettings(useDemographicsGender = TRUE, #' useDemographicsAge = FALSE, #' useDemographicsAgeGroup = TRUE, -#' useDemographicsRace = TRUE, -#' useDemographicsEthnicity = TRUE, -#' useDemographicsIndexYear = TRUE, -#' useDemographicsIndexMonth = TRUE, -#' useDemographicsPriorObservationTime = FALSE, -#' useDemographicsPostObservationTime = FALSE, -#' useDemographicsTimeInCohort = FALSE, -#' useDemographicsIndexYearMonth = FALSE, -#' useConditionOccurrenceAnyTimePrior = FALSE, -#' useConditionOccurrenceLongTerm = FALSE, -#' useConditionOccurrenceMediumTerm = FALSE, -#' useConditionOccurrenceShortTerm = FALSE, -#' useConditionOccurrencePrimaryInpatientAnyTimePrior = FALSE, -#' useConditionOccurrencePrimaryInpatientLongTerm = FALSE, -#' useConditionOccurrencePrimaryInpatientMediumTerm = FALSE, -#' useConditionOccurrencePrimaryInpatientShortTerm = FALSE, -#' useConditionEraAnyTimePrior = FALSE, -#' useConditionEraLongTerm = FALSE, -#' useConditionEraMediumTerm = FALSE, -#' useConditionEraShortTerm = FALSE, -#' useConditionEraOverlapping = FALSE, -#' useConditionEraStartLongTerm = FALSE, -#' useConditionEraStartMediumTerm = FALSE, -#' useConditionEraStartShortTerm = FALSE, -#' useConditionGroupEraAnyTimePrior = FALSE, -#' useConditionGroupEraLongTerm = TRUE, -#' useConditionGroupEraMediumTerm = FALSE, -#' useConditionGroupEraShortTerm = TRUE, -#' useConditionGroupEraOverlapping = FALSE, -#' useConditionGroupEraStartLongTerm = FALSE, -#' useConditionGroupEraStartMediumTerm = FALSE, -#' useConditionGroupEraStartShortTerm = FALSE, -#' useDrugExposureAnyTimePrior = FALSE, -#' useDrugExposureLongTerm = FALSE, -#' useDrugExposureMediumTerm = FALSE, -#' useDrugExposureShortTerm = FALSE, -#' useDrugEraAnyTimePrior = FALSE, -#' useDrugEraLongTerm = FALSE, -#' useDrugEraMediumTerm = FALSE, -#' useDrugEraShortTerm = FALSE, -#' useDrugEraOverlapping = FALSE, -#' useDrugEraStartLongTerm = FALSE, -#' useDrugEraStartMediumTerm = FALSE, -#' useDrugEraStartShortTerm = FALSE, -#' useDrugGroupEraAnyTimePrior = FALSE, -#' useDrugGroupEraLongTerm = TRUE, -#' useDrugGroupEraMediumTerm = FALSE, -#' useDrugGroupEraShortTerm = TRUE, -#' useDrugGroupEraOverlapping = TRUE, -#' useDrugGroupEraStartLongTerm = FALSE, -#' useDrugGroupEraStartMediumTerm = FALSE, -#' useDrugGroupEraStartShortTerm = FALSE, -#' useProcedureOccurrenceAnyTimePrior = FALSE, -#' useProcedureOccurrenceLongTerm = TRUE, -#' useProcedureOccurrenceMediumTerm = FALSE, -#' useProcedureOccurrenceShortTerm = TRUE, -#' useDeviceExposureAnyTimePrior = FALSE, -#' useDeviceExposureLongTerm = TRUE, -#' useDeviceExposureMediumTerm = FALSE, -#' useDeviceExposureShortTerm = TRUE, -#' useMeasurementAnyTimePrior = FALSE, -#' useMeasurementLongTerm = TRUE, -#' useMeasurementMediumTerm = FALSE, -#' useMeasurementShortTerm = TRUE, -#' useMeasurementValueAnyTimePrior = FALSE, -#' useMeasurementValueLongTerm = FALSE, -#' useMeasurementValueMediumTerm = FALSE, -#' useMeasurementValueShortTerm = FALSE, -#' useMeasurementRangeGroupAnyTimePrior = FALSE, -#' useMeasurementRangeGroupLongTerm = TRUE, -#' useMeasurementRangeGroupMediumTerm = FALSE, -#' useMeasurementRangeGroupShortTerm = FALSE, -#' useObservationAnyTimePrior = FALSE, -#' useObservationLongTerm = TRUE, -#' useObservationMediumTerm = FALSE, -#' useObservationShortTerm = TRUE, -#' useCharlsonIndex = TRUE, -#' useDcsi = TRUE, -#' useChads2 = TRUE, -#' useChads2Vasc = TRUE, -#' useHfrs = FALSE, -#' useDistinctConditionCountLongTerm = FALSE, -#' useDistinctConditionCountMediumTerm = FALSE, -#' useDistinctConditionCountShortTerm = FALSE, -#' useDistinctIngredientCountLongTerm = FALSE, -#' useDistinctIngredientCountMediumTerm = FALSE, -#' useDistinctIngredientCountShortTerm = FALSE, -#' useDistinctProcedureCountLongTerm = FALSE, -#' useDistinctProcedureCountMediumTerm = FALSE, -#' useDistinctProcedureCountShortTerm = FALSE, -#' useDistinctMeasurementCountLongTerm = FALSE, -#' useDistinctMeasurementCountMediumTerm = FALSE, -#' useDistinctMeasurementCountShortTerm = FALSE, -#' useDistinctObservationCountLongTerm = FALSE, -#' useDistinctObservationCountMediumTerm = FALSE, -#' useDistinctObservationCountShortTerm = FALSE, -#' useVisitCountLongTerm = FALSE, -#' useVisitCountMediumTerm = FALSE, -#' useVisitCountShortTerm = FALSE, -#' useVisitConceptCountLongTerm = FALSE, -#' useVisitConceptCountMediumTerm = FALSE, -#' useVisitConceptCountShortTerm = FALSE, -#' longTermStartDays = -365, -#' mediumTermStartDays = -180, -#' shortTermStartDays = -30, -#' endDays = 0, -#' includedCovariateConceptIds = c(), -#' addDescendantsToInclude = FALSE, -#' excludedCovariateConceptIds = c(), -#' addDescendantsToExclude = FALSE, -#' includedCovariateIds = c()) +#' +#' useDemographicsRace = TRUE, useDemographicsEthnicity = TRUE, useDemographicsIndexYear = TRUE, +#' useDemographicsIndexMonth = TRUE, useDemographicsPriorObservationTime = FALSE, useDemographicsPostObservationTime = FALSE, +#' useDemographicsTimeInCohort = FALSE, useDemographicsIndexYearMonth = FALSE, useConditionOccurrenceAnyTimePrior = FALSE, +#' useConditionOccurrenceLongTerm = FALSE, useConditionOccurrenceMediumTerm = FALSE, useConditionOccurrenceShortTerm = FALSE, +#' useConditionOccurrencePrimaryInpatientAnyTimePrior = FALSE, useConditionOccurrencePrimaryInpatientLongTerm = FALSE, +#' useConditionOccurrencePrimaryInpatientMediumTerm = FALSE, useConditionOccurrencePrimaryInpatientShortTerm = FALSE, +#' useConditionEraAnyTimePrior = FALSE, useConditionEraLongTerm = FALSE, useConditionEraMediumTerm = FALSE, +#' useConditionEraShortTerm = FALSE, useConditionEraOverlapping = FALSE, useConditionEraStartLongTerm = FALSE, +#' useConditionEraStartMediumTerm = FALSE, useConditionEraStartShortTerm = FALSE, useConditionGroupEraAnyTimePrior = FALSE, +#' useConditionGroupEraLongTerm = TRUE, useConditionGroupEraMediumTerm = FALSE, useConditionGroupEraShortTerm = TRUE, +#' useConditionGroupEraOverlapping = FALSE, useConditionGroupEraStartLongTerm = FALSE, useConditionGroupEraStartMediumTerm = FALSE, +#' useConditionGroupEraStartShortTerm = FALSE, useDrugExposureAnyTimePrior = FALSE, useDrugExposureLongTerm = FALSE, +#' useDrugExposureMediumTerm = FALSE, useDrugExposureShortTerm = FALSE, useDrugEraAnyTimePrior = FALSE, +#' useDrugEraLongTerm = FALSE, useDrugEraMediumTerm = FALSE, useDrugEraShortTerm = FALSE, useDrugEraOverlapping = FALSE, +#' useDrugEraStartLongTerm = FALSE, useDrugEraStartMediumTerm = FALSE, useDrugEraStartShortTerm = FALSE, +#' useDrugGroupEraAnyTimePrior = FALSE, useDrugGroupEraLongTerm = TRUE, useDrugGroupEraMediumTerm = FALSE, +#' useDrugGroupEraShortTerm = TRUE, useDrugGroupEraOverlapping = TRUE, useDrugGroupEraStartLongTerm = FALSE, +#' useDrugGroupEraStartMediumTerm = FALSE, useDrugGroupEraStartShortTerm = FALSE, useProcedureOccurrenceAnyTimePrior = FALSE, +#' useProcedureOccurrenceLongTerm = TRUE, useProcedureOccurrenceMediumTerm = FALSE, useProcedureOccurrenceShortTerm = TRUE, +#' useDeviceExposureAnyTimePrior = FALSE, useDeviceExposureLongTerm = TRUE, useDeviceExposureMediumTerm = FALSE, +#' useDeviceExposureShortTerm = TRUE, useMeasurementAnyTimePrior = FALSE, useMeasurementLongTerm = TRUE, +#' useMeasurementMediumTerm = FALSE, useMeasurementShortTerm = TRUE, useMeasurementValueAnyTimePrior = FALSE, +#' useMeasurementValueLongTerm = FALSE, useMeasurementValueMediumTerm = FALSE, useMeasurementValueShortTerm = FALSE, +#' useMeasurementRangeGroupAnyTimePrior = FALSE, useMeasurementRangeGroupLongTerm = TRUE, useMeasurementRangeGroupMediumTerm = FALSE, +#' useMeasurementRangeGroupShortTerm = FALSE, useObservationAnyTimePrior = FALSE, useObservationLongTerm = TRUE, +#' useObservationMediumTerm = FALSE, useObservationShortTerm = TRUE, useCharlsonIndex = TRUE, useDcsi = TRUE, +#' useChads2 = TRUE, useChads2Vasc = TRUE, useHfrs = FALSE, useEfi = FALSE, useDistinctConditionCountLongTerm = FALSE, +#' useDistinctConditionCountMediumTerm = FALSE, useDistinctConditionCountShortTerm = FALSE, useDistinctIngredientCountLongTerm = FALSE, +#' useDistinctIngredientCountMediumTerm = FALSE, useDistinctIngredientCountShortTerm = FALSE, useDistinctProcedureCountLongTerm = FALSE, +#' useDistinctProcedureCountMediumTerm = FALSE, useDistinctProcedureCountShortTerm = FALSE, useDistinctMeasurementCountLongTerm = FALSE, +#' useDistinctMeasurementCountMediumTerm = FALSE, useDistinctMeasurementCountShortTerm = FALSE, useDistinctObservationCountLongTerm = FALSE, +#' useDistinctObservationCountMediumTerm = FALSE, useDistinctObservationCountShortTerm = FALSE, useVisitCountLongTerm = FALSE, +#' useVisitCountMediumTerm = FALSE, useVisitCountShortTerm = FALSE, useVisitConceptCountLongTerm = FALSE, +#' useVisitConceptCountMediumTerm = FALSE, useVisitConceptCountShortTerm = FALSE, longTermStartDays = -365, +#' mediumTermStartDays = -180, shortTermStartDays = -30, endDays = 0, includedCovariateConceptIds = c(), +#' addDescendantsToInclude = FALSE, excludedCovariateConceptIds = c(), addDescendantsToExclude = FALSE, +#' includedCovariateIds = c()) #' #' @export createCovariateSettings <- function(useDemographicsGender = FALSE, useDemographicsAge = FALSE, useDemographicsAgeGroup = FALSE, - useDemographicsRace = FALSE, - useDemographicsEthnicity = FALSE, - useDemographicsIndexYear = FALSE, - useDemographicsIndexMonth = FALSE, - useDemographicsPriorObservationTime = FALSE, - useDemographicsPostObservationTime = FALSE, - useDemographicsTimeInCohort = FALSE, - useDemographicsIndexYearMonth = FALSE, - useConditionOccurrenceAnyTimePrior = FALSE, - useConditionOccurrenceLongTerm = FALSE, - useConditionOccurrenceMediumTerm = FALSE, - useConditionOccurrenceShortTerm = FALSE, - useConditionOccurrencePrimaryInpatientAnyTimePrior = FALSE, - useConditionOccurrencePrimaryInpatientLongTerm = FALSE, - useConditionOccurrencePrimaryInpatientMediumTerm = FALSE, - useConditionOccurrencePrimaryInpatientShortTerm = FALSE, - useConditionEraAnyTimePrior = FALSE, - useConditionEraLongTerm = FALSE, - useConditionEraMediumTerm = FALSE, - useConditionEraShortTerm = FALSE, - useConditionEraOverlapping = FALSE, - useConditionEraStartLongTerm = FALSE, - useConditionEraStartMediumTerm = FALSE, - useConditionEraStartShortTerm = FALSE, - useConditionGroupEraAnyTimePrior = FALSE, - useConditionGroupEraLongTerm = FALSE, - useConditionGroupEraMediumTerm = FALSE, - useConditionGroupEraShortTerm = FALSE, - useConditionGroupEraOverlapping = FALSE, - useConditionGroupEraStartLongTerm = FALSE, - useConditionGroupEraStartMediumTerm = FALSE, - useConditionGroupEraStartShortTerm = FALSE, - useDrugExposureAnyTimePrior = FALSE, - useDrugExposureLongTerm = FALSE, - useDrugExposureMediumTerm = FALSE, - useDrugExposureShortTerm = FALSE, - useDrugEraAnyTimePrior = FALSE, - useDrugEraLongTerm = FALSE, - useDrugEraMediumTerm = FALSE, - useDrugEraShortTerm = FALSE, - useDrugEraOverlapping = FALSE, - useDrugEraStartLongTerm = FALSE, - useDrugEraStartMediumTerm = FALSE, - useDrugEraStartShortTerm = FALSE, - useDrugGroupEraAnyTimePrior = FALSE, - useDrugGroupEraLongTerm = FALSE, - useDrugGroupEraMediumTerm = FALSE, - useDrugGroupEraShortTerm = FALSE, - useDrugGroupEraOverlapping = FALSE, - useDrugGroupEraStartLongTerm = FALSE, - useDrugGroupEraStartMediumTerm = FALSE, - useDrugGroupEraStartShortTerm = FALSE, - useProcedureOccurrenceAnyTimePrior = FALSE, - useProcedureOccurrenceLongTerm = FALSE, - useProcedureOccurrenceMediumTerm = FALSE, - useProcedureOccurrenceShortTerm = FALSE, - useDeviceExposureAnyTimePrior = FALSE, - useDeviceExposureLongTerm = FALSE, - useDeviceExposureMediumTerm = FALSE, - useDeviceExposureShortTerm = FALSE, - useMeasurementAnyTimePrior = FALSE, - useMeasurementLongTerm = FALSE, - useMeasurementMediumTerm = FALSE, - useMeasurementShortTerm = FALSE, - useMeasurementValueAnyTimePrior = FALSE, - useMeasurementValueLongTerm = FALSE, - useMeasurementValueMediumTerm = FALSE, - useMeasurementValueShortTerm = FALSE, - useMeasurementRangeGroupAnyTimePrior = FALSE, - useMeasurementRangeGroupLongTerm = FALSE, - useMeasurementRangeGroupMediumTerm = FALSE, - useMeasurementRangeGroupShortTerm = FALSE, - useObservationAnyTimePrior = FALSE, - useObservationLongTerm = FALSE, - useObservationMediumTerm = FALSE, - useObservationShortTerm = FALSE, - useCharlsonIndex = FALSE, - useDcsi = FALSE, - useChads2 = FALSE, - useChads2Vasc = FALSE, - useHfrs = FALSE, - useDistinctConditionCountLongTerm = FALSE, - useDistinctConditionCountMediumTerm = FALSE, - useDistinctConditionCountShortTerm = FALSE, - useDistinctIngredientCountLongTerm = FALSE, - useDistinctIngredientCountMediumTerm = FALSE, - useDistinctIngredientCountShortTerm = FALSE, - useDistinctProcedureCountLongTerm = FALSE, - useDistinctProcedureCountMediumTerm = FALSE, - useDistinctProcedureCountShortTerm = FALSE, - useDistinctMeasurementCountLongTerm = FALSE, - useDistinctMeasurementCountMediumTerm = FALSE, - useDistinctMeasurementCountShortTerm = FALSE, - useDistinctObservationCountLongTerm = FALSE, - useDistinctObservationCountMediumTerm = FALSE, - useDistinctObservationCountShortTerm = FALSE, - useVisitCountLongTerm = FALSE, - useVisitCountMediumTerm = FALSE, - useVisitCountShortTerm = FALSE, - useVisitConceptCountLongTerm = FALSE, - useVisitConceptCountMediumTerm = FALSE, - useVisitConceptCountShortTerm = FALSE, - longTermStartDays = -365, - mediumTermStartDays = -180, - shortTermStartDays = -30, - endDays = 0, - includedCovariateConceptIds = c(), - addDescendantsToInclude = FALSE, - excludedCovariateConceptIds = c(), - addDescendantsToExclude = FALSE, - includedCovariateIds = c()) { - covariateSettings <- list(temporal = FALSE, - temporalSequence = FALSE) + + useDemographicsRace = FALSE, useDemographicsEthnicity = FALSE, useDemographicsIndexYear = FALSE, + useDemographicsIndexMonth = FALSE, useDemographicsPriorObservationTime = FALSE, useDemographicsPostObservationTime = FALSE, + useDemographicsTimeInCohort = FALSE, useDemographicsIndexYearMonth = FALSE, useConditionOccurrenceAnyTimePrior = FALSE, + useConditionOccurrenceLongTerm = FALSE, useConditionOccurrenceMediumTerm = FALSE, useConditionOccurrenceShortTerm = FALSE, + useConditionOccurrencePrimaryInpatientAnyTimePrior = FALSE, useConditionOccurrencePrimaryInpatientLongTerm = FALSE, + useConditionOccurrencePrimaryInpatientMediumTerm = FALSE, useConditionOccurrencePrimaryInpatientShortTerm = FALSE, + useConditionEraAnyTimePrior = FALSE, useConditionEraLongTerm = FALSE, useConditionEraMediumTerm = FALSE, + useConditionEraShortTerm = FALSE, useConditionEraOverlapping = FALSE, useConditionEraStartLongTerm = FALSE, + useConditionEraStartMediumTerm = FALSE, useConditionEraStartShortTerm = FALSE, useConditionGroupEraAnyTimePrior = FALSE, + useConditionGroupEraLongTerm = FALSE, useConditionGroupEraMediumTerm = FALSE, useConditionGroupEraShortTerm = FALSE, + useConditionGroupEraOverlapping = FALSE, useConditionGroupEraStartLongTerm = FALSE, useConditionGroupEraStartMediumTerm = FALSE, + useConditionGroupEraStartShortTerm = FALSE, useDrugExposureAnyTimePrior = FALSE, useDrugExposureLongTerm = FALSE, + useDrugExposureMediumTerm = FALSE, useDrugExposureShortTerm = FALSE, useDrugEraAnyTimePrior = FALSE, + useDrugEraLongTerm = FALSE, useDrugEraMediumTerm = FALSE, useDrugEraShortTerm = FALSE, useDrugEraOverlapping = FALSE, + useDrugEraStartLongTerm = FALSE, useDrugEraStartMediumTerm = FALSE, useDrugEraStartShortTerm = FALSE, + useDrugGroupEraAnyTimePrior = FALSE, useDrugGroupEraLongTerm = FALSE, useDrugGroupEraMediumTerm = FALSE, + useDrugGroupEraShortTerm = FALSE, useDrugGroupEraOverlapping = FALSE, useDrugGroupEraStartLongTerm = FALSE, + useDrugGroupEraStartMediumTerm = FALSE, useDrugGroupEraStartShortTerm = FALSE, useProcedureOccurrenceAnyTimePrior = FALSE, + useProcedureOccurrenceLongTerm = FALSE, useProcedureOccurrenceMediumTerm = FALSE, useProcedureOccurrenceShortTerm = FALSE, + useDeviceExposureAnyTimePrior = FALSE, useDeviceExposureLongTerm = FALSE, useDeviceExposureMediumTerm = FALSE, + useDeviceExposureShortTerm = FALSE, useMeasurementAnyTimePrior = FALSE, useMeasurementLongTerm = FALSE, + useMeasurementMediumTerm = FALSE, useMeasurementShortTerm = FALSE, useMeasurementValueAnyTimePrior = FALSE, + useMeasurementValueLongTerm = FALSE, useMeasurementValueMediumTerm = FALSE, useMeasurementValueShortTerm = FALSE, + useMeasurementRangeGroupAnyTimePrior = FALSE, useMeasurementRangeGroupLongTerm = FALSE, useMeasurementRangeGroupMediumTerm = FALSE, + useMeasurementRangeGroupShortTerm = FALSE, useObservationAnyTimePrior = FALSE, useObservationLongTerm = FALSE, + useObservationMediumTerm = FALSE, useObservationShortTerm = FALSE, useCharlsonIndex = FALSE, useDcsi = FALSE, + useChads2 = FALSE, useChads2Vasc = FALSE, useHfrs = FALSE, useEfi = FALSE, useDistinctConditionCountLongTerm = FALSE, + useDistinctConditionCountMediumTerm = FALSE, useDistinctConditionCountShortTerm = FALSE, useDistinctIngredientCountLongTerm = FALSE, + useDistinctIngredientCountMediumTerm = FALSE, useDistinctIngredientCountShortTerm = FALSE, useDistinctProcedureCountLongTerm = FALSE, + useDistinctProcedureCountMediumTerm = FALSE, useDistinctProcedureCountShortTerm = FALSE, useDistinctMeasurementCountLongTerm = FALSE, + useDistinctMeasurementCountMediumTerm = FALSE, useDistinctMeasurementCountShortTerm = FALSE, useDistinctObservationCountLongTerm = FALSE, + useDistinctObservationCountMediumTerm = FALSE, useDistinctObservationCountShortTerm = FALSE, useVisitCountLongTerm = FALSE, + useVisitCountMediumTerm = FALSE, useVisitCountShortTerm = FALSE, useVisitConceptCountLongTerm = FALSE, + useVisitConceptCountMediumTerm = FALSE, useVisitConceptCountShortTerm = FALSE, longTermStartDays = -365, + mediumTermStartDays = -180, shortTermStartDays = -30, endDays = 0, includedCovariateConceptIds = c(), + addDescendantsToInclude = FALSE, excludedCovariateConceptIds = c(), addDescendantsToExclude = FALSE, + includedCovariateIds = c()) { + covariateSettings <- list(temporal = FALSE, temporalSequence = FALSE) formalNames <- names(formals(createCovariateSettings)) anyUseTrue <- FALSE for (name in formalNames) { diff --git a/R/DefaultTemporalCovariateSettings.R b/R/DefaultTemporalCovariateSettings.R index e74fdb4e..c6948a5b 100644 --- a/R/DefaultTemporalCovariateSettings.R +++ b/R/DefaultTemporalCovariateSettings.R @@ -1,4 +1,4 @@ -# Copyright 2021 Observational Health Data Sciences and Informatics +# Copyright 2022 Observational Health Data Sciences and Informatics # # This file is part of FeatureExtraction # @@ -97,6 +97,8 @@ #' @param useHfrs The Hospital Frailty Risk Score score using all #' conditions prior to the window end. (analysis ID #' 926) +#' @param useEfi Electronic frailty score, using all conditions prior +#' to the window end. (analysis ID 927) #' @param useDistinctConditionCount The number of distinct condition concepts observed #' in the time window. (analysis ID 905) #' @param useDistinctIngredientCount The number of distinct ingredients observed in the @@ -138,102 +140,39 @@ #' @examples #' settings <- createTemporalCovariateSettings(useDemographicsGender = TRUE, #' useDemographicsAge = FALSE, -#' useDemographicsAgeGroup = TRUE, -#' useDemographicsRace = TRUE, -#' useDemographicsEthnicity = TRUE, -#' useDemographicsIndexYear = TRUE, -#' useDemographicsIndexMonth = TRUE, -#' useDemographicsPriorObservationTime = FALSE, -#' useDemographicsPostObservationTime = FALSE, -#' useDemographicsTimeInCohort = FALSE, -#' useDemographicsIndexYearMonth = FALSE, -#' useConditionOccurrence = FALSE, -#' useConditionOccurrencePrimaryInpatient = FALSE, -#' useConditionEraStart = FALSE, -#' useConditionEraOverlap = FALSE, -#' useConditionEraGroupStart = FALSE, -#' useConditionEraGroupOverlap = TRUE, -#' useDrugExposure = FALSE, -#' useDrugEraStart = FALSE, -#' useDrugEraOverlap = FALSE, -#' useDrugEraGroupStart = FALSE, -#' useDrugEraGroupOverlap = TRUE, -#' useProcedureOccurrence = TRUE, -#' useDeviceExposure = TRUE, -#' useMeasurement = TRUE, -#' useMeasurementValue = FALSE, -#' useMeasurementRangeGroup = TRUE, -#' useObservation = TRUE, -#' useCharlsonIndex = TRUE, -#' useDcsi = TRUE, -#' useChads2 = TRUE, -#' useChads2Vasc = TRUE, -#' useHfrs = FALSE, -#' useDistinctConditionCount = FALSE, -#' useDistinctIngredientCount = FALSE, -#' useDistinctProcedureCount = FALSE, -#' useDistinctMeasurementCount = FALSE, -#' useDistinctObservationCount = FALSE, -#' useVisitCount = FALSE, -#' useVisitConceptCount = FALSE, -#' temporalStartDays = -365:-1, -#' temporalEndDays = -365:-1, -#' includedCovariateConceptIds = c(), -#' addDescendantsToInclude = FALSE, -#' excludedCovariateConceptIds = c(), -#' addDescendantsToExclude = FALSE, -#' includedCovariateIds = c()) +#' +#' useDemographicsAgeGroup = TRUE, useDemographicsRace = TRUE, useDemographicsEthnicity = TRUE, useDemographicsIndexYear = TRUE, +#' useDemographicsIndexMonth = TRUE, useDemographicsPriorObservationTime = FALSE, useDemographicsPostObservationTime = FALSE, +#' useDemographicsTimeInCohort = FALSE, useDemographicsIndexYearMonth = FALSE, useConditionOccurrence = FALSE, +#' useConditionOccurrencePrimaryInpatient = FALSE, useConditionEraStart = FALSE, useConditionEraOverlap = FALSE, +#' useConditionEraGroupStart = FALSE, useConditionEraGroupOverlap = TRUE, useDrugExposure = FALSE, +#' useDrugEraStart = FALSE, useDrugEraOverlap = FALSE, useDrugEraGroupStart = FALSE, useDrugEraGroupOverlap = TRUE, +#' useProcedureOccurrence = TRUE, useDeviceExposure = TRUE, useMeasurement = TRUE, useMeasurementValue = FALSE, +#' useMeasurementRangeGroup = TRUE, useObservation = TRUE, useCharlsonIndex = TRUE, useDcsi = TRUE, +#' useChads2 = TRUE, useChads2Vasc = TRUE, useHfrs = FALSE, useEfi = FALSE, useDistinctConditionCount = FALSE, +#' useDistinctIngredientCount = FALSE, useDistinctProcedureCount = FALSE, useDistinctMeasurementCount = FALSE, +#' useDistinctObservationCount = FALSE, useVisitCount = FALSE, useVisitConceptCount = FALSE, temporalStartDays = -365:-1, +#' temporalEndDays = -365:-1, includedCovariateConceptIds = c(), addDescendantsToInclude = FALSE, +#' excludedCovariateConceptIds = c(), addDescendantsToExclude = FALSE, includedCovariateIds = c()) #' #' @export createTemporalCovariateSettings <- function(useDemographicsGender = FALSE, useDemographicsAge = FALSE, - useDemographicsAgeGroup = FALSE, - useDemographicsRace = FALSE, - useDemographicsEthnicity = FALSE, - useDemographicsIndexYear = FALSE, - useDemographicsIndexMonth = FALSE, - useDemographicsPriorObservationTime = FALSE, - useDemographicsPostObservationTime = FALSE, - useDemographicsTimeInCohort = FALSE, - useDemographicsIndexYearMonth = FALSE, - useConditionOccurrence = FALSE, - useConditionOccurrencePrimaryInpatient = FALSE, - useConditionEraStart = FALSE, - useConditionEraOverlap = FALSE, - useConditionEraGroupStart = FALSE, - useConditionEraGroupOverlap = FALSE, - useDrugExposure = FALSE, - useDrugEraStart = FALSE, - useDrugEraOverlap = FALSE, - useDrugEraGroupStart = FALSE, - useDrugEraGroupOverlap = FALSE, - useProcedureOccurrence = FALSE, - useDeviceExposure = FALSE, - useMeasurement = FALSE, - useMeasurementValue = FALSE, - useMeasurementRangeGroup = FALSE, - useObservation = FALSE, - useCharlsonIndex = FALSE, - useDcsi = FALSE, - useChads2 = FALSE, - useChads2Vasc = FALSE, - useHfrs = FALSE, - useDistinctConditionCount = FALSE, - useDistinctIngredientCount = FALSE, - useDistinctProcedureCount = FALSE, - useDistinctMeasurementCount = FALSE, - useDistinctObservationCount = FALSE, - useVisitCount = FALSE, - useVisitConceptCount = FALSE, - temporalStartDays = -365:-1, - temporalEndDays = -365:-1, - includedCovariateConceptIds = c(), - addDescendantsToInclude = FALSE, - excludedCovariateConceptIds = c(), - addDescendantsToExclude = FALSE, - includedCovariateIds = c()) { - covariateSettings <- list(temporal = TRUE, - temporalSequence = FALSE) + + useDemographicsAgeGroup = FALSE, useDemographicsRace = FALSE, useDemographicsEthnicity = FALSE, useDemographicsIndexYear = FALSE, + useDemographicsIndexMonth = FALSE, useDemographicsPriorObservationTime = FALSE, useDemographicsPostObservationTime = FALSE, + useDemographicsTimeInCohort = FALSE, useDemographicsIndexYearMonth = FALSE, useConditionOccurrence = FALSE, + useConditionOccurrencePrimaryInpatient = FALSE, useConditionEraStart = FALSE, useConditionEraOverlap = FALSE, + useConditionEraGroupStart = FALSE, useConditionEraGroupOverlap = FALSE, useDrugExposure = FALSE, + useDrugEraStart = FALSE, useDrugEraOverlap = FALSE, useDrugEraGroupStart = FALSE, useDrugEraGroupOverlap = FALSE, + useProcedureOccurrence = FALSE, useDeviceExposure = FALSE, useMeasurement = FALSE, useMeasurementValue = FALSE, + useMeasurementRangeGroup = FALSE, useObservation = FALSE, useCharlsonIndex = FALSE, useDcsi = FALSE, + useChads2 = FALSE, useChads2Vasc = FALSE, useHfrs = FALSE, useEfi = FALSE, useDistinctConditionCount = FALSE, + useDistinctIngredientCount = FALSE, useDistinctProcedureCount = FALSE, useDistinctMeasurementCount = FALSE, + useDistinctObservationCount = FALSE, useVisitCount = FALSE, useVisitConceptCount = FALSE, temporalStartDays = -365:-1, + temporalEndDays = -365:-1, includedCovariateConceptIds = c(), addDescendantsToInclude = FALSE, excludedCovariateConceptIds = c(), + addDescendantsToExclude = FALSE, includedCovariateIds = c()) { + covariateSettings <- list(temporal = TRUE, temporalSequence = FALSE) formalNames <- names(formals(createTemporalCovariateSettings)) anyUseTrue <- FALSE for (name in formalNames) { diff --git a/R/DefaultTemporalSequenceCovariateSettings.R b/R/DefaultTemporalSequenceCovariateSettings.R index d25bef81..ccdbf0d0 100644 --- a/R/DefaultTemporalSequenceCovariateSettings.R +++ b/R/DefaultTemporalSequenceCovariateSettings.R @@ -1,4 +1,4 @@ -# Copyright 2021 Observational Health Data Sciences and Informatics +# Copyright 2022 Observational Health Data Sciences and Informatics # # This file is part of FeatureExtraction # @@ -62,13 +62,15 @@ #' (analysis ID 702) #' @param useObservation One covariate per observation in the observation #' table in the time window. (analysis ID 801) -#' @param timePart The interval scale ('DAY', 'MONTH', 'YEAR') -#' @param timeInterval Fixed interval length for timeId using the 'timePart' scale. For example, a 'timePart' of DAY with -#' 'timeInterval' 30 has timeIds where timeId 1 is day 0 to day 29, timeId 2 is day 30 to day 59, etc. -#' @param sequenceEndDay What is the end day (relative to the -#' index date) of the data extraction? -#' @param sequenceStartDay What is the start day (relative to the -#' index date) of the data extraction? +#' @param timePart The interval scale ('DAY', 'MONTH', 'YEAR') +#' @param timeInterval Fixed interval length for timeId using the +#' 'timePart' scale. For example, a 'timePart' of DAY +#' with 'timeInterval' 30 has timeIds where timeId 1 is +#' day 0 to day 29, timeId 2 is day 30 to day 59, etc. +#' @param sequenceEndDay What is the end day (relative to the index date) of +#' the data extraction? +#' @param sequenceStartDay What is the start day (relative to the index date) +#' of the data extraction? #' @param includedCovariateConceptIds A list of concept IDs that should be used to #' construct covariates. #' @param addDescendantsToInclude Should descendant concept IDs be added to the list @@ -85,65 +87,28 @@ #' #' @examples #' settings <- createTemporalSequenceCovariateSettings(useDemographicsGender = TRUE, -#' useDemographicsAge = FALSE, -#' useDemographicsAgeGroup = TRUE, -#' useDemographicsRace = TRUE, -#' useDemographicsEthnicity = TRUE, -#' useDemographicsIndexYear = TRUE, -#' useDemographicsIndexMonth = TRUE, -#' useConditionOccurrence = FALSE, -#' useConditionOccurrencePrimaryInpatient = FALSE, -#' useConditionEraStart = FALSE, -#' useConditionEraGroupStart = FALSE, -#' useDrugExposure = FALSE, -#' useDrugEraStart = FALSE, -#' useDrugEraGroupStart = FALSE, -#' useProcedureOccurrence = TRUE, -#' useDeviceExposure = TRUE, -#' useMeasurement = TRUE, -#' useMeasurementValue = FALSE, -#' useObservation = TRUE, -#' timePart = 'DAY', -#' timeInterval = 1, -#' sequenceEndDay = -1, -#' sequenceStartDay = -730, -#' includedCovariateConceptIds = c(), -#' addDescendantsToInclude = FALSE, -#' excludedCovariateConceptIds = c(), -#' addDescendantsToExclude = FALSE, -#' includedCovariateIds = c()) +#' useDemographicsAge = FALSE, +#' +#' useDemographicsAgeGroup = TRUE, useDemographicsRace = TRUE, useDemographicsEthnicity = TRUE, useDemographicsIndexYear = TRUE, +#' useDemographicsIndexMonth = TRUE, useConditionOccurrence = FALSE, useConditionOccurrencePrimaryInpatient = FALSE, +#' useConditionEraStart = FALSE, useConditionEraGroupStart = FALSE, useDrugExposure = FALSE, useDrugEraStart = FALSE, +#' useDrugEraGroupStart = FALSE, useProcedureOccurrence = TRUE, useDeviceExposure = TRUE, useMeasurement = TRUE, +#' useMeasurementValue = FALSE, useObservation = TRUE, timePart = "DAY", timeInterval = 1, sequenceEndDay = -1, +#' sequenceStartDay = -730, includedCovariateConceptIds = c(), addDescendantsToInclude = FALSE, excludedCovariateConceptIds = c(), +#' addDescendantsToExclude = FALSE, includedCovariateIds = c()) #' #' @export createTemporalSequenceCovariateSettings <- function(useDemographicsGender = FALSE, - useDemographicsAge = FALSE, - useDemographicsAgeGroup = FALSE, - useDemographicsRace = FALSE, - useDemographicsEthnicity = FALSE, - useDemographicsIndexYear = FALSE, - useDemographicsIndexMonth = FALSE, - useConditionOccurrence = FALSE, - useConditionOccurrencePrimaryInpatient = FALSE, - useConditionEraStart = FALSE, - useConditionEraGroupStart = FALSE, - useDrugExposure = FALSE, - useDrugEraStart = FALSE, - useDrugEraGroupStart = FALSE, - useProcedureOccurrence = FALSE, - useDeviceExposure = FALSE, - useMeasurement = FALSE, - useMeasurementValue = FALSE, - useObservation = FALSE, - timePart = 'month', - timeInterval = 1, - sequenceEndDay = -1, - sequenceStartDay = -730, - includedCovariateConceptIds = c(), - addDescendantsToInclude = FALSE, - excludedCovariateConceptIds = c(), - addDescendantsToExclude = FALSE, - includedCovariateIds = c()) { - covariateSettings <- list(temporal = FALSE, - temporalSequence = TRUE) + useDemographicsAge = FALSE, + + useDemographicsAgeGroup = FALSE, useDemographicsRace = FALSE, useDemographicsEthnicity = FALSE, useDemographicsIndexYear = FALSE, + useDemographicsIndexMonth = FALSE, useConditionOccurrence = FALSE, useConditionOccurrencePrimaryInpatient = FALSE, + useConditionEraStart = FALSE, useConditionEraGroupStart = FALSE, useDrugExposure = FALSE, useDrugEraStart = FALSE, + useDrugEraGroupStart = FALSE, useProcedureOccurrence = FALSE, useDeviceExposure = FALSE, useMeasurement = FALSE, + useMeasurementValue = FALSE, useObservation = FALSE, timePart = "month", timeInterval = 1, sequenceEndDay = -1, + sequenceStartDay = -730, includedCovariateConceptIds = c(), addDescendantsToInclude = FALSE, excludedCovariateConceptIds = c(), + addDescendantsToExclude = FALSE, includedCovariateIds = c()) { + covariateSettings <- list(temporal = FALSE, temporalSequence = TRUE) formalNames <- names(formals(createTemporalSequenceCovariateSettings)) anyUseTrue <- FALSE for (name in formalNames) { diff --git a/R/DetailedCovariateSettings.R b/R/DetailedCovariateSettings.R index 60ee84fc..8011d95c 100644 --- a/R/DetailedCovariateSettings.R +++ b/R/DetailedCovariateSettings.R @@ -1,4 +1,4 @@ -# Copyright 2021 Observational Health Data Sciences and Informatics +# Copyright 2022 Observational Health Data Sciences and Informatics # # This file is part of FeatureExtraction # @@ -30,9 +30,7 @@ #' #' @export createDetailedCovariateSettings <- function(analyses = list()) { - covariateSettings <- list(temporal = FALSE, - temporalSequence = FALSE, - analyses = analyses) + covariateSettings <- list(temporal = FALSE, temporalSequence = FALSE, analyses = analyses) attr(covariateSettings, "fun") <- "getDbDefaultCovariateData" class(covariateSettings) <- "covariateSettings" return(covariateSettings) @@ -61,8 +59,7 @@ createDetailedCovariateSettings <- function(analyses = list()) { createDetailedTemporalCovariateSettings <- function(analyses = list(), temporalStartDays = -365:-1, temporalEndDays = -365:-1) { - covariateSettings <- list(temporal = TRUE, - temporalSequence = FALSE) + covariateSettings <- list(temporal = TRUE, temporalSequence = FALSE) formalNames <- names(formals(createDetailedTemporalCovariateSettings)) for (name in formalNames) { covariateSettings[[name]] <- get(name) @@ -102,13 +99,8 @@ createDetailedTemporalCovariateSettings <- function(analyses = list(), #' analysisDetails <- createAnalysisDetails(analysisId = 1, #' sqlFileName = "DemographicsGender.sql", #' parameters = list(analysisId = 1, -#' analysisName = "Gender", -#' domainId = "Demographics"), -#' includedCovariateConceptIds = c(), -#' addDescendantsToInclude = FALSE, -#' excludedCovariateConceptIds = c(), -#' addDescendantsToExclude = FALSE, -#' includedCovariateIds = c()) +#' analysisName = "Gender", domainId = "Demographics"), includedCovariateConceptIds = c(), addDescendantsToInclude = FALSE, +#' excludedCovariateConceptIds = c(), addDescendantsToExclude = FALSE, includedCovariateIds = c()) #' #' #' @export @@ -116,10 +108,9 @@ createAnalysisDetails <- function(analysisId, sqlFileName, parameters, includedCovariateConceptIds = c(), - addDescendantsToInclude = FALSE, - excludedCovariateConceptIds = c(), - addDescendantsToExclude = FALSE, - includedCovariateIds = c()) { + + addDescendantsToInclude = FALSE, excludedCovariateConceptIds = c(), addDescendantsToExclude = FALSE, + includedCovariateIds = c()) { analysisDetail <- list() formalNames <- names(formals(createAnalysisDetails)) for (name in formalNames) { @@ -173,9 +164,8 @@ convertPrespecSettingsToDetailedSettings <- function(covariateSettings) { #' @export createDefaultCovariateSettings <- function(includedCovariateConceptIds = c(), addDescendantsToInclude = FALSE, - excludedCovariateConceptIds = c(), - addDescendantsToExclude = FALSE, - includedCovariateIds = c()) { + + excludedCovariateConceptIds = c(), addDescendantsToExclude = FALSE, includedCovariateIds = c()) { rJava::J("org.ohdsi.featureExtraction.FeatureExtraction")$init(system.file("", package = "FeatureExtraction")) newJson <- rJava::J("org.ohdsi.featureExtraction.FeatureExtraction")$getDefaultPrespecAnalyses() covariateSettings <- .fromJson(newJson) @@ -210,9 +200,8 @@ createDefaultCovariateSettings <- function(includedCovariateConceptIds = c(), #' @export createDefaultTemporalCovariateSettings <- function(includedCovariateConceptIds = c(), addDescendantsToInclude = FALSE, - excludedCovariateConceptIds = c(), - addDescendantsToExclude = FALSE, - includedCovariateIds = c()) { + + excludedCovariateConceptIds = c(), addDescendantsToExclude = FALSE, includedCovariateIds = c()) { rJava::J("org.ohdsi.featureExtraction.FeatureExtraction")$init(system.file("", package = "FeatureExtraction")) newJson <- rJava::J("org.ohdsi.featureExtraction.FeatureExtraction")$getDefaultPrespecTemporalAnalyses() covariateSettings <- .fromJson(newJson) diff --git a/R/FeatureExtraction.R b/R/FeatureExtraction.R index 2562d5fc..772b7e3c 100644 --- a/R/FeatureExtraction.R +++ b/R/FeatureExtraction.R @@ -1,6 +1,6 @@ # @file FeatureExtraction.R # -# Copyright 2021 Observational Health Data Sciences and Informatics +# Copyright 2022 Observational Health Data Sciences and Informatics # # This file is part of FeatureExtraction # @@ -28,16 +28,23 @@ NULL .onLoad <- function(libname, pkgname) { - + rJava::.jpackage(pkgname, lib.loc = libname) - + # Verify checksum of JAR: - storedChecksum <- scan(file = system.file("csv", "jarChecksum.txt", package = "FeatureExtraction"), what = character(), quiet = TRUE) - computedChecksum <- tryCatch(rJava::J("org.ohdsi.featureExtraction.JarChecksum","computeJarChecksum"), - error = function(e) {warning("Problem connecting to Java. This is normal when runing roxygen."); return("")}) + storedChecksum <- scan(file = system.file("csv", + "jarChecksum.txt", + package = "FeatureExtraction"), + what = character(), quiet = TRUE) + computedChecksum <- tryCatch(rJava::J("org.ohdsi.featureExtraction.JarChecksum", + "computeJarChecksum"), + error = function(e) { + warning("Problem connecting to Java. This is normal when runing roxygen.") + return("") + }) if (computedChecksum != "" && (storedChecksum != computedChecksum)) { warning("Java library version does not match R package version! Please try reinstalling the FeatureExtraction package. - Make sure to close all instances of R, and open only one instance before reinstalling. Also make sure your + Make sure to close all instances of R, and open only one instance before reinstalling. Also make sure your R workspace is not reloaded on startup. Delete your .Rdata file if necessary") } } diff --git a/R/GetCovariates.R b/R/GetCovariates.R index 4801b952..b78314a0 100644 --- a/R/GetCovariates.R +++ b/R/GetCovariates.R @@ -1,4 +1,4 @@ -# Copyright 2021 Observational Health Data Sciences and Informatics +# Copyright 2022 Observational Health Data Sciences and Informatics # # This file is part of FeatureExtraction # @@ -61,18 +61,9 @@ #' Returns an object of type \code{covariateData}, containing information on the covariates. #' #' @export -getDbCovariateData <- function(connectionDetails = NULL, - connection = NULL, - oracleTempSchema = NULL, - cdmDatabaseSchema, - cdmVersion = "5", - cohortTable = "cohort", - cohortDatabaseSchema = cdmDatabaseSchema, - cohortTableIsTemp = FALSE, - cohortId = -1, - rowIdField = "subject_id", - covariateSettings, - aggregated = FALSE) { +getDbCovariateData <- function(connectionDetails = NULL, connection = NULL, oracleTempSchema = NULL, + cdmDatabaseSchema, cdmVersion = "5", cohortTable = "cohort", cohortDatabaseSchema = cdmDatabaseSchema, + cohortTableIsTemp = FALSE, cohortId = -1, rowIdField = "subject_id", covariateSettings, aggregated = FALSE) { if (is.null(connectionDetails) && is.null(connection)) { stop("Need to provide either connectionDetails or connection") } @@ -119,46 +110,46 @@ getDbCovariateData <- function(connectionDetails = NULL, if (is.list(covariateSettings)) { covariateData <- NULL hasData <- function(data) { - return(!is.null(data) && (data %>% count() %>% pull()) > 0) + return(!is.null(data) && (data %>% + count() %>% + pull()) > 0) } for (i in 1:length(covariateSettings)) { fun <- attr(covariateSettings[[i]], "fun") args <- list(connection = connection, oracleTempSchema = oracleTempSchema, cdmDatabaseSchema = cdmDatabaseSchema, - cohortTable = cohortDatabaseSchemaTable, - cohortId = cohortId, - cdmVersion = cdmVersion, - rowIdField = rowIdField, - covariateSettings = covariateSettings[[i]], - aggregated = aggregated) + + cohortTable = cohortDatabaseSchemaTable, cohortId = cohortId, cdmVersion = cdmVersion, + rowIdField = rowIdField, covariateSettings = covariateSettings[[i]], aggregated = aggregated) tempCovariateData <- do.call(eval(parse(text = fun)), args) if (is.null(covariateData)) { covariateData <- tempCovariateData } else { if (hasData(covariateData$covariates)) { - if (hasData(tempCovariateData$covariates)) { - Andromeda::appendToTable(covariateData$covariates, tempCovariateData$covariates) - } + if (hasData(tempCovariateData$covariates)) { + Andromeda::appendToTable(covariateData$covariates, tempCovariateData$covariates) + } } else if (hasData(tempCovariateData$covariates)) { - covariateData$covariates <- tempCovariateData$covariates + covariateData$covariates <- tempCovariateData$covariates } if (hasData(covariateData$covariatesContinuous)) { - if (hasData(tempCovariateData$covariatesContinuous)) { - Andromeda::appendToTable(covariateData$covariatesContinuous, tempCovariateData$covariatesContinuous) - } else if (hasData(tempCovariateData$covariatesContinuous)) { - covariateData$covariatesContinuous <- tempCovariateData$covariatesContinuous - } - } + if (hasData(tempCovariateData$covariatesContinuous)) { + Andromeda::appendToTable(covariateData$covariatesContinuous, + tempCovariateData$covariatesContinuous) + } else if (hasData(tempCovariateData$covariatesContinuous)) { + covariateData$covariatesContinuous <- tempCovariateData$covariatesContinuous + } + } Andromeda::appendToTable(covariateData$covariateRef, tempCovariateData$covariateRef) Andromeda::appendToTable(covariateData$analysisRef, tempCovariateData$analysisRef) for (name in names(attr(tempCovariateData, "metaData"))) { - if (is.null(attr(covariateData, "metaData")[name])) { - attr(covariateData, "metaData")[[name]] <- attr(tempCovariateData, "metaData")[[name]] - } else { - attr(covariateData, "metaData")[[name]] <- list(attr(covariateData, "metaData")[[name]], - attr(tempCovariateData, "metaData")[[name]]) - } + if (is.null(attr(covariateData, "metaData")[name])) { + attr(covariateData, "metaData")[[name]] <- attr(tempCovariateData, "metaData")[[name]] + } else { + attr(covariateData, "metaData")[[name]] <- list(attr(covariateData, "metaData")[[name]], + attr(tempCovariateData, "metaData")[[name]]) + } } } } diff --git a/R/GetCovariatesFromCohortAttributes.R b/R/GetCovariatesFromCohortAttributes.R index e6aa30d7..751fb214 100644 --- a/R/GetCovariatesFromCohortAttributes.R +++ b/R/GetCovariatesFromCohortAttributes.R @@ -1,4 +1,4 @@ -# Copyright 2021 Observational Health Data Sciences and Informatics +# Copyright 2022 Observational Health Data Sciences and Informatics # # This file is part of FeatureExtraction # @@ -29,11 +29,8 @@ getDbCohortAttrCovariatesData <- function(connection, oracleTempSchema = NULL, cdmDatabaseSchema, cohortTable = "#cohort_person", - cohortId = -1, - cdmVersion = "5", - rowIdField = "subject_id", - covariateSettings, - aggregated = FALSE) { + + cohortId = -1, cdmVersion = "5", rowIdField = "subject_id", covariateSettings, aggregated = FALSE) { if (aggregated) { stop("Aggregation not implemented for covariates from cohort attributes.") } @@ -42,7 +39,7 @@ getDbCohortAttrCovariatesData <- function(connection, } start <- Sys.time() writeLines("Constructing covariates from cohort attributes table") - + if (is.null(covariateSettings$includeAttrIds) || length(covariateSettings$includeAttrIds) == 0) { hasIncludeAttrIds <- FALSE } else { @@ -52,58 +49,53 @@ getDbCohortAttrCovariatesData <- function(connection, DatabaseConnector::insertTable(connection, tableName = "#included_attr", data = data.frame(attribute_definition_id = as.integer(covariateSettings$includeAttrIds)), - dropTableIfExists = TRUE, - createTable = TRUE, - tempTable = TRUE, - oracleTempSchema = oracleTempSchema) + + dropTableIfExists = TRUE, createTable = TRUE, tempTable = TRUE, oracleTempSchema = oracleTempSchema) } - sql <- SqlRender::readSql(system.file("sql/sql_server/GetAttrCovariates.sql", package = "FeatureExtraction")) + sql <- SqlRender::readSql(system.file("sql/sql_server/GetAttrCovariates.sql", + package = "FeatureExtraction")) renderedSql <- SqlRender::render(sql = sql, attr_database_schema = covariateSettings$attrDatabaseSchema, - cohort_table = cohortTable, - row_id_field = rowIdField, - cohort_attribute_table = covariateSettings$cohortAttrTable, - has_include_attr_ids = hasIncludeAttrIds) - renderedSql <- SqlRender::translate(sql = renderedSql, - targetDialect = attr(connection, "dbms"), - oracleTempSchema = oracleTempSchema) - # renderedSql <- SqlRender::loadRenderTranslateSql("GetAttrCovariates.sql", - # packageName = "FeatureExtraction", - # dbms = attr(connection, "dbms"), - # oracleTempSchema = oracleTempSchema, - # attr_database_schema = covariateSettings$attrDatabaseSchema, - # cohort_table = cohortTable, - # row_id_field = rowIdField, - # cohort_attribute_table = covariateSettings$cohortAttrTable, - # has_include_attr_ids = hasIncludeAttrIds) - - covariates <- DatabaseConnector::querySql(connection, renderedSql, snakeCaseToCamelCase = TRUE) + + cohort_table = cohortTable, row_id_field = rowIdField, cohort_attribute_table = covariateSettings$cohortAttrTable, + has_include_attr_ids = hasIncludeAttrIds) + renderedSql <- SqlRender::translate(sql = renderedSql, targetDialect = attr(connection, "dbms"), + oracleTempSchema = oracleTempSchema) + # renderedSql <- SqlRender::loadRenderTranslateSql('GetAttrCovariates.sql', packageName = + # 'FeatureExtraction', dbms = attr(connection, 'dbms'), oracleTempSchema = oracleTempSchema, + # attr_database_schema = covariateSettings$attrDatabaseSchema, cohort_table = cohortTable, + # row_id_field = rowIdField, cohort_attribute_table = covariateSettings$cohortAttrTable, + # has_include_attr_ids = hasIncludeAttrIds) + + covariates <- DatabaseConnector::querySql(connection, renderedSql, snakeCaseToCamelCase = TRUE) covariateRefSql <- "SELECT attribute_definition_id AS covariate_id, attribute_name AS covariate_name FROM @attr_database_schema.@attr_definition_table ORDER BY attribute_definition_id" covariateRefSql <- SqlRender::render(covariateRefSql, attr_database_schema = covariateSettings$attrDatabaseSchema, - attr_definition_table = covariateSettings$attrDefinitionTable) - covariateRefSql <- SqlRender::translate(sql = covariateRefSql, - targetDialect = attr(connection, "dbms"), - oracleTempSchema = oracleTempSchema) - covariateRef <- DatabaseConnector::querySql(connection, covariateRefSql, snakeCaseToCamelCase = TRUE) - covariateRef$analysisId <- rep(as.numeric(covariateSettings$analysisId), length = nrow(covariateRef)) + + attr_definition_table = covariateSettings$attrDefinitionTable) + covariateRefSql <- SqlRender::translate(sql = covariateRefSql, targetDialect = attr(connection, + "dbms"), + oracleTempSchema = oracleTempSchema) + covariateRef <- DatabaseConnector::querySql(connection, + covariateRefSql, + snakeCaseToCamelCase = TRUE) + covariateRef$analysisId <- rep(as.numeric(covariateSettings$analysisId), + length = nrow(covariateRef)) covariateRef$conceptId <- rep(0, length = nrow(covariateRef)) - + analysisRef <- data.frame(analysisId = as.numeric(covariateSettings$analysisId), analysisName = "Covariates from cohort attributes", - domainId = "Cohort", - startDay = as.numeric(NA), - endDay = as.numeric(NA), - isBinary = ifelse(covariateSettings$isBinary, "Y", "N"), - missingMeansZero = ifelse(covariateSettings$missingMeansZero, "Y", "N")) + + domainId = "Cohort", startDay = as.numeric(NA), endDay = as.numeric(NA), isBinary = ifelse(covariateSettings$isBinary, + "Y", "N"), missingMeansZero = ifelse(covariateSettings$missingMeansZero, "Y", "N")) delta <- Sys.time() - start writeLines(paste("Loading took", signif(delta, 3), attr(delta, "units"))) - + result <- createEmptyCovariateData(cohortId, aggregated, covariateSettings$temporal) - result$covariates = covariates - result$covariateRef = covariateRef - result$analysisRef = analysisRef - + result$covariates <- covariates + result$covariateRef <- covariateRef + result$analysisRef <- analysisRef + return(result) } @@ -127,10 +119,10 @@ getDbCohortAttrCovariatesData <- function(connection, #' @param attrDefinitionTable The name of the attribute definition table. #' @param cohortAttrTable The name of the cohort attribute table. #' @param includeAttrIds (optional) A list of attribute definition IDs to restrict to. -#' @param isBinary Needed for aggregation: Are these binary variables? Binary -#' variables should only have the values 0 or 1. -#' @param missingMeansZero Needed for aggregation: For continuous values, should missing -#' values be interpreted as 0? +#' @param isBinary Needed for aggregation: Are these binary variables? Binary variables +#' should only have the values 0 or 1. +#' @param missingMeansZero Needed for aggregation: For continuous values, should missing values be +#' interpreted as 0? #' #' @return #' An object of type \code{covariateSettings}, to be used in other functions. @@ -139,10 +131,8 @@ getDbCohortAttrCovariatesData <- function(connection, createCohortAttrCovariateSettings <- function(analysisId = -1, attrDatabaseSchema, attrDefinitionTable = "attribute_definition", - cohortAttrTable = "cohort_attribute", - includeAttrIds = c(), - isBinary = FALSE, - missingMeansZero = FALSE) { + + cohortAttrTable = "cohort_attribute", includeAttrIds = c(), isBinary = FALSE, missingMeansZero = FALSE) { # First: get the default values: covariateSettings <- list() for (name in names(formals(createCohortAttrCovariateSettings))) { @@ -154,7 +144,7 @@ createCohortAttrCovariateSettings <- function(analysisId = -1, if (name %in% names(covariateSettings)) covariateSettings[[name]] <- values[[name]] } - + attr(covariateSettings, "fun") <- "getDbCohortAttrCovariatesData" class(covariateSettings) <- "covariateSettings" return(covariateSettings) diff --git a/R/GetDefaultCovariates.R b/R/GetDefaultCovariates.R index 12ca90af..380b70bc 100644 --- a/R/GetDefaultCovariates.R +++ b/R/GetDefaultCovariates.R @@ -1,4 +1,4 @@ -# Copyright 2021 Observational Health Data Sciences and Informatics +# Copyright 2022 Observational Health Data Sciences and Informatics # # This file is part of FeatureExtraction # @@ -21,16 +21,19 @@ #' Includes covariates for all drugs, drug classes, condition, condition classes, procedures, #' observations, etc. #' -#' @param covariateSettings Either an object of type \code{covariateSettings} as created using one -#' of the createCovariate functions, or a list of such objects. -#' @param targetDatabaseSchema (Optional) The name of the database schema where the resulting covariates -#' should be stored. -#' @param targetCovariateTable (Optional) The name of the table where the resulting covariates will -#' be stored. If not provided, results will be fetched to R. The table can be -#' a permanent table in the \code{targetDatabaseSchema} or a temp table. If -#' it is a temp table, do not specify \code{targetDatabaseSchema}. -#' @param targetCovariateRefTable (Optional) The name of the table where the covariate reference will be stored. -#' @param targetAnalysisRefTable (Optional) The name of the table where the analysis reference will be stored. +#' @param covariateSettings Either an object of type \code{covariateSettings} as created using +#' one of the createCovariate functions, or a list of such objects. +#' @param targetDatabaseSchema (Optional) The name of the database schema where the resulting +#' covariates should be stored. +#' @param targetCovariateTable (Optional) The name of the table where the resulting covariates +#' will be stored. If not provided, results will be fetched to R. The +#' table can be a permanent table in the \code{targetDatabaseSchema} +#' or a temp table. If it is a temp table, do not specify +#' \code{targetDatabaseSchema}. +#' @param targetCovariateRefTable (Optional) The name of the table where the covariate reference will +#' be stored. +#' @param targetAnalysisRefTable (Optional) The name of the table where the analysis reference will +#' be stored. #' #' @template GetCovarParams #' @@ -39,15 +42,9 @@ getDbDefaultCovariateData <- function(connection, oracleTempSchema = NULL, cdmDatabaseSchema, cohortTable = "#cohort_person", - cohortId = -1, - cdmVersion = "5", - rowIdField = "subject_id", - covariateSettings, - targetDatabaseSchema, - targetCovariateTable, - targetCovariateRefTable, - targetAnalysisRefTable, - aggregated = FALSE) { + + cohortId = -1, cdmVersion = "5", rowIdField = "subject_id", covariateSettings, targetDatabaseSchema, + targetCovariateTable, targetCovariateRefTable, targetAnalysisRefTable, aggregated = FALSE) { if (!is(covariateSettings, "covariateSettings")) { stop("Covariate settings object not of type covariateSettings") } @@ -57,10 +54,11 @@ getDbDefaultCovariateData <- function(connection, if (!missing(targetCovariateTable) && !is.null(targetCovariateTable) && aggregated) { stop("Writing aggregated results to database is currently not supported") } - + settings <- .toJson(covariateSettings) rJava::J("org.ohdsi.featureExtraction.FeatureExtraction")$init(system.file("", package = "FeatureExtraction")) - json <- rJava::J("org.ohdsi.featureExtraction.FeatureExtraction")$createSql(settings, aggregated, cohortTable, rowIdField, rJava::.jarray(as.character(cohortId)), cdmDatabaseSchema) + json <- rJava::J("org.ohdsi.featureExtraction.FeatureExtraction")$createSql(settings, aggregated, + cohortTable, rowIdField, rJava::.jarray(as.character(cohortId)), cdmDatabaseSchema) todo <- .fromJson(json) if (length(todo$tempTables) != 0) { ParallelLogger::logInfo("Sending temp tables to server") @@ -68,84 +66,78 @@ getDbDefaultCovariateData <- function(connection, DatabaseConnector::insertTable(connection, tableName = names(todo$tempTables)[i], data = as.data.frame(todo$tempTables[[i]]), - dropTableIfExists = TRUE, - createTable = TRUE, - tempTable = TRUE, - oracleTempSchema = oracleTempSchema) + + dropTableIfExists = TRUE, createTable = TRUE, tempTable = TRUE, oracleTempSchema = oracleTempSchema) } } - + ParallelLogger::logInfo("Constructing features on server") - - sql <- SqlRender::translate(sql = todo$sqlConstruction, - targetDialect = attr(connection, "dbms"), - oracleTempSchema = oracleTempSchema) + + sql <- SqlRender::translate(sql = todo$sqlConstruction, targetDialect = attr(connection, "dbms"), + oracleTempSchema = oracleTempSchema) profile <- (!is.null(getOption("dbProfile")) && getOption("dbProfile") == TRUE) DatabaseConnector::executeSql(connection, sql, profile = profile) - + if (missing(targetCovariateTable) || is.null(targetCovariateTable)) { ParallelLogger::logInfo("Fetching data from server") start <- Sys.time() # Binary or non-aggregated features covariateData <- Andromeda::andromeda() if (!is.null(todo$sqlQueryFeatures)) { - sql <- SqlRender::translate(sql = todo$sqlQueryFeatures, - targetDialect = attr(connection, "dbms"), - oracleTempSchema = oracleTempSchema) - - DatabaseConnector::querySqlToAndromeda(connection = connection, - sql = sql, - andromeda = covariateData, - andromedaTableName = "covariates", - snakeCaseToCamelCase = TRUE) - } - + sql <- SqlRender::translate(sql = todo$sqlQueryFeatures, targetDialect = attr(connection, + "dbms"), oracleTempSchema = oracleTempSchema) + + DatabaseConnector::querySqlToAndromeda(connection = connection, + sql = sql, + andromeda = covariateData, + + andromedaTableName = "covariates", snakeCaseToCamelCase = TRUE) + } + # Continuous aggregated features if (!is.null(todo$sqlQueryContinuousFeatures)) { sql <- SqlRender::translate(sql = todo$sqlQueryContinuousFeatures, - targetDialect = attr(connection, "dbms"), - oracleTempSchema = oracleTempSchema) - DatabaseConnector::querySqlToAndromeda(connection = connection, - sql = sql, - andromeda = covariateData, - andromedaTableName = "covariatesContinuous", - snakeCaseToCamelCase = TRUE) + targetDialect = attr(connection, + "dbms"), oracleTempSchema = oracleTempSchema) + DatabaseConnector::querySqlToAndromeda(connection = connection, + sql = sql, + andromeda = covariateData, + + andromedaTableName = "covariatesContinuous", snakeCaseToCamelCase = TRUE) } - + # Covariate reference - sql <- SqlRender::translate(sql = todo$sqlQueryFeatureRef, - targetDialect = attr(connection, "dbms"), - oracleTempSchema = oracleTempSchema) - - DatabaseConnector::querySqlToAndromeda(connection = connection, - sql = sql, - andromeda = covariateData, - andromedaTableName = "covariateRef", - snakeCaseToCamelCase = TRUE) - + sql <- SqlRender::translate(sql = todo$sqlQueryFeatureRef, targetDialect = attr(connection, + "dbms"), + oracleTempSchema = oracleTempSchema) + + DatabaseConnector::querySqlToAndromeda(connection = connection, + sql = sql, + andromeda = covariateData, + + andromedaTableName = "covariateRef", snakeCaseToCamelCase = TRUE) + # Analysis reference - sql <- SqlRender::translate(sql = todo$sqlQueryAnalysisRef, - targetDialect = attr(connection, "dbms"), - oracleTempSchema = oracleTempSchema) - DatabaseConnector::querySqlToAndromeda(connection = connection, - sql = sql, - andromeda = covariateData, - andromedaTableName = "analysisRef", - snakeCaseToCamelCase = TRUE) - + sql <- SqlRender::translate(sql = todo$sqlQueryAnalysisRef, targetDialect = attr(connection, + "dbms"), oracleTempSchema = oracleTempSchema) + DatabaseConnector::querySqlToAndromeda(connection = connection, + sql = sql, + andromeda = covariateData, + + andromedaTableName = "analysisRef", snakeCaseToCamelCase = TRUE) + # Time reference if (!is.null(todo$sqlQueryTimeRef)) { - sql <- SqlRender::translate(sql = todo$sqlQueryTimeRef, - targetDialect = attr(connection, "dbms"), - oracleTempSchema = oracleTempSchema) - DatabaseConnector::querySqlToAndromeda(connection = connection, - sql = sql, - andromeda = covariateData, - andromedaTableName = "timeRef", - snakeCaseToCamelCase = TRUE) + sql <- SqlRender::translate(sql = todo$sqlQueryTimeRef, targetDialect = attr(connection, + "dbms"), oracleTempSchema = oracleTempSchema) + DatabaseConnector::querySqlToAndromeda(connection = connection, + sql = sql, + andromeda = covariateData, + + andromedaTableName = "timeRef", snakeCaseToCamelCase = TRUE) } - - + + delta <- Sys.time() - start ParallelLogger::logInfo("Fetching data took ", signif(delta, 3), " ", attr(delta, "units")) } else { @@ -160,7 +152,7 @@ getDbDefaultCovariateData <- function(connection, } return(sub("FROM", paste("INTO", tableName, "FROM"), sql)) } - + # Covariates if (!is.null(todo$sqlQueryFeatures)) { sql <- convertQuery(todo$sqlQueryFeatures, targetDatabaseSchema, targetCovariateTable) @@ -169,7 +161,7 @@ getDbDefaultCovariateData <- function(connection, oracleTempSchema = oracleTempSchema) DatabaseConnector::executeSql(connection, sql, progressBar = FALSE, reportOverallTime = FALSE) } - + # Covariate reference if (!missing(targetCovariateRefTable) && !is.null(targetCovariateRefTable)) { sql <- convertQuery(todo$sqlQueryFeatureRef, targetDatabaseSchema, targetCovariateRefTable) @@ -178,7 +170,7 @@ getDbDefaultCovariateData <- function(connection, oracleTempSchema = oracleTempSchema) DatabaseConnector::executeSql(connection, sql, progressBar = FALSE, reportOverallTime = FALSE) } - + # Analysis reference if (!missing(targetAnalysisRefTable) && !is.null(targetAnalysisRefTable)) { sql <- convertQuery(todo$sqlQueryAnalysisRef, targetDatabaseSchema, targetAnalysisRefTable) @@ -189,7 +181,7 @@ getDbDefaultCovariateData <- function(connection, } delta <- Sys.time() - start ParallelLogger::logInfo("Writing data took", signif(delta, 3), " ", attr(delta, "units")) - + } # Drop temp tables sql <- SqlRender::translate(sql = todo$sqlCleanup, @@ -206,7 +198,7 @@ getDbDefaultCovariateData <- function(connection, DatabaseConnector::executeSql(connection, sql, progressBar = FALSE, reportOverallTime = FALSE) } } - + if (missing(targetCovariateTable) || is.null(targetCovariateTable)) { attr(covariateData, "metaData") <- list() if (is.null(covariateData$covariates) && is.null(covariateData$covariatesContinuous)) { diff --git a/R/HelperFunctions.R b/R/HelperFunctions.R index abcb37bb..4868bcf0 100644 --- a/R/HelperFunctions.R +++ b/R/HelperFunctions.R @@ -1,4 +1,4 @@ -# Copyright 2021 Observational Health Data Sciences and Informatics +# Copyright 2022 Observational Health Data Sciences and Informatics # # This file is part of FeatureExtraction # @@ -16,8 +16,8 @@ #' Filter covariates by row ID #' -#' @param covariateData An object of type \code{CovariateData} -#' @param rowIds A vector containing the rowIds to keep. +#' @param covariateData An object of type \code{CovariateData} +#' @param rowIds A vector containing the rowIds to keep. #' #' @return #' An object of type \code{covariateData}. @@ -25,16 +25,15 @@ filterByRowId <- function(covariateData, rowIds) { if (!isCovariateData(covariateData)) stop("Data not of class CovariateData") - if (!Andromeda::isValidAndromeda(covariateData)) + if (!Andromeda::isValidAndromeda(covariateData)) stop("CovariateData object is closed") if (isAggregatedCovariateData(covariateData)) stop("Cannot filter aggregated data by rowId") covariates <- covariateData$covariates %>% filter(.data$rowId %in% rowIds) - - result <- Andromeda::andromeda(covariates = covariates, - covariateRef = covariateData$covariateRef, - analysisRef = covariateData$analysisRef) + + result <- Andromeda::andromeda(covariates = covariates, covariateRef = covariateData$covariateRef, + analysisRef = covariateData$analysisRef) metaData <- attr(covariateData, "metaData") metaData$populationSize <- length(rowIds) attr(result, "metaData") <- metaData @@ -44,8 +43,8 @@ filterByRowId <- function(covariateData, rowIds) { #' Filter covariates by cohort definition ID #' -#' @param covariateData An object of type \code{CovariateData} -#' @param cohortId The cohort definition ID to keep. +#' @param covariateData An object of type \code{CovariateData} +#' @param cohortId The cohort definition ID to keep. #' #' @return #' An object of type \code{covariateData}. @@ -53,7 +52,7 @@ filterByRowId <- function(covariateData, rowIds) { filterByCohortDefinitionId <- function(covariateData, cohortId) { if (!isCovariateData(covariateData)) stop("Data not of class CovariateData") - if (!Andromeda::isValidAndromeda(covariateData)) + if (!Andromeda::isValidAndromeda(covariateData)) stop("CovariateData object is closed") if (!isAggregatedCovariateData(covariateData)) stop("Can only filter aggregated data by cohortId") @@ -71,10 +70,11 @@ filterByCohortDefinitionId <- function(covariateData, cohortId) { } result <- Andromeda::andromeda(covariates = covariates, covariatesContinuous = covariatesContinuous, - covariateRef = covariateData$covariateRef, - analysisRef = covariateData$analysisRef) + + covariateRef = covariateData$covariateRef, analysisRef = covariateData$analysisRef) metaData <- attr(covariateData, "metaData") - metaData$populationSize <- metaData$populationSize[as.numeric(names(metaData$populationSize)) %in% cohortId] + metaData$populationSize <- metaData$populationSize[as.numeric(names(metaData$populationSize)) %in% + cohortId] attr(result, "metaData") <- metaData class(result) <- "CovariateData" attr(class(result), "package") <- "FeatureExtraction" diff --git a/R/Normalization.R b/R/Normalization.R index 1edcaee9..14668181 100644 --- a/R/Normalization.R +++ b/R/Normalization.R @@ -1,4 +1,4 @@ -# Copyright 2021 Observational Health Data Sciences and Informatics +# Copyright 2022 Observational Health Data Sciences and Informatics # # This file is part of FeatureExtraction # @@ -21,7 +21,7 @@ #' infrequent covariates. For temporal covariates, redundancy is evaluated per time ID. #' #' @param covariateData An object as generated using the \code{\link{getDbCovariateData}} -#' function. +#' function. #' @param minFraction Minimum fraction of the population that should have a non-zero value for a #' covariate for that covariate to be kept. Set to 0 to don't filter on #' frequency. @@ -35,107 +35,117 @@ tidyCovariateData <- function(covariateData, removeRedundancy = TRUE) { if (!isCovariateData(covariateData)) stop("Data not of class CovariateData") - if (!Andromeda::isValidAndromeda(covariateData)) + if (!Andromeda::isValidAndromeda(covariateData)) stop("CovariateData object is closed") if (isAggregatedCovariateData(covariateData)) stop("Cannot tidy aggregated covariates") start <- Sys.time() - + newCovariateData <- Andromeda::andromeda(covariateRef = covariateData$covariateRef, analysisRef = covariateData$analysisRef) metaData <- attr(covariateData, "metaData") populationSize <- metaData$populationSize - if (covariateData$covariates %>% count() %>% pull() == 0) { + if (covariateData$covariates %>% + count() %>% + pull() == 0) { newCovariateData$covariates <- covariateData$covariates } else { newCovariates <- covariateData$covariates - covariateData$maxValuePerCovariateId <- covariateData$covariates %>% - group_by(.data$covariateId) %>% + covariateData$maxValuePerCovariateId <- covariateData$covariates %>% + group_by(.data$covariateId) %>% summarise(maxValue = max(.data$covariateValue, na.rm = TRUE)) on.exit(covariateData$maxValuePerCovariateId <- NULL) - + if (removeRedundancy || minFraction != 0) { - covariateData$valueCounts <- covariateData$covariates %>% - group_by(.data$covariateId) %>% + covariateData$valueCounts <- covariateData$covariates %>% + group_by(.data$covariateId) %>% summarise(n = count(), nDistinct = n_distinct(.data$covariateValue)) on.exit(covariateData$valueCounts <- NULL, add = TRUE) } - + ignoreCovariateIds <- c() deleteCovariateIds <- c() if (removeRedundancy) { - covariateData$binaryCovariateIds <- covariateData$maxValuePerCovariateId %>% + covariateData$binaryCovariateIds <- covariateData$maxValuePerCovariateId %>% inner_join(covariateData$valueCounts, by = "covariateId") %>% filter(.data$maxValue == 1 & .data$nDistinct == 1) %>% select(covariateId = .data$covariateId) on.exit(covariateData$binaryCovariateIds <- NULL, add = TRUE) - - if (covariateData$binaryCovariateIds %>% count() %>% pull() != 0) { - if (isTemporalCovariateData(covariateData)) { + + if (covariateData$binaryCovariateIds %>% + count() %>% + pull() != 0) { + if (isTemporalCovariateData(covariateData)) { # Temporal - covariateData$temporalValueCounts <- covariateData$covariates %>% - inner_join(covariateData$binaryCovariateIds, by = "covariateId") %>% - group_by(.data$covariateId, .data$timeId) %>% - count() + covariateData$temporalValueCounts <- covariateData$covariates %>% + inner_join(covariateData$binaryCovariateIds, by = "covariateId") %>% + group_by(.data$covariateId, .data$timeId) %>% + count() on.exit(covariateData$temporalValueCounts <- NULL, add = TRUE) - - # First, find all single covariates that, for every timeId, appear in every row with the same value - covariateData$deleteCovariateTimeIds <- covariateData$temporalValueCounts %>% - filter(n == populationSize) %>% - select(.data$covariateId, .data$timeId) + + # First, find all single covariates that, for every timeId, appear in every row with the + # same value + covariateData$deleteCovariateTimeIds <- covariateData$temporalValueCounts %>% + filter(n == populationSize) %>% + select(.data$covariateId, .data$timeId) on.exit(covariateData$deleteCovariateTimeIds <- NULL, add = TRUE) - + # Next, find groups of covariates (analyses) that together cover everyone: analysisIds <- covariateData$temporalValueCounts %>% - anti_join(covariateData$deleteCovariateTimeIds, by = c("covariateId", "timeId")) %>% - inner_join(covariateData$covariateRef, by = "covariateId") %>% - group_by(.data$analysisId) %>% - summarise(n = sum(.data$n, na.rm = TRUE)) %>% - filter(n == populationSize) %>% - select(.data$analysisId) - + anti_join(covariateData$deleteCovariateTimeIds, by = c("covariateId", "timeId")) %>% + inner_join(covariateData$covariateRef, by = "covariateId") %>% + group_by(.data$analysisId) %>% + summarise(n = sum(.data$n, na.rm = TRUE)) %>% + filter(n == populationSize) %>% + select(.data$analysisId) + # For those, find most prevalent covariate, and mark it for deletion: valueCounts <- analysisIds %>% - inner_join(covariateData$covariateRef, by = "analysisId") %>% - inner_join(covariateData$temporalValueCounts, by = "covariateId") %>% - select(.data$analysisId, .data$covariateId, .data$timeId, .data$n) %>% - collect() + inner_join(covariateData$covariateRef, by = "analysisId") %>% + inner_join(covariateData$temporalValueCounts, by = "covariateId") %>% + select(.data$analysisId, .data$covariateId, .data$timeId, .data$n) %>% + collect() valueCounts <- valueCounts[order(valueCounts$analysisId, -valueCounts$n), ] - Andromeda::appendToTable(covariateData$deleteCovariateTimeIds, - valueCounts[!duplicated(valueCounts$analysisId), c("covariateId", "timeId")]) - + Andromeda::appendToTable(covariateData$deleteCovariateTimeIds, + valueCounts[!duplicated(valueCounts$analysisId), + + c("covariateId", "timeId")]) + newCovariates <- newCovariates %>% - anti_join(covariateData$deleteCovariateTimeIds, by = c("covariateId", "timeId")) - - ParallelLogger::logInfo("Removing ", covariateData$deleteCovariateTimeIds %>% count() %>% pull(), " redundant covariate ID - time ID combinations") + anti_join(covariateData$deleteCovariateTimeIds, by = c("covariateId", "timeId")) + + ParallelLogger::logInfo("Removing ", covariateData$deleteCovariateTimeIds %>% + count() %>% + pull(), " redundant covariate ID - time ID combinations") } else { # Non-temporal - + # First, find all single covariates that appear in every row with the same value - toDelete <- covariateData$valueCounts %>% - inner_join(covariateData$binaryCovariateIds, by = "covariateId") %>% - filter(n == populationSize) %>% - select(.data$covariateId) %>% - collect() + toDelete <- covariateData$valueCounts %>% + inner_join(covariateData$binaryCovariateIds, by = "covariateId") %>% + filter(n == populationSize) %>% + select(.data$covariateId) %>% + collect() deleteCovariateIds <- toDelete$covariateId - + # Next, find groups of covariates (analyses) that together cover everyone: analysisIds <- covariateData$valueCounts %>% - inner_join(covariateData$binaryCovariateIds, by = "covariateId") %>% - filter(!.data$covariateId %in% deleteCovariateIds) %>% - inner_join(covariateData$covariateRef, by = "covariateId") %>% - group_by(.data$analysisId) %>% - summarise(n = sum(.data$n, na.rm = TRUE)) %>% - filter(n == populationSize) %>% - select(.data$analysisId) + inner_join(covariateData$binaryCovariateIds, by = "covariateId") %>% + filter(!.data$covariateId %in% deleteCovariateIds) %>% + inner_join(covariateData$covariateRef, by = "covariateId") %>% + group_by(.data$analysisId) %>% + summarise(n = sum(.data$n, na.rm = TRUE)) %>% + filter(n == populationSize) %>% + select(.data$analysisId) # For those, find most prevalent covariate, and mark it for deletion: valueCounts <- analysisIds %>% - inner_join(covariateData$covariateRef, by = "analysisId") %>% - inner_join(covariateData$valueCounts, by = "covariateId") %>% - select(.data$analysisId, .data$covariateId, .data$n) %>% - collect() + inner_join(covariateData$covariateRef, by = "analysisId") %>% + inner_join(covariateData$valueCounts, by = "covariateId") %>% + select(.data$analysisId, .data$covariateId, .data$n) %>% + collect() valueCounts <- valueCounts[order(valueCounts$analysisId, -valueCounts$n), ] - deleteCovariateIds <- c(deleteCovariateIds, valueCounts$covariateId[!duplicated(valueCounts$analysisId)]) + deleteCovariateIds <- c(deleteCovariateIds, + valueCounts$covariateId[!duplicated(valueCounts$analysisId)]) ignoreCovariateIds <- valueCounts$covariateId ParallelLogger::logInfo("Removing ", length(deleteCovariateIds), " redundant covariates") } @@ -149,34 +159,34 @@ tidyCovariateData <- function(covariateData, filter(!.data$covariateId %in% ignoreCovariateIds) %>% select(.data$covariateId) %>% collect() - + metaData$deletedInfrequentCovariateIds <- toDelete$covariateId deleteCovariateIds <- c(deleteCovariateIds, toDelete$covariateId) ParallelLogger::logInfo("Removing ", nrow(toDelete), " infrequent covariates") } if (length(deleteCovariateIds) > 0) { - newCovariates <- newCovariates %>% + newCovariates <- newCovariates %>% filter(!.data$covariateId %in% deleteCovariateIds) } - + if (normalize) { ParallelLogger::logInfo("Normalizing covariates") - newCovariates <- newCovariates %>% + newCovariates <- newCovariates %>% inner_join(covariateData$maxValuePerCovariateId, by = "covariateId") %>% - mutate(covariateValue = .data$covariateValue / .data$maxValue) %>% + mutate(covariateValue = .data$covariateValue/.data$maxValue) %>% select(-.data$maxValue) metaData$normFactors <- covariateData$maxValuePerCovariateId %>% collect() - } + } newCovariateData$covariates <- newCovariates } - + class(newCovariateData) <- "CovariateData" attr(class(newCovariateData), "package") <- "FeatureExtraction" attr(newCovariateData, "metaData") <- metaData - + delta <- Sys.time() - start ParallelLogger::logInfo("Tidying covariates took ", signif(delta, 3), " ", attr(delta, "units")) - + return(newCovariateData) } diff --git a/R/Table1.R b/R/Table1.R index e5ca618a..08bed37b 100644 --- a/R/Table1.R +++ b/R/Table1.R @@ -1,4 +1,4 @@ -# Copyright 2021 Observational Health Data Sciences and Informatics +# Copyright 2022 Observational Health Data Sciences and Informatics # # This file is part of FeatureExtraction # @@ -26,7 +26,9 @@ #' @export getDefaultTable1Specifications <- function() { fileName <- system.file("csv", "Table1Specs.csv", package = "FeatureExtraction") - colTypes <- list(label = readr::col_character(), analysisId = readr::col_integer(), covariateIds = readr::col_character()) + colTypes <- list(label = readr::col_character(), + analysisId = readr::col_integer(), + covariateIds = readr::col_character()) specifications <- readr::read_csv(fileName, col_types = colTypes) return(specifications) } @@ -52,7 +54,7 @@ getDefaultTable1Specifications <- function() { #' @param percentDigits Number of digits to be used for percentages. #' @param stdDiffDigits Number of digits to be used for the standardized differences. #' @param valueDigits Number of digits to be used for the values of continuous variables. -#' +#' #' @return #' A data frame, or, when \code{output = "list"} a list of two data frames. #' @@ -62,12 +64,9 @@ createTable1 <- function(covariateData1, cohortId1 = NULL, cohortId2 = NULL, specifications = getDefaultTable1Specifications(), - output = "two columns", - showCounts = FALSE, - showPercent = TRUE, - percentDigits = 1, - valueDigits = 1, - stdDiffDigits = 2) { + + output = "two columns", showCounts = FALSE, showPercent = TRUE, percentDigits = 1, valueDigits = 1, + stdDiffDigits = 2) { comparison <- !is.null(covariateData2) if (!isCovariateData(covariateData1)) stop("covariateData1 is not of type 'covariateData'") @@ -81,7 +80,7 @@ createTable1 <- function(covariateData1, stop("Must show counts or percent, or both") if (!(output %in% c("one column", "two columns", "list"))) stop("The `output` argument must be 'one column', 'two columns', or 'list'") - + fixCase <- function(label) { idx <- (toupper(label) == label) if (any(idx)) { @@ -90,32 +89,32 @@ createTable1 <- function(covariateData1, } return(label) } - + formatCount <- function(x) { result <- format(round(x), justify = "right", big.mark = ",") result <- gsub("NA", "", result) result <- gsub(" ", " ", result) return(result) } - + formatPercent <- function(x) { - result <- format(round(100*x, percentDigits), digits = percentDigits + 1, justify = "right") + result <- format(round(100 * x, percentDigits), digits = percentDigits + 1, justify = "right") result <- gsub("NA", "", result) result <- gsub(" ", " ", result) return(result) } - + formatStdDiff <- function(x) { result <- format(round(x, stdDiffDigits), digits = stdDiffDigits + 1, justify = "right") result <- gsub("NA", "", result) result <- gsub(" ", " ", result) return(result) } - + formatValue <- function(x) { return(format(round(x, valueDigits), nsmall = valueDigits)) } - + if (is.null(covariateData1$covariates)) { covariates <- NULL } else { @@ -124,10 +123,8 @@ createTable1 <- function(covariateData1, covariates <- covariates %>% filter(.data$cohortDefinitionId == cohortId1) } - covariates <- covariates %>% - select(covariateId = "covariateId", - count1 = "sumValue", - percent1 = "averageValue") %>% + covariates <- covariates %>% + select(covariateId = "covariateId", count1 = "sumValue", percent1 = "averageValue") %>% collect() covariates$count1 <- formatCount(covariates$count1) covariates$percent1 <- formatPercent(covariates$percent1) @@ -140,15 +137,13 @@ createTable1 <- function(covariateData1, covariatesContinuous <- covariatesContinuous %>% filter(.data$cohortDefinitionId == cohortId1) } - covariatesContinuous <- covariatesContinuous %>% + covariatesContinuous <- covariatesContinuous %>% select(covariateId = "covariateId", averageValue1 = "averageValue", standardDeviation1 = "standardDeviation", - minValue1 = "minValue", - p25Value1 = "p25Value", - medianValue1 = "medianValue", - p75Value1 = "p75Value", - maxValue1 = "maxValue") %>% + + minValue1 = "minValue", p25Value1 = "p25Value", medianValue1 = "medianValue", p75Value1 = "p75Value", + maxValue1 = "maxValue") %>% collect() covariatesContinuous$averageValue1 <- formatValue(covariatesContinuous$averageValue1) covariatesContinuous$standardDeviation1 <- formatValue(covariatesContinuous$standardDeviation1) @@ -158,26 +153,24 @@ createTable1 <- function(covariateData1, covariatesContinuous$p75Value1 <- formatValue(covariatesContinuous$p75Value1) covariatesContinuous$maxValue1 <- formatValue(covariatesContinuous$maxValue1) } - + covariateRef <- covariateData1$covariateRef %>% collect() analysisRef <- covariateData1$analysisRef %>% collect() if (comparison) { - stdDiff <- computeStandardizedDifference(covariateData1 = covariateData1, - covariateData2 = covariateData2, - cohortId1 = cohortId1, - cohortId2 = cohortId2) + stdDiff <- computeStandardizedDifference(covariateData1 = covariateData1, + covariateData2 = covariateData2, + + cohortId1 = cohortId1, cohortId2 = cohortId2) if (!is.null(covariateData1$covariates) && !is.null(covariateData2$covariates)) { - tempCovariates <- covariateData2$covariates - if (!is.null(cohortId2)) { - tempCovariates <- tempCovariates %>% - filter(.data$cohortDefinitionId == cohortId2) - } + tempCovariates <- covariateData2$covariates + if (!is.null(cohortId2)) { + tempCovariates <- tempCovariates %>% + filter(.data$cohortDefinitionId == cohortId2) + } tempCovariates <- tempCovariates %>% - select(covariateId = "covariateId", - count2 = "sumValue", - percent2 = "averageValue") %>% + select(covariateId = "covariateId", count2 = "sumValue", percent2 = "averageValue") %>% collect() tempCovariates$count2 <- formatCount(tempCovariates$count2) tempCovariates$percent2 <- formatPercent(tempCovariates$percent2) @@ -190,23 +183,21 @@ createTable1 <- function(covariateData1, covariates$stdDiff <- formatStdDiff(covariates$stdDiff) } if (!is.null(covariatesContinuous)) { - tempCovariates <- covariateData2$covariatesContinuous - if (!is.null(cohortId2)) { + tempCovariates <- covariateData2$covariatesContinuous + if (!is.null(cohortId2)) { + tempCovariates <- tempCovariates %>% + filter(.data$cohortDefinitionId == cohortId2) + } + tempCovariates <- tempCovariates %>% - filter(.data$cohortDefinitionId == cohortId2) - } - - tempCovariates <- tempCovariates %>% select(covariateId = "covariateId", averageValue2 = "averageValue", standardDeviation2 = "standardDeviation", - minValue2 = "minValue", - p25Value2 = "p25Value", - medianValue2 = "medianValue", - p75Value2 = "p75Value", - maxValue2 = "maxValue") %>% + + minValue2 = "minValue", p25Value2 = "p25Value", medianValue2 = "medianValue", p75Value2 = "p75Value", + maxValue2 = "maxValue") %>% collect() - + tempCovariates$averageValue2 <- formatValue(tempCovariates$averageValue2) tempCovariates$standardDeviation2 <- formatValue(tempCovariates$standardDeviation2) tempCovariates$minValue2 <- formatValue(tempCovariates$minValue2) @@ -246,7 +237,7 @@ createTable1 <- function(covariateData1, covariatesContinuous$maxValue2 <- " " covariatesContinuous$stdDiff <- " " } - + binaryTable <- tibble() continuousTable <- tibble() for (i in 1:nrow(specifications)) { @@ -261,148 +252,95 @@ createTable1 <- function(covariateData1, if (isBinary == "Y") { # Binary if (is.na(specifications$covariateIds[i])) { - idx <- covariateRef$analysisId == specifications$analysisId[i] + idx <- covariateRef$analysisId == specifications$analysisId[i] } else { - covariateIds <- as.numeric(strsplit(specifications$covariateIds[i], ",")[[1]]) - idx <- covariateRef$covariateId %in% covariateIds + covariateIds <- as.numeric(strsplit(specifications$covariateIds[i], ",")[[1]]) + idx <- covariateRef$covariateId %in% covariateIds } if (any(idx)) { - covariateRefSubset <- covariateRef[idx, ] - covariatesSubset <- merge(covariates, covariateRefSubset) - if (is.null(covariateIds)) { - covariatesSubset <- covariatesSubset[order(covariatesSubset$covariateId), ] - } else { - covariatesSubset <- merge(covariatesSubset, tibble(covariateId = covariateIds, - rn = 1:length(covariateIds))) - covariatesSubset <- covariatesSubset[order(covariatesSubset$rn, - covariatesSubset$covariateId), ] - } - covariatesSubset$covariateName <- fixCase(gsub("^.*: ", - "", - covariatesSubset$covariateName)) - if (is.na(specifications$covariateIds[i]) || length(covariateIds) > 1) { - binaryTable <- bind_rows(binaryTable, tibble(Characteristic = specifications$label[i], - count1 = "", - percent1 = "", - count2 = "", - percent2 = "", - stdDiff = "")) - binaryTable <- bind_rows(binaryTable, - tibble(Characteristic = paste0(" ", covariatesSubset$covariateName), - count1 = covariatesSubset$count1, - percent1 = covariatesSubset$percent1, - count2 = covariatesSubset$count2, - percent2 = covariatesSubset$percent2, - stdDiff = covariatesSubset$stdDiff)) - } else { - binaryTable <- bind_rows(binaryTable, tibble(Characteristic = specifications$label[i], - count1 = covariatesSubset$count1, - percent1 = covariatesSubset$percent1, - count2 = covariatesSubset$count2, - percent2 = covariatesSubset$percent2, - stdDiff = covariatesSubset$stdDiff)) - } + covariateRefSubset <- covariateRef[idx, ] + covariatesSubset <- merge(covariates, covariateRefSubset) + if (is.null(covariateIds)) { + covariatesSubset <- covariatesSubset[order(covariatesSubset$covariateId), ] + } else { + covariatesSubset <- merge(covariatesSubset, tibble(covariateId = covariateIds, + rn = 1:length(covariateIds))) + covariatesSubset <- covariatesSubset[order(covariatesSubset$rn, + covariatesSubset$covariateId), + ] + } + covariatesSubset$covariateName <- fixCase(gsub("^.*: ", + "", + covariatesSubset$covariateName)) + if (is.na(specifications$covariateIds[i]) || length(covariateIds) > 1) { + binaryTable <- bind_rows(binaryTable, tibble(Characteristic = specifications$label[i], + count1 = "", percent1 = "", count2 = "", percent2 = "", stdDiff = "")) + binaryTable <- bind_rows(binaryTable, + tibble(Characteristic = paste0(" ", + covariatesSubset$covariateName), + count1 = covariatesSubset$count1, percent1 = covariatesSubset$percent1, count2 = covariatesSubset$count2, + percent2 = covariatesSubset$percent2, stdDiff = covariatesSubset$stdDiff)) + } else { + binaryTable <- bind_rows(binaryTable, tibble(Characteristic = specifications$label[i], + count1 = covariatesSubset$count1, percent1 = covariatesSubset$percent1, count2 = covariatesSubset$count2, + percent2 = covariatesSubset$percent2, stdDiff = covariatesSubset$stdDiff)) + } } } else { # Not binary if (is.na(specifications$covariateIds[i])) { - idx <- covariateRef$analysisId == specifications$analysisId[i] + idx <- covariateRef$analysisId == specifications$analysisId[i] } else { - covariateIds <- as.numeric(strsplit(specifications$covariateIds[i], ",")[[1]]) - idx <- covariateRef$covariateId %in% covariateIds + covariateIds <- as.numeric(strsplit(specifications$covariateIds[i], ",")[[1]]) + idx <- covariateRef$covariateId %in% covariateIds } if (any(idx)) { - covariateRefSubset <- covariateRef[idx, ] - covariatesSubset <- covariatesContinuous[covariatesContinuous$covariateId %in% covariateRefSubset$covariateId, ] - covariatesSubset <- merge(covariatesSubset, covariateRefSubset) - if (is.null(covariateIds)) { - covariatesSubset <- covariatesSubset[order(covariatesSubset$covariateId), ] - } else { - covariatesSubset <- merge(covariatesSubset, tibble(covariateId = covariateIds, - rn = 1:length(covariateIds))) - covariatesSubset <- covariatesSubset[order(covariatesSubset$rn, - covariatesSubset$covariateId), ] - } - covariatesSubset$covariateName <- fixCase(gsub("^.*: ", - "", - covariatesSubset$covariateName)) - if (is.na(specifications$covariateIds[i]) || length(covariateIds) > 1) { - continuousTable <- bind_rows(continuousTable, - tibble(Characteristic = specifications$label[i], - value1 = "", - value2 = "", - stdDiff = "")) - for (j in 1:nrow(covariatesSubset)) { - continuousTable <- bind_rows(continuousTable, - tibble(Characteristic = paste0(" ", covariatesSubset$covariateName[j]), - value1 = "", - value2 = "", - stdDiff = "")) - continuousTable <- bind_rows(continuousTable, tibble(Characteristic = c(" Mean", - " Std. deviation", - " Minimum", - " 25th percentile", - " Median", - " 75th percentile", - " Maximum"), - value1 = c(covariatesSubset$averageValue1[j], - covariatesSubset$standardDeviation1[j], - covariatesSubset$minValue1[j], - covariatesSubset$p25Value1[j], - covariatesSubset$medianValue1[j], - covariatesSubset$p75Value1[j], - covariatesSubset$maxValue1[j]), - value2 = c(covariatesSubset$averageValue2[j], - covariatesSubset$standardDeviation2[j], - covariatesSubset$minValue2[j], - covariatesSubset$p25Value2[j], - covariatesSubset$medianValue2[j], - covariatesSubset$p75Value2[j], - covariatesSubset$maxValue2[j]), - stdDiff = c(covariatesSubset$stdDiff[j], - " ", - " ", - " ", - " ", - " ", - " "))) - - } - } else { - continuousTable <- bind_rows(continuousTable, - tibble(Characteristic = specifications$label[i], - value1 = "", - value2 = "", - stdDiff = "")) - continuousTable <- bind_rows(continuousTable, tibble(Characteristic = c(" Mean", - " Std. deviation", - " Minimum", - " 25th percentile", - " Median", - " 75th percentile", - " Maximum"), - value1 = c(covariatesSubset$averageValue1, - covariatesSubset$standardDeviation1, - covariatesSubset$minValue1, - covariatesSubset$p25Value1, - covariatesSubset$medianValue1, - covariatesSubset$p75Value1, - covariatesSubset$maxValue1), - value2 = c(covariatesSubset$averageValue2, - covariatesSubset$standardDeviation2, - covariatesSubset$minValue2, - covariatesSubset$p25Value2, - covariatesSubset$medianValue2, - covariatesSubset$p75Value2, - covariatesSubset$maxValue2), - stdDiff = c(covariatesSubset$stdDiff, - " ", - " ", - " ", - " ", - " ", - " "))) + covariateRefSubset <- covariateRef[idx, ] + covariatesSubset <- covariatesContinuous[covariatesContinuous$covariateId %in% covariateRefSubset$covariateId, + ] + covariatesSubset <- merge(covariatesSubset, covariateRefSubset) + if (is.null(covariateIds)) { + covariatesSubset <- covariatesSubset[order(covariatesSubset$covariateId), ] + } else { + covariatesSubset <- merge(covariatesSubset, tibble(covariateId = covariateIds, + rn = 1:length(covariateIds))) + covariatesSubset <- covariatesSubset[order(covariatesSubset$rn, + covariatesSubset$covariateId), + ] + } + covariatesSubset$covariateName <- fixCase(gsub("^.*: ", + "", + covariatesSubset$covariateName)) + if (is.na(specifications$covariateIds[i]) || length(covariateIds) > 1) { + continuousTable <- bind_rows(continuousTable, + tibble(Characteristic = specifications$label[i], + value1 = "", value2 = "", stdDiff = "")) + for (j in 1:nrow(covariatesSubset)) { + continuousTable <- bind_rows(continuousTable, tibble(Characteristic = paste0(" ", + covariatesSubset$covariateName[j]), value1 = "", value2 = "", stdDiff = "")) + continuousTable <- bind_rows(continuousTable, tibble(Characteristic = c(" Mean", + " Std. deviation", " Minimum", " 25th percentile", " Median", " 75th percentile", + " Maximum"), value1 = c(covariatesSubset$averageValue1[j], covariatesSubset$standardDeviation1[j], + covariatesSubset$minValue1[j], covariatesSubset$p25Value1[j], covariatesSubset$medianValue1[j], + covariatesSubset$p75Value1[j], covariatesSubset$maxValue1[j]), value2 = c(covariatesSubset$averageValue2[j], + covariatesSubset$standardDeviation2[j], covariatesSubset$minValue2[j], covariatesSubset$p25Value2[j], + covariatesSubset$medianValue2[j], covariatesSubset$p75Value2[j], covariatesSubset$maxValue2[j]), + stdDiff = c(covariatesSubset$stdDiff[j], " ", " ", " ", " ", " ", " "))) + } + } else { + continuousTable <- bind_rows(continuousTable, + tibble(Characteristic = specifications$label[i], + value1 = "", value2 = "", stdDiff = "")) + continuousTable <- bind_rows(continuousTable, tibble(Characteristic = c(" Mean", + " Std. deviation", " Minimum", " 25th percentile", " Median", " 75th percentile", + " Maximum"), value1 = c(covariatesSubset$averageValue1, covariatesSubset$standardDeviation1, + covariatesSubset$minValue1, covariatesSubset$p25Value1, covariatesSubset$medianValue1, + covariatesSubset$p75Value1, covariatesSubset$maxValue1), value2 = c(covariatesSubset$averageValue2, + covariatesSubset$standardDeviation2, covariatesSubset$minValue2, covariatesSubset$p25Value2, + covariatesSubset$medianValue2, covariatesSubset$p75Value2, covariatesSubset$maxValue2), + stdDiff = c(covariatesSubset$stdDiff, " ", " ", " ", " ", " ", " "))) + } } } } @@ -417,32 +355,27 @@ createTable1 <- function(covariateData1, colnames(continuousTable) <- c("Characteristic", "", "Value", "", "Value", "Std.Diff") } else { continuousTable$dummy <- "" - continuousTable <- continuousTable[, c(1,3,2)] + continuousTable <- continuousTable[, c(1, 3, 2)] colnames(continuousTable) <- c("Characteristic", "", "Value") } } else { if (comparison) { colnames(continuousTable) <- c("Characteristic", "Value", "Value", "Std.Diff") - } else { + } else { continuousTable$value2 <- NULL continuousTable$stdDiff <- NULL colnames(continuousTable) <- c("Characteristic", "Value") } } } - + if (nrow(binaryTable) != 0) { if (comparison) { colnames(binaryTable) <- c("Characteristic", "Count", - paste0("% (n = ", - formatCount(attr(covariateData1, "metaData")$populationSize), - ")"), - "Count", - paste0("% (n = ", - formatCount(attr(covariateData2, "metaData")$populationSize), - ")"), - "Std.Diff") + paste0("% (n = ", formatCount(attr(covariateData1, + "metaData")$populationSize), ")"), "Count", paste0("% (n = ", formatCount(attr(covariateData2, + "metaData")$populationSize), ")"), "Std.Diff") if (!showCounts) { binaryTable[, 4] <- NULL binaryTable[, 2] <- NULL @@ -457,9 +390,8 @@ createTable1 <- function(covariateData1, binaryTable$stdDiff <- NULL colnames(binaryTable) <- c("Characteristic", "Count", - paste0("% (n = ", - formatCount(attr(covariateData1, "metaData")$populationSize), - ")")) + paste0("% (n = ", formatCount(attr(covariateData1, + "metaData")$populationSize), ")")) if (!showCounts) { binaryTable[, 2] <- NULL } @@ -468,7 +400,7 @@ createTable1 <- function(covariateData1, } } } - + if (output == "two columns") { if (nrow(binaryTable) > nrow(continuousTable)) { if (nrow(continuousTable) > 0) { @@ -476,10 +408,11 @@ createTable1 <- function(covariateData1, column1 <- binaryTable[1:rowsPerColumn, ] ct <- continuousTable colnames(ct) <- colnames(binaryTable) - column2 <- rbind(binaryTable[(rowsPerColumn + 1):nrow(binaryTable), ], + column2 <- rbind(binaryTable[(rowsPerColumn + 1):nrow(binaryTable), + ], rep("", ncol(binaryTable)), - colnames(continuousTable), - ct) + + colnames(continuousTable), ct) } else { rowsPerColumn <- ceiling((nrow(binaryTable) + nrow(continuousTable))/2) column1 <- binaryTable[1:rowsPerColumn, ] @@ -491,15 +424,12 @@ createTable1 <- function(covariateData1, result <- cbind(column1, column2) } else { rlang::abort(paste("createTable1 cannot display the output in two columns because there are more rows in the table of continuous covariates than there are in the table of binary covariates.", - "\nTry using `output = 'one column'` when calling createTable1()")) + "\nTry using `output = 'one column'` when calling createTable1()")) } } else if (output == "one column") { ct <- continuousTable colnames(ct) <- colnames(binaryTable) - result <- rbind(binaryTable, - rep("", ncol(binaryTable)), - colnames(continuousTable), - ct) + result <- rbind(binaryTable, rep("", ncol(binaryTable)), colnames(continuousTable), ct) } else { result <- list(part1 = binaryTable, part2 = continuousTable) } @@ -533,11 +463,9 @@ createTable1 <- function(covariateData1, #' @export createTable1CovariateSettings <- function(specifications = getDefaultTable1Specifications(), covariateSettings = createDefaultCovariateSettings(), - includedCovariateConceptIds = c(), - addDescendantsToInclude = FALSE, - excludedCovariateConceptIds = c(), - addDescendantsToExclude = FALSE, - includedCovariateIds = c()) { + + includedCovariateConceptIds = c(), addDescendantsToInclude = FALSE, excludedCovariateConceptIds = c(), + addDescendantsToExclude = FALSE, includedCovariateIds = c()) { covariateSettings <- convertPrespecSettingsToDetailedSettings(covariateSettings) filterBySpecs <- function(analysis) { if (analysis$analysisId %in% specifications$analysisId) { diff --git a/R/UnitTestHelperFunctions.R b/R/UnitTestHelperFunctions.R index 3e91f453..4f6abd91 100644 --- a/R/UnitTestHelperFunctions.R +++ b/R/UnitTestHelperFunctions.R @@ -1,4 +1,4 @@ -# Copyright 2021 Observational Health Data Sciences and Informatics +# Copyright 2022 Observational Health Data Sciences and Informatics # # This file is part of FeatureExtraction # @@ -26,33 +26,28 @@ oracleTempSchema = NULL, cdmDatabaseSchema, cohortTable = "#cohort_person", - cohortId = -1, - cdmVersion = "5", - rowIdField = "subject_id", - covariateSettings, - aggregated = FALSE) { + + cohortId = -1, cdmVersion = "5", rowIdField = "subject_id", covariateSettings, aggregated = FALSE) { writeLines("Constructing length of observation covariates") if (covariateSettings$useLengthOfObs == FALSE) { return(NULL) } if (aggregated) stop("Aggregation not supported") - + # Some SQL to construct the covariate: sql <- paste("SELECT @row_id_field AS row_id, 1 AS covariate_id,", "DATEDIFF(DAY, observation_period_start_date, cohort_start_date)", - "AS covariate_value", - "FROM @cohort_table c", - "INNER JOIN @cdm_database_schema.observation_period op", - "ON op.person_id = c.subject_id", - "WHERE cohort_start_date >= observation_period_start_date", - "AND cohort_start_date <= observation_period_end_date", - "{@cohort_id != -1} ? {AND cohort_definition_id = @cohort_id}") + + "AS covariate_value", "FROM @cohort_table c", "INNER JOIN @cdm_database_schema.observation_period op", + "ON op.person_id = c.subject_id", "WHERE cohort_start_date >= observation_period_start_date", + "AND cohort_start_date <= observation_period_end_date", "{@cohort_id != -1} ? {AND cohort_definition_id = @cohort_id}") sql <- SqlRender::render(sql, cohort_table = cohortTable, cohort_id = cohortId, row_id_field = rowIdField, - cdm_database_schema = cdmDatabaseSchema) + + cdm_database_schema = cdmDatabaseSchema) sql <- SqlRender::translate(sql, targetDialect = attr(connection, "dbms")) # Retrieve the covariate: covariates <- DatabaseConnector::querySql(connection, sql, snakeCaseToCamelCase = TRUE) @@ -60,15 +55,14 @@ covariateRef <- data.frame(covariateId = 1, covariateName = "Length of observation", analysisId = 1, - conceptId = 0) + + conceptId = 0) # Construct analysis reference: analysisRef <- data.frame(analysisId = 1, analysisName = "Length of observation", domainId = "Demographics", - startDay = 0, - endDay = 0, - isBinary = "N", - missingMeansZero = "Y") + + startDay = 0, endDay = 0, isBinary = "N", missingMeansZero = "Y") # Construct analysis reference: metaData <- list(sql = sql, call = match.call()) result <- Andromeda::andromeda(covariates = covariates, @@ -77,4 +71,4 @@ attr(result, "metaData") <- metaData class(result) <- "CovariateData" return(result) -} \ No newline at end of file +} diff --git a/extras/DefaultCovariateSettingsTemplate.R b/extras/DefaultCovariateSettingsTemplate.R index 2a35340a..3f344d7e 100644 --- a/extras/DefaultCovariateSettingsTemplate.R +++ b/extras/DefaultCovariateSettingsTemplate.R @@ -1,4 +1,4 @@ -# Copyright 2021 Observational Health Data Sciences and Informatics +# Copyright 2022 Observational Health Data Sciences and Informatics # # This file is part of FeatureExtraction # diff --git a/extras/DetailedCovariateSettingsTemplate.R b/extras/DetailedCovariateSettingsTemplate.R index 3ba90038..537564b1 100644 --- a/extras/DetailedCovariateSettingsTemplate.R +++ b/extras/DetailedCovariateSettingsTemplate.R @@ -1,4 +1,4 @@ -# Copyright 2021 Observational Health Data Sciences and Informatics +# Copyright 2022 Observational Health Data Sciences and Informatics # # This file is part of FeatureExtraction # diff --git a/extras/GetHdpsCovariates.R b/extras/GetHdpsCovariates.R index 3dd35b6a..6785f40c 100644 --- a/extras/GetHdpsCovariates.R +++ b/extras/GetHdpsCovariates.R @@ -1,6 +1,6 @@ # @file GetHdpsCovariates.R # -# Copyright 2021 Observational Health Data Sciences and Informatics +# Copyright 2022 Observational Health Data Sciences and Informatics # # This file is part of FeatureExtraction # diff --git a/extras/PackageMaintenance.R b/extras/PackageMaintenance.R index 7fa810da..8a0a3816 100644 --- a/extras/PackageMaintenance.R +++ b/extras/PackageMaintenance.R @@ -1,6 +1,6 @@ # @file PackageMaintenance # -# Copyright 2021 Observational Health Data Sciences and Informatics +# Copyright 2022 Observational Health Data Sciences and Informatics # # This file is part of FeatureExtraction # diff --git a/extras/TestCode.R b/extras/TestCode.R index 9a49ee81..975e1fd1 100644 --- a/extras/TestCode.R +++ b/extras/TestCode.R @@ -584,6 +584,7 @@ settings <- createTemporalCovariateSettings(useDemographicsGender = TRUE, useChads2 = TRUE, useChads2Vasc = TRUE, useHfrs = TRUE, + useEfi = TRUE, useDistinctConditionCount = TRUE, useDistinctIngredientCount = TRUE, useDistinctProcedureCount = TRUE, diff --git a/extras/VignetteDataFetch.R b/extras/VignetteDataFetch.R index 8e726661..e0586de9 100644 --- a/extras/VignetteDataFetch.R +++ b/extras/VignetteDataFetch.R @@ -1,6 +1,6 @@ # @file VignetteDataFetch.R # -# Copyright 2021 Observational Health Data Sciences and Informatics +# Copyright 2022 Observational Health Data Sciences and Informatics # # This file is part of FeatureExtraction # diff --git a/inst/csv/PrespecAnalyses.csv b/inst/csv/PrespecAnalyses.csv index b33f76c9..ecbef9d4 100644 --- a/inst/csv/PrespecAnalyses.csv +++ b/inst/csv/PrespecAnalyses.csv @@ -1,106 +1,107 @@ -analysisId,analysisName,sqlFileName,startDay,endDay,subType,domainId,domainTable,domainConceptId,domainStartDate,domainEndDate,isDefault,description -1,DemographicsGender,DemographicsGender.sql,,,,Demographics,,,,,TRUE,Gender of the subject. -2,DemographicsAge,DemographicsAge.sql,,,,Demographics,,,,,FALSE,Age of the subject on the index date (in years). -3,DemographicsAgeGroup,DemographicsAgeGroup.sql,,,,Demographics,,,,,TRUE,Age of the subject on the index date (in 5 year age groups) -4,DemographicsRace,DemographicsRace.sql,,,,Demographics,,,,,TRUE,Race of the subject. -5,DemographicsEthnicity,DemographicsEthnicity.sql,,,,Demographics,,,,,TRUE,Ethnicity of the subject. -6,DemographicsIndexYear,DemographicsYear.sql,,,,Demographics,,,,,TRUE,Year of the index date. -7,DemographicsIndexMonth,DemographicsMonth.sql,,,,Demographics,,,,,TRUE,Month of the index date. -8,DemographicsPriorObservationTime,DemographicsTime.sql,,,priorObservation,Demographics,,,,,FALSE,Number of continuous days of observation time preceding the index date. -9,DemographicsPostObservationTime,DemographicsTime.sql,,,postObservation,Demographics,,,,,FALSE,Number of continuous days of observation time following the index date. -10,DemographicsTimeInCohort,DemographicsTime.sql,,,inCohort,Demographics,,,,,FALSE,Number of days of observation time during cohort period. -11,DemographicsIndexYearMonth,DemographicsYearMonth.sql,,,,Demographics,,,,,FALSE,Both calendar year and month of the index date in a single variable. -101,ConditionOccurrenceAnyTimePrior,DomainConcept.sql,anyTimePrior,endDays,all,Condition,condition_occurrence,condition_concept_id,condition_start_date,condition_start_date,FALSE,One covariate per condition in the condition_occurrence table starting any time prior to index. -102,ConditionOccurrenceLongTerm,DomainConcept.sql,longTermStartDays,endDays,all,Condition,condition_occurrence,condition_concept_id,condition_start_date,condition_start_date,FALSE,One covariate per condition in the condition_occurrence table starting in the long term window. -103,ConditionOccurrenceMediumTerm,DomainConcept.sql,mediumTermStartDays,endDays,all,Condition,condition_occurrence,condition_concept_id,condition_start_date,condition_start_date,FALSE,One covariate per condition in the condition_occurrence table starting in the medium term window. -104,ConditionOccurrenceShortTerm,DomainConcept.sql,shortTermStartDays,endDays,all,Condition,condition_occurrence,condition_concept_id,condition_start_date,condition_start_date,FALSE,One covariate per condition in the condition_occurrence table starting in the short term window. -105,ConditionOccurrencePrimaryInpatientAnyTimePrior,DomainConcept.sql,anyTimePrior,endDays,inpatient,Condition,condition_occurrence,condition_concept_id,condition_start_date,condition_start_date,FALSE,One covariate per condition observed as a primary diagnosis in an inpatient setting in the condition_occurrence table starting any time prior to index. -106,ConditionOccurrencePrimaryInpatientLongTerm,DomainConcept.sql,longTermStartDays,endDays,inpatient,Condition,condition_occurrence,condition_concept_id,condition_start_date,condition_start_date,FALSE,One covariate per condition observed as a primary diagnosis in an inpatient setting in the condition_occurrence table starting in the long term window. -107,ConditionOccurrencePrimaryInpatientMediumTerm,DomainConcept.sql,mediumTermStartDays,endDays,inpatient,Condition,condition_occurrence,condition_concept_id,condition_start_date,condition_start_date,FALSE,One covariate per condition observed as a primary diagnosis in an inpatient setting in the condition_occurrence table starting in the medium term window. -108,ConditionOccurrencePrimaryInpatientShortTerm,DomainConcept.sql,shortTermStartDays,endDays,inpatient,Condition,condition_occurrence,condition_concept_id,condition_start_date,condition_start_date,FALSE,One covariate per condition observed as a primary diagnosis in an inpatient setting in the condition_occurrence table starting in the short term window. -201,ConditionEraAnyTimePrior,DomainConcept.sql,anyTimePrior,endDays,all,Condition,condition_era,condition_concept_id,condition_era_start_date,condition_era_end_date,FALSE,One covariate per condition in the condition_era table overlapping with any time prior to index. -202,ConditionEraLongTerm,DomainConcept.sql,longTermStartDays,endDays,all,Condition,condition_era,condition_concept_id,condition_era_start_date,condition_era_end_date,FALSE,One covariate per condition in the condition_era table overlapping with any part of the long term window. -203,ConditionEraMediumTerm,DomainConcept.sql,mediumTermStartDays,endDays,all,Condition,condition_era,condition_concept_id,condition_era_start_date,condition_era_end_date,FALSE,One covariate per condition in the condition_era table overlapping with any part of the medium term window. -204,ConditionEraShortTerm,DomainConcept.sql,shortTermStartDays,endDays,all,Condition,condition_era,condition_concept_id,condition_era_start_date,condition_era_end_date,FALSE,One covariate per condition in the condition_era table overlapping with any part of the short term window. -205,ConditionEraOverlapping,DomainConcept.sql,endDays,endDays,all,Condition,condition_era,condition_concept_id,condition_era_start_date,condition_era_end_date,FALSE,One covariate per condition in the condition_era table overlapping with the end of the risk window. -206,ConditionEraStartLongTerm,DomainConcept.sql,longTermStartDays,endDays,all,Condition,condition_era,condition_concept_id,condition_era_start_date,condition_era_start_date,FALSE,One covariate per condition in the condition_era table starting in the long term window. -207,ConditionEraStartMediumTerm,DomainConcept.sql,mediumTermStartDays,endDays,all,Condition,condition_era,condition_concept_id,condition_era_start_date,condition_era_start_date,FALSE,One covariate per condition in the condition_era table starting in the medium term window. -208,ConditionEraStartShortTerm,DomainConcept.sql,shortTermStartDays,endDays,all,Condition,condition_era,condition_concept_id,condition_era_start_date,condition_era_start_date,FALSE,One covariate per condition in the condition_era table starting in the short term window. -209,ConditionGroupEraAnyTimePrior,DomainConceptGroup.sql,anyTimePrior,endDays,all,Condition,condition_era,condition_concept_id,condition_era_start_date,condition_era_end_date,FALSE,One covariate per condition era rolled up to groups in the condition_era table overlapping with any time prior to index. -210,ConditionGroupEraLongTerm,DomainConceptGroup.sql,longTermStartDays,endDays,all,Condition,condition_era,condition_concept_id,condition_era_start_date,condition_era_end_date,TRUE,One covariate per condition era rolled up to groups in the condition_era table overlapping with any part of the long term window. -211,ConditionGroupEraMediumTerm,DomainConceptGroup.sql,mediumTermStartDays,endDays,all,Condition,condition_era,condition_concept_id,condition_era_start_date,condition_era_end_date,FALSE,One covariate per condition era rolled up to groups in the condition_era table overlapping with any part of the medium term window. -212,ConditionGroupEraShortTerm,DomainConceptGroup.sql,shortTermStartDays,endDays,all,Condition,condition_era,condition_concept_id,condition_era_start_date,condition_era_end_date,TRUE,One covariate per condition era rolled up to groups in the condition_era table overlapping with any part of the short term window. -213,ConditionGroupEraOverlapping,DomainConceptGroup.sql,endDays,endDays,all,Condition,condition_era,condition_concept_id,condition_era_start_date,condition_era_end_date,FALSE,One covariate per condition era rolled up to groups in the condition_era table overlapping with the end of the risk window. -214,ConditionGroupEraStartLongTerm,DomainConceptGroup.sql,longTermStartDays,endDays,all,Condition,condition_era,condition_concept_id,condition_era_start_date,condition_era_start_date,FALSE,One covariate per condition era rolled up to groups in the condition_era table starting in the long term window. -215,ConditionGroupEraStartMediumTerm,DomainConceptGroup.sql,mediumTermStartDays,endDays,all,Condition,condition_era,condition_concept_id,condition_era_start_date,condition_era_start_date,FALSE,One covariate per condition era rolled up to groups in the condition_era table starting in the medium term window. -216,ConditionGroupEraStartShortTerm,DomainConceptGroup.sql,shortTermStartDays,endDays,all,Condition,condition_era,condition_concept_id,condition_era_start_date,condition_era_start_date,FALSE,One covariate per condition era rolled up to groups in the condition_era table starting in the short term window. -301,DrugExposureAnyTimePrior,DomainConcept.sql,anyTimePrior,endDays,all,Drug,drug_exposure,drug_concept_id,drug_exposure_start_date,drug_exposure_start_date,FALSE,One covariate per drug in the drug_exposure table starting any time prior to index. -302,DrugExposureLongTerm,DomainConcept.sql,longTermStartDays,endDays,all,Drug,drug_exposure,drug_concept_id,drug_exposure_start_date,drug_exposure_start_date,FALSE,One covariate per drug in the drug_exposure table starting in the long term window. -303,DrugExposureMediumTerm,DomainConcept.sql,mediumTermStartDays,endDays,all,Drug,drug_exposure,drug_concept_id,drug_exposure_start_date,drug_exposure_start_date,FALSE,One covariate per drug in the drug_exposure table starting in the medium term window. -304,DrugExposureShortTerm,DomainConcept.sql,shortTermStartDays,endDays,all,Drug,drug_exposure,drug_concept_id,drug_exposure_start_date,drug_exposure_start_date,FALSE,One covariate per drug in the drug_exposure table starting in the short term window. -401,DrugEraAnyTimePrior,DomainConcept.sql,anyTimePrior,endDays,all,Drug,drug_era,drug_concept_id,drug_era_start_date,drug_era_end_date,FALSE,One covariate per drug in the drug_era table overlapping with any time prior to index. -402,DrugEraLongTerm,DomainConcept.sql,longTermStartDays,endDays,all,Drug,drug_era,drug_concept_id,drug_era_start_date,drug_era_end_date,FALSE,One covariate per drug in the drug_era table overlapping with any part of the long term window. -403,DrugEraMediumTerm,DomainConcept.sql,mediumTermStartDays,endDays,all,Drug,drug_era,drug_concept_id,drug_era_start_date,drug_era_end_date,FALSE,One covariate per drug in the drug_era table overlapping with any part of the medium term window. -404,DrugEraShortTerm,DomainConcept.sql,shortTermStartDays,endDays,all,Drug,drug_era,drug_concept_id,drug_era_start_date,drug_era_end_date,FALSE,One covariate per drug in the drug_era table overlapping with any part of the short window. -405,DrugEraOverlapping,DomainConcept.sql,endDays,endDays,all,Drug,drug_era,drug_concept_id,drug_era_start_date,drug_era_end_date,FALSE,One covariate per drug in the drug_era table overlapping with the end of the risk window. -406,DrugEraStartLongTerm,DomainConcept.sql,longTermStartDays,endDays,all,Drug,drug_era,drug_concept_id,drug_era_start_date,drug_era_start_date,FALSE,One covariate per drug in the drug_era table starting in the long term window. -407,DrugEraStartMediumTerm,DomainConcept.sql,mediumTermStartDays,endDays,all,Drug,drug_era,drug_concept_id,drug_era_start_date,drug_era_start_date,FALSE,One covariate per drug in the drug_era table starting in the medium term window. -408,DrugEraStartShortTerm,DomainConcept.sql,shortTermStartDays,endDays,all,Drug,drug_era,drug_concept_id,drug_era_start_date,drug_era_start_date,FALSE,One covariate per drug in the drug_era table starting in the long short window. -409,DrugGroupEraAnyTimePrior,DomainConceptGroup.sql,anyTimePrior,endDays,all,Drug,drug_era,drug_concept_id,drug_era_start_date,drug_era_end_date,FALSE,One covariate per drug rolled up to ATC groups in the drug_era table overlapping with any time prior to index. -410,DrugGroupEraLongTerm,DomainConceptGroup.sql,longTermStartDays,endDays,all,Drug,drug_era,drug_concept_id,drug_era_start_date,drug_era_end_date,TRUE,One covariate per drug rolled up to ATC groups in the drug_era table overlapping with any part of the long term window. -411,DrugGroupEraMediumTerm,DomainConceptGroup.sql,mediumTermStartDays,endDays,all,Drug,drug_era,drug_concept_id,drug_era_start_date,drug_era_end_date,FALSE,One covariate per drug rolled up to ATC groups in the drug_era table overlapping with any part of the medium term window. -412,DrugGroupEraShortTerm,DomainConceptGroup.sql,shortTermStartDays,endDays,all,Drug,drug_era,drug_concept_id,drug_era_start_date,drug_era_end_date,TRUE,One covariate per drug rolled up to ATC groups in the drug_era table overlapping with any part of the short term window. -413,DrugGroupEraOverlapping,DomainConceptGroup.sql,endDays,endDays,all,Drug,drug_era,drug_concept_id,drug_era_start_date,drug_era_end_date,TRUE,One covariate per drug rolled up to ATC groups in the drug_era table overlapping with the end of the risk window. -414,DrugGroupEraStartLongTerm,DomainConceptGroup.sql,longTermStartDays,endDays,all,Drug,drug_era,drug_concept_id,drug_era_start_date,drug_era_start_date,FALSE,One covariate per drug rolled up to ATC groups in the drug_era table starting in the long term window. -415,DrugGroupEraStartMediumTerm,DomainConceptGroup.sql,mediumTermStartDays,endDays,all,Drug,drug_era,drug_concept_id,drug_era_start_date,drug_era_start_date,FALSE,One covariate per drug rolled up to ATC groups in the drug_era table starting in the medium term window. -416,DrugGroupEraStartShortTerm,DomainConceptGroup.sql,shortTermStartDays,endDays,all,Drug,drug_era,drug_concept_id,drug_era_start_date,drug_era_start_date,FALSE,One covariate per drug rolled up to ATC groups in the drug_era table starting in the short term window. -501,ProcedureOccurrenceAnyTimePrior,DomainConcept.sql,anyTimePrior,endDays,all,Procedure,procedure_occurrence,procedure_concept_id,procedure_date,procedure_date,FALSE,One covariate per procedure in the procedure_occurrence table any time prior to index. -502,ProcedureOccurrenceLongTerm,DomainConcept.sql,longTermStartDays,endDays,all,Procedure,procedure_occurrence,procedure_concept_id,procedure_date,procedure_date,TRUE,One covariate per procedure in the procedure_occurrence table in the long term window. -503,ProcedureOccurrenceMediumTerm,DomainConcept.sql,mediumTermStartDays,endDays,all,Procedure,procedure_occurrence,procedure_concept_id,procedure_date,procedure_date,FALSE,One covariate per procedure in the procedure_occurrence table in the medium term window. -504,ProcedureOccurrenceShortTerm,DomainConcept.sql,shortTermStartDays,endDays,all,Procedure,procedure_occurrence,procedure_concept_id,procedure_date,procedure_date,TRUE,One covariate per procedure in the procedure_occurrence table in the short term window. -601,DeviceExposureAnyTimePrior,DomainConcept.sql,anyTimePrior,endDays,all,Device,device_exposure,device_concept_id,device_exposure_start_date,device_exposure_start_date,FALSE,One covariate per device in the device exposure table starting any time prior to index. -602,DeviceExposureLongTerm,DomainConcept.sql,longTermStartDays,endDays,all,Device,device_exposure,device_concept_id,device_exposure_start_date,device_exposure_start_date,TRUE,One covariate per device in the device exposure table starting in the long term window. -603,DeviceExposureMediumTerm,DomainConcept.sql,mediumTermStartDays,endDays,all,Device,device_exposure,device_concept_id,device_exposure_start_date,device_exposure_start_date,FALSE,One covariate per device in the device exposure table starting in the medium term window. -604,DeviceExposureShortTerm,DomainConcept.sql,shortTermStartDays,endDays,all,Device,device_exposure,device_concept_id,device_exposure_start_date,device_exposure_start_date,TRUE,One covariate per device in the device exposure table starting in the short term window. -701,MeasurementAnyTimePrior,DomainConcept.sql,anyTimePrior,endDays,all,Measurement,measurement,measurement_concept_id,measurement_date,measurement_date,FALSE,One covariate per measurement in the measurement table any time prior to index. -702,MeasurementLongTerm,DomainConcept.sql,longTermStartDays,endDays,all,Measurement,measurement,measurement_concept_id,measurement_date,measurement_date,TRUE,One covariate per measurement in the measurement table in the long term window. -703,MeasurementMediumTerm,DomainConcept.sql,mediumTermStartDays,endDays,all,Measurement,measurement,measurement_concept_id,measurement_date,measurement_date,FALSE,One covariate per measurement in the measurement table in the medium term window. -704,MeasurementShortTerm,DomainConcept.sql,shortTermStartDays,endDays,all,Measurement,measurement,measurement_concept_id,measurement_date,measurement_date,TRUE,One covariate per measurement in the measurement table in the short term window. -705,MeasurementValueAnyTimePrior,MeasurementValue.sql,anyTimePrior,endDays,,,,,,,FALSE,One covariate containing the value per measurement-unit combination any time prior to index. -706,MeasurementValueLongTerm,MeasurementValue.sql,longTermStartDays,endDays,,,,,,,FALSE,One covariate containing the value per measurement-unit combination in the long term window. -707,MeasurementValueMediumTerm,MeasurementValue.sql,mediumTermStartDays,endDays,,,,,,,FALSE,One covariate containing the value per measurement-unit combination in the medium term window. -708,MeasurementValueShortTerm,MeasurementValue.sql,shortTermStartDays,endDays,,,,,,,FALSE,One covariate containing the value per measurement-unit combination in the short term window. -709,MeasurementRangeGroupAnyTimePrior,MeasurementRangeGroup.sql,anyTimePrior,endDays,,,,,,,FALSE,"Covariates indicating whether measurements are below, within, or above normal range any time prior to index." -710,MeasurementRangeGroupLongTerm,MeasurementRangeGroup.sql,longTermStartDays,endDays,,,,,,,TRUE,"Covariates indicating whether measurements are below, within, or above normal range in the long term window." -711,MeasurementRangeGroupMediumTerm,MeasurementRangeGroup.sql,mediumTermStartDays,endDays,,,,,,,FALSE,"Covariates indicating whether measurements are below, within, or above normal range in the medium term window." -712,MeasurementRangeGroupShortTerm,MeasurementRangeGroup.sql,shortTermStartDays,endDays,,,,,,,FALSE,"Covariates indicating whether measurements are below, within, or above normal range in the short term window." -801,ObservationAnyTimePrior,DomainConcept.sql,anyTimePrior,endDays,all,Observation,observation,observation_concept_id,observation_date,observation_date,FALSE,One covariate per observation in the observation table any time prior to index. -802,ObservationLongTerm,DomainConcept.sql,longTermStartDays,endDays,all,Observation,observation,observation_concept_id,observation_date,observation_date,TRUE,One covariate per observation in the observation table in the long term window. -803,ObservationMediumTerm,DomainConcept.sql,mediumTermStartDays,endDays,all,Observation,observation,observation_concept_id,observation_date,observation_date,FALSE,One covariate per observation in the observation table in the medium term window. -804,ObservationShortTerm,DomainConcept.sql,shortTermStartDays,endDays,all,Observation,observation,observation_concept_id,observation_date,observation_date,TRUE,One covariate per observation in the observation table in the short term window. -901,CharlsonIndex,CharlsonIndex.sql,,endDays,,Condition,,,,,TRUE,The Charlson comorbidity index (Romano adaptation) using all conditions prior to the window end. -902,Dcsi,Dcsi.sql,,endDays,,Condition,,,,,TRUE,The Diabetes Comorbidity Severity Index (DCSI) using all conditions prior to the window end. -903,Chads2,Chads2.sql,,endDays,,Condition,,,,,TRUE,The CHADS2 score using all conditions prior to the window end. -904,Chads2Vasc,Chads2Vasc.sql,,endDays,,Condition,,,,,TRUE,The CHADS2VASc score using all conditions prior to the window end. -926,Hfrs,Hfrs.sql,,endDays,,Condition,,,,,FALSE,The Hospital Frailty Risk Score score using all conditions prior to the window end. -905,DistinctConditionCountLongTerm,ConceptCounts.sql,longTermStartDays,endDays,distinct,Condition,condition_era,condition_concept_id,condition_era_start_date,condition_era_end_date,FALSE,The number of distinct condition concepts observed in the long term window. -906,DistinctConditionCountMediumTerm,ConceptCounts.sql,mediumTermStartDays,endDays,distinct,Condition,condition_era,condition_concept_id,condition_era_start_date,condition_era_end_date,FALSE,The number of distinct condition concepts observed in the medium term window. -907,DistinctConditionCountShortTerm,ConceptCounts.sql,shortTermStartDays,endDays,distinct,Condition,condition_era,condition_concept_id,condition_era_start_date,condition_era_end_date,FALSE,The number of distinct condition concepts observed in the short term window. -908,DistinctIngredientCountLongTerm,ConceptCounts.sql,longTermStartDays,endDays,distinct,Drug,drug_era,drug_concept_id,drug_era_start_date,drug_era_end_date,FALSE,The number of distinct ingredients observed in the long term window. -909,DistinctIngredientCountMediumTerm,ConceptCounts.sql,mediumTermStartDays,endDays,distinct,Drug,drug_era,drug_concept_id,drug_era_start_date,drug_era_end_date,FALSE,The number of distinct ingredients observed in the medium term window. -910,DistinctIngredientCountShortTerm,ConceptCounts.sql,shortTermStartDays,endDays,distinct,Drug,drug_era,drug_concept_id,drug_era_start_date,drug_era_end_date,FALSE,The number of distinct ingredients observed in the short term window. -911,DistinctProcedureCountLongTerm,ConceptCounts.sql,longTermStartDays,endDays,distinct,Procedure,procedure_occurrence,procedure_concept_id,procedure_date,procedure_date,FALSE,The number of distinct procedures observed in the long term window. -912,DistinctProcedureCountMediumTerm,ConceptCounts.sql,mediumTermStartDays,endDays,distinct,Procedure,procedure_occurrence,procedure_concept_id,procedure_date,procedure_date,FALSE,The number of distinct procedures observed in the medium term window. -913,DistinctProcedureCountShortTerm,ConceptCounts.sql,shortTermStartDays,endDays,distinct,Procedure,procedure_occurrence,procedure_concept_id,procedure_date,procedure_date,FALSE,The number of distinct procedures observed in the short term window. -914,DistinctMeasurementCountLongTerm,ConceptCounts.sql,longTermStartDays,endDays,distinct,Measurement,measurement,measurement_concept_id,measurement_date,measurement_date,FALSE,The number of distinct measurements observed in the long term window. -915,DistinctMeasurementCountMediumTerm,ConceptCounts.sql,mediumTermStartDays,endDays,distinct,Measurement,measurement,measurement_concept_id,measurement_date,measurement_date,FALSE,The number of distinct measurements observed in the medium term window. -916,DistinctMeasurementCountShortTerm,ConceptCounts.sql,shortTermStartDays,endDays,distinct,Measurement,measurement,measurement_concept_id,measurement_date,measurement_date,FALSE,The number of distinct measurements observed in the short term window. -917,DistinctObservationCountLongTerm,ConceptCounts.sql,longTermStartDays,endDays,distinct,Observation,observation,observation_concept_id,observation_date,observation_date,FALSE,The number of distinct observations observed in the long term window. -918,DistinctObservationCountMediumTerm,ConceptCounts.sql,mediumTermStartDays,endDays,distinct,Observation,observation,observation_concept_id,observation_date,observation_date,FALSE,The number of distinct observations observed in the medium term window. -919,DistinctObservationCountShortTerm,ConceptCounts.sql,shortTermStartDays,endDays,distinct,Observation,observation,observation_concept_id,observation_date,observation_date,FALSE,The number of distinct observations observed in the short term window. -920,VisitCountLongTerm,ConceptCounts.sql,longTermStartDays,endDays,all,Visit,visit_occurrence,visit_concept_id,visit_start_date,visit_end_date,FALSE,The number of visits observed in the long term window. -921,VisitCountMediumTerm,ConceptCounts.sql,mediumTermStartDays,endDays,all,Visit,visit_occurrence,visit_concept_id,visit_start_date,visit_end_date,FALSE,The number of visits observed in the medium term window. -922,VisitCountShortTerm,ConceptCounts.sql,shortTermStartDays,endDays,all,Visit,visit_occurrence,visit_concept_id,visit_start_date,visit_end_date,FALSE,The number of visits observed in the short term window. -923,VisitConceptCountLongTerm,ConceptCounts.sql,longTermStartDays,endDays,stratified,Visit,visit_occurrence,visit_concept_id,visit_start_date,visit_end_date,FALSE,"The number of visits observed in the long term window, stratified by visit concept ID." -924,VisitConceptCountMediumTerm,ConceptCounts.sql,mediumTermStartDays,endDays,stratified,Visit,visit_occurrence,visit_concept_id,visit_start_date,visit_end_date,FALSE,"The number of visits observed in the medium term window, stratified by visit concept ID." -925,VisitConceptCountShortTerm,ConceptCounts.sql,shortTermStartDays,endDays,stratified,Visit,visit_occurrence,visit_concept_id,visit_start_date,visit_end_date,FALSE,"The number of visits observed in the short term window, stratified by visit concept ID." +analysisId,analysisName,sqlFileName,startDay,endDay,subType,domainId,domainTable,domainConceptId,domainStartDate,domainEndDate,isDefault,description +1,DemographicsGender,DemographicsGender.sql,,,,Demographics,,,,,TRUE,Gender of the subject. +2,DemographicsAge,DemographicsAge.sql,,,,Demographics,,,,,FALSE,Age of the subject on the index date (in years). +3,DemographicsAgeGroup,DemographicsAgeGroup.sql,,,,Demographics,,,,,TRUE,Age of the subject on the index date (in 5 year age groups) +4,DemographicsRace,DemographicsRace.sql,,,,Demographics,,,,,TRUE,Race of the subject. +5,DemographicsEthnicity,DemographicsEthnicity.sql,,,,Demographics,,,,,TRUE,Ethnicity of the subject. +6,DemographicsIndexYear,DemographicsYear.sql,,,,Demographics,,,,,TRUE,Year of the index date. +7,DemographicsIndexMonth,DemographicsMonth.sql,,,,Demographics,,,,,TRUE,Month of the index date. +8,DemographicsPriorObservationTime,DemographicsTime.sql,,,priorObservation,Demographics,,,,,FALSE,Number of continuous days of observation time preceding the index date. +9,DemographicsPostObservationTime,DemographicsTime.sql,,,postObservation,Demographics,,,,,FALSE,Number of continuous days of observation time following the index date. +10,DemographicsTimeInCohort,DemographicsTime.sql,,,inCohort,Demographics,,,,,FALSE,Number of days of observation time during cohort period. +11,DemographicsIndexYearMonth,DemographicsYearMonth.sql,,,,Demographics,,,,,FALSE,Both calendar year and month of the index date in a single variable. +101,ConditionOccurrenceAnyTimePrior,DomainConcept.sql,anyTimePrior,endDays,all,Condition,condition_occurrence,condition_concept_id,condition_start_date,condition_start_date,FALSE,One covariate per condition in the condition_occurrence table starting any time prior to index. +102,ConditionOccurrenceLongTerm,DomainConcept.sql,longTermStartDays,endDays,all,Condition,condition_occurrence,condition_concept_id,condition_start_date,condition_start_date,FALSE,One covariate per condition in the condition_occurrence table starting in the long term window. +103,ConditionOccurrenceMediumTerm,DomainConcept.sql,mediumTermStartDays,endDays,all,Condition,condition_occurrence,condition_concept_id,condition_start_date,condition_start_date,FALSE,One covariate per condition in the condition_occurrence table starting in the medium term window. +104,ConditionOccurrenceShortTerm,DomainConcept.sql,shortTermStartDays,endDays,all,Condition,condition_occurrence,condition_concept_id,condition_start_date,condition_start_date,FALSE,One covariate per condition in the condition_occurrence table starting in the short term window. +105,ConditionOccurrencePrimaryInpatientAnyTimePrior,DomainConcept.sql,anyTimePrior,endDays,inpatient,Condition,condition_occurrence,condition_concept_id,condition_start_date,condition_start_date,FALSE,One covariate per condition observed as a primary diagnosis in an inpatient setting in the condition_occurrence table starting any time prior to index. +106,ConditionOccurrencePrimaryInpatientLongTerm,DomainConcept.sql,longTermStartDays,endDays,inpatient,Condition,condition_occurrence,condition_concept_id,condition_start_date,condition_start_date,FALSE,One covariate per condition observed as a primary diagnosis in an inpatient setting in the condition_occurrence table starting in the long term window. +107,ConditionOccurrencePrimaryInpatientMediumTerm,DomainConcept.sql,mediumTermStartDays,endDays,inpatient,Condition,condition_occurrence,condition_concept_id,condition_start_date,condition_start_date,FALSE,One covariate per condition observed as a primary diagnosis in an inpatient setting in the condition_occurrence table starting in the medium term window. +108,ConditionOccurrencePrimaryInpatientShortTerm,DomainConcept.sql,shortTermStartDays,endDays,inpatient,Condition,condition_occurrence,condition_concept_id,condition_start_date,condition_start_date,FALSE,One covariate per condition observed as a primary diagnosis in an inpatient setting in the condition_occurrence table starting in the short term window. +201,ConditionEraAnyTimePrior,DomainConcept.sql,anyTimePrior,endDays,all,Condition,condition_era,condition_concept_id,condition_era_start_date,condition_era_end_date,FALSE,One covariate per condition in the condition_era table overlapping with any time prior to index. +202,ConditionEraLongTerm,DomainConcept.sql,longTermStartDays,endDays,all,Condition,condition_era,condition_concept_id,condition_era_start_date,condition_era_end_date,FALSE,One covariate per condition in the condition_era table overlapping with any part of the long term window. +203,ConditionEraMediumTerm,DomainConcept.sql,mediumTermStartDays,endDays,all,Condition,condition_era,condition_concept_id,condition_era_start_date,condition_era_end_date,FALSE,One covariate per condition in the condition_era table overlapping with any part of the medium term window. +204,ConditionEraShortTerm,DomainConcept.sql,shortTermStartDays,endDays,all,Condition,condition_era,condition_concept_id,condition_era_start_date,condition_era_end_date,FALSE,One covariate per condition in the condition_era table overlapping with any part of the short term window. +205,ConditionEraOverlapping,DomainConcept.sql,endDays,endDays,all,Condition,condition_era,condition_concept_id,condition_era_start_date,condition_era_end_date,FALSE,One covariate per condition in the condition_era table overlapping with the end of the risk window. +206,ConditionEraStartLongTerm,DomainConcept.sql,longTermStartDays,endDays,all,Condition,condition_era,condition_concept_id,condition_era_start_date,condition_era_start_date,FALSE,One covariate per condition in the condition_era table starting in the long term window. +207,ConditionEraStartMediumTerm,DomainConcept.sql,mediumTermStartDays,endDays,all,Condition,condition_era,condition_concept_id,condition_era_start_date,condition_era_start_date,FALSE,One covariate per condition in the condition_era table starting in the medium term window. +208,ConditionEraStartShortTerm,DomainConcept.sql,shortTermStartDays,endDays,all,Condition,condition_era,condition_concept_id,condition_era_start_date,condition_era_start_date,FALSE,One covariate per condition in the condition_era table starting in the short term window. +209,ConditionGroupEraAnyTimePrior,DomainConceptGroup.sql,anyTimePrior,endDays,all,Condition,condition_era,condition_concept_id,condition_era_start_date,condition_era_end_date,FALSE,One covariate per condition era rolled up to groups in the condition_era table overlapping with any time prior to index. +210,ConditionGroupEraLongTerm,DomainConceptGroup.sql,longTermStartDays,endDays,all,Condition,condition_era,condition_concept_id,condition_era_start_date,condition_era_end_date,TRUE,One covariate per condition era rolled up to groups in the condition_era table overlapping with any part of the long term window. +211,ConditionGroupEraMediumTerm,DomainConceptGroup.sql,mediumTermStartDays,endDays,all,Condition,condition_era,condition_concept_id,condition_era_start_date,condition_era_end_date,FALSE,One covariate per condition era rolled up to groups in the condition_era table overlapping with any part of the medium term window. +212,ConditionGroupEraShortTerm,DomainConceptGroup.sql,shortTermStartDays,endDays,all,Condition,condition_era,condition_concept_id,condition_era_start_date,condition_era_end_date,TRUE,One covariate per condition era rolled up to groups in the condition_era table overlapping with any part of the short term window. +213,ConditionGroupEraOverlapping,DomainConceptGroup.sql,endDays,endDays,all,Condition,condition_era,condition_concept_id,condition_era_start_date,condition_era_end_date,FALSE,One covariate per condition era rolled up to groups in the condition_era table overlapping with the end of the risk window. +214,ConditionGroupEraStartLongTerm,DomainConceptGroup.sql,longTermStartDays,endDays,all,Condition,condition_era,condition_concept_id,condition_era_start_date,condition_era_start_date,FALSE,One covariate per condition era rolled up to groups in the condition_era table starting in the long term window. +215,ConditionGroupEraStartMediumTerm,DomainConceptGroup.sql,mediumTermStartDays,endDays,all,Condition,condition_era,condition_concept_id,condition_era_start_date,condition_era_start_date,FALSE,One covariate per condition era rolled up to groups in the condition_era table starting in the medium term window. +216,ConditionGroupEraStartShortTerm,DomainConceptGroup.sql,shortTermStartDays,endDays,all,Condition,condition_era,condition_concept_id,condition_era_start_date,condition_era_start_date,FALSE,One covariate per condition era rolled up to groups in the condition_era table starting in the short term window. +301,DrugExposureAnyTimePrior,DomainConcept.sql,anyTimePrior,endDays,all,Drug,drug_exposure,drug_concept_id,drug_exposure_start_date,drug_exposure_start_date,FALSE,One covariate per drug in the drug_exposure table starting any time prior to index. +302,DrugExposureLongTerm,DomainConcept.sql,longTermStartDays,endDays,all,Drug,drug_exposure,drug_concept_id,drug_exposure_start_date,drug_exposure_start_date,FALSE,One covariate per drug in the drug_exposure table starting in the long term window. +303,DrugExposureMediumTerm,DomainConcept.sql,mediumTermStartDays,endDays,all,Drug,drug_exposure,drug_concept_id,drug_exposure_start_date,drug_exposure_start_date,FALSE,One covariate per drug in the drug_exposure table starting in the medium term window. +304,DrugExposureShortTerm,DomainConcept.sql,shortTermStartDays,endDays,all,Drug,drug_exposure,drug_concept_id,drug_exposure_start_date,drug_exposure_start_date,FALSE,One covariate per drug in the drug_exposure table starting in the short term window. +401,DrugEraAnyTimePrior,DomainConcept.sql,anyTimePrior,endDays,all,Drug,drug_era,drug_concept_id,drug_era_start_date,drug_era_end_date,FALSE,One covariate per drug in the drug_era table overlapping with any time prior to index. +402,DrugEraLongTerm,DomainConcept.sql,longTermStartDays,endDays,all,Drug,drug_era,drug_concept_id,drug_era_start_date,drug_era_end_date,FALSE,One covariate per drug in the drug_era table overlapping with any part of the long term window. +403,DrugEraMediumTerm,DomainConcept.sql,mediumTermStartDays,endDays,all,Drug,drug_era,drug_concept_id,drug_era_start_date,drug_era_end_date,FALSE,One covariate per drug in the drug_era table overlapping with any part of the medium term window. +404,DrugEraShortTerm,DomainConcept.sql,shortTermStartDays,endDays,all,Drug,drug_era,drug_concept_id,drug_era_start_date,drug_era_end_date,FALSE,One covariate per drug in the drug_era table overlapping with any part of the short window. +405,DrugEraOverlapping,DomainConcept.sql,endDays,endDays,all,Drug,drug_era,drug_concept_id,drug_era_start_date,drug_era_end_date,FALSE,One covariate per drug in the drug_era table overlapping with the end of the risk window. +406,DrugEraStartLongTerm,DomainConcept.sql,longTermStartDays,endDays,all,Drug,drug_era,drug_concept_id,drug_era_start_date,drug_era_start_date,FALSE,One covariate per drug in the drug_era table starting in the long term window. +407,DrugEraStartMediumTerm,DomainConcept.sql,mediumTermStartDays,endDays,all,Drug,drug_era,drug_concept_id,drug_era_start_date,drug_era_start_date,FALSE,One covariate per drug in the drug_era table starting in the medium term window. +408,DrugEraStartShortTerm,DomainConcept.sql,shortTermStartDays,endDays,all,Drug,drug_era,drug_concept_id,drug_era_start_date,drug_era_start_date,FALSE,One covariate per drug in the drug_era table starting in the long short window. +409,DrugGroupEraAnyTimePrior,DomainConceptGroup.sql,anyTimePrior,endDays,all,Drug,drug_era,drug_concept_id,drug_era_start_date,drug_era_end_date,FALSE,One covariate per drug rolled up to ATC groups in the drug_era table overlapping with any time prior to index. +410,DrugGroupEraLongTerm,DomainConceptGroup.sql,longTermStartDays,endDays,all,Drug,drug_era,drug_concept_id,drug_era_start_date,drug_era_end_date,TRUE,One covariate per drug rolled up to ATC groups in the drug_era table overlapping with any part of the long term window. +411,DrugGroupEraMediumTerm,DomainConceptGroup.sql,mediumTermStartDays,endDays,all,Drug,drug_era,drug_concept_id,drug_era_start_date,drug_era_end_date,FALSE,One covariate per drug rolled up to ATC groups in the drug_era table overlapping with any part of the medium term window. +412,DrugGroupEraShortTerm,DomainConceptGroup.sql,shortTermStartDays,endDays,all,Drug,drug_era,drug_concept_id,drug_era_start_date,drug_era_end_date,TRUE,One covariate per drug rolled up to ATC groups in the drug_era table overlapping with any part of the short term window. +413,DrugGroupEraOverlapping,DomainConceptGroup.sql,endDays,endDays,all,Drug,drug_era,drug_concept_id,drug_era_start_date,drug_era_end_date,TRUE,One covariate per drug rolled up to ATC groups in the drug_era table overlapping with the end of the risk window. +414,DrugGroupEraStartLongTerm,DomainConceptGroup.sql,longTermStartDays,endDays,all,Drug,drug_era,drug_concept_id,drug_era_start_date,drug_era_start_date,FALSE,One covariate per drug rolled up to ATC groups in the drug_era table starting in the long term window. +415,DrugGroupEraStartMediumTerm,DomainConceptGroup.sql,mediumTermStartDays,endDays,all,Drug,drug_era,drug_concept_id,drug_era_start_date,drug_era_start_date,FALSE,One covariate per drug rolled up to ATC groups in the drug_era table starting in the medium term window. +416,DrugGroupEraStartShortTerm,DomainConceptGroup.sql,shortTermStartDays,endDays,all,Drug,drug_era,drug_concept_id,drug_era_start_date,drug_era_start_date,FALSE,One covariate per drug rolled up to ATC groups in the drug_era table starting in the short term window. +501,ProcedureOccurrenceAnyTimePrior,DomainConcept.sql,anyTimePrior,endDays,all,Procedure,procedure_occurrence,procedure_concept_id,procedure_date,procedure_date,FALSE,One covariate per procedure in the procedure_occurrence table any time prior to index. +502,ProcedureOccurrenceLongTerm,DomainConcept.sql,longTermStartDays,endDays,all,Procedure,procedure_occurrence,procedure_concept_id,procedure_date,procedure_date,TRUE,One covariate per procedure in the procedure_occurrence table in the long term window. +503,ProcedureOccurrenceMediumTerm,DomainConcept.sql,mediumTermStartDays,endDays,all,Procedure,procedure_occurrence,procedure_concept_id,procedure_date,procedure_date,FALSE,One covariate per procedure in the procedure_occurrence table in the medium term window. +504,ProcedureOccurrenceShortTerm,DomainConcept.sql,shortTermStartDays,endDays,all,Procedure,procedure_occurrence,procedure_concept_id,procedure_date,procedure_date,TRUE,One covariate per procedure in the procedure_occurrence table in the short term window. +601,DeviceExposureAnyTimePrior,DomainConcept.sql,anyTimePrior,endDays,all,Device,device_exposure,device_concept_id,device_exposure_start_date,device_exposure_start_date,FALSE,One covariate per device in the device exposure table starting any time prior to index. +602,DeviceExposureLongTerm,DomainConcept.sql,longTermStartDays,endDays,all,Device,device_exposure,device_concept_id,device_exposure_start_date,device_exposure_start_date,TRUE,One covariate per device in the device exposure table starting in the long term window. +603,DeviceExposureMediumTerm,DomainConcept.sql,mediumTermStartDays,endDays,all,Device,device_exposure,device_concept_id,device_exposure_start_date,device_exposure_start_date,FALSE,One covariate per device in the device exposure table starting in the medium term window. +604,DeviceExposureShortTerm,DomainConcept.sql,shortTermStartDays,endDays,all,Device,device_exposure,device_concept_id,device_exposure_start_date,device_exposure_start_date,TRUE,One covariate per device in the device exposure table starting in the short term window. +701,MeasurementAnyTimePrior,DomainConcept.sql,anyTimePrior,endDays,all,Measurement,measurement,measurement_concept_id,measurement_date,measurement_date,FALSE,One covariate per measurement in the measurement table any time prior to index. +702,MeasurementLongTerm,DomainConcept.sql,longTermStartDays,endDays,all,Measurement,measurement,measurement_concept_id,measurement_date,measurement_date,TRUE,One covariate per measurement in the measurement table in the long term window. +703,MeasurementMediumTerm,DomainConcept.sql,mediumTermStartDays,endDays,all,Measurement,measurement,measurement_concept_id,measurement_date,measurement_date,FALSE,One covariate per measurement in the measurement table in the medium term window. +704,MeasurementShortTerm,DomainConcept.sql,shortTermStartDays,endDays,all,Measurement,measurement,measurement_concept_id,measurement_date,measurement_date,TRUE,One covariate per measurement in the measurement table in the short term window. +705,MeasurementValueAnyTimePrior,MeasurementValue.sql,anyTimePrior,endDays,,,,,,,FALSE,One covariate containing the value per measurement-unit combination any time prior to index. +706,MeasurementValueLongTerm,MeasurementValue.sql,longTermStartDays,endDays,,,,,,,FALSE,One covariate containing the value per measurement-unit combination in the long term window. +707,MeasurementValueMediumTerm,MeasurementValue.sql,mediumTermStartDays,endDays,,,,,,,FALSE,One covariate containing the value per measurement-unit combination in the medium term window. +708,MeasurementValueShortTerm,MeasurementValue.sql,shortTermStartDays,endDays,,,,,,,FALSE,One covariate containing the value per measurement-unit combination in the short term window. +709,MeasurementRangeGroupAnyTimePrior,MeasurementRangeGroup.sql,anyTimePrior,endDays,,,,,,,FALSE,"Covariates indicating whether measurements are below, within, or above normal range any time prior to index." +710,MeasurementRangeGroupLongTerm,MeasurementRangeGroup.sql,longTermStartDays,endDays,,,,,,,TRUE,"Covariates indicating whether measurements are below, within, or above normal range in the long term window." +711,MeasurementRangeGroupMediumTerm,MeasurementRangeGroup.sql,mediumTermStartDays,endDays,,,,,,,FALSE,"Covariates indicating whether measurements are below, within, or above normal range in the medium term window." +712,MeasurementRangeGroupShortTerm,MeasurementRangeGroup.sql,shortTermStartDays,endDays,,,,,,,FALSE,"Covariates indicating whether measurements are below, within, or above normal range in the short term window." +801,ObservationAnyTimePrior,DomainConcept.sql,anyTimePrior,endDays,all,Observation,observation,observation_concept_id,observation_date,observation_date,FALSE,One covariate per observation in the observation table any time prior to index. +802,ObservationLongTerm,DomainConcept.sql,longTermStartDays,endDays,all,Observation,observation,observation_concept_id,observation_date,observation_date,TRUE,One covariate per observation in the observation table in the long term window. +803,ObservationMediumTerm,DomainConcept.sql,mediumTermStartDays,endDays,all,Observation,observation,observation_concept_id,observation_date,observation_date,FALSE,One covariate per observation in the observation table in the medium term window. +804,ObservationShortTerm,DomainConcept.sql,shortTermStartDays,endDays,all,Observation,observation,observation_concept_id,observation_date,observation_date,TRUE,One covariate per observation in the observation table in the short term window. +901,CharlsonIndex,CharlsonIndex.sql,,endDays,,Condition,,,,,TRUE,The Charlson comorbidity index (Romano adaptation) using all conditions prior to the window end. +902,Dcsi,Dcsi.sql,,endDays,,Condition,,,,,TRUE,The Diabetes Comorbidity Severity Index (DCSI) using all conditions prior to the window end. +903,Chads2,Chads2.sql,,endDays,,Condition,,,,,TRUE,The CHADS2 score using all conditions prior to the window end. +904,Chads2Vasc,Chads2Vasc.sql,,endDays,,Condition,,,,,TRUE,The CHADS2VASc score using all conditions prior to the window end. +926,Hfrs,Hfrs.sql,,endDays,,Condition,,,,,FALSE,The Hospital Frailty Risk Score score using all conditions prior to the window end. +927,Efi,Efi.sql,,endDays,,Condition,,,,,FALSE,Electroninc frailty score +905,DistinctConditionCountLongTerm,ConceptCounts.sql,longTermStartDays,endDays,distinct,Condition,condition_era,condition_concept_id,condition_era_start_date,condition_era_end_date,FALSE,The number of distinct condition concepts observed in the long term window. +906,DistinctConditionCountMediumTerm,ConceptCounts.sql,mediumTermStartDays,endDays,distinct,Condition,condition_era,condition_concept_id,condition_era_start_date,condition_era_end_date,FALSE,The number of distinct condition concepts observed in the medium term window. +907,DistinctConditionCountShortTerm,ConceptCounts.sql,shortTermStartDays,endDays,distinct,Condition,condition_era,condition_concept_id,condition_era_start_date,condition_era_end_date,FALSE,The number of distinct condition concepts observed in the short term window. +908,DistinctIngredientCountLongTerm,ConceptCounts.sql,longTermStartDays,endDays,distinct,Drug,drug_era,drug_concept_id,drug_era_start_date,drug_era_end_date,FALSE,The number of distinct ingredients observed in the long term window. +909,DistinctIngredientCountMediumTerm,ConceptCounts.sql,mediumTermStartDays,endDays,distinct,Drug,drug_era,drug_concept_id,drug_era_start_date,drug_era_end_date,FALSE,The number of distinct ingredients observed in the medium term window. +910,DistinctIngredientCountShortTerm,ConceptCounts.sql,shortTermStartDays,endDays,distinct,Drug,drug_era,drug_concept_id,drug_era_start_date,drug_era_end_date,FALSE,The number of distinct ingredients observed in the short term window. +911,DistinctProcedureCountLongTerm,ConceptCounts.sql,longTermStartDays,endDays,distinct,Procedure,procedure_occurrence,procedure_concept_id,procedure_date,procedure_date,FALSE,The number of distinct procedures observed in the long term window. +912,DistinctProcedureCountMediumTerm,ConceptCounts.sql,mediumTermStartDays,endDays,distinct,Procedure,procedure_occurrence,procedure_concept_id,procedure_date,procedure_date,FALSE,The number of distinct procedures observed in the medium term window. +913,DistinctProcedureCountShortTerm,ConceptCounts.sql,shortTermStartDays,endDays,distinct,Procedure,procedure_occurrence,procedure_concept_id,procedure_date,procedure_date,FALSE,The number of distinct procedures observed in the short term window. +914,DistinctMeasurementCountLongTerm,ConceptCounts.sql,longTermStartDays,endDays,distinct,Measurement,measurement,measurement_concept_id,measurement_date,measurement_date,FALSE,The number of distinct measurements observed in the long term window. +915,DistinctMeasurementCountMediumTerm,ConceptCounts.sql,mediumTermStartDays,endDays,distinct,Measurement,measurement,measurement_concept_id,measurement_date,measurement_date,FALSE,The number of distinct measurements observed in the medium term window. +916,DistinctMeasurementCountShortTerm,ConceptCounts.sql,shortTermStartDays,endDays,distinct,Measurement,measurement,measurement_concept_id,measurement_date,measurement_date,FALSE,The number of distinct measurements observed in the short term window. +917,DistinctObservationCountLongTerm,ConceptCounts.sql,longTermStartDays,endDays,distinct,Observation,observation,observation_concept_id,observation_date,observation_date,FALSE,The number of distinct observations observed in the long term window. +918,DistinctObservationCountMediumTerm,ConceptCounts.sql,mediumTermStartDays,endDays,distinct,Observation,observation,observation_concept_id,observation_date,observation_date,FALSE,The number of distinct observations observed in the medium term window. +919,DistinctObservationCountShortTerm,ConceptCounts.sql,shortTermStartDays,endDays,distinct,Observation,observation,observation_concept_id,observation_date,observation_date,FALSE,The number of distinct observations observed in the short term window. +920,VisitCountLongTerm,ConceptCounts.sql,longTermStartDays,endDays,all,Visit,visit_occurrence,visit_concept_id,visit_start_date,visit_end_date,FALSE,The number of visits observed in the long term window. +921,VisitCountMediumTerm,ConceptCounts.sql,mediumTermStartDays,endDays,all,Visit,visit_occurrence,visit_concept_id,visit_start_date,visit_end_date,FALSE,The number of visits observed in the medium term window. +922,VisitCountShortTerm,ConceptCounts.sql,shortTermStartDays,endDays,all,Visit,visit_occurrence,visit_concept_id,visit_start_date,visit_end_date,FALSE,The number of visits observed in the short term window. +923,VisitConceptCountLongTerm,ConceptCounts.sql,longTermStartDays,endDays,stratified,Visit,visit_occurrence,visit_concept_id,visit_start_date,visit_end_date,FALSE,"The number of visits observed in the long term window, stratified by visit concept ID." +924,VisitConceptCountMediumTerm,ConceptCounts.sql,mediumTermStartDays,endDays,stratified,Visit,visit_occurrence,visit_concept_id,visit_start_date,visit_end_date,FALSE,"The number of visits observed in the medium term window, stratified by visit concept ID." +925,VisitConceptCountShortTerm,ConceptCounts.sql,shortTermStartDays,endDays,stratified,Visit,visit_occurrence,visit_concept_id,visit_start_date,visit_end_date,FALSE,"The number of visits observed in the short term window, stratified by visit concept ID." diff --git a/inst/csv/PrespecTemporalAnalyses.csv b/inst/csv/PrespecTemporalAnalyses.csv index 88b9f2e3..f47f710c 100644 --- a/inst/csv/PrespecTemporalAnalyses.csv +++ b/inst/csv/PrespecTemporalAnalyses.csv @@ -1,41 +1,42 @@ -analysisId,analysisName,sqlFileName,subType,domainId,domainTable,domainConceptId,domainStartDate,domainEndDate,isDefault,description -1,DemographicsGender,DemographicsGender.sql,,Demographics,,,,,TRUE,Gender of the subject. -2,DemographicsAge,DemographicsAge.sql,,Demographics,,,,,FALSE,Age of the subject on the index date (in years). -3,DemographicsAgeGroup,DemographicsAgeGroup.sql,,Demographics,,,,,TRUE,Age of the subject on the index date (in 5 year age groups) -4,DemographicsRace,DemographicsRace.sql,,Demographics,,,,,TRUE,Race of the subject. -5,DemographicsEthnicity,DemographicsEthnicity.sql,,Demographics,,,,,TRUE,Ethnicity of the subject. -6,DemographicsIndexYear,DemographicsYear.sql,,Demographics,,,,,TRUE,Year of the index date. -7,DemographicsIndexMonth,DemographicsMonth.sql,,Demographics,,,,,TRUE,Month of the index date. -8,DemographicsPriorObservationTime,DemographicsTime.sql,priorObservation,Demographics,,,,,FALSE,Number of days of observation time preceding the index date. -9,DemographicsPostObservationTime,DemographicsTime.sql,postObservation,Demographics,,,,,FALSE,Number of days of observation time preceding the index date. -10,DemographicsTimeInCohort,DemographicsTime.sql,inCohort,Demographics,,,,,FALSE,Number of days of observation time preceding the index date. -11,DemographicsIndexYearMonth,DemographicsYearMonth.sql,,Demographics,,,,,FALSE,Calendar month of the index date. -101,ConditionOccurrence,DomainConcept.sql,all,Condition,condition_occurrence,condition_concept_id,condition_start_date,condition_start_date,FALSE,One covariate per condition in the condition_occurrence table starting in the time window. -102,ConditionOccurrencePrimaryInpatient,DomainConcept.sql,inpatient,Condition,condition_occurrence,condition_concept_id,condition_start_date,condition_start_date,FALSE,One covariate per condition observed as a primary diagnosis in an inpatient setting in the condition_occurrence table starting in the time window. -201,ConditionEraStart,DomainConcept.sql,all,Condition,condition_era,condition_concept_id,condition_era_start_date,condition_era_start_date,FALSE,One covariate per condition in the condition_era table starting in the time window. -202,ConditionEraOverlap,DomainConcept.sql,all,Condition,condition_era,condition_concept_id,condition_era_start_date,condition_era_end_date,FALSE,One covariate per condition in the condition_era table overlapping with any part of the time window. -203,ConditionEraGroupStart,DomainConceptGroup.sql,all,Condition,condition_era,condition_concept_id,condition_era_start_date,condition_era_start_date,FALSE,One covariate per condition era rolled up to SNOMED groups in the condition_era table starting in the time window. -203,ConditionEraGroupOverlap,DomainConceptGroup.sql,all,Condition,condition_era,condition_concept_id,condition_era_start_date,condition_era_end_date,TRUE,One covariate per condition era rolled up to SNOMED groups in the condition_era table overlapping with any part of the time window. -301,DrugExposure,DomainConcept.sql,all,Drug,drug_exposure,drug_concept_id,drug_exposure_start_date,drug_exposure_start_date,FALSE,One covariate per drug in the drug_exposure table starting in the time window. -401,DrugEraStart,DomainConcept.sql,all,Drug,drug_era,drug_concept_id,drug_era_start_date,drug_era_start_date,FALSE,One covariate per drug in the drug_era table starting in the time window. -402,DrugEraOverlap,DomainConcept.sql,all,Drug,drug_era,drug_concept_id,drug_era_start_date,drug_era_end_date,FALSE,One covariate per drug in the drug_era table overlapping with any part of the time window. -403,DrugEraGroupStart,DomainConceptGroup.sql,all,Drug,drug_era,drug_concept_id,drug_era_start_date,drug_era_start_date,FALSE,One covariate per drug rolled up to ATC groups in the drug_era table starting in thetime window. -403,DrugEraGroupOverlap,DomainConceptGroup.sql,all,Drug,drug_era,drug_concept_id,drug_era_start_date,drug_era_end_date,TRUE,One covariate per drug rolled up to ATC groups in the drug_era table overlapping with any part of thetime window. -501,ProcedureOccurrence,DomainConcept.sql,all,Procedure,procedure_occurrence,procedure_concept_id,procedure_date,procedure_date,TRUE,One covariate per procedure in the procedure_occurrence table in the time window. -601,DeviceExposure,DomainConcept.sql,all,Device,device_exposure,device_concept_id,device_exposure_start_date,device_exposure_start_date,TRUE,One covariate per device in the device exposure table starting in the timewindow. -701,Measurement,DomainConcept.sql,all,Measurement,measurement,measurement_concept_id,measurement_date,measurement_date,TRUE,One covariate per measurement in the measurement table in the time window. -702,MeasurementValue,MeasurementValue.sql,,Measurement,,,,,FALSE,"One covariate containing the value per measurement-unit combination in the time window. If multiple values are found, the last is taken." -703,MeasurementRangeGroup,MeasurementRangeGroup.sql,,Measurement,,,,,TRUE,"Covariates indicating whether measurements are below, within, or above normal range within the time period." -801,Observation,DomainConcept.sql,all,Observation,observation,observation_concept_id,observation_date,observation_date,TRUE,One covariate per observation in the observation table in the time window. -901,CharlsonIndex,CharlsonIndex.sql,,Condition,,,,,TRUE,The Charlson comorbidity index (Romano adaptation) using all conditions prior to the window end. -902,Dcsi,Dcsi.sql,,Condition,,,,,TRUE,The Diabetes Comorbidity Severity Index (DCSI) using all conditions prior to the window end. -903,Chads2,Chads2.sql,,Condition,,,,,TRUE,The CHADS2 score using all conditions prior to the window end. -904,Chads2Vasc,Chads2Vasc.sql,,Condition,,,,,TRUE,The CHADS2VASc score using all conditions prior to the window end. -926,Hfrs,Hfrs.sql,,Condition,,,,,FALSE,The Hospital Frailty Risk Score score using all conditions prior to the window end. -905,DistinctConditionCount,ConceptCounts.sql,distinct,Condition,condition_era,condition_concept_id,condition_era_start_date,condition_era_end_date,FALSE,The number of distinct condition concepts observed in the time window. -906,DistinctIngredientCount,ConceptCounts.sql,distinct,Drug,drug_era,drug_concept_id,drug_era_start_date,drug_era_end_date,FALSE,The number of distinct ingredients observed in the time window. -907,DistinctProcedureCount,ConceptCounts.sql,distinct,Procedure,procedure_occurrence,procedure_concept_id,procedure_date,procedure_date,FALSE,The number of distinct procedures observed in the time window. -908,DistinctMeasurementCount,ConceptCounts.sql,distinct,Measurement,measurement,measurement_concept_id,measurement_date,measurement_date,FALSE,The number of distinct measurements observed in the time window. -909,DistinctObservationCount,ConceptCounts.sql,distinct,Observation,observation,observation_concept_id,observation_date,observation_date,FALSE,The number of distinct observations in the time window. -910,VisitCount,ConceptCounts.sql,all,Visit,visit_occurrence,visit_concept_id,visit_start_date,visit_end_date,FALSE,The number of visits observed in the time window. -911,VisitConceptCount,ConceptCounts.sql,stratified,Visit,visit_occurrence,visit_concept_id,visit_start_date,visit_end_date,FALSE,"The number of visits observed in the time window, stratified by visit concept ID." +analysisId,analysisName,sqlFileName,subType,domainId,domainTable,domainConceptId,domainStartDate,domainEndDate,isDefault,description +1,DemographicsGender,DemographicsGender.sql,,Demographics,,,,,TRUE,Gender of the subject. +2,DemographicsAge,DemographicsAge.sql,,Demographics,,,,,FALSE,Age of the subject on the index date (in years). +3,DemographicsAgeGroup,DemographicsAgeGroup.sql,,Demographics,,,,,TRUE,Age of the subject on the index date (in 5 year age groups) +4,DemographicsRace,DemographicsRace.sql,,Demographics,,,,,TRUE,Race of the subject. +5,DemographicsEthnicity,DemographicsEthnicity.sql,,Demographics,,,,,TRUE,Ethnicity of the subject. +6,DemographicsIndexYear,DemographicsYear.sql,,Demographics,,,,,TRUE,Year of the index date. +7,DemographicsIndexMonth,DemographicsMonth.sql,,Demographics,,,,,TRUE,Month of the index date. +8,DemographicsPriorObservationTime,DemographicsTime.sql,priorObservation,Demographics,,,,,FALSE,Number of days of observation time preceding the index date. +9,DemographicsPostObservationTime,DemographicsTime.sql,postObservation,Demographics,,,,,FALSE,Number of days of observation time preceding the index date. +10,DemographicsTimeInCohort,DemographicsTime.sql,inCohort,Demographics,,,,,FALSE,Number of days of observation time preceding the index date. +11,DemographicsIndexYearMonth,DemographicsYearMonth.sql,,Demographics,,,,,FALSE,Calendar month of the index date. +101,ConditionOccurrence,DomainConcept.sql,all,Condition,condition_occurrence,condition_concept_id,condition_start_date,condition_start_date,FALSE,One covariate per condition in the condition_occurrence table starting in the time window. +102,ConditionOccurrencePrimaryInpatient,DomainConcept.sql,inpatient,Condition,condition_occurrence,condition_concept_id,condition_start_date,condition_start_date,FALSE,One covariate per condition observed as a primary diagnosis in an inpatient setting in the condition_occurrence table starting in the time window. +201,ConditionEraStart,DomainConcept.sql,all,Condition,condition_era,condition_concept_id,condition_era_start_date,condition_era_start_date,FALSE,One covariate per condition in the condition_era table starting in the time window. +202,ConditionEraOverlap,DomainConcept.sql,all,Condition,condition_era,condition_concept_id,condition_era_start_date,condition_era_end_date,FALSE,One covariate per condition in the condition_era table overlapping with any part of the time window. +203,ConditionEraGroupStart,DomainConceptGroup.sql,all,Condition,condition_era,condition_concept_id,condition_era_start_date,condition_era_start_date,FALSE,One covariate per condition era rolled up to SNOMED groups in the condition_era table starting in the time window. +203,ConditionEraGroupOverlap,DomainConceptGroup.sql,all,Condition,condition_era,condition_concept_id,condition_era_start_date,condition_era_end_date,TRUE,One covariate per condition era rolled up to SNOMED groups in the condition_era table overlapping with any part of the time window. +301,DrugExposure,DomainConcept.sql,all,Drug,drug_exposure,drug_concept_id,drug_exposure_start_date,drug_exposure_start_date,FALSE,One covariate per drug in the drug_exposure table starting in the time window. +401,DrugEraStart,DomainConcept.sql,all,Drug,drug_era,drug_concept_id,drug_era_start_date,drug_era_start_date,FALSE,One covariate per drug in the drug_era table starting in the time window. +402,DrugEraOverlap,DomainConcept.sql,all,Drug,drug_era,drug_concept_id,drug_era_start_date,drug_era_end_date,FALSE,One covariate per drug in the drug_era table overlapping with any part of the time window. +403,DrugEraGroupStart,DomainConceptGroup.sql,all,Drug,drug_era,drug_concept_id,drug_era_start_date,drug_era_start_date,FALSE,One covariate per drug rolled up to ATC groups in the drug_era table starting in thetime window. +403,DrugEraGroupOverlap,DomainConceptGroup.sql,all,Drug,drug_era,drug_concept_id,drug_era_start_date,drug_era_end_date,TRUE,One covariate per drug rolled up to ATC groups in the drug_era table overlapping with any part of thetime window. +501,ProcedureOccurrence,DomainConcept.sql,all,Procedure,procedure_occurrence,procedure_concept_id,procedure_date,procedure_date,TRUE,One covariate per procedure in the procedure_occurrence table in the time window. +601,DeviceExposure,DomainConcept.sql,all,Device,device_exposure,device_concept_id,device_exposure_start_date,device_exposure_start_date,TRUE,One covariate per device in the device exposure table starting in the timewindow. +701,Measurement,DomainConcept.sql,all,Measurement,measurement,measurement_concept_id,measurement_date,measurement_date,TRUE,One covariate per measurement in the measurement table in the time window. +702,MeasurementValue,MeasurementValue.sql,,Measurement,,,,,FALSE,"One covariate containing the value per measurement-unit combination in the time window. If multiple values are found, the last is taken." +703,MeasurementRangeGroup,MeasurementRangeGroup.sql,,Measurement,,,,,TRUE,"Covariates indicating whether measurements are below, within, or above normal range within the time period." +801,Observation,DomainConcept.sql,all,Observation,observation,observation_concept_id,observation_date,observation_date,TRUE,One covariate per observation in the observation table in the time window. +901,CharlsonIndex,CharlsonIndex.sql,,Condition,,,,,TRUE,The Charlson comorbidity index (Romano adaptation) using all conditions prior to the window end. +902,Dcsi,Dcsi.sql,,Condition,,,,,TRUE,The Diabetes Comorbidity Severity Index (DCSI) using all conditions prior to the window end. +903,Chads2,Chads2.sql,,Condition,,,,,TRUE,The CHADS2 score using all conditions prior to the window end. +904,Chads2Vasc,Chads2Vasc.sql,,Condition,,,,,TRUE,The CHADS2VASc score using all conditions prior to the window end. +926,Hfrs,Hfrs.sql,,Condition,,,,,FALSE,The Hospital Frailty Risk Score score using all conditions prior to the window end. +927,Efi,Efi.sql,,Condition,,,,,FALSE,Electroninc frailty score +905,DistinctConditionCount,ConceptCounts.sql,distinct,Condition,condition_era,condition_concept_id,condition_era_start_date,condition_era_end_date,FALSE,The number of distinct condition concepts observed in the time window. +906,DistinctIngredientCount,ConceptCounts.sql,distinct,Drug,drug_era,drug_concept_id,drug_era_start_date,drug_era_end_date,FALSE,The number of distinct ingredients observed in the time window. +907,DistinctProcedureCount,ConceptCounts.sql,distinct,Procedure,procedure_occurrence,procedure_concept_id,procedure_date,procedure_date,FALSE,The number of distinct procedures observed in the time window. +908,DistinctMeasurementCount,ConceptCounts.sql,distinct,Measurement,measurement,measurement_concept_id,measurement_date,measurement_date,FALSE,The number of distinct measurements observed in the time window. +909,DistinctObservationCount,ConceptCounts.sql,distinct,Observation,observation,observation_concept_id,observation_date,observation_date,FALSE,The number of distinct observations in the time window. +910,VisitCount,ConceptCounts.sql,all,Visit,visit_occurrence,visit_concept_id,visit_start_date,visit_end_date,FALSE,The number of visits observed in the time window. +911,VisitConceptCount,ConceptCounts.sql,stratified,Visit,visit_occurrence,visit_concept_id,visit_start_date,visit_end_date,FALSE,"The number of visits observed in the time window, stratified by visit concept ID." diff --git a/inst/sql/sql_server/Efi.sql b/inst/sql/sql_server/Efi.sql new file mode 100644 index 00000000..4d2269f8 --- /dev/null +++ b/inst/sql/sql_server/Efi.sql @@ -0,0 +1,1229 @@ +IF OBJECT_ID('tempdb..#efi_concepts', 'U') IS NOT NULL + DROP TABLE #efi_concepts; + +CREATE TABLE #efi_concepts ( + diag_category_id INT, + concept_id INT, + min_levels_of_separation INT, + domain_id VARCHAR(255) + ); + +IF OBJECT_ID('tempdb..#efi_scoring', 'U') IS NOT NULL + DROP TABLE #efi_scoring; + +CREATE TABLE #efi_scoring ( + diag_category_id INT, + diag_category_name VARCHAR(255), + weight INT + ); + +--Arthritis +INSERT INTO #efi_scoring ( + diag_category_id, + diag_category_name, + weight + ) +VALUES ( + 1, + 'Arthritis', + 1 + ); + +INSERT INTO #efi_concepts ( + diag_category_id, + concept_id, + min_levels_of_separation, + domain_id + ) +SELECT 1, + descendant_concept_id, + min_levels_of_separation, + 'Condition' +FROM @cdm_database_schema.concept_ancestor +WHERE ancestor_concept_id IN (4291025); + +--Atrial Fibrillation +INSERT INTO #efi_scoring ( + diag_category_id, + diag_category_name, + weight + ) +VALUES ( + 2, + 'Atrial Fibrillation', + 1 + ); + +INSERT INTO #efi_concepts ( + diag_category_id, + concept_id, + min_levels_of_separation, + domain_id + ) +SELECT 2, + descendant_concept_id, + min_levels_of_separation, + 'Condition' +FROM @cdm_database_schema.concept_ancestor +WHERE ancestor_concept_id IN (313217); + +--Chronic Kidney Disease +INSERT INTO #efi_scoring ( + diag_category_id, + diag_category_name, + weight + ) +VALUES ( + 3, + 'Chronic Kidney Disease', + 1 + ); + +INSERT INTO #efi_concepts ( + diag_category_id, + concept_id, + min_levels_of_separation, + domain_id + ) +SELECT 3, + descendant_concept_id, + min_levels_of_separation, + 'Condition' +FROM @cdm_database_schema.concept_ancestor +WHERE ancestor_concept_id IN (46271022); + +--Coronary Heart Disease +INSERT INTO #efi_scoring ( + diag_category_id, + diag_category_name, + weight + ) +VALUES ( + 4, + 'Coronary Heart Disease', + 1 + ); + +INSERT INTO #efi_concepts ( + diag_category_id, + concept_id, + min_levels_of_separation, + domain_id + ) +SELECT 4, + descendant_concept_id, + min_levels_of_separation, + 'Condition' +FROM @cdm_database_schema.concept_ancestor +WHERE ancestor_concept_id IN (317576); + +--Diabetes +INSERT INTO #efi_scoring ( + diag_category_id, + diag_category_name, + weight + ) +VALUES ( + 5, + 'Diabetes', + 1 + ); + +INSERT INTO #efi_concepts ( + diag_category_id, + concept_id, + min_levels_of_separation, + domain_id + ) +SELECT 5, + descendant_concept_id, + min_levels_of_separation, + 'Condition' +FROM @cdm_database_schema.concept_ancestor +WHERE ancestor_concept_id IN (201826); + +--Foot Problems +INSERT INTO #efi_scoring ( + diag_category_id, + diag_category_name, + weight + ) +VALUES ( + 6, + 'Foot problems', + 1 + ); + +INSERT INTO #efi_concepts ( + diag_category_id, + concept_id, + min_levels_of_separation, + domain_id + ) +SELECT 6, + descendant_concept_id, + min_levels_of_separation, + 'Condition' +FROM @cdm_database_schema.concept_ancestor +WHERE ancestor_concept_id IN (4169905, 4182187); + +--Fragility fracture +INSERT INTO #efi_scoring ( + diag_category_id, + diag_category_name, + weight + ) +VALUES ( + 7, + 'Fragility fracture', + 1 + ); + +INSERT INTO #efi_concepts ( + diag_category_id, + concept_id, + min_levels_of_separation, + domain_id + ) +SELECT 7, + descendant_concept_id, + min_levels_of_separation, + 'Condition' +FROM @cdm_database_schema.concept_ancestor +WHERE ancestor_concept_id IN (44791986); + +--Heart Failure +INSERT INTO #efi_scoring ( + diag_category_id, + diag_category_name, + weight + ) +VALUES ( + 8, + 'Heart Failure', + 1 + ); + +INSERT INTO #efi_concepts ( + diag_category_id, + concept_id, + min_levels_of_separation, + domain_id + ) +SELECT 8, + descendant_concept_id, + min_levels_of_separation, + 'Condition' +FROM @cdm_database_schema.concept_ancestor +WHERE ancestor_concept_id IN (316139); + +--Heart valve disease +INSERT INTO #efi_scoring ( + diag_category_id, + diag_category_name, + weight + ) +VALUES ( + 9, + 'Heart valve disease', + 1 + ); + +INSERT INTO #efi_concepts ( + diag_category_id, + concept_id, + min_levels_of_separation, + domain_id + ) +SELECT 9, + descendant_concept_id, + min_levels_of_separation, + 'Condition' +FROM @cdm_database_schema.concept_ancestor +WHERE ancestor_concept_id IN (4281749); + +--Hypertension +INSERT INTO #efi_scoring ( + diag_category_id, + diag_category_name, + weight + ) +VALUES ( + 10, + 'Hypertension', + 1 + ); + +INSERT INTO #efi_concepts ( + diag_category_id, + concept_id, + min_levels_of_separation, + domain_id + ) +SELECT 10, + descendant_concept_id, + min_levels_of_separation, + 'Condition' +FROM @cdm_database_schema.concept_ancestor +WHERE ancestor_concept_id IN (316866); + +--Hypotension, syncope +INSERT INTO #efi_scoring ( + diag_category_id, + diag_category_name, + weight + ) +VALUES ( + 11, + 'Hypotension, syncope', + 1 + ); + +INSERT INTO #efi_concepts ( + diag_category_id, + concept_id, + min_levels_of_separation, + domain_id + ) +SELECT 11, + descendant_concept_id, + min_levels_of_separation, + 'Condition' +FROM @cdm_database_schema.concept_ancestor +WHERE ancestor_concept_id IN (443240, 4037508) +UNION +SELECT 11, + descendant_concept_id, + min_levels_of_separation, + 'Observation' +FROM @cdm_database_schema.concept_ancestor +WHERE ancestor_concept_id IN (4151718) +; + +--Osteoporosis +INSERT INTO #efi_scoring ( + diag_category_id, + diag_category_name, + weight + ) +VALUES ( + 12, + 'Osteoporosis', + 1 + ); + +INSERT INTO #efi_concepts ( + diag_category_id, + concept_id, + min_levels_of_separation, + domain_id + ) +SELECT 12, + descendant_concept_id, + min_levels_of_separation, + 'Condition' +FROM @cdm_database_schema.concept_ancestor +WHERE ancestor_concept_id IN (80502); + +--Parkinson’s Disease +INSERT INTO #efi_scoring ( + diag_category_id, + diag_category_name, + weight + ) +VALUES ( + 13, + 'Parkinson’s Disease', + 1 + ); + +INSERT INTO #efi_concepts ( + diag_category_id, + concept_id, + min_levels_of_separation, + domain_id + ) +SELECT 13, + descendant_concept_id, + min_levels_of_separation, + 'Condition' +FROM @cdm_database_schema.concept_ancestor +WHERE ancestor_concept_id IN (381270); + +--Peptic Ulcer +INSERT INTO #efi_scoring ( + diag_category_id, + diag_category_name, + weight + ) +VALUES ( + 14, + 'Peptic Ulcer', + 1 + ); + +INSERT INTO #efi_concepts ( + diag_category_id, + concept_id, + min_levels_of_separation, + domain_id + ) +SELECT 14, + descendant_concept_id, + min_levels_of_separation, + 'Condition' +FROM @cdm_database_schema.concept_ancestor +WHERE ancestor_concept_id IN (4027663); + +--Peripheral Vascular Disease +INSERT INTO #efi_scoring ( + diag_category_id, + diag_category_name, + weight + ) +VALUES ( + 15, + 'Peripheral Vascular Disease', + 1 + ); + +INSERT INTO #efi_concepts ( + diag_category_id, + concept_id, + min_levels_of_separation, + domain_id + ) +SELECT 15, + descendant_concept_id, + min_levels_of_separation, + 'Condition' +FROM @cdm_database_schema.concept_ancestor +WHERE ancestor_concept_id IN (321052); + +--Respiratory Disease +INSERT INTO #efi_scoring ( + diag_category_id, + diag_category_name, + weight + ) +VALUES ( + 16, + 'Respiratory Disease', + 1 + ); + +INSERT INTO #efi_concepts ( + diag_category_id, + concept_id, + min_levels_of_separation, + domain_id + ) +SELECT 16, + descendant_concept_id, + min_levels_of_separation, + 'Condition' +FROM @cdm_database_schema.concept_ancestor +WHERE ancestor_concept_id IN (320136); + +--Skin ulcer +INSERT INTO #efi_scoring ( + diag_category_id, + diag_category_name, + weight + ) +VALUES ( + 17, + 'Skin ulcer', + 1 + ); + +INSERT INTO #efi_concepts ( + diag_category_id, + concept_id, + min_levels_of_separation, + domain_id + ) +SELECT 17, + descendant_concept_id, + min_levels_of_separation, + 'Condition' +FROM @cdm_database_schema.concept_ancestor +WHERE ancestor_concept_id IN (135333) +UNION +SELECT 17, + descendant_concept_id, + min_levels_of_separation, + 'Procedure' +FROM @cdm_database_schema.concept_ancestor +WHERE ancestor_concept_id IN (4080500) +; + +--Stroke and TIA +INSERT INTO #efi_scoring ( + diag_category_id, + diag_category_name, + weight + ) +VALUES ( + 18, + 'Stroke and TIA', + 1 + ); + +INSERT INTO #efi_concepts ( + diag_category_id, + concept_id, + min_levels_of_separation, + domain_id + ) +SELECT 18, + descendant_concept_id, + min_levels_of_separation, + 'Condition' +FROM @cdm_database_schema.concept_ancestor +WHERE ancestor_concept_id IN (381316, 373503); + +--Thyroid Disorders +INSERT INTO #efi_scoring ( + diag_category_id, + diag_category_name, + weight + ) +VALUES ( + 19, + 'Thyroid Disorders', + 1 + ); + +INSERT INTO #efi_concepts ( + diag_category_id, + concept_id, + min_levels_of_separation, + domain_id + ) +SELECT 19, + descendant_concept_id, + min_levels_of_separation, + 'Condition' +FROM @cdm_database_schema.concept_ancestor +WHERE ancestor_concept_id IN (141253); + +--Urinary System Disease +INSERT INTO #efi_scoring ( + diag_category_id, + diag_category_name, + weight + ) +VALUES ( + 20, + 'Urinary system disease', + 1 + ); + +INSERT INTO #efi_concepts ( + diag_category_id, + concept_id, + min_levels_of_separation, + domain_id + ) +SELECT 20, + descendant_concept_id, + min_levels_of_separation, + 'Device' +FROM @cdm_database_schema.concept_ancestor +WHERE ancestor_concept_id IN (4145656); + +--Dizziness +INSERT INTO #efi_scoring ( + diag_category_id, + diag_category_name, + weight + ) +VALUES ( + 21, + 'Dizziness', + 1 + ); + +INSERT INTO #efi_concepts ( + diag_category_id, + concept_id, + min_levels_of_separation, + domain_id + ) +SELECT 21, + descendant_concept_id, + min_levels_of_separation, + 'Observation' +FROM @cdm_database_schema.concept_ancestor +WHERE ancestor_concept_id IN (4012520); + +--Dyspnoea +INSERT INTO #efi_scoring ( + diag_category_id, + diag_category_name, + weight + ) +VALUES ( + 22, + 'Dyspnoea', + 1 + ); + +INSERT INTO #efi_concepts ( + diag_category_id, + concept_id, + min_levels_of_separation, + domain_id + ) +SELECT 22, + descendant_concept_id, + min_levels_of_separation, + 'Condition' +FROM @cdm_database_schema.concept_ancestor +WHERE ancestor_concept_id IN (312437); + +--Falls +INSERT INTO #efi_scoring ( + diag_category_id, + diag_category_name, + weight + ) +VALUES ( + 23, + 'Falls', + 1 + ); + +INSERT INTO #efi_concepts ( + diag_category_id, + concept_id, + min_levels_of_separation, + domain_id + ) +SELECT 23, + descendant_concept_id, + min_levels_of_separation, + 'Observation' +FROM @cdm_database_schema.concept_ancestor +WHERE ancestor_concept_id IN (436583); + +--Memory and Cognitive Problems +INSERT INTO #efi_scoring ( + diag_category_id, + diag_category_name, + weight + ) +VALUES ( + 24, + 'Memory and Cognitive Problems', + 1 + ); + +INSERT INTO #efi_concepts ( + diag_category_id, + concept_id, + min_levels_of_separation, + domain_id + ) +SELECT 24, + descendant_concept_id, + min_levels_of_separation, + 'Condition' +FROM @cdm_database_schema.concept_ancestor +WHERE ancestor_concept_id IN (443432); + +--Sleep Disturbance +INSERT INTO #efi_scoring ( + diag_category_id, + diag_category_name, + weight + ) +VALUES ( + 26, + 'Sleep Disturbance', + 1 + ); + +INSERT INTO #efi_concepts ( + diag_category_id, + concept_id, + min_levels_of_separation, + domain_id + ) +SELECT 26, + descendant_concept_id, + min_levels_of_separation, + 'Condition' +FROM @cdm_database_schema.concept_ancestor +WHERE ancestor_concept_id IN (4158489, 436962, 4156060); + +--Urinary Incontinence +INSERT INTO #efi_scoring ( + diag_category_id, + diag_category_name, + weight + ) +VALUES ( + 27, + 'Urinary Incontinence', + 1 + ); + +INSERT INTO #efi_concepts ( + diag_category_id, + concept_id, + min_levels_of_separation, + domain_id + ) +SELECT 27, + descendant_concept_id, + min_levels_of_separation, + 'Condition' +FROM @cdm_database_schema.concept_ancestor +WHERE ancestor_concept_id IN (197672); + +--Weight Loss and Anorexia +INSERT INTO #efi_scoring ( + diag_category_id, + diag_category_name, + weight + ) +VALUES ( + 28, + 'Weight Loss and Anorexia', + 1 + ); + +INSERT INTO #efi_concepts ( + diag_category_id, + concept_id, + min_levels_of_separation, + domain_id + ) +SELECT 28, + descendant_concept_id, + min_levels_of_separation, + 'Condition' +FROM @cdm_database_schema.concept_ancestor +WHERE ancestor_concept_id IN (4275273, 40491502); + +--Activity Limitation +INSERT INTO #efi_scoring ( + diag_category_id, + diag_category_name, + weight + ) +VALUES ( + 29, + 'Activity Limitation', + 1 + ); + +INSERT INTO #efi_concepts ( + diag_category_id, + concept_id, + min_levels_of_separation, + domain_id + ) +SELECT 29, + descendant_concept_id, + min_levels_of_separation, + 'Condition' +FROM @cdm_database_schema.concept_ancestor +WHERE ancestor_concept_id IN (4058154); + +--Hearing Loss +INSERT INTO #efi_scoring ( + diag_category_id, + diag_category_name, + weight + ) +VALUES ( + 30, + 'Hearing Loss', + 1 + ); + +INSERT INTO #efi_concepts ( + diag_category_id, + concept_id, + min_levels_of_separation, + domain_id + ) +SELECT 30, + descendant_concept_id, + min_levels_of_separation, + 'Condition' +FROM @cdm_database_schema.concept_ancestor +WHERE ancestor_concept_id IN (4038030) +UNION +SELECT 30, + descendant_concept_id, + min_levels_of_separation, + 'Device' +FROM @cdm_database_schema.concept_ancestor +WHERE ancestor_concept_id IN (4246497) +; + +--Housebound +INSERT INTO #efi_scoring ( + diag_category_id, + diag_category_name, + weight + ) +VALUES ( + 31, + 'Housebound', + 1 + ); + +INSERT INTO #efi_concepts ( + diag_category_id, + concept_id, + min_levels_of_separation, + domain_id + ) +SELECT 31, + descendant_concept_id, + min_levels_of_separation, + 'Observation' +FROM @cdm_database_schema.concept_ancestor +WHERE ancestor_concept_id IN (4052962); + +--Mobility and Transfer problems +INSERT INTO #efi_scoring ( + diag_category_id, + diag_category_name, + weight + ) +VALUES ( + 32, + 'Mobility and Transfer problems', + 1 + ); + +INSERT INTO #efi_concepts ( + diag_category_id, + concept_id, + min_levels_of_separation, + domain_id + ) +SELECT 32, + descendant_concept_id, + min_levels_of_separation, + 'Condition' +FROM @cdm_database_schema.concept_ancestor +WHERE ancestor_concept_id IN (4306934, 4052477, 4052468) +UNION +SELECT 32, + descendant_concept_id, + min_levels_of_separation, + 'Observation' +FROM @cdm_database_schema.concept_ancestor +WHERE ancestor_concept_id IN (4012645) +; + +--Requirement for Care +INSERT INTO #efi_scoring ( + diag_category_id, + diag_category_name, + weight + ) +VALUES ( + 33, + 'Requirement for Care', + 1 + ); + +INSERT INTO #efi_concepts ( + diag_category_id, + concept_id, + min_levels_of_separation, + domain_id + ) +SELECT 33, + descendant_concept_id, + min_levels_of_separation, + 'Observation' +FROM @cdm_database_schema.concept_ancestor +WHERE ancestor_concept_id IN (4080054, 4052331, 44791364, 4081589, 4080053); + +--Social Vulnerability +INSERT INTO #efi_scoring ( + diag_category_id, + diag_category_name, + weight + ) +VALUES ( + 34, + 'Social Vulnerability', + 1 + ); + +INSERT INTO #efi_concepts ( + diag_category_id, + concept_id, + min_levels_of_separation, + domain_id + ) +SELECT 34, + descendant_concept_id, + min_levels_of_separation, + 'Condition' +FROM @cdm_database_schema.concept_ancestor +WHERE ancestor_concept_id IN (4309238, 4019835, 4307853, 4307117, 44791055, 4218604) +UNION +SELECT 34, + descendant_concept_id, + min_levels_of_separation, + 'Observation' +FROM @cdm_database_schema.concept_ancestor +WHERE ancestor_concept_id IN (4022661, 4143188, 4053087, 36716273) +; + +--Vision Problems, Blindness +INSERT INTO #efi_scoring ( + diag_category_id, + diag_category_name, + weight + ) +VALUES ( + 35, + 'Vision Problems, Blindness', + 1 + ); + +INSERT INTO #efi_concepts ( + diag_category_id, + concept_id, + min_levels_of_separation, + domain_id + ) +SELECT 35, + descendant_concept_id, + min_levels_of_separation, + 'Condition' +FROM @cdm_database_schema.concept_ancestor +WHERE ancestor_concept_id IN (44790784) +UNION +SELECT 35, + descendant_concept_id, + min_levels_of_separation, + 'Observation' +FROM @cdm_database_schema.concept_ancestor +WHERE ancestor_concept_id IN (4102251, 44791072, 4016895) +; + +--Anaemia & Haematinic Deficiency +INSERT INTO #efi_scoring ( + diag_category_id, + diag_category_name, + weight + ) +VALUES ( + 36, + 'Anaemia & Haematinic Deficiency', + 1 + ); + +INSERT INTO #efi_concepts ( + diag_category_id, + concept_id, + min_levels_of_separation, + domain_id + ) +SELECT 36, + descendant_concept_id, + min_levels_of_separation, + 'Condition' +FROM @cdm_database_schema.concept_ancestor +WHERE ancestor_concept_id IN (439777); + +-- Feature construction +{@aggregated} ? { +IF OBJECT_ID('tempdb..#efi_data', 'U') IS NOT NULL + DROP TABLE #efi_data; + +IF OBJECT_ID('tempdb..#efi_stats', 'U') IS NOT NULL + DROP TABLE #efi_stats; + +IF OBJECT_ID('tempdb..#efi_prep', 'U') IS NOT NULL + DROP TABLE #efi_prep; + +IF OBJECT_ID('tempdb..#efi_prep2', 'U') IS NOT NULL + DROP TABLE #efi_prep2; + +SELECT cohort_definition_id, + @row_id_field, + cohort_start_date, + SUM(weight) AS score +INTO #efi_data +} : { +SELECT CAST(1000 + @analysis_id AS BIGINT) AS covariate_id, +{@temporal} ? { + CAST(NULL AS INT) AS time_id, +} + row_id, + SUM(weight) AS covariate_value +INTO @covariate_table +} +FROM ( +-- Condition + SELECT DISTINCT efi_scoring.diag_category_id, + efi_scoring.weight, +{@aggregated} ? { + cohort_definition_id, + cohort.@row_id_field, + cohort.cohort_start_date +} : { + cohort.@row_id_field AS row_id +} + FROM @cohort_table cohort + INNER JOIN @cdm_database_schema.condition_era condition_era + ON cohort.@row_id_field = condition_era.person_id + INNER JOIN #efi_concepts efi_concepts + ON condition_era.condition_concept_id = efi_concepts.concept_id + INNER JOIN #efi_scoring efi_scoring + ON efi_concepts.diag_category_id = efi_scoring.diag_category_id +{@temporal} ? { + WHERE condition_era_start_date <= cohort.cohort_start_date +} : { + WHERE condition_era_start_date <= DATEADD(DAY, @end_day, cohort.cohort_start_date) +} +{@cohort_definition_id != -1} ? { AND cohort.cohort_definition_id IN (@cohort_definition_id)} +AND efi_concepts.domain_id = 'Condition' +UNION +-- Observation + SELECT DISTINCT efi_scoring.diag_category_id, + efi_scoring.weight, +{@aggregated} ? { + cohort_definition_id, + cohort.@row_id_field, + cohort.cohort_start_date +} : { + cohort.@row_id_field AS row_id +} + FROM @cohort_table cohort + INNER JOIN @cdm_database_schema.observation observation + ON cohort.@row_id_field = observation.person_id + INNER JOIN #efi_concepts efi_concepts + ON observation.observation_concept_id = efi_concepts.concept_id + INNER JOIN #efi_scoring efi_scoring + ON efi_concepts.diag_category_id = efi_scoring.diag_category_id +{@temporal} ? { + WHERE observation_date <= cohort.cohort_start_date +} : { + WHERE observation_date <= DATEADD(DAY, @end_day, cohort.cohort_start_date) +} +{@cohort_definition_id != -1} ? { AND cohort.cohort_definition_id IN (@cohort_definition_id)} +AND efi_concepts.domain_id = 'Observation' +UNION +-- Procedure + SELECT DISTINCT efi_scoring.diag_category_id, + efi_scoring.weight, +{@aggregated} ? { + cohort_definition_id, + cohort.@row_id_field, + cohort.cohort_start_date +} : { + cohort.@row_id_field AS row_id +} + FROM @cohort_table cohort + INNER JOIN @cdm_database_schema.procedure_occurrence procedure_occurrence + ON cohort.@row_id_field = procedure_occurrence.person_id + INNER JOIN #efi_concepts efi_concepts + ON procedure_occurrence.procedure_concept_id = efi_concepts.concept_id + INNER JOIN #efi_scoring efi_scoring + ON efi_concepts.diag_category_id = efi_scoring.diag_category_id +{@temporal} ? { + WHERE procedure_date <= cohort.cohort_start_date +} : { + WHERE procedure_date <= DATEADD(DAY, @end_day, cohort.cohort_start_date) +} +{@cohort_definition_id != -1} ? { AND cohort.cohort_definition_id IN (@cohort_definition_id)} +AND efi_concepts.domain_id = 'Procedure' +UNION +-- Device + SELECT DISTINCT efi_scoring.diag_category_id, + efi_scoring.weight, +{@aggregated} ? { + cohort_definition_id, + cohort.@row_id_field, + cohort.cohort_start_date +} : { + cohort.@row_id_field AS row_id +} + FROM @cohort_table cohort + INNER JOIN @cdm_database_schema.device_exposure device_exposure + ON cohort.@row_id_field = device_exposure.person_id + INNER JOIN #efi_concepts efi_concepts + ON device_exposure.device_concept_id = efi_concepts.concept_id + INNER JOIN #efi_scoring efi_scoring + ON efi_concepts.diag_category_id = efi_scoring.diag_category_id +{@temporal} ? { + WHERE device_exposure_start_date <= cohort.cohort_start_date +} : { + WHERE device_exposure_start_date <= DATEADD(DAY, @end_day, cohort.cohort_start_date) +} +{@cohort_definition_id != -1} ? { AND cohort.cohort_definition_id IN (@cohort_definition_id)} +AND efi_concepts.domain_id = 'Device' +UNION +-- Poly-pharmacy + SELECT 25 AS diag_category_id, + 1 AS weight, +{@aggregated} ? { + cohort_definition_id, + @row_id_field, + cohort_start_date +} : { + @row_id_field AS row_id +} + FROM + ( + SELECT cohort.cohort_definition_id, + cohort.@row_id_field, + cohort.cohort_start_date, + COUNT (distinct drug_concept_id) AS ingrd_count + FROM @cohort_table cohort + INNER JOIN @cdm_database_schema.drug_era + ON cohort.@row_id_field = drug_era.person_id + WHERE drug_concept_id != 0 +{@temporal} ? { + AND drug_era_start_date <= cohort.cohort_start_date +} : { + AND drug_era_start_date <= DATEADD(DAY, @end_day, cohort.cohort_start_date) + AND drug_era_end_date >= DATEADD(DAY, @end_day, cohort.cohort_start_date) +} +{@cohort_definition_id != -1} ? { AND cohort.cohort_definition_id IN (@cohort_definition_id)} + GROUP BY cohort.cohort_definition_id, + cohort.@row_id_field, + cohort.cohort_start_date + ) q + WHERE ingrd_count>=5 + ) temp +{@aggregated} ? { +GROUP BY cohort_definition_id, + @row_id_field, + cohort_start_date +} : { +GROUP BY row_id +} +; + +{@aggregated} ? { +WITH t1 AS ( + SELECT cohort_definition_id, + COUNT(*) AS cnt + FROM @cohort_table +{@cohort_definition_id != -1} ? { WHERE cohort_definition_id IN (@cohort_definition_id)} + GROUP BY cohort_definition_id + ), +t2 AS ( + SELECT cohort_definition_id, + COUNT(*) AS cnt, + MIN(score) AS min_score, + MAX(score) AS max_score, + SUM(score) AS sum_score, + SUM(score * score) as squared_score + FROM #efi_data + GROUP BY cohort_definition_id + ) +SELECT t1.cohort_definition_id, + CASE WHEN t2.cnt = t1.cnt THEN t2.min_score ELSE 0 END AS min_value, + t2.max_score AS max_value, + CAST(t2.sum_score / (1.0 * t1.cnt) AS FLOAT) AS average_value, + CAST(CASE WHEN t2.cnt = 1 THEN 0 ELSE SQRT((1.0 * t2.cnt*t2.squared_score - 1.0 * t2.sum_score*t2.sum_score) / (1.0 * t2.cnt*(1.0 * t2.cnt - 1))) END AS FLOAT) AS standard_deviation, + t2.cnt AS count_value, + t1.cnt - t2.cnt AS count_no_value, + t1.cnt AS population_size +INTO #efi_stats +FROM t1 +INNER JOIN t2 + ON t1.cohort_definition_id = t2.cohort_definition_id; + +SELECT cohort_definition_id, + score, + COUNT(*) AS total, + ROW_NUMBER() OVER (PARTITION BY cohort_definition_id ORDER BY score) AS rn +INTO #efi_prep +FROM #efi_data +GROUP BY cohort_definition_id, + score; + +SELECT s.cohort_definition_id, + s.score, + SUM(p.total) AS accumulated +INTO #efi_prep2 +FROM #efi_prep s +INNER JOIN #efi_prep p + ON p.rn <= s.rn + AND p.cohort_definition_id = s.cohort_definition_id +GROUP BY s.cohort_definition_id, + s.score; + +SELECT o.cohort_definition_id, + CAST(1000 + @analysis_id AS BIGINT) AS covariate_id, +{@temporal} ? { + CAST(NULL AS INT) AS time_id, +} + o.count_value, + o.min_value, + o.max_value, + CAST(o.average_value AS FLOAT) average_value, + CAST(o.standard_deviation AS FLOAT) standard_deviation, + CASE + WHEN .50 * o.population_size < count_no_value THEN 0 + ELSE MIN(CASE WHEN p.accumulated + count_no_value >= .50 * o.population_size THEN score END) + END AS median_value, + CASE + WHEN .10 * o.population_size < count_no_value THEN 0 + ELSE MIN(CASE WHEN p.accumulated + count_no_value >= .10 * o.population_size THEN score END) + END AS p10_value, + CASE + WHEN .25 * o.population_size < count_no_value THEN 0 + ELSE MIN(CASE WHEN p.accumulated + count_no_value >= .25 * o.population_size THEN score END) + END AS p25_value, + CASE + WHEN .75 * o.population_size < count_no_value THEN 0 + ELSE MIN(CASE WHEN p.accumulated + count_no_value >= .75 * o.population_size THEN score END) + END AS p75_value, + CASE + WHEN .90 * o.population_size < count_no_value THEN 0 + ELSE MIN(CASE WHEN p.accumulated + count_no_value >= .90 * o.population_size THEN score END) + END AS p90_value +INTO @covariate_table +FROM #efi_prep2 p +INNER JOIN #efi_stats o + ON p.cohort_definition_id = o.cohort_definition_id +{@included_cov_table != ''} ? {WHERE 1000 + @analysis_id IN (SELECT id FROM @included_cov_table)} +GROUP BY o.count_value, + o.count_no_value, + o.min_value, + o.max_value, + o.average_value, + o.standard_deviation, + o.population_size, + o.cohort_definition_id; + +TRUNCATE TABLE #efi_data; +DROP TABLE #efi_data; + +TRUNCATE TABLE #efi_stats; +DROP TABLE #efi_stats; + +TRUNCATE TABLE #efi_prep; +DROP TABLE #efi_prep; + +TRUNCATE TABLE #efi_prep2; +DROP TABLE #efi_prep2; +} + +TRUNCATE TABLE #efi_concepts; + +DROP TABLE #efi_concepts; + +TRUNCATE TABLE #efi_scoring; + +DROP TABLE #efi_scoring; + diff --git a/inst/sql/sql_server/GetAttrCovariates.sql b/inst/sql/sql_server/GetAttrCovariates.sql index 2d138774..51e73ea8 100644 --- a/inst/sql/sql_server/GetAttrCovariates.sql +++ b/inst/sql/sql_server/GetAttrCovariates.sql @@ -1,7 +1,7 @@ /************************************************************************ @file GetAttrCovariates.sql -Copyright 2021 Observational Health Data Sciences and Informatics +Copyright 2022 Observational Health Data Sciences and Informatics This file is part of FeatureExtraction diff --git a/inst/sql/sql_server/GetHdpsCovariates.sql b/inst/sql/sql_server/GetHdpsCovariates.sql index 69e00431..ab1935f8 100644 --- a/inst/sql/sql_server/GetHdpsCovariates.sql +++ b/inst/sql/sql_server/GetHdpsCovariates.sql @@ -1,5 +1,5 @@ /************************************************************************ -Copyright 2021 Observational Health Data Sciences and Informatics +Copyright 2022 Observational Health Data Sciences and Informatics This file is part of FeatureExtraction diff --git a/inst/sql/sql_server/IncludeDescendants.sql b/inst/sql/sql_server/IncludeDescendants.sql index 701f2204..fb95e655 100644 --- a/inst/sql/sql_server/IncludeDescendants.sql +++ b/inst/sql/sql_server/IncludeDescendants.sql @@ -1,5 +1,5 @@ /************************************************************************ -Copyright 2021 Observational Health Data Sciences and Informatics +Copyright 2022 Observational Health Data Sciences and Informatics This file is part of FeatureExtraction diff --git a/inst/sql/sql_server/RemoveCovariateTempTables.sql b/inst/sql/sql_server/RemoveCovariateTempTables.sql index 58eac6bf..bee4c313 100644 --- a/inst/sql/sql_server/RemoveCovariateTempTables.sql +++ b/inst/sql/sql_server/RemoveCovariateTempTables.sql @@ -1,5 +1,5 @@ /************************************************************************ -Copyright 2021 Observational Health Data Sciences and Informatics +Copyright 2022 Observational Health Data Sciences and Informatics This file is part of FeatureExtraction diff --git a/java/org/ohdsi/featureExtraction/FeatureExtraction.java b/java/org/ohdsi/featureExtraction/FeatureExtraction.java index 0661aed7..4ae717ea 100644 --- a/java/org/ohdsi/featureExtraction/FeatureExtraction.java +++ b/java/org/ohdsi/featureExtraction/FeatureExtraction.java @@ -1,5 +1,5 @@ /******************************************************************************* - * Copyright 2021 Observational Health Data Sciences and Informatics + * Copyright 2022 Observational Health Data Sciences and Informatics * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/java/org/ohdsi/featureExtraction/ReadCSVFile.java b/java/org/ohdsi/featureExtraction/ReadCSVFile.java index 741af824..fdd6f0ca 100644 --- a/java/org/ohdsi/featureExtraction/ReadCSVFile.java +++ b/java/org/ohdsi/featureExtraction/ReadCSVFile.java @@ -1,5 +1,5 @@ /******************************************************************************* - * Copyright 2021 Observational Health Data Sciences and Informatics + * Copyright 2022 Observational Health Data Sciences and Informatics * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/java/org/ohdsi/featureExtraction/ReadCSVFileWithHeader.java b/java/org/ohdsi/featureExtraction/ReadCSVFileWithHeader.java index 8943c721..696ec543 100644 --- a/java/org/ohdsi/featureExtraction/ReadCSVFileWithHeader.java +++ b/java/org/ohdsi/featureExtraction/ReadCSVFileWithHeader.java @@ -1,5 +1,5 @@ /******************************************************************************* - * Copyright 2021 Observational Health Data Sciences and Informatics + * Copyright 2022 Observational Health Data Sciences and Informatics * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/tests/.DS_Store b/tests/.DS_Store new file mode 100644 index 00000000..585b524a Binary files /dev/null and b/tests/.DS_Store differ diff --git a/tests/testthat/test-CovariateData.R b/tests/testthat/test-CovariateData.R index 32b6a99f..6b3eb687 100644 --- a/tests/testthat/test-CovariateData.R +++ b/tests/testthat/test-CovariateData.R @@ -109,4 +109,4 @@ test_that("Test show method", { on.exit(rm(cvData)) }) -unlink(connectionDetails$server()) \ No newline at end of file +unlink(connectionDetails$server()) diff --git a/tests/testthat/test-FeatureExtractionInternal.R b/tests/testthat/test-FeatureExtractionInternal.R index fbc820c9..611d0751 100644 --- a/tests/testthat/test-FeatureExtractionInternal.R +++ b/tests/testthat/test-FeatureExtractionInternal.R @@ -16,4 +16,4 @@ test_that("Test JSON functions", { fromJsonResult <- FeatureExtraction:::.fromJson(expectedToJsonResult) expect_equal(fromJsonResult, expectedFromJsonResult) -}) \ No newline at end of file +}) diff --git a/tests/testthat/test-GetCovariates.R b/tests/testthat/test-GetCovariates.R index 040322f2..14a61ce1 100644 --- a/tests/testthat/test-GetCovariates.R +++ b/tests/testthat/test-GetCovariates.R @@ -118,4 +118,4 @@ test_that("Custom covariate builder", { on.exit(DatabaseConnector::disconnect(connection)) }) -unlink(connectionDetails$server()) \ No newline at end of file +unlink(connectionDetails$server()) diff --git a/tests/testthat/test-GetCovariatesFromCohortAttributes.R b/tests/testthat/test-GetCovariatesFromCohortAttributes.R index c968a673..f15dcb81 100644 --- a/tests/testthat/test-GetCovariatesFromCohortAttributes.R +++ b/tests/testthat/test-GetCovariatesFromCohortAttributes.R @@ -104,4 +104,4 @@ test_that("createCohortAttrCovariateSettings check", { # Remove the Eunomia database: -unlink(connectionDetails$server()) \ No newline at end of file +unlink(connectionDetails$server()) diff --git a/tests/testthat/test-GetDefaultCovariates.R b/tests/testthat/test-GetDefaultCovariates.R index f8e299b0..a4b407ac 100644 --- a/tests/testthat/test-GetDefaultCovariates.R +++ b/tests/testthat/test-GetDefaultCovariates.R @@ -55,4 +55,4 @@ test_that("Test exit conditions", { # on.exit(DatabaseConnector::disconnect(connection)) # }) -unlink(connectionDetails$server()) \ No newline at end of file +unlink(connectionDetails$server()) diff --git a/tests/testthat/test-query-no-fail.R b/tests/testthat/test-query-no-fail.R index 3fecbb71..8db76df2 100644 --- a/tests/testthat/test-query-no-fail.R +++ b/tests/testthat/test-query-no-fail.R @@ -94,6 +94,7 @@ runExtractionPerPerson <- function(connectionDetails, cdmDatabaseSchema, ohdsiDa useChads2 = TRUE, useChads2Vasc = TRUE, useHfrs = TRUE, + useEfi = TRUE, useDistinctConditionCountLongTerm = FALSE, useDistinctConditionCountMediumTerm = FALSE, useDistinctConditionCountShortTerm = TRUE, @@ -288,6 +289,7 @@ runExtractionAggregated <- function(connectionDetails, cdmDatabaseSchema, ohdsiD useChads2 = TRUE, useChads2Vasc = TRUE, useHfrs = TRUE, + useEfi = TRUE, useDistinctConditionCountLongTerm = FALSE, useDistinctConditionCountMediumTerm = FALSE, useDistinctConditionCountShortTerm = TRUE, @@ -432,6 +434,7 @@ runExtractionTemporalPerPerson <- function(connectionDetails, cdmDatabaseSchema, useChads2 = TRUE, useChads2Vasc = TRUE, useHfrs = TRUE, + useEfi = TRUE, useDistinctConditionCount = TRUE, useDistinctIngredientCount = TRUE, useDistinctProcedureCount = TRUE, @@ -558,6 +561,7 @@ runExtractionTemporalAggregated <- function(connectionDetails, cdmDatabaseSchema useChads2 = TRUE, useChads2Vasc = TRUE, useHfrs = TRUE, + useEfi = TRUE, useDistinctConditionCount = TRUE, useDistinctIngredientCount = TRUE, useDistinctProcedureCount = TRUE, diff --git a/tests/testthat/test-tidyCovariates.R b/tests/testthat/test-tidyCovariates.R index 94da809e..165562d1 100644 --- a/tests/testthat/test-tidyCovariates.R +++ b/tests/testthat/test-tidyCovariates.R @@ -80,4 +80,4 @@ test_that("tidyCovariateData on Temporal Data", { expect_equal(length(tidy$analysisRef$analysisId), length(covariateData$analysisRef$analysisId)) }) -unlink(connectionDetails$server()) \ No newline at end of file +unlink(connectionDetails$server())