Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 7 additions & 0 deletions NEWS.md
Original file line number Diff line number Diff line change
@@ -1,3 +1,10 @@
# ROMOPMappingTools 2.0.5

- Fixed a bug in Usagi to STCM table conversion with respect to source parents
- STCDM to CDM table conversion through SQL has flipped `Subsumes` and `Is a` relationships
- Added the self-reference to the concepts in SQL that converts `concept_relationship` to `concept_ancestor`
- Updated test-databasesFromAndToCSV.R to ignore warnings in the DQD validation

# ROMOPMappingTools 2.0.4

- Added missing domainId combinations to the usagi file validation
Expand Down
4 changes: 2 additions & 2 deletions R/appendUsagiFileToSTCMtable.R
Original file line number Diff line number Diff line change
Expand Up @@ -155,7 +155,7 @@ appendUsagiFileToSTCMtable <- function(
if (all(c("ADD_INFO:sourceParents", "ADD_INFO:sourceParentVocabulary") %in% usagiTibbleColumns) &&
any(!is.na(dplyr::pull(usagiTibble, `ADD_INFO:sourceParents`)))) {
validVocabularyConceptCodes <- usagiTibble |>
dplyr::transmute(vocabulary_id = "", concept_code = sourceCode, concept_id = `ADD_INFO:sourceConceptId`) |>
dplyr::transmute(vocabulary_id = NA_character_, concept_code = sourceCode, concept_id = `ADD_INFO:sourceConceptId`) |>
dplyr::distinct()

usedParentVocabularies <- usagiTibble |>
Expand All @@ -165,7 +165,7 @@ appendUsagiFileToSTCMtable <- function(
dplyr::pull(`ADD_INFO:sourceParentVocabulary`) |>
stringr::str_split("\\|") |>
purrr::flatten_chr() |>
unique()
unique()
if (length(usedParentVocabularies) > 0) {
parentVocabularyConceptCodes <- dplyr::tbl(connection, "CONCEPT") |>
dplyr::filter(vocabulary_id %in% usedParentVocabularies) |>
Expand Down
37 changes: 32 additions & 5 deletions inst/sql/sql_server/CONCEPT_RELATIONSHIPToANCESTOR.sql
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ SELECT cr.concept_id_1, cr.concept_id_2
INTO #relationships
FROM @vocabularyDatabaseSchema.concept c
INNER JOIN @vocabularyDatabaseSchema.concept_relationship cr
ON cr.concept_id_1 = c.concept_id
ON cr.concept_id_1 = c.concept_id
WHERE c.vocabulary_id IN (@vocabularyList)
AND cr.relationship_id = 'Subsumes'
ORDER BY cr.concept_id_1, cr.concept_id_2;
Expand All @@ -38,9 +38,9 @@ WITH RECURSIVE ancestor_cte AS (
1 AS min_levels_of_separation,
1 AS max_levels_of_separation
FROM #relationships

UNION ALL

-- Recursive case: find descendant relationships
SELECT r.concept_id_1 AS ancestor_concept_id,
c.descendant_concept_id AS descendant_concept_id,
Expand All @@ -49,9 +49,36 @@ WITH RECURSIVE ancestor_cte AS (
FROM #relationships r
JOIN ancestor_cte c
ON r.concept_id_2 = c.ancestor_concept_id
), ancestor_cte_self_reference AS (
SELECT ancestor_concept_id,
descendant_concept_id,
min_levels_of_separation,
max_levels_of_separation
FROM ancestor_cte
UNION ALL
-- Add self reference for each ancestor_concept_id
SELECT r.concept_id_1 AS ancestor_concept_id,
r.concept_id_1 AS descendant_concept_id,
0 AS min_levels_of_separation,
0 AS max_levels_of_separation
FROM (
SELECT DISTINCT concept_id_1
FROM #relationships
) AS r
UNION ALL
-- Add self reference for each descendant_concept_id
SELECT r.concept_id_2 AS ancestor_concept_id,
r.concept_id_2 AS descendant_concept_id,
0 AS min_levels_of_separation,
0 AS max_levels_of_separation
FROM (
SELECT DISTINCT concept_id_2
FROM #relationships
) AS r
)
SELECT *
FROM ancestor_cte;
SELECT DISTINCT *
FROM ancestor_cte_self_reference;


-- 4- Remove the temporary table
DROP TABLE #relationships;
52 changes: 26 additions & 26 deletions inst/sql/sql_server/STCMExtendedToCDM.sql
Original file line number Diff line number Diff line change
Expand Up @@ -116,36 +116,36 @@ ORDER BY


-- 4. CONCEPT_RELATIONSHIP table
-- Delete previous rows with the same concept_id_1 or concept_id_2 as in the source_to_concept_map.source_concept_id
-- Delete previous rows with the same concept_id_1 or concept_id_2 as in the source_to_concept_map.source_concept_id
-- Insert one row for each relationship of type 'Maps to', 'Maps from', 'Subsumes', 'Is a'
-- 'Maps to' as follows:
-- - concept_id_1 = source_to_concept_map.source_concept_id
-- - concept_id_2 = source_to_concept_map.target_concept_id
-- - relationship_id = 'Maps to'
-- - valid_start_date = if source_to_concept_map.valid_start_date is not NULL, use it, otherwise use '1970-01-01'
-- - valid_end_date = if source_to_concept_map.valid_end_date is not NULL, use it, otherwise use '2099-12-31'
-- - invalid_reason = NULL
-- - invalid_reason = NULL
-- 'Maps from' as follows:
-- - concept_id_1 = source_to_concept_map.target_concept_id
-- - concept_id_2 = source_to_concept_map.source_concept_id
-- - relationship_id = 'Maps from'
-- - valid_start_date = if source_to_concept_map.valid_start_date is not NULL, use it, otherwise use '1970-01-01'
-- - valid_end_date = if source_to_concept_map.valid_end_date is not NULL, use it, otherwise use '2099-12-31'
-- - invalid_reason = NULL
-- - invalid_reason = NULL
-- 'Subsumes' as follows:
-- - concept_id_1 = source_to_concept_map.source_concept_id
-- - concept_id_2 = any concept_id in source_to_concept_map.source_parents_concept_ids
-- - relationship_id = 'Subsumes'
-- - valid_start_date = if source_to_concept_map.valid_start_date is not NULL, use it, otherwise use '1970-01-01'
-- - valid_end_date = if source_to_concept_map.valid_end_date is not NULL, use it, otherwise use '2099-12-31'
-- - invalid_reason = NULL
-- - invalid_reason = NULL
-- 'Is a' as follows:
-- - concept_id_1 = any concept_id in source_to_concept_map.source_parents_concept_ids
-- - concept_id_2 = source_to_concept_map.source_concept_id
-- - relationship_id = 'Is a'
-- - valid_start_date = if source_to_concept_map.valid_start_date is not NULL, use it, otherwise use '1970-01-01'
-- - valid_end_date = if source_to_concept_map.valid_end_date is not NULL, use it, otherwise use '2099-12-31'
-- - invalid_reason = NULL
-- - invalid_reason = NULL
DELETE FROM
@vocabularyDatabaseSchema.CONCEPT_RELATIONSHIP
WHERE
Expand Down Expand Up @@ -218,7 +218,7 @@ WHERE
AND stcm.source_concept_id != 0;

-- subsumes
INSERT INTO
INSERT INTO
@vocabularyDatabaseSchema.CONCEPT_RELATIONSHIP (
concept_id_1,
concept_id_2,
Expand All @@ -229,43 +229,43 @@ INSERT INTO
)

WITH RECURSIVE split_parents AS (
SELECT
SELECT
source_concept_id,
valid_start_date,
valid_end_date,
-- Get the first value before the delimiter
SUBSTRING(source_parents_concept_ids, 1,
CASE
WHEN POSITION('|' IN source_parents_concept_ids) = 0
SUBSTRING(source_parents_concept_ids, 1,
CASE
WHEN POSITION('|' IN source_parents_concept_ids) = 0
THEN LENGTH(source_parents_concept_ids)
ELSE POSITION('|' IN source_parents_concept_ids) - 1
END) AS source_parents_concept_ids,
-- Get the remaining string after the delimiter
CASE
WHEN POSITION('|' IN source_parents_concept_ids) = 0
CASE
WHEN POSITION('|' IN source_parents_concept_ids) = 0
THEN NULL
ELSE SUBSTRING(source_parents_concept_ids,
ELSE SUBSTRING(source_parents_concept_ids,
POSITION('|' IN source_parents_concept_ids) + 1)
END AS remaining_string
FROM @vocabularyDatabaseSchema.@sourceToConceptMapTable AS stcm
WHERE stcm.source_parents_concept_ids IS NOT NULL

UNION ALL
SELECT

SELECT
source_concept_id,
valid_start_date,
valid_end_date,
SUBSTRING(remaining_string, 1,
CASE
WHEN POSITION('|' IN remaining_string) = 0
SUBSTRING(remaining_string, 1,
CASE
WHEN POSITION('|' IN remaining_string) = 0
THEN LENGTH(remaining_string)
ELSE POSITION('|' IN remaining_string) - 1
END),
CASE
WHEN POSITION('|' IN remaining_string) = 0
CASE
WHEN POSITION('|' IN remaining_string) = 0
THEN NULL
ELSE SUBSTRING(remaining_string,
ELSE SUBSTRING(remaining_string,
POSITION('|' IN remaining_string) + 1)
END
FROM split_parents
Expand All @@ -275,7 +275,7 @@ WITH RECURSIVE split_parents AS (
SELECT DISTINCT
CAST(sp.source_concept_id AS INTEGER) AS concept_id_1,
CAST(sp.source_parents_concept_ids AS INTEGER) AS concept_id_2,
'Subsumes' AS relationship_id,
'Is a' AS relationship_id,
CAST(
COALESCE(sp.valid_start_date, '1970-01-01') AS DATE
) AS valid_start_date,
Expand All @@ -287,11 +287,11 @@ FROM split_parents AS sp
WHERE sp.source_parents_concept_ids IS NOT NULL
UNION ALL

-- is a
-- Subsumes
SELECT DISTINCT
CAST(sp.source_parents_concept_ids AS INTEGER) AS concept_id_1,
CAST(sp.source_concept_id AS INTEGER) AS concept_id_2,
'Is a' AS relationship_id,
'Subsumes' AS relationship_id,
CAST(
COALESCE(sp.valid_start_date, '1970-01-01') AS DATE
) AS valid_start_date,
Expand All @@ -300,4 +300,4 @@ SELECT DISTINCT
) AS valid_end_date,
NULL AS invalid_reason
FROM split_parents AS sp
WHERE sp.source_parents_concept_ids IS NOT NULL
WHERE sp.source_parents_concept_ids IS NOT NULL
49 changes: 25 additions & 24 deletions tests/testthat/test-STCMToCDMTable.R
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ test_that("STCMToCDMTables creates CONCEPT entries from STCM Extended with corre
"code5", 2000000005, "TestVocab", "Test Code 5 unmapped", 0, "SNOMED", as.Date("2023-01-01"), as.Date("2099-12-31"), NA_character_, "Test concept class", "Condition", NA_character_,
# no dates
"code6", 2000000006, "TestVocab", "Test Code 6 no start date", 0, "SNOMED", as.Date(NA), as.Date("2099-12-31"), NA_character_, "Test concept class 2", "Condition", NA_character_,
"code7", 2000000007, "TestVocab", "Test Code 7 no end date", 0, "SNOMED", as.Date("2023-01-01"), as.Date(NA), NA_character_, "Test concept class 2", "Condition", NA_character_,
"code7", 2000000007, "TestVocab", "Test Code 7 no end date", 0, "SNOMED", as.Date("2023-01-01"), as.Date(NA), NA_character_, "Test concept class 2", "Condition", NA_character_,
# parent concept ids
"code8", 2000000008, "TestVocab", "Test Code 8 parent concept ids", 0, "SNOMED", as.Date("2023-01-01"), as.Date("2099-12-31"), NA_character_, "Test concept class 3", "Condition", "2000000001",
"code9", 2000000009, "TestVocab", "Test Code 9 parent concept ids", 0, "SNOMED", as.Date("2023-01-01"), as.Date("2099-12-31"), NA_character_, "Test concept class 3", "Condition", "2000000001|2000000002",
Expand Down Expand Up @@ -56,9 +56,9 @@ test_that("STCMToCDMTables creates CONCEPT entries from STCM Extended with corre
)

# CONCEPT
res <- dplyr::tbl(connection, "CONCEPT") |>
dplyr::filter(vocabulary_id == "TestVocab") |>
dplyr::arrange(concept_id) |>
res <- dplyr::tbl(connection, "CONCEPT") |>
dplyr::filter(vocabulary_id == "TestVocab") |>
dplyr::arrange(concept_id) |>
dplyr::collect()

# general
Expand All @@ -72,43 +72,44 @@ test_that("STCMToCDMTables creates CONCEPT entries from STCM Extended with corre

# CONCEPT_RELATIONSHIP
# maps to
res <- dplyr::tbl(connection, "CONCEPT_RELATIONSHIP") |>
dplyr::filter(relationship_id == "Maps to") |>
dplyr::filter(concept_id_1 > 2000000000) |>
dplyr::arrange(concept_id_1, concept_id_2) |>
res <- dplyr::tbl(connection, "CONCEPT_RELATIONSHIP") |>
dplyr::filter(relationship_id == "Maps to") |>
dplyr::filter(concept_id_1 > 2000000000) |>
dplyr::arrange(concept_id_1, concept_id_2) |>
dplyr::collect()
res |> nrow() |> expect_equal(6)
res |> dplyr::pull(relationship_id) |> expect_equal(rep("Maps to", 6))
res |> dplyr::pull(concept_id_2) |> expect_equal(c( 141797, 141797, 36713461, 141797, 36713461, 36713461))

# maps from
res <- dplyr::tbl(connection, "CONCEPT_RELATIONSHIP") |>
dplyr::filter(relationship_id == "Mapped from") |>
dplyr::filter(concept_id_2 > 2000000000) |>
dplyr::arrange(concept_id_2, concept_id_1) |>
res <- dplyr::tbl(connection, "CONCEPT_RELATIONSHIP") |>
dplyr::filter(relationship_id == "Mapped from") |>
dplyr::filter(concept_id_2 > 2000000000) |>
dplyr::arrange(concept_id_2, concept_id_1) |>
dplyr::collect()
res |> nrow() |> expect_equal(6)
res |> dplyr::pull(relationship_id) |> expect_equal(rep("Mapped from", 6))
res |> dplyr::pull(concept_id_1) |> expect_equal(c( 141797, 141797, 36713461, 141797, 36713461, 36713461))

# subsumes
res <- dplyr::tbl(connection, "CONCEPT_RELATIONSHIP") |>
dplyr::filter(relationship_id == "Subsumes") |>
dplyr::filter(concept_id_1 > 2000000000) |>
dplyr::arrange(concept_id_1, concept_id_2) |>
res <- dplyr::tbl(connection, "CONCEPT_RELATIONSHIP") |>
dplyr::filter(relationship_id == "Subsumes") |>
dplyr::filter(concept_id_1 > 2000000000) |>
dplyr::arrange(concept_id_1, concept_id_2) |>
dplyr::collect()
res |> nrow() |> expect_equal(6)
res |> dplyr::pull(concept_id_1) |> expect_equal(c(2000000008, 2000000009, 2000000009, 2000000010, 2000000010, 2000000010))
res |> dplyr::pull(concept_id_2) |> expect_equal(c(2000000001, 2000000001, 2000000002, 2000000001, 2000000002, 2000000003))
res |> dplyr::pull(concept_id_1) |> expect_equal(c(2000000001, 2000000001, 2000000002, 2000000001, 2000000002, 2000000003))
res |> dplyr::pull(concept_id_2) |> expect_equal(c(2000000008, 2000000009, 2000000009, 2000000010, 2000000010, 2000000010))


# is a
res <- dplyr::tbl(connection, "CONCEPT_RELATIONSHIP") |>
dplyr::filter(relationship_id == "Is a") |>
dplyr::filter(concept_id_1 > 2000000000) |>
dplyr::arrange(concept_id_1, concept_id_2) |>
res <- dplyr::tbl(connection, "CONCEPT_RELATIONSHIP") |>
dplyr::filter(relationship_id == "Is a") |>
dplyr::filter(concept_id_1 > 2000000000) |>
dplyr::arrange(concept_id_1, concept_id_2) |>
dplyr::collect()
res |> nrow() |> expect_equal(6)
res |> dplyr::pull(concept_id_1) |> expect_equal(c(2000000001, 2000000001, 2000000002, 2000000001, 2000000002, 2000000003))
res |> dplyr::pull(concept_id_2) |> expect_equal(c(2000000008, 2000000009, 2000000009, 2000000010, 2000000010, 2000000010))
res |> dplyr::pull(concept_id_1) |> expect_equal(c(2000000008, 2000000009, 2000000009, 2000000010, 2000000010, 2000000010))
res |> dplyr::pull(concept_id_2) |> expect_equal(c(2000000001, 2000000001, 2000000002, 2000000001, 2000000002, 2000000003))

})
51 changes: 46 additions & 5 deletions tests/testthat/test-appendUsagiFileToSTCMTable.R
Original file line number Diff line number Diff line change
Expand Up @@ -63,7 +63,7 @@ test_that("test appendUsagiFileToSTCMTable appends the usagi file to the sourceT
connection = connection,
vocabularyDatabaseSchema = vocabularyDatabaseSchema,
sourceToConceptMapTable = sourceToConceptMapTable
)
)

stcmTable <- DBI::dbReadTable(connection, sourceToConceptMapTable) |> tibble::as_tibble()
stcmTable |>
Expand All @@ -74,18 +74,59 @@ test_that("test appendUsagiFileToSTCMTable appends the usagi file to the sourceT
dplyr::filter(TARGET_CONCEPT_ID != 0L) |>
dplyr::count() |> dplyr::pull(n) |>
expect_equal(nrowUsagiFileMapped)

stcmTable |>
names() |>
names() |>
stringr::str_to_lower() |>
expect_equal(c(
"source_code", "source_concept_id", "source_vocabulary_id", "source_code_description", "target_concept_id",
"target_vocabulary_id", "valid_start_date", "valid_end_date", "invalid_reason", "source_concept_class",
"target_vocabulary_id", "valid_start_date", "valid_end_date", "invalid_reason", "source_concept_class",
"source_domain", "source_parents_concept_ids"))

stcmTable |>
dplyr::filter(is.na(SOURCE_PARENTS_CONCEPT_IDS)) |>
nrow() |>
expect_equal(21)
expect_equal(0)

})

test_that("test appendUsagiFileToSTCMTable appends the ICD10fi usagi file to the sourceToConceptMapTable with ICD10 parent information", {
pathToUsagiFile <- system.file("testdata/VOCABULARIES/ICD10fi/ICD10fi.usagi.csv", package = "ROMOPMappingTools")
nrowUsagiFile <- readUsagiFile(pathToUsagiFile) |> nrow()
nrowUsagiFileMapped <- readUsagiFile(pathToUsagiFile) |> dplyr::filter(mappingStatus == "APPROVED") |> nrow()
pathToOMOPVocabularyDuckDBfile <- helper_createATemporaryCopyOfTheOMOPVocabularyDuckDB()
vocabularyDatabaseSchema <- "main"

connection <- DatabaseConnector::connect(
dbms = "duckdb",
server = pathToOMOPVocabularyDuckDBfile
)
on.exit(DatabaseConnector::disconnect(connection))

# create an extended sourceToConceptMapTable
sourceToConceptMapTable <- "source_to_concept_map_extended"
createSourceToConceptMapExtended(connection, vocabularyDatabaseSchema, sourceToConceptMapTable)

appendUsagiFileToSTCMtable(
vocabularyId = "ICD10fi",
pathToUsagiFile = pathToUsagiFile,
connection = connection,
vocabularyDatabaseSchema = vocabularyDatabaseSchema,
sourceToConceptMapTable = sourceToConceptMapTable
)

stcmTable <- DBI::dbReadTable(connection, sourceToConceptMapTable) |> tibble::as_tibble()

# For source code C18.62 the parent concept IDs should be ICD10 code C18.6 with concept_id 45552246
stcmTable |>
dplyr::filter(SOURCE_CODE == "C18.62") |>
dplyr::pull(SOURCE_PARENTS_CONCEPT_IDS) |>
expect_equal("45552246")

# For source code Y94.1 the parent concept IDs should be ICD10fi code Y94 with concept_id 2000503727
stcmTable |>
dplyr::filter(SOURCE_CODE == "Y94.1") |>
dplyr::pull(SOURCE_PARENTS_CONCEPT_IDS) |>
expect_equal("2000503725")

})
Loading
Loading