Skip to content
2 changes: 1 addition & 1 deletion DESCRIPTION
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
Package: ROMOPMappingTools
Title: Tools for Working with OMOP CDM Mappings
Version: 2.1.2
Version: 2.1.3
Authors@R:
person("Javier", "Gracia-Tabuenca", , "javier.graciatabuenca@tuni.fi", role = c("aut", "cre"),
comment = c(ORCID = "0000-0002-2455-0598"))
Expand Down
4 changes: 4 additions & 0 deletions NEWS.md
Original file line number Diff line number Diff line change
@@ -1,3 +1,7 @@
# ROMOPMappingTools 2.1.3

- Added rule to validate usagi file: sourceConceptId must be unique

# ROMOPMappingTools 2.1.2

- Added rule to validate usagi file: sourceConceptCode must be less than 50 characters
Expand Down
45 changes: 45 additions & 0 deletions R/validateUsagiFile.R
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
#' - Check if sourceName is less than 255 characters
#' If usagi file has C&CR columns:
#' - Check if concept_id is not 0 for APPROVED mappingStatus
#' - Check if sourceConceptId is unique (each sourceConceptId belongs to only one sourceCode)
#' - Check codes with mapping to more than one domain are mapped to compatible domains
#' - Check if sourceValidStartDate is before sourceValidEndDate
#' - Check if ADD_INFO:sourceParents is a valid concept code in the ADD_INFO:sourceParentVocabulary
Expand Down Expand Up @@ -238,6 +239,50 @@ validateUsagiFile <- function(
usagiTibble <- result$fileTibble
validationLogR6 <- result$validationLogR6

# Check SourceConceptId is unique (each sourceConceptId should belong to only one sourceCode).
# Deduplication by (sourceCode, sourceConceptId) is needed first so that valid multi-mapped
# codes (one sourceCode → many conceptIds, all sharing the same sourceConceptId) are not
# incorrectly flagged.
usagiDistinct <- usagiTibble |>
dplyr::filter(!is.na(`ADD_INFO:sourceConceptId`)) |>
dplyr::distinct(sourceCode, `ADD_INFO:sourceConceptId`)

distinctValidationRules <- validate::validator(
SourceConceptId.is.not.unique = is_unique(`ADD_INFO:sourceConceptId`)
)
distinctValidations <- validate::confront(usagiDistinct, distinctValidationRules)
distinctValidationSummary <- validate::summary(distinctValidations) |> tibble::as_tibble()

if (distinctValidationSummary$fails[1] > 0) {
validationLogR6$ERROR(
"SourceConceptId is not unique",
paste0("Found ", distinctValidationSummary$fails[1], " sourceConceptIds assigned to more than one sourceCode")
)

notUniqueSourceConceptIds <- usagiDistinct[
!validate::values(distinctValidations)[, "SourceConceptId.is.not.unique"],
] |>
dplyr::pull(`ADD_INFO:sourceConceptId`) |>
unique()

usagiTibble <- usagiTibble |>
dplyr::mutate(
errorMessage = dplyr::if_else(
`ADD_INFO:sourceConceptId` %in% notUniqueSourceConceptIds,
"ERROR: SourceConceptId is not unique",
NA_character_
)
) |>
dplyr::mutate(tmpvalidationMessages = dplyr::if_else(
!is.na(errorMessage),
paste0(tmpvalidationMessages, " | ", errorMessage),
tmpvalidationMessages
)) |>
dplyr::select(-errorMessage)
} else {
validationLogR6$SUCCESS("SourceConceptId is not unique", "")
}

# check if when the code maps to more than one concept the combined domain is valid
invalidDomainCombinations <- usagiTibble |>
dplyr::filter(mappingStatus != "INVALID_TARGET") |>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -30,8 +30,8 @@ A18.0+M01.2,[ConceptIds outdated][Updated conceptIds not found]Tuberculous arthr
A18.0+M01.5,[ConceptIds outdated][Updated conceptIds not found 2]Tuberculous arthritis,-1,,2000500901,Tuberkuloottinen nivelinfektio,ICD10fi Hierarchy,Condition,1900-01-01,2099-12-31,A18|A18.0|M01.1,ICD10|ICD10|ICD10,0,APPROVED,EQUAL,PKo,1669304412625,4071477,Fetal or neonatal effect of placental insufficiency,Condition,MAPS_TO,,TAYS,1623974400000,,,
A18.0+M01.5,[ConceptIds outdated][Updated conceptIds not found 2]Tuberculous arthritis,-1,,2000500901,Tuberkuloottinen nivelinfektio,ICD10fi Hierarchy,Condition,1900-01-01,2099-12-31,A18|A18.0|M01.1,ICD10|ICD10|ICD10,0,APPROVED,EQUAL,PKo,1669304412625,4110778,Fetal or neonatal effect of placental insufficiency,Condition,MAPS_TO,,TAYS,1623974400000,,,
A18.0+M01.0,[ConceptIds outdated][Updated by usagi]Tuberculous arthritis,-1,,2000530124,Tuberkuloottinen nivelinfektio,ICD10fi Hierarchy,Condition,1900-01-01,2099-12-31,A18|A18.0|M01.1,ICD10|ICD10|ICD10,0,INVALID_TARGET,EQUAL,PKo,1669304412625,0,Unmapped,Condition,MAPS_TO,Invalid existing target: 4071477,TAYS,1623974400000,,,
A17.8+G63.1,[ConceptIds outdated][Updated by usagi 2 one invalid]Tuberculous polyneuropathy,-1,,2000500119,Tuberkuloottinen monihermosairaus,ICD10fi Hierarchy,Condition,1900-01-01,2099-12-31,A17|A17.8|G63.0,ICD10|ICD10|ICD10,0,APPROVED,EQUIVALENT,PKo,1669563756643,4121541,Neutropenia,Condition,MAPS_TO,Invalid existing target: 3079174,PKo,1669563749083,,,
A17.8+G63.2,[ConceptIds outdated][Updated by usagi 2 2 invalid]Tuberculous polyneuropathy,-1,,2000500119,Tuberkuloottinen monihermosairaus,ICD10fi Hierarchy,Condition,1900-01-01,2099-12-31,A17|A17.8|G63.0,ICD10|ICD10|ICD10,0,INVALID_TARGET,EQUIVALENT,PKo,1669563756643,0,Unmapped,Condition,MAPS_TO,Invalid existing target: 3079174,PKo,1669563731393,,,
A17.8+G63.1,[ConceptIds outdated][Updated by usagi 2 one invalid]Tuberculous polyneuropathy,-1,,2000599997,Tuberkuloottinen monihermosairaus,ICD10fi Hierarchy,Condition,1900-01-01,2099-12-31,A17|A17.8|G63.0,ICD10|ICD10|ICD10,0,APPROVED,EQUIVALENT,PKo,1669563756643,4121541,Neutropenia,Condition,MAPS_TO,Invalid existing target: 3079174,PKo,1669563749083,,,
A17.8+G63.2,[ConceptIds outdated][Updated by usagi 2 2 invalid]Tuberculous polyneuropathy,-1,,2000599998,Tuberkuloottinen monihermosairaus,ICD10fi Hierarchy,Condition,1900-01-01,2099-12-31,A17|A17.8|G63.0,ICD10|ICD10|ICD10,0,INVALID_TARGET,EQUIVALENT,PKo,1669563756643,0,Unmapped,Condition,MAPS_TO,Invalid existing target: 3079174,PKo,1669563731393,,,
A02.2+J17.0,[SourceConceptId is empty]Salmonella pneumonia,-1,,,Salmonellan aiheuttama keuhkokuume,ICD10fi Hierarchy,Condition,1900-01-01,2099-12-31,A02|A02.2|J17.0,ICD10|ICD10|ICD10,0,APPROVED,EQUAL,PKo,1666803987773,258333,Salmonella pneumonia,Condition,MAPS_TO,,TAYS,1623974400000,,,
A02.2+M01.3,[SourceConceptId is not a number on the range]Salmonella arthritis,-1,,5,Salmonellan aiheuttama niveltulehdus,ICD10fi Hierarchy,Condition,1900-01-01,2099-12-31,A02|A02.2|M01.3,ICD10|ICD10|ICD10,0,APPROVED,EQUAL,PKo,1666803996141,80316,Salmonella arthritis,Condition,MAPS_TO,,TAYS,1623974400000,,,
A02.2+M90.2,[SourceConceptClass is empty]Salmonella osteomyelitis,-1,,2000500109,Salmonellan aiheuttama osteomyeliitti,,Condition,1900-01-01,2099-12-31,A02|A02.2|M90.2,ICD10|ICD10|ICD10,0,APPROVED,EQUAL,PKo,1666803999237,134264,Salmonella osteomyelitis,Condition,MAPS_TO,,TAYS,1623974400000,,,
Expand Down Expand Up @@ -71,3 +71,4 @@ A18.7+E35.1,Tuberculosis of adrenal glands,-1,,2000500142,Lisämunuaistuberkuloo
A18.8+D77,Tuberculosis of spleen,-1,,2000500143,Pernan tuberkuloosi,ICD10fi Hierarchy,Condition,1998-08-19,2099-12-31,A18|A18.8|D77,ICD10|ICD10|ICD10,0,APPROVED,EQUAL,PKo,1666805697461,195176,Tuberculosis of spleen,Condition,MAPS_TO,,TAYS,1623974400000,,,
A18.88,Tuberculosis of spleen,-1,,2000500200,Pernan tuberkuloosi,ICD10fi Hierarchy,Condition,1998-08-19,2099-12-31,A18,ICD10,0,APPROVED,EQUAL,PKo,1666805697461,195176,Tuberculosis of spleen,Condition,MAPS_TO,,TAYS,1623974400000,,,
A18.88+D77,Tuberculosis of spleen,-1,,2000500201,Pernan tuberkuloosi,ICD10fi Hierarchy,Condition,1998-08-19,2099-12-31,A18|A18.88|D77,ICD10||ICD10,0,APPROVED,EQUAL,PKo,1666805697461,195176,Tuberculosis of spleen,Condition,MAPS_TO,,TAYS,1623974400000,,,
A18.8+E35.0,[SourceConceptId is not unique]Test duplicate sourceConceptId,-1,,2000500124,Kilpirauhasen tuberkuloosi,ICD10fi Hierarchy,Condition,1900-01-01,2099-12-31,A18|A18.8|E35.0,ICD10|ICD10|ICD10,0,APPROVED,EQUAL,PKo,1666805704364,141777,Tuberculosis of thyroid gland,Condition,MAPS_TO,,TAYS,1623974400000,,,
8 changes: 8 additions & 0 deletions tests/testthat/test-validateUsagiFile.R
Original file line number Diff line number Diff line change
Expand Up @@ -190,6 +190,14 @@ test_that("test validateUsagiFile returns errors with the errored usagi file", {
validatedUsagiFile |> dplyr::filter(stringr::str_detect(sourceName, "SourceConceptId is not a number on the range")) |> dplyr::pull(mappingStatus) |>
expect_equal("FLAGGED")

# SourceConceptId is not unique
validationsSummary |> dplyr::filter(step == "SourceConceptId is not unique") |> nrow() |> expect_equal(1)
validatedUsagiFile |> dplyr::filter(stringr::str_detect(sourceName, "SourceConceptId is not unique")) |> nrow() |> expect_equal(1)
validatedUsagiFile |> dplyr::filter(stringr::str_detect(sourceName, "SourceConceptId is not unique")) |> dplyr::pull(`ADD_INFO:validationMessages`) |>
expect_equal("ERROR: SourceConceptId is not unique")
validatedUsagiFile |> dplyr::filter(stringr::str_detect(sourceName, "SourceConceptId is not unique")) |> dplyr::pull(mappingStatus) |>
expect_equal("FLAGGED")

# SourceConceptClass is empty
validationsSummary |> dplyr::filter(step == "SourceConceptClass is empty") |> nrow() |> expect_equal(1)
validatedUsagiFile |> dplyr::filter(stringr::str_detect(sourceName, "SourceConceptClass is empty")) |> nrow() |> expect_equal(1)
Expand Down
Loading