mirror of
https://github.com/msberends/AMR.git
synced 2026-06-29 11:36:20 +02:00
When a genus+species abbreviation like "P. ovale" was used, the previous bypass (Issue #288) checked sum(sp_exact) == 1, which failed if the species also had subspecies sharing the epithet (ovale curtisi, ovale wallikeri). The fix extends the bypass to fire whenever all exact species matches belong to one genus, collapsing to the species-rank record (subspecies == "") for genus+species queries and preserving the chosen row for explicit subspecies queries. Also extends the data-invariant test to cover all taxonomic rank columns from domain to subspecies, not just the terminal three. Claude-Session: https://claude.ai/code/session_01M4fqQYQYJ3drdudkDYNqAY Co-authored-by: Claude <noreply@anthropic.com>
368 lines
15 KiB
R
368 lines
15 KiB
R
# ==================================================================== #
|
|
# TITLE: #
|
|
# AMR: An R Package for Working with Antimicrobial Resistance Data #
|
|
# #
|
|
# SOURCE CODE: #
|
|
# https://github.com/msberends/AMR #
|
|
# #
|
|
# PLEASE CITE THIS SOFTWARE AS: #
|
|
# Berends MS, Luz CF, Friedrich AW, et al. (2022). #
|
|
# AMR: An R Package for Working with Antimicrobial Resistance Data. #
|
|
# Journal of Statistical Software, 104(3), 1-31. #
|
|
# https://doi.org/10.18637/jss.v104.i03 #
|
|
# #
|
|
# Developed at the University of Groningen and the University Medical #
|
|
# Center Groningen in The Netherlands, in collaboration with many #
|
|
# colleagues from around the world, see our website. #
|
|
# #
|
|
# This R package is free software; you can freely use and distribute #
|
|
# it for both personal and commercial purposes under the terms of the #
|
|
# GNU General Public License version 2.0 (GNU GPL-2), as published by #
|
|
# the Free Software Foundation. #
|
|
# We created this package for both routine data analysis and academic #
|
|
# research and it was publicly released in the hope that it will be #
|
|
# useful, but it comes WITHOUT ANY WARRANTY OR LIABILITY. #
|
|
# #
|
|
# Visit our website for the full manual and a complete tutorial about #
|
|
# how to conduct AMR data analysis: https://amr-for-r.org #
|
|
# ==================================================================== #
|
|
|
|
test_that("test-mo.R", {
|
|
skip_on_cran()
|
|
|
|
MOs <- subset(microorganisms, !is.na(mo) & nchar(mo) > 3)
|
|
expect_identical(as.character(MOs$mo), as.character(as.mo(MOs$mo, keep_synonyms = TRUE)))
|
|
|
|
expect_identical(
|
|
as.character(as.mo(c("E. coli", "H. influenzae"), keep_synonyms = FALSE)),
|
|
c("B_ESCHR_COLI", "B_HMPHL_INFL")
|
|
)
|
|
|
|
expect_equal(as.character(as.mo("Escherichia coli")), "B_ESCHR_COLI")
|
|
expect_equal(as.character(as.mo("Escherichia coli")), "B_ESCHR_COLI")
|
|
expect_equal(as.character(as.mo(112283007)), "B_ESCHR_COLI")
|
|
expect_equal(as.character(as.mo("Escherichia species")), "B_ESCHR")
|
|
expect_equal(as.character(as.mo("Escherichia")), "B_ESCHR")
|
|
expect_equal(as.character(as.mo("Eschr spp.")), "B_ESCHR")
|
|
expect_equal(as.character(as.mo(" B_ESCHR_COLI ")), "B_ESCHR_COLI")
|
|
expect_equal(as.character(as.mo("e coli")), "B_ESCHR_COLI") # not Campylobacter
|
|
expect_equal(as.character(as.mo("klpn")), "B_KLBSL_PNMN")
|
|
expect_equal(as.character(as.mo("Klebsiella")), "B_KLBSL")
|
|
expect_equal(as.character(as.mo("K. pneumo rhino")), "B_KLBSL_PNMN_RHNS") # K. pneumoniae subspp. rhinoscleromatis
|
|
expect_equal(as.character(as.mo("Bartonella")), "B_BRTNL")
|
|
expect_equal(as.character(as.mo("C. difficile")), "B_CRDDS_DFFC")
|
|
expect_equal(as.character(as.mo("L. pneumophila")), "B_LGNLL_PNMP")
|
|
expect_equal(as.character(as.mo("Streptococcus")), "B_STRPT") # not Peptostreptoccus
|
|
expect_equal(as.character(as.mo("Estreptococos grupo B")), "B_STRPT_GRPB")
|
|
expect_equal(as.character(as.mo("Group B Streptococci")), "B_STRPT_GRPB")
|
|
|
|
expect_equal(as.character(as.mo(c("GAS", "GBS", "haemoly strep"))), c("B_STRPT_GRPA", "B_STRPT_GRPB", "B_STRPT_HAEM"))
|
|
|
|
|
|
expect_equal(as.character(as.mo("S. pyo")), "B_STRPT_PYGN") # not Actinomyces pyogenes
|
|
|
|
# GLIMS
|
|
expect_equal(as.character(as.mo("bctfgr")), "B_BCTRD_FRGL")
|
|
|
|
expect_equal(as.character(as.mo("MRSE")), "B_STPHY_EPDR")
|
|
expect_equal(as.character(as.mo("VRE")), "B_ENTRC")
|
|
expect_equal(as.character(as.mo("MRPA")), "B_PSDMN_AERG")
|
|
expect_equal(as.character(as.mo("PISP")), "B_STRPT_PNMN")
|
|
expect_equal(as.character(as.mo("PRSP")), "B_STRPT_PNMN")
|
|
expect_equal(as.character(as.mo("VISP")), "B_STRPT_PNMN")
|
|
expect_equal(as.character(as.mo("VRSP")), "B_STRPT_PNMN")
|
|
|
|
expect_equal(as.character(as.mo("CNS")), "B_STPHY_CONS")
|
|
expect_equal(as.character(as.mo("CoNS")), "B_STPHY_CONS")
|
|
expect_equal(as.character(as.mo("CPS")), "B_STPHY_COPS")
|
|
expect_equal(as.character(as.mo("CoPS")), "B_STPHY_COPS")
|
|
expect_equal(as.character(as.mo("VGS")), "B_STRPT_VIRI")
|
|
expect_equal(as.character(as.mo("streptococcus milleri")), "B_STRPT_MILL")
|
|
|
|
|
|
expect_equal(as.character(as.mo(c("Gram negative", "Gram positive"))), c("B_GRAMN", "B_GRAMP"))
|
|
|
|
# expect_warning(as.mo("Acinetobacter calcoaceticus/baumannii complex"))
|
|
|
|
# Issue #287: "X complex" fallback to "X" when complex is not a distinct taxon
|
|
expect_identical(as.character(suppressWarnings(as.mo("Proteus vulgaris complex"))), as.character(suppressWarnings(as.mo("Proteus vulgaris"))))
|
|
expect_identical(as.character(suppressWarnings(as.mo("Enterobacter cloacae complex"))), as.character(as.mo("Enterobacter cloacae complex")))
|
|
|
|
# Issue #288: abbreviated genus with exact species epithet match should win
|
|
expect_identical(
|
|
as.character(suppressWarnings(as.mo("S. apiospermum"))),
|
|
as.character(suppressWarnings(as.mo("Scedosporium apiospermum")))
|
|
)
|
|
|
|
# prevalent MO
|
|
expect_identical(
|
|
suppressWarnings(as.character(
|
|
as.mo(c(
|
|
"stau", # WHONET code
|
|
"STAU",
|
|
"staaur",
|
|
"S. aureus",
|
|
"S aureus",
|
|
"Sthafilokkockus aureus", # handles incorrect spelling
|
|
"Staphylococcus aureus (MRSA)",
|
|
"MRSA", # Methicillin Resistant S. aureus
|
|
"VISA", # Vancomycin Intermediate S. aureus
|
|
"VRSA", # Vancomycin Resistant S. aureus
|
|
115329001 # SNOMED CT code
|
|
))
|
|
)),
|
|
rep("B_STPHY_AURS", 11)
|
|
)
|
|
expect_identical(
|
|
as.character(
|
|
as.mo(c("EHEC", "EPEC", "EIEC", "STEC", "ATEC", "UPEC"))
|
|
),
|
|
rep("B_ESCHR_COLI", 6)
|
|
)
|
|
# unprevalent MO
|
|
expect_identical(
|
|
as.character(
|
|
as.mo(c(
|
|
"parnod",
|
|
"Paraburkholderia nodosa"
|
|
))
|
|
),
|
|
rep("B_PRBRK_NODS", 2)
|
|
)
|
|
|
|
# empty values
|
|
expect_identical(as.character(as.mo(c("", " ", NA, NaN))), rep(NA_character_, 4))
|
|
expect_identical(as.character(as.mo(" ")), NA_character_)
|
|
# too few characters
|
|
# expect_warning(as.mo("ab"))
|
|
|
|
expect_identical(
|
|
suppressWarnings(as.character(as.mo(c("Qq species", "MRSA", "K. pneu rhino", "esco")))),
|
|
c("UNKNOWN", "B_STPHY_AURS", "B_KLBSL_PNMN_RHNS", "B_ESCHR_COLI")
|
|
)
|
|
|
|
# check for Becker classification
|
|
expect_identical(as.character(as.mo("S. epidermidis", Becker = FALSE)), "B_STPHY_EPDR")
|
|
expect_identical(as.character(as.mo("S. epidermidis", Becker = TRUE)), "B_STPHY_CONS")
|
|
expect_identical(as.character(as.mo("STAEPI", Becker = TRUE)), "B_STPHY_CONS")
|
|
expect_identical(as.character(as.mo("Sta intermedius", Becker = FALSE)), "B_STPHY_INTR")
|
|
expect_identical(as.character(as.mo("Sta intermedius", Becker = TRUE)), "B_STPHY_COPS")
|
|
expect_identical(as.character(as.mo("STAINT", Becker = TRUE)), "B_STPHY_COPS")
|
|
# aureus must only be influenced if Becker = "all"
|
|
expect_identical(as.character(as.mo("STAAUR", Becker = FALSE)), "B_STPHY_AURS")
|
|
expect_identical(as.character(as.mo("STAAUR", Becker = TRUE)), "B_STPHY_AURS")
|
|
expect_identical(as.character(as.mo("STAAUR", Becker = "all")), "B_STPHY_COPS")
|
|
|
|
# check for Lancefield classification
|
|
expect_identical(as.character(as.mo("S. pyogenes", Lancefield = FALSE)), "B_STRPT_PYGN")
|
|
expect_identical(as.character(as.mo("S. pyogenes", Lancefield = TRUE)), "B_STRPT_GRPA")
|
|
expect_identical(as.character(as.mo("STCPYO", Lancefield = TRUE)), "B_STRPT_GRPA") # group A
|
|
expect_identical(as.character(as.mo("S. agalactiae", Lancefield = FALSE)), "B_STRPT_AGLC")
|
|
expect_identical(as.character(as.mo("S. agalactiae", Lancefield = TRUE)), "B_STRPT_GRPB") # group B
|
|
expect_identical(as.character(suppressWarnings(as.mo("estreptococos grupo B"))), "B_STRPT_GRPB")
|
|
expect_identical(as.character(as.mo("S. equi", Lancefield = FALSE)), "B_STRPT_EQUI")
|
|
expect_identical(as.character(as.mo("S. equi", Lancefield = TRUE)), "B_STRPT_GRPC") # group C
|
|
# Enterococci must only be influenced if Lancefield = "all"
|
|
expect_identical(as.character(as.mo("E. faecium", Lancefield = FALSE)), "B_ENTRC_FACM")
|
|
expect_identical(as.character(as.mo("E. faecium", Lancefield = TRUE)), "B_ENTRC_FACM")
|
|
expect_identical(as.character(as.mo("E. faecium", Lancefield = "all")), "B_STRPT_GRPD") # group D
|
|
expect_identical(as.character(as.mo("S. anginosus", Lancefield = FALSE)), "B_STRPT_ANGN")
|
|
expect_identical(as.character(as.mo("S. anginosus", Lancefield = TRUE)), "B_STRPT_GRPF") # group F
|
|
expect_identical(as.character(as.mo("S. sanguinis", Lancefield = FALSE)), "B_STRPT_SNGN")
|
|
expect_identical(as.character(as.mo("S. sanguinis", Lancefield = TRUE)), "B_STRPT_GRPH") # group H
|
|
expect_identical(as.character(as.mo("S. salivarius", Lancefield = FALSE)), "B_STRPT_SLVR")
|
|
expect_identical(as.character(as.mo("S. salivarius", Lancefield = TRUE)), "B_STRPT_GRPK") # group K
|
|
|
|
if (AMR:::pkg_is_available("dplyr", min_version = "1.0.0", also_load = TRUE)) {
|
|
# select with one column
|
|
expect_identical(
|
|
example_isolates %>%
|
|
slice(1:10) %>%
|
|
left_join_microorganisms() %>%
|
|
select(genus) %>%
|
|
as.mo() %>%
|
|
as.character(),
|
|
c(
|
|
"B_ESCHR", "B_ESCHR", "B_STPHY", "B_STPHY", "B_STPHY",
|
|
"B_STPHY", "B_STPHY", "B_STPHY", "B_STPHY", "B_STPHY"
|
|
)
|
|
)
|
|
|
|
# select with two columns
|
|
expect_identical(
|
|
example_isolates %>%
|
|
slice(1:10) %>%
|
|
pull(mo),
|
|
example_isolates %>%
|
|
slice(1:10) %>%
|
|
left_join_microorganisms() %>%
|
|
select(genus, species) %>%
|
|
as.mo()
|
|
)
|
|
|
|
# too many columns
|
|
expect_error(example_isolates %>% select(1:3) %>% as.mo())
|
|
|
|
# test pull
|
|
expect_equal(
|
|
nrow(example_isolates %>% mutate(mo = as.mo(mo))),
|
|
2000
|
|
)
|
|
expect_true(example_isolates %>% pull(mo) %>% is.mo())
|
|
}
|
|
|
|
# print
|
|
expect_output(print(as.mo(c("B_ESCHR_COLI", NA))))
|
|
|
|
# test data.frame
|
|
expect_equal(
|
|
nrow(data.frame(test = as.mo("B_ESCHR_COLI"))),
|
|
1
|
|
)
|
|
|
|
# check empty values
|
|
expect_equal(
|
|
as.character(as.mo("")),
|
|
NA_character_
|
|
)
|
|
|
|
# check less prevalent MOs
|
|
expect_equal(as.character(as.mo("Actinosynnema pretiosum auranticum")), "B_ANNMA_PRTS_ARNT")
|
|
# expect_equal(as.character(as.mo("Actinosynnema preti aura")), "B_ANNMA_PRTS_ARNT")
|
|
# expect_equal(as.character(as.mo("A pre aur")), "B_ANNMA_PRTS_ARNT")
|
|
expect_equal(as.character(as.mo("Actinosynnema pretiosum")), "B_ANNMA_PRTS")
|
|
expect_equal(as.character(as.mo("Actinosynnema")), "B_ANNMA")
|
|
expect_equal(as.character(as.mo(" B_ANNMA_PRTS ")), "B_ANNMA_PRTS")
|
|
|
|
# check old names
|
|
expect_equal(suppressMessages(as.character(as.mo("Escherichia blattae"))), "B_SHMWL_BLTT")
|
|
expect_equal(suppressMessages(as.character(as.mo(c("E. coli", "Chlamydo psittaci")))), c("B_ESCHR_COLI", "B_CHLMY_PSTT"))
|
|
expect_equal(suppressMessages(mo_name("eubcom")), "Clostridium combesii")
|
|
|
|
# predefined reference_df
|
|
expect_equal(
|
|
as.character(as.mo("TestingOwnID",
|
|
reference_df = data.frame(mycol = "TestingOwnID", mo = "B_ESCHR_COLI")
|
|
)),
|
|
"B_ESCHR_COLI"
|
|
)
|
|
expect_equal(
|
|
as.character(as.mo(c("TestingOwnID", "E. coli"),
|
|
reference_df = data.frame(mycol = "TestingOwnID", mo = "B_ESCHR_COLI")
|
|
)),
|
|
c("B_ESCHR_COLI", "B_ESCHR_COLI")
|
|
)
|
|
# # expect_warning(as.mo("TestingOwnID", reference_df = NULL))
|
|
expect_error(as.mo("E. coli", reference_df = data.frame(mycol = "TestingOwnID")))
|
|
|
|
# combination of existing mo and other code
|
|
expect_identical(
|
|
suppressWarnings(as.character(as.mo(c("B_ESCHR_COL", "ESCCOL")))),
|
|
c("B_ESCHR_COLI", "B_ESCHR_COLI")
|
|
)
|
|
|
|
# from different sources
|
|
expect_equal(
|
|
as.character(as.mo(
|
|
c("PRTMIR", "bclcer", "B_ESCHR_COLI")
|
|
)),
|
|
c("B_PROTS_MRBL", "B_BCLLS_CERS", "B_ESCHR_COLI")
|
|
)
|
|
|
|
# hard to find
|
|
expect_equal(
|
|
as.character(suppressMessages(as.mo(
|
|
c(
|
|
"Microbacterium paraoxidans",
|
|
"Streptococcus suis (bovis gr)",
|
|
"Raoultella (here some text) terrigena"
|
|
)
|
|
))),
|
|
c("B_MCRBC_PRXY", "B_STRPT_SUIS", "B_KLBSL_TRRG")
|
|
)
|
|
|
|
x <- as.mo("Sta. aur")
|
|
|
|
# no viruses
|
|
expect_equal(suppressWarnings(as.mo("Virus")), as.mo("UNKNOWN"))
|
|
|
|
# summary
|
|
expect_equal(length(summary(example_isolates$mo)), 6)
|
|
|
|
# WHONET codes and NA/NaN
|
|
expect_true(all(is.na(as.mo(c("xxx", "na", "nan")))))
|
|
expect_equal(as.character(as.mo(c("con", "eco"))), c("UNKNOWN", "B_ESCHR_COLI"))
|
|
expect_equal(
|
|
as.character(suppressWarnings(as.mo(c("other", "none", "unknown")))),
|
|
rep("UNKNOWN", 3)
|
|
)
|
|
|
|
# ..coccus
|
|
expect_equal(
|
|
as.character(as.mo(c("meningococ", "gonococ", "pneumococ"))),
|
|
c("B_NESSR_MNNG", "B_NESSR_GNRR", "B_STRPT_PNMN")
|
|
)
|
|
# yeasts and fungi
|
|
expect_equal(
|
|
suppressWarnings(as.character(as.mo(c("yeasts", "fungi")))),
|
|
c("F_YEAST", "F_FUNGUS")
|
|
)
|
|
|
|
if (AMR:::pkg_is_available("tibble")) {
|
|
# print tibble
|
|
expect_output(print(tibble::tibble(mo = as.mo("B_ESCHR_COLI"))))
|
|
}
|
|
|
|
# assigning and subsetting
|
|
x <- example_isolates$mo
|
|
expect_inherits(x[1], "mo")
|
|
expect_inherits(x[[1]], "mo")
|
|
expect_inherits(c(x[1], x[9]), "mo")
|
|
# expect_warning(x[1] <- "invalid code")
|
|
# expect_warning(x[[1]] <- "invalid code")
|
|
# expect_warning(c(x[1], "test"))
|
|
|
|
# ignoring patterns
|
|
expect_true(is.na(as.mo("E. coli ignorethis", ignore_pattern = "this")))
|
|
|
|
# frequency tables
|
|
if (AMR:::pkg_is_available("cleaner")) {
|
|
expect_inherits(cleaner::freq(example_isolates$mo), "freq")
|
|
}
|
|
|
|
# skimr
|
|
if (AMR:::pkg_is_available("skimr", min_version = "2.0.0", also_load = TRUE)) {
|
|
expect_named(
|
|
skim(example_isolates$mo),
|
|
c("skim_type", "skim_variable", "n_missing", "complete_rate", "mo.n_unique", "mo.gram_negative", "mo.gram_positive", "mo.yeast", "mo.top_genus", "mo.top_species")
|
|
)
|
|
}
|
|
})
|
|
|
|
test_that("as.mo() resolves abbreviated genus when species carries subspecies (#288 follow-up)", {
|
|
# "P. ovale" must resolve to Plasmodium ovale, not a Pseudomonas species,
|
|
# even though P. ovale has subspecies (curtisi, wallikeri) sharing the epithet.
|
|
expect_identical(
|
|
as.mo("P. ovale", keep_synonyms = TRUE, info = FALSE),
|
|
as.mo("Plasmodium ovale", keep_synonyms = TRUE, info = FALSE)
|
|
)
|
|
expect_identical(
|
|
mo_name("P. ovale", keep_synonyms = TRUE, language = NULL),
|
|
"Plasmodium ovale"
|
|
)
|
|
|
|
# Non-regression: the original #288 example must still work.
|
|
expect_identical(
|
|
mo_genus("S. apiospermum", keep_synonyms = TRUE, language = NULL),
|
|
"Scedosporium"
|
|
)
|
|
|
|
# Explicit subspecies must not be collapsed to species rank.
|
|
if (any(microorganisms$fullname == "Plasmodium ovale curtisi")) {
|
|
expect_identical(
|
|
mo_name("P. ovale curtisi", keep_synonyms = TRUE, language = NULL),
|
|
"Plasmodium ovale curtisi"
|
|
)
|
|
}
|
|
})
|