1
0
mirror of https://github.com/msberends/AMR.git synced 2026-06-29 11:36:20 +02:00
Files
AMR/tests/testthat/test-mo.R
Matthijs Berends 518425311e (v3.0.1.9080) fix(as.mo): resolve abbreviated genus when species has subspecies (#288 follow-up) (#301)
When a genus+species abbreviation like "P. ovale" was used, the previous
bypass (Issue #288) checked sum(sp_exact) == 1, which failed if the species
also had subspecies sharing the epithet (ovale curtisi, ovale wallikeri).
The fix extends the bypass to fire whenever all exact species matches belong
to one genus, collapsing to the species-rank record (subspecies == "") for
genus+species queries and preserving the chosen row for explicit subspecies
queries.

Also extends the data-invariant test to cover all taxonomic rank columns
from domain to subspecies, not just the terminal three.


Claude-Session: https://claude.ai/code/session_01M4fqQYQYJ3drdudkDYNqAY

Co-authored-by: Claude <noreply@anthropic.com>
2026-06-27 15:20:38 +02:00

368 lines
15 KiB
R

# ==================================================================== #
# TITLE: #
# AMR: An R Package for Working with Antimicrobial Resistance Data #
# #
# SOURCE CODE: #
# https://github.com/msberends/AMR #
# #
# PLEASE CITE THIS SOFTWARE AS: #
# Berends MS, Luz CF, Friedrich AW, et al. (2022). #
# AMR: An R Package for Working with Antimicrobial Resistance Data. #
# Journal of Statistical Software, 104(3), 1-31. #
# https://doi.org/10.18637/jss.v104.i03 #
# #
# Developed at the University of Groningen and the University Medical #
# Center Groningen in The Netherlands, in collaboration with many #
# colleagues from around the world, see our website. #
# #
# This R package is free software; you can freely use and distribute #
# it for both personal and commercial purposes under the terms of the #
# GNU General Public License version 2.0 (GNU GPL-2), as published by #
# the Free Software Foundation. #
# We created this package for both routine data analysis and academic #
# research and it was publicly released in the hope that it will be #
# useful, but it comes WITHOUT ANY WARRANTY OR LIABILITY. #
# #
# Visit our website for the full manual and a complete tutorial about #
# how to conduct AMR data analysis: https://amr-for-r.org #
# ==================================================================== #
test_that("test-mo.R", {
skip_on_cran()
MOs <- subset(microorganisms, !is.na(mo) & nchar(mo) > 3)
expect_identical(as.character(MOs$mo), as.character(as.mo(MOs$mo, keep_synonyms = TRUE)))
expect_identical(
as.character(as.mo(c("E. coli", "H. influenzae"), keep_synonyms = FALSE)),
c("B_ESCHR_COLI", "B_HMPHL_INFL")
)
expect_equal(as.character(as.mo("Escherichia coli")), "B_ESCHR_COLI")
expect_equal(as.character(as.mo("Escherichia coli")), "B_ESCHR_COLI")
expect_equal(as.character(as.mo(112283007)), "B_ESCHR_COLI")
expect_equal(as.character(as.mo("Escherichia species")), "B_ESCHR")
expect_equal(as.character(as.mo("Escherichia")), "B_ESCHR")
expect_equal(as.character(as.mo("Eschr spp.")), "B_ESCHR")
expect_equal(as.character(as.mo(" B_ESCHR_COLI ")), "B_ESCHR_COLI")
expect_equal(as.character(as.mo("e coli")), "B_ESCHR_COLI") # not Campylobacter
expect_equal(as.character(as.mo("klpn")), "B_KLBSL_PNMN")
expect_equal(as.character(as.mo("Klebsiella")), "B_KLBSL")
expect_equal(as.character(as.mo("K. pneumo rhino")), "B_KLBSL_PNMN_RHNS") # K. pneumoniae subspp. rhinoscleromatis
expect_equal(as.character(as.mo("Bartonella")), "B_BRTNL")
expect_equal(as.character(as.mo("C. difficile")), "B_CRDDS_DFFC")
expect_equal(as.character(as.mo("L. pneumophila")), "B_LGNLL_PNMP")
expect_equal(as.character(as.mo("Streptococcus")), "B_STRPT") # not Peptostreptoccus
expect_equal(as.character(as.mo("Estreptococos grupo B")), "B_STRPT_GRPB")
expect_equal(as.character(as.mo("Group B Streptococci")), "B_STRPT_GRPB")
expect_equal(as.character(as.mo(c("GAS", "GBS", "haemoly strep"))), c("B_STRPT_GRPA", "B_STRPT_GRPB", "B_STRPT_HAEM"))
expect_equal(as.character(as.mo("S. pyo")), "B_STRPT_PYGN") # not Actinomyces pyogenes
# GLIMS
expect_equal(as.character(as.mo("bctfgr")), "B_BCTRD_FRGL")
expect_equal(as.character(as.mo("MRSE")), "B_STPHY_EPDR")
expect_equal(as.character(as.mo("VRE")), "B_ENTRC")
expect_equal(as.character(as.mo("MRPA")), "B_PSDMN_AERG")
expect_equal(as.character(as.mo("PISP")), "B_STRPT_PNMN")
expect_equal(as.character(as.mo("PRSP")), "B_STRPT_PNMN")
expect_equal(as.character(as.mo("VISP")), "B_STRPT_PNMN")
expect_equal(as.character(as.mo("VRSP")), "B_STRPT_PNMN")
expect_equal(as.character(as.mo("CNS")), "B_STPHY_CONS")
expect_equal(as.character(as.mo("CoNS")), "B_STPHY_CONS")
expect_equal(as.character(as.mo("CPS")), "B_STPHY_COPS")
expect_equal(as.character(as.mo("CoPS")), "B_STPHY_COPS")
expect_equal(as.character(as.mo("VGS")), "B_STRPT_VIRI")
expect_equal(as.character(as.mo("streptococcus milleri")), "B_STRPT_MILL")
expect_equal(as.character(as.mo(c("Gram negative", "Gram positive"))), c("B_GRAMN", "B_GRAMP"))
# expect_warning(as.mo("Acinetobacter calcoaceticus/baumannii complex"))
# Issue #287: "X complex" fallback to "X" when complex is not a distinct taxon
expect_identical(as.character(suppressWarnings(as.mo("Proteus vulgaris complex"))), as.character(suppressWarnings(as.mo("Proteus vulgaris"))))
expect_identical(as.character(suppressWarnings(as.mo("Enterobacter cloacae complex"))), as.character(as.mo("Enterobacter cloacae complex")))
# Issue #288: abbreviated genus with exact species epithet match should win
expect_identical(
as.character(suppressWarnings(as.mo("S. apiospermum"))),
as.character(suppressWarnings(as.mo("Scedosporium apiospermum")))
)
# prevalent MO
expect_identical(
suppressWarnings(as.character(
as.mo(c(
"stau", # WHONET code
"STAU",
"staaur",
"S. aureus",
"S aureus",
"Sthafilokkockus aureus", # handles incorrect spelling
"Staphylococcus aureus (MRSA)",
"MRSA", # Methicillin Resistant S. aureus
"VISA", # Vancomycin Intermediate S. aureus
"VRSA", # Vancomycin Resistant S. aureus
115329001 # SNOMED CT code
))
)),
rep("B_STPHY_AURS", 11)
)
expect_identical(
as.character(
as.mo(c("EHEC", "EPEC", "EIEC", "STEC", "ATEC", "UPEC"))
),
rep("B_ESCHR_COLI", 6)
)
# unprevalent MO
expect_identical(
as.character(
as.mo(c(
"parnod",
"Paraburkholderia nodosa"
))
),
rep("B_PRBRK_NODS", 2)
)
# empty values
expect_identical(as.character(as.mo(c("", " ", NA, NaN))), rep(NA_character_, 4))
expect_identical(as.character(as.mo(" ")), NA_character_)
# too few characters
# expect_warning(as.mo("ab"))
expect_identical(
suppressWarnings(as.character(as.mo(c("Qq species", "MRSA", "K. pneu rhino", "esco")))),
c("UNKNOWN", "B_STPHY_AURS", "B_KLBSL_PNMN_RHNS", "B_ESCHR_COLI")
)
# check for Becker classification
expect_identical(as.character(as.mo("S. epidermidis", Becker = FALSE)), "B_STPHY_EPDR")
expect_identical(as.character(as.mo("S. epidermidis", Becker = TRUE)), "B_STPHY_CONS")
expect_identical(as.character(as.mo("STAEPI", Becker = TRUE)), "B_STPHY_CONS")
expect_identical(as.character(as.mo("Sta intermedius", Becker = FALSE)), "B_STPHY_INTR")
expect_identical(as.character(as.mo("Sta intermedius", Becker = TRUE)), "B_STPHY_COPS")
expect_identical(as.character(as.mo("STAINT", Becker = TRUE)), "B_STPHY_COPS")
# aureus must only be influenced if Becker = "all"
expect_identical(as.character(as.mo("STAAUR", Becker = FALSE)), "B_STPHY_AURS")
expect_identical(as.character(as.mo("STAAUR", Becker = TRUE)), "B_STPHY_AURS")
expect_identical(as.character(as.mo("STAAUR", Becker = "all")), "B_STPHY_COPS")
# check for Lancefield classification
expect_identical(as.character(as.mo("S. pyogenes", Lancefield = FALSE)), "B_STRPT_PYGN")
expect_identical(as.character(as.mo("S. pyogenes", Lancefield = TRUE)), "B_STRPT_GRPA")
expect_identical(as.character(as.mo("STCPYO", Lancefield = TRUE)), "B_STRPT_GRPA") # group A
expect_identical(as.character(as.mo("S. agalactiae", Lancefield = FALSE)), "B_STRPT_AGLC")
expect_identical(as.character(as.mo("S. agalactiae", Lancefield = TRUE)), "B_STRPT_GRPB") # group B
expect_identical(as.character(suppressWarnings(as.mo("estreptococos grupo B"))), "B_STRPT_GRPB")
expect_identical(as.character(as.mo("S. equi", Lancefield = FALSE)), "B_STRPT_EQUI")
expect_identical(as.character(as.mo("S. equi", Lancefield = TRUE)), "B_STRPT_GRPC") # group C
# Enterococci must only be influenced if Lancefield = "all"
expect_identical(as.character(as.mo("E. faecium", Lancefield = FALSE)), "B_ENTRC_FACM")
expect_identical(as.character(as.mo("E. faecium", Lancefield = TRUE)), "B_ENTRC_FACM")
expect_identical(as.character(as.mo("E. faecium", Lancefield = "all")), "B_STRPT_GRPD") # group D
expect_identical(as.character(as.mo("S. anginosus", Lancefield = FALSE)), "B_STRPT_ANGN")
expect_identical(as.character(as.mo("S. anginosus", Lancefield = TRUE)), "B_STRPT_GRPF") # group F
expect_identical(as.character(as.mo("S. sanguinis", Lancefield = FALSE)), "B_STRPT_SNGN")
expect_identical(as.character(as.mo("S. sanguinis", Lancefield = TRUE)), "B_STRPT_GRPH") # group H
expect_identical(as.character(as.mo("S. salivarius", Lancefield = FALSE)), "B_STRPT_SLVR")
expect_identical(as.character(as.mo("S. salivarius", Lancefield = TRUE)), "B_STRPT_GRPK") # group K
if (AMR:::pkg_is_available("dplyr", min_version = "1.0.0", also_load = TRUE)) {
# select with one column
expect_identical(
example_isolates %>%
slice(1:10) %>%
left_join_microorganisms() %>%
select(genus) %>%
as.mo() %>%
as.character(),
c(
"B_ESCHR", "B_ESCHR", "B_STPHY", "B_STPHY", "B_STPHY",
"B_STPHY", "B_STPHY", "B_STPHY", "B_STPHY", "B_STPHY"
)
)
# select with two columns
expect_identical(
example_isolates %>%
slice(1:10) %>%
pull(mo),
example_isolates %>%
slice(1:10) %>%
left_join_microorganisms() %>%
select(genus, species) %>%
as.mo()
)
# too many columns
expect_error(example_isolates %>% select(1:3) %>% as.mo())
# test pull
expect_equal(
nrow(example_isolates %>% mutate(mo = as.mo(mo))),
2000
)
expect_true(example_isolates %>% pull(mo) %>% is.mo())
}
# print
expect_output(print(as.mo(c("B_ESCHR_COLI", NA))))
# test data.frame
expect_equal(
nrow(data.frame(test = as.mo("B_ESCHR_COLI"))),
1
)
# check empty values
expect_equal(
as.character(as.mo("")),
NA_character_
)
# check less prevalent MOs
expect_equal(as.character(as.mo("Actinosynnema pretiosum auranticum")), "B_ANNMA_PRTS_ARNT")
# expect_equal(as.character(as.mo("Actinosynnema preti aura")), "B_ANNMA_PRTS_ARNT")
# expect_equal(as.character(as.mo("A pre aur")), "B_ANNMA_PRTS_ARNT")
expect_equal(as.character(as.mo("Actinosynnema pretiosum")), "B_ANNMA_PRTS")
expect_equal(as.character(as.mo("Actinosynnema")), "B_ANNMA")
expect_equal(as.character(as.mo(" B_ANNMA_PRTS ")), "B_ANNMA_PRTS")
# check old names
expect_equal(suppressMessages(as.character(as.mo("Escherichia blattae"))), "B_SHMWL_BLTT")
expect_equal(suppressMessages(as.character(as.mo(c("E. coli", "Chlamydo psittaci")))), c("B_ESCHR_COLI", "B_CHLMY_PSTT"))
expect_equal(suppressMessages(mo_name("eubcom")), "Clostridium combesii")
# predefined reference_df
expect_equal(
as.character(as.mo("TestingOwnID",
reference_df = data.frame(mycol = "TestingOwnID", mo = "B_ESCHR_COLI")
)),
"B_ESCHR_COLI"
)
expect_equal(
as.character(as.mo(c("TestingOwnID", "E. coli"),
reference_df = data.frame(mycol = "TestingOwnID", mo = "B_ESCHR_COLI")
)),
c("B_ESCHR_COLI", "B_ESCHR_COLI")
)
# # expect_warning(as.mo("TestingOwnID", reference_df = NULL))
expect_error(as.mo("E. coli", reference_df = data.frame(mycol = "TestingOwnID")))
# combination of existing mo and other code
expect_identical(
suppressWarnings(as.character(as.mo(c("B_ESCHR_COL", "ESCCOL")))),
c("B_ESCHR_COLI", "B_ESCHR_COLI")
)
# from different sources
expect_equal(
as.character(as.mo(
c("PRTMIR", "bclcer", "B_ESCHR_COLI")
)),
c("B_PROTS_MRBL", "B_BCLLS_CERS", "B_ESCHR_COLI")
)
# hard to find
expect_equal(
as.character(suppressMessages(as.mo(
c(
"Microbacterium paraoxidans",
"Streptococcus suis (bovis gr)",
"Raoultella (here some text) terrigena"
)
))),
c("B_MCRBC_PRXY", "B_STRPT_SUIS", "B_KLBSL_TRRG")
)
x <- as.mo("Sta. aur")
# no viruses
expect_equal(suppressWarnings(as.mo("Virus")), as.mo("UNKNOWN"))
# summary
expect_equal(length(summary(example_isolates$mo)), 6)
# WHONET codes and NA/NaN
expect_true(all(is.na(as.mo(c("xxx", "na", "nan")))))
expect_equal(as.character(as.mo(c("con", "eco"))), c("UNKNOWN", "B_ESCHR_COLI"))
expect_equal(
as.character(suppressWarnings(as.mo(c("other", "none", "unknown")))),
rep("UNKNOWN", 3)
)
# ..coccus
expect_equal(
as.character(as.mo(c("meningococ", "gonococ", "pneumococ"))),
c("B_NESSR_MNNG", "B_NESSR_GNRR", "B_STRPT_PNMN")
)
# yeasts and fungi
expect_equal(
suppressWarnings(as.character(as.mo(c("yeasts", "fungi")))),
c("F_YEAST", "F_FUNGUS")
)
if (AMR:::pkg_is_available("tibble")) {
# print tibble
expect_output(print(tibble::tibble(mo = as.mo("B_ESCHR_COLI"))))
}
# assigning and subsetting
x <- example_isolates$mo
expect_inherits(x[1], "mo")
expect_inherits(x[[1]], "mo")
expect_inherits(c(x[1], x[9]), "mo")
# expect_warning(x[1] <- "invalid code")
# expect_warning(x[[1]] <- "invalid code")
# expect_warning(c(x[1], "test"))
# ignoring patterns
expect_true(is.na(as.mo("E. coli ignorethis", ignore_pattern = "this")))
# frequency tables
if (AMR:::pkg_is_available("cleaner")) {
expect_inherits(cleaner::freq(example_isolates$mo), "freq")
}
# skimr
if (AMR:::pkg_is_available("skimr", min_version = "2.0.0", also_load = TRUE)) {
expect_named(
skim(example_isolates$mo),
c("skim_type", "skim_variable", "n_missing", "complete_rate", "mo.n_unique", "mo.gram_negative", "mo.gram_positive", "mo.yeast", "mo.top_genus", "mo.top_species")
)
}
})
test_that("as.mo() resolves abbreviated genus when species carries subspecies (#288 follow-up)", {
# "P. ovale" must resolve to Plasmodium ovale, not a Pseudomonas species,
# even though P. ovale has subspecies (curtisi, wallikeri) sharing the epithet.
expect_identical(
as.mo("P. ovale", keep_synonyms = TRUE, info = FALSE),
as.mo("Plasmodium ovale", keep_synonyms = TRUE, info = FALSE)
)
expect_identical(
mo_name("P. ovale", keep_synonyms = TRUE, language = NULL),
"Plasmodium ovale"
)
# Non-regression: the original #288 example must still work.
expect_identical(
mo_genus("S. apiospermum", keep_synonyms = TRUE, language = NULL),
"Scedosporium"
)
# Explicit subspecies must not be collapsed to species rank.
if (any(microorganisms$fullname == "Plasmodium ovale curtisi")) {
expect_identical(
mo_name("P. ovale curtisi", keep_synonyms = TRUE, language = NULL),
"Plasmodium ovale curtisi"
)
}
})