mirror of
https://github.com/msberends/AMR.git
synced 2026-06-29 08:56:20 +02:00
When a genus+species abbreviation like "P. ovale" was used, the previous bypass (Issue #288) checked sum(sp_exact) == 1, which failed if the species also had subspecies sharing the epithet (ovale curtisi, ovale wallikeri). The fix extends the bypass to fire whenever all exact species matches belong to one genus, collapsing to the species-rank record (subspecies == "") for genus+species queries and preserving the chosen row for explicit subspecies queries. Also extends the data-invariant test to cover all taxonomic rank columns from domain to subspecies, not just the terminal three. Claude-Session: https://claude.ai/code/session_01M4fqQYQYJ3drdudkDYNqAY Co-authored-by: Claude <noreply@anthropic.com>
151 lines
7.1 KiB
R
151 lines
7.1 KiB
R
# ==================================================================== #
|
|
# TITLE: #
|
|
# AMR: An R Package for Working with Antimicrobial Resistance Data #
|
|
# #
|
|
# SOURCE CODE: #
|
|
# https://github.com/msberends/AMR #
|
|
# #
|
|
# PLEASE CITE THIS SOFTWARE AS: #
|
|
# Berends MS, Luz CF, Friedrich AW, et al. (2022). #
|
|
# AMR: An R Package for Working with Antimicrobial Resistance Data. #
|
|
# Journal of Statistical Software, 104(3), 1-31. #
|
|
# https://doi.org/10.18637/jss.v104.i03 #
|
|
# #
|
|
# Developed at the University of Groningen and the University Medical #
|
|
# Center Groningen in The Netherlands, in collaboration with many #
|
|
# colleagues from around the world, see our website. #
|
|
# #
|
|
# This R package is free software; you can freely use and distribute #
|
|
# it for both personal and commercial purposes under the terms of the #
|
|
# GNU General Public License version 2.0 (GNU GPL-2), as published by #
|
|
# the Free Software Foundation. #
|
|
# We created this package for both routine data analysis and academic #
|
|
# research and it was publicly released in the hope that it will be #
|
|
# useful, but it comes WITHOUT ANY WARRANTY OR LIABILITY. #
|
|
# #
|
|
# Visit our website for the full manual and a complete tutorial about #
|
|
# how to conduct AMR data analysis: https://amr-for-r.org #
|
|
# ==================================================================== #
|
|
|
|
test_that("test-data.R", {
|
|
skip_on_cran()
|
|
|
|
# IDs should always be unique
|
|
expect_identical(nrow(microorganisms), length(unique(microorganisms$mo)))
|
|
expect_identical(class(microorganisms$mo), c("mo", "character"))
|
|
expect_identical(nrow(antimicrobials), length(unique(AMR::antimicrobials$ab)))
|
|
expect_identical(class(AMR::antimicrobials$ab), c("ab", "character"))
|
|
expect_identical(
|
|
nrow(antimicrobials[!is.na(antimicrobials$cid), ]),
|
|
length(unique(AMR::antimicrobials$cid[!is.na(antimicrobials$cid)]))
|
|
)
|
|
|
|
# check cross table reference
|
|
expect_true(all(microorganisms.codes$mo %in% microorganisms$mo))
|
|
expect_true(all(example_isolates$mo %in% microorganisms$mo))
|
|
expect_true(all(microorganisms.groups$mo %in% microorganisms$mo))
|
|
expect_true(all(microorganisms.groups$mo_group %in% microorganisms$mo))
|
|
expect_true(all(clinical_breakpoints$mo %in% microorganisms$mo))
|
|
expect_true(all(clinical_breakpoints$ab %in% AMR::antimicrobials$ab))
|
|
expect_true(all(intrinsic_resistant$mo %in% microorganisms$mo))
|
|
expect_true(all(intrinsic_resistant$ab %in% AMR::antimicrobials$ab))
|
|
expect_false(anyNA(microorganisms.codes$code))
|
|
expect_false(anyNA(microorganisms.codes$mo))
|
|
expect_true(all(dosage$ab %in% AMR::antimicrobials$ab))
|
|
expect_true(all(dosage$name %in% AMR::antimicrobials$name))
|
|
interpretive_abx <- AMR:::INTERPRETIVE_RULES_DF$and_these_antibiotics
|
|
interpretive_abx <- unique(unlist(strsplit(interpretive_abx[!is.na(interpretive_abx)], ", +")))
|
|
expect_true(all(interpretive_abx %in% AMR::antimicrobials$ab),
|
|
info = paste0(
|
|
"Missing in `antimicrobials` data set: ",
|
|
toString(interpretive_abx[which(!interpretive_abx %in% AMR::antimicrobials$ab)])
|
|
)
|
|
)
|
|
|
|
# check valid disks/MICs
|
|
expect_false(anyNA(as.mic(clinical_breakpoints[which(clinical_breakpoints$method == "MIC" & clinical_breakpoints$ref_tbl != "ECOFF"), "breakpoint_S", drop = TRUE])))
|
|
expect_true(anyNA(as.mic(clinical_breakpoints[which(clinical_breakpoints$method == "MIC" & clinical_breakpoints$ref_tbl != "ECOFF"), "breakpoint_R", drop = TRUE])))
|
|
expect_false(anyNA(as.disk(clinical_breakpoints[which(clinical_breakpoints$method == "DISK" & clinical_breakpoints$ref_tbl != "ECOFF"), "breakpoint_S", drop = TRUE])))
|
|
expect_true(anyNA(as.disk(clinical_breakpoints[which(clinical_breakpoints$method == "DISK" & clinical_breakpoints$ref_tbl != "ECOFF"), "breakpoint_R", drop = TRUE])))
|
|
|
|
# antibiotic names must always be coercible to their original AB code
|
|
expect_identical(as.ab(AMR::antimicrobials$name), AMR::antimicrobials$ab)
|
|
|
|
if (AMR:::pkg_is_available("tibble")) {
|
|
# there should be no diacritics (i.e. non ASCII) characters in the datasets (CRAN policy)
|
|
datasets <- data(package = "AMR", envir = asNamespace("AMR"))$results[, "Item", drop = TRUE]
|
|
datasets <- datasets[datasets != "antibiotics"]
|
|
for (i in seq_len(length(datasets))) {
|
|
dataset <- get(datasets[i], envir = asNamespace("AMR"))
|
|
expect_identical(AMR:::dataset_UTF8_to_ASCII(dataset), dataset, info = datasets[i])
|
|
}
|
|
}
|
|
|
|
AMR:::add_MO_lookup_to_AMR_env()
|
|
df <- AMR:::AMR_env$MO_lookup
|
|
expect_true(all(c(
|
|
"mo", "fullname", "status", "kingdom", "phylum", "class", "order",
|
|
"family", "genus", "species", "subspecies", "rank", "ref", "source",
|
|
"lpsn", "lpsn_parent", "lpsn_renamed_to", "gbif", "gbif_parent", "gbif_renamed_to", "prevalence",
|
|
"snomed", "domain_index", "fullname_lower", "full_first", "species_first"
|
|
) %in% colnames(df)))
|
|
|
|
expect_inherits(AMR:::MO_CONS, "mo")
|
|
|
|
uncategorised <- subset(
|
|
microorganisms,
|
|
genus == "Staphylococcus" &
|
|
!species %in% c("", "aureus") &
|
|
!mo %in% c(AMR:::MO_CONS, AMR:::MO_COPS)
|
|
)
|
|
expect_true(NROW(uncategorised) == 0,
|
|
info = ifelse(NROW(uncategorised) == 0,
|
|
"All staphylococcal species categorised as CoNS/CoPS.",
|
|
paste0(
|
|
"Staphylococcal species not categorised as CoNS/CoPS: S. ",
|
|
uncategorised$species, " (", uncategorised$mo, ")",
|
|
collapse = "\n"
|
|
)
|
|
)
|
|
)
|
|
|
|
# THIS WILL CHECK NON-ASCII STRINGS IN ALL FILES:
|
|
|
|
# check_non_ascii <- function() {
|
|
# purrr::map_df(
|
|
# .id = "file",
|
|
# # list common text files
|
|
# .x = fs::dir_ls(
|
|
# recurse = TRUE,
|
|
# type = "file",
|
|
# # ignore images, compressed
|
|
# regexp = "\\.(png|ico|rda|ai|tar.gz|zip|xlsx|csv|pdf|psd)$",
|
|
# invert = TRUE
|
|
# ),
|
|
# .f = function(path) {
|
|
# x <- readLines(path, warn = FALSE)
|
|
# # from tools::showNonASCII()
|
|
# asc <- iconv(x, "latin1", "ASCII")
|
|
# ind <- is.na(asc) | asc != x
|
|
# # make data frame
|
|
# if (any(ind)) {
|
|
# tibble::tibble(
|
|
# row = which(ind),
|
|
# line = iconv(x[ind], "latin1", "ASCII", sub = "byte")
|
|
# )
|
|
# } else {
|
|
# tibble::tibble()
|
|
# }
|
|
# }
|
|
# )
|
|
# }
|
|
# x <- check_non_ascii() %>%
|
|
# filter(file %unlike% "^(data-raw|docs|git_)")
|
|
})
|
|
|
|
test_that("taxonomic name columns contain no NA (empty string is used instead)", {
|
|
for (col in c("domain", "kingdom", "phylum", "class", "order", "family", "genus", "species", "subspecies")) {
|
|
expect_false(anyNA(microorganisms[[col]]), info = col)
|
|
}
|
|
})
|