1
0
mirror of https://github.com/msberends/AMR.git synced 2025-01-15 23:21:37 +01:00

last unit tests fix?

This commit is contained in:
dr. M.S. (Matthijs) Berends 2022-10-04 21:33:04 +02:00
parent 37f6db5ccd
commit aa06aad4ea
22 changed files with 275 additions and 256 deletions

View File

@ -1,5 +1,5 @@
Package: AMR
Version: 1.8.2.9027
Version: 1.8.2.9028
Date: 2022-10-04
Title: Antimicrobial Resistance Data Analysis
Description: Functions to simplify and standardise antimicrobial resistance (AMR)

View File

@ -1,4 +1,4 @@
# AMR 1.8.2.9027
# AMR 1.8.2.9028
This version will eventually become v2.0! We're happy to reach a new major milestone soon!
@ -9,11 +9,12 @@ This version will eventually become v2.0! We're happy to reach a new major miles
* Chromista are almost never clinically relevant, thus lacking the secondary scope of this package
* The `microorganisms` no longer relies on the Catalogue of Life, but now primarily on the List of Prokaryotic names with Standing in Nomenclature (LPSN) and is supplemented with the Global Biodiversity Information Facility (GBIF). The structure of this data set has changed to include separate LPSN and GBIF identifiers. Almost all previous MO codes were retained. It contains over 1,000 taxonomic names from 2022 already.
* The `microorganisms.old` data set was removed, and all previously accepted names are now included in the `microorganisms` data set. A new column `status` contains `"accepted"` for currently accepted names and `"synonym"` for taxonomic synonyms; currently invalid names. All previously accepted names now have a microorganisms ID and - if available - an LPSN, GBIF and SNOMED CT identifier.
* The `mo_matching_score()` now count deletions and substitutions as 2 instead of 1, which impacts the outcome of `as.mo()` and any `mo_*()` function
### New
* EUCAST 2022 and CLSI 2022 guidelines have been added for `as.rsi()`. EUCAST 2022 is now the new default guideline for all MIC and disks diffusion interpretations.
* All new algorithm for `as.mo()` (and thus internally all `mo_*()` functions) while still following our original set-up as described in our paper (DOI 10.18637/jss.v104.i03).
* A new argument `keep_synonyms` allows to *not* correct for updated taxonomy
* A new argument `keep_synonyms` allows to *not* correct for updated taxonomy, in favour of the now deleted argument `allow_uncertain`
* It has increased tremendously in speed and returns generally more consequent results
* Sequential coercion is now extremely fast as results are stored to the package environment, although coercion of unknown values must be run once per session. Previous results can be reset/removed with the new `mo_reset_session()` function.
* Function `mean_amr_distance()` to calculate the mean AMR distance. The mean AMR distance is a normalised numeric value to compare AMR test results and can help to identify similar isolates, without comparing antibiograms by hand.

37
R/mo.R
View File

@ -371,9 +371,11 @@ as.mo <- function(x,
gbif_matches[!gbif_matches %in% AMR::microorganisms$gbif] <- NA
lpsn_matches <- AMR::microorganisms$lpsn_renamed_to[match(out, AMR::microorganisms$mo)]
lpsn_matches[!lpsn_matches %in% AMR::microorganisms$lpsn] <- NA
AMR_env$mo_renamed <- list(old = out[!is.na(gbif_matches) | !is.na(lpsn_matches)],
AMR_env$mo_renamed <- list(
old = out[!is.na(gbif_matches) | !is.na(lpsn_matches)],
gbif_matches = gbif_matches[!is.na(gbif_matches) | !is.na(lpsn_matches)],
lpsn_matches = lpsn_matches[!is.na(gbif_matches) | !is.na(lpsn_matches)])
lpsn_matches = lpsn_matches[!is.na(gbif_matches) | !is.na(lpsn_matches)]
)
if (isFALSE(keep_synonyms)) {
out[which(!is.na(gbif_matches))] <- AMR::microorganisms$mo[match(gbif_matches[which(!is.na(gbif_matches))], AMR::microorganisms$gbif)]
out[which(!is.na(lpsn_matches))] <- AMR::microorganisms$mo[match(lpsn_matches[which(!is.na(lpsn_matches))], AMR::microorganisms$lpsn)]
@ -475,11 +477,13 @@ mo_renamed <- function() {
ref_old <- AMR::microorganisms$ref[match(x$old, AMR::microorganisms$mo)]
ref_new <- AMR::microorganisms$ref[match(x$new, AMR::microorganisms$mo)]
df_renamed <- data.frame(old = mo_old,
df_renamed <- data.frame(
old = mo_old,
new = mo_new,
ref_old = ref_old,
ref_new = ref_new,
stringsAsFactors = FALSE)
stringsAsFactors = FALSE
)
df_renamed <- unique(df_renamed)
df_renamed <- df_renamed[order(df_renamed$old), , drop = FALSE]
set_clean_class(df_renamed, new_class = c("mo_renamed", "data.frame"))
@ -512,7 +516,8 @@ mo_cleaning_regex <- function() {
"|",
"([({]|\\[).+([})]|\\])",
"|",
"(^| )(e?spp|e?ssp|e?ss|e?sp|e?subsp|sube?species|biovar|biotype|serovar|e?species)[.]*( |$))")
"(^| )(e?spp|e?ssp|e?ss|e?sp|e?subsp|sube?species|biovar|biotype|serovar|e?species)[.]*( |$))"
)
}
# UNDOCUMENTED METHODS ----------------------------------------------------
@ -831,9 +836,12 @@ print.mo_uncertainties <- function(x, ...) {
ifelse(x[i, ]$original_input != x[i, ]$input, paste0(strrep(" ", nchar(x[i, ]$original_input) + 6), "Based on input \"", x[i, ]$input, "\""), ""),
# Add note if result was coerced to accepted taxonomic name
ifelse(x[i, ]$keep_synonyms == FALSE & x[i, ]$mo %in% AMR::microorganisms$mo[which(AMR::microorganisms$status == "synonym")],
paste0(strrep(" ", nchar(x[i, ]$original_input) + 6),
font_red(paste0("This old taxonomic name was converted to ", font_italic(AMR::microorganisms$fullname[match(synonym_mo_to_accepted_mo(x[i, ]$mo), AMR::microorganisms$mo)], collapse = NULL), " (", synonym_mo_to_accepted_mo(x[i, ]$mo), ")."), collapse = NULL)),
""),
paste0(
strrep(" ", nchar(x[i, ]$original_input) + 6),
font_red(paste0("This old taxonomic name was converted to ", font_italic(AMR::microorganisms$fullname[match(synonym_mo_to_accepted_mo(x[i, ]$mo), AMR::microorganisms$mo)], collapse = NULL), " (", synonym_mo_to_accepted_mo(x[i, ]$mo), ")."), collapse = NULL)
),
""
),
candidates,
sep = "\n"
)
@ -879,20 +887,24 @@ convert_colloquial_input <- function(x) {
out[x %like_case% "^g[abcdfghkl]s$"] <- gsub("g([abcdfghkl])s",
"B_STRPT_GRP\\U\\1",
x[x %like_case% "^g[abcdfghkl]s$"],
perl = TRUE)
perl = TRUE
)
# Streptococci in different languages, like "estreptococos grupo B"
out[x %like_case% "strepto[ck]o[ck].* [abcdfghkl]$"] <- gsub(".*e?strepto[ck]o[ck].* ([abcdfghkl])$",
"B_STRPT_GRP\\U\\1",
x[x %like_case% "strepto[ck]o[ck].* [abcdfghkl]$"],
perl = TRUE)
perl = TRUE
)
out[x %like_case% "group [abcdfghkl] strepto[ck]o[ck]"] <- gsub(".*group ([abcdfghkl]) strepto[ck]o[ck].*",
"B_STRPT_GRP\\U\\1",
x[x %like_case% "group [abcdfghkl] strepto[ck]o[ck]"],
perl = TRUE)
perl = TRUE
)
out[x %like_case% "ha?emoly.*strep"] <- "B_STRPT_HAEM"
out[x %like_case% "(strepto.* mil+er+i|^mgs[^a-z]*$)"] <- "B_STRPT_MILL"
out[x %like_case% "mil+er+i gr"] <- "B_STRPT_MILL"
out[x %like_case% "((strepto|^s).* viridans|^vgs[^a-z]*$)"] <- "B_STRPT_VIRI"
out[x %like_case% "(viridans.* (strepto|^s).*|^vgs[^a-z]*$)"] <- "B_STRPT_VIRI"
# CoNS/CoPS in different languages (support for German, Dutch, Spanish, Portuguese)
out[x %like_case% "([ck]oagulas[ea].negatie?[vf]|^[ck]o?ns[^a-z]*$)"] <- "B_STPHY_CONS"
@ -1088,5 +1100,6 @@ synonym_mo_to_accepted_mo <- function(x) {
ifelse(is.na(x_lpsn),
AMR::microorganisms$mo[match(x_gbif, AMR::microorganisms$gbif)],
AMR::microorganisms$mo[match(x_lpsn, AMR::microorganisms$lpsn)])
AMR::microorganisms$mo[match(x_lpsn, AMR::microorganisms$lpsn)]
)
}

View File

@ -80,7 +80,7 @@ mo_matching_score <- function(x, n) {
# only keep one space
x <- gsub(" +", " ", x)
# start with a capital letter
# force a capital letter, so this conversion will not count as a substitution
substr(x, 1, 1) <- toupper(substr(x, 1, 1))
# n is always a taxonomically valid full name
@ -100,7 +100,8 @@ mo_matching_score <- function(x, n) {
ignore.case = FALSE,
fixed = TRUE,
costs = c(insertions = 1, deletions = 2, substitutions = 2),
counts = FALSE))
counts = FALSE
))
}, x, n, USE.NAMES = FALSE))
l_n.lev[l_n < lev] <- l_n[l_n < lev]

Binary file not shown.

View File

@ -124,11 +124,14 @@ set_AMR_locale <- function(language) {
options(AMR_locale = language)
if (interactive() || identical(Sys.getenv("IN_PKGDOWN"), "true")) {
# show which language to use now
message_("Using ", LANGUAGES_SUPPORTED_NAMES[[language]]$exonym,
message_(
"Using ", LANGUAGES_SUPPORTED_NAMES[[language]]$exonym,
ifelse(language != "en",
paste0(" (", LANGUAGES_SUPPORTED_NAMES[[language]]$endonym, ")"),
""),
" for the AMR package for this session.")
""
),
" for the AMR package for this session."
)
}
}

View File

@ -104,7 +104,7 @@ create_species_cons_cops <- function(type = c("CoNS", "CoPS")) {
MO_staph[which(MO_staph$species %in% c(
"coagulase-negative", "argensis", "arlettae",
"auricularis", "borealis", "caeli", "capitis", "caprae",
"carnosus", "casei", "chromogenes", "cohnii", "condimenti",
"carnosus", "casei", "caseolyticus", "chromogenes", "cohnii", "condimenti",
"croceilyticus",
"debuckii", "devriesei", "edaphicus", "epidermidis",
"equorum", "felis", "fleurettii", "gallinarum",
@ -118,7 +118,7 @@ create_species_cons_cops <- function(type = c("CoNS", "CoPS")) {
"vitulinus", "vitulus", "warneri", "xylosus",
"caledonicus", "canis",
"durrellii", "lloydii",
"ratti", "taiwanensis"
"ratti", "taiwanensis", "veratri", "urealyticus"
) |
# old, now renamed to S. schleiferi (but still as synonym in our data of course):
(MO_staph$species == "schleiferi" & MO_staph$subspecies %in% c("schleiferi", ""))),

View File

@ -1016,7 +1016,8 @@ taxonomy <- taxonomy %>%
# Animalia:
!genus %in% c("Lucilia", "Lumbricus"),
!(genus %in% c("Aedes", "Anopheles") & rank %in% c("species", "subspecies")), # only genus of the many hundreds of mosquitoes species
kingdom != "Plantae") # this kingdom only contained Curvularia and Hymenolepis, which have coincidental twin names with Fungi
kingdom != "Plantae"
) # this kingdom only contained Curvularia and Hymenolepis, which have coincidental twin names with Fungi
message("\nCongratulations! The new taxonomic table will contain ", format(nrow(taxonomy), big.mark = ","), " rows.\n")

View File

@ -68,10 +68,10 @@ df <- AMR:::MO_lookup
expect_true(nrow(df[which(df$prevalence == 1), , drop = FALSE]) < nrow(df[which(df$prevalence == 2), , drop = FALSE]))
expect_true(nrow(df[which(df$prevalence == 2), , drop = FALSE]) < nrow(df[which(df$prevalence == 3), , drop = FALSE]))
expect_true(all(c(
"mo", "fullname",
"kingdom", "phylum", "class", "order", "family", "genus", "species", "subspecies",
"rank", "ref", "lpsn", "gbif", "status", "source", "prevalence", "snomed",
"kingdom_index", "fullname_lower", "g_species"
"mo", "fullname", "status", "kingdom", "phylum", "class", "order",
"family", "genus", "species", "subspecies", "rank", "ref", "source",
"lpsn", "lpsn_parent", "lpsn_renamed_to", "gbif", "gbif_parent", "gbif_renamed_to", "prevalence",
"snomed", "kingdom_index", "fullname_lower", "full_first", "species_first"
) %in% colnames(df)))
expect_inherits(AMR:::MO_CONS, "mo")
@ -87,7 +87,8 @@ expect_true(NROW(uncategorised) == 0,
"All staphylococcal species categorised as CoNS/CoPS.",
paste0(
"Staphylococcal species not categorised as CoNS/CoPS: S. ",
uncategorised$species, " (", uncategorised$mo, ")"
uncategorised$species, " (", uncategorised$mo, ")",
collapse = "\n"
)
)
)

View File

@ -41,7 +41,7 @@ expect_equal(
)
MOs_mentioned <- unique(AMR:::EUCAST_RULES_DF$this_value)
MOs_mentioned <- sort(trimws(unlist(strsplit(MOs_mentioned[!AMR:::is_valid_regex(MOs_mentioned)], ",", fixed = TRUE))))
MOs_test <- suppressWarnings(suppressMessages(mo_name(MOs_mentioned)))
MOs_test <- suppressWarnings(suppressMessages(mo_name(MOs_mentioned, keep_synonyms = TRUE, language = NULL)))
expect_true(length(MOs_mentioned[MOs_test != MOs_mentioned]) == 0)
expect_error(suppressWarnings(eucast_rules(example_isolates, col_mo = "Non-existing")))

View File

@ -59,7 +59,7 @@ expect_inherits(mo_synonyms(c("Candida albicans", "Escherichia coli")), "list")
expect_equal(names(mo_info("Escherichia coli")), c(
"kingdom", "phylum", "class", "order",
"family", "genus", "species", "subspecies",
"synonyms", "gramstain", "url", "ref",
"status", "synonyms", "gramstain", "url", "ref",
"snomed"
))
expect_inherits(mo_info(c("Escherichia coli", "Staphylococcus aureus")), "list")
@ -73,7 +73,7 @@ expect_true(mo_url("Escherichia coli") %like% "lpsn.dsmz.de")
# test integrity
MOs <- microorganisms
expect_identical(MOs$fullname, mo_fullname(MOs$fullname, language = "en"))
expect_identical(MOs$fullname, mo_fullname(MOs$fullname, language = "en", keep_synonyms = TRUE))
# check languages
expect_equal(mo_type("Escherichia coli", language = "de"), "Bakterien")
@ -81,13 +81,13 @@ expect_equal(mo_gramstain("Escherichia coli", language = "nl"), "Gram-negatief")
gr <- mo_gramstain("Escherichia coli", language = NULL)
for (l in AMR:::LANGUAGES_SUPPORTED[-1]) {
expect_false(mo_gramstain("Escherichia coli", language = l) == gr, info = paste("Gram-stain in langauge", l))
expect_false(mo_gramstain("Escherichia coli", language = l) == gr, info = paste("Gram-stain in language", l))
}
expect_error(mo_gramstain("Escherichia coli", language = "UNKNOWN"))
dutch <- mo_name(microorganisms$fullname[which(microorganisms$fullname %unlike% "unknown|coagulase")], language = "nl") # should be transformable to English again
expect_identical(mo_name(dutch, language = NULL),
microorganisms$fullname[which(microorganisms$fullname %unlike% "unknown|coagulase")]) # gigantic test - will run ALL names
dutch <- mo_name(microorganisms$fullname[which(microorganisms$fullname %unlike% "unknown|coagulase|Fungi")], language = "nl", keep_synonyms = TRUE) # should be transformable to English again
expect_identical(mo_name(dutch, language = NULL, keep_synonyms = TRUE),
microorganisms$fullname[which(microorganisms$fullname %unlike% "unknown|coagulase|Fungi")]) # gigantic test - will run ALL names
# manual property function
expect_error(mo_property("Escherichia coli", property = c("genus", "fullname")))

View File

@ -76,4 +76,3 @@ if (identical(Sys.getenv("R_RUN_TINYTEST"), "true")) {
print(summary(out))
}
}