last unit tests fix?

2026-07-22 16:31:00 +02:00 · 2022-10-04 21:33:04 +02:00
parent 37f6db5ccd
commit aa06aad4ea
22 changed files with 275 additions and 256 deletions
--- a/2
+++ b/2
@@ -1,5 +1,5 @@
 Package: AMR
-Version: 1.8.2.9027
+Version: 1.8.2.9028
 Date: 2022-10-04
 Title: Antimicrobial Resistance Data Analysis
 Description: Functions to simplify and standardise antimicrobial resistance (AMR)
--- a/NEWS.md
+++ b/NEWS.md
@@ -1,4 +1,4 @@
-# AMR 1.8.2.9027
+# AMR 1.8.2.9028

 This version will eventually become v2.0! We're happy to reach a new major milestone soon!

@@ -9,11 +9,12 @@ This version will eventually become v2.0! We're happy to reach a new major miles
  * Chromista are almost never clinically relevant, thus lacking the secondary scope of this package
 * The `microorganisms` no longer relies on the Catalogue of Life, but now primarily on the List of Prokaryotic names with Standing in Nomenclature (LPSN) and is supplemented with the Global Biodiversity Information Facility (GBIF). The structure of this data set has changed to include separate LPSN and GBIF identifiers. Almost all previous MO codes were retained. It contains over 1,000 taxonomic names from 2022 already.
 * The `microorganisms.old` data set was removed, and all previously accepted names are now included in the `microorganisms` data set. A new column `status` contains `"accepted"` for currently accepted names and `"synonym"` for taxonomic synonyms; currently invalid names. All previously accepted names now have a microorganisms ID and - if available - an LPSN, GBIF and SNOMED CT identifier.
+* The `mo_matching_score()` now count deletions and substitutions as 2 instead of 1, which impacts the outcome of `as.mo()` and any `mo_*()` function

 ### New
 * EUCAST 2022 and CLSI 2022 guidelines have been added for `as.rsi()`. EUCAST 2022 is now the new default guideline for all MIC and disks diffusion interpretations.
 * All new algorithm for `as.mo()` (and thus internally all `mo_*()` functions) while still following our original set-up as described in our paper (DOI 10.18637/jss.v104.i03).
-  * A new argument `keep_synonyms` allows to *not* correct for updated taxonomy
+  * A new argument `keep_synonyms` allows to *not* correct for updated taxonomy, in favour of the now deleted argument `allow_uncertain`
  * It has increased tremendously in speed and returns generally more consequent results
  * Sequential coercion is now extremely fast as results are stored to the package environment, although coercion of unknown values must be run once per session. Previous results can be reset/removed with the new `mo_reset_session()` function.
 * Function `mean_amr_distance()` to calculate the mean AMR distance. The mean AMR distance is a normalised numeric value to compare AMR test results and can help to identify similar isolates, without comparing antibiograms by hand.
--- a/R/mo.R
+++ b/R/mo.R
@@ -371,9 +371,11 @@ as.mo <- function(x,
  gbif_matches[!gbif_matches %in% AMR::microorganisms$gbif] <- NA
  lpsn_matches <- AMR::microorganisms$lpsn_renamed_to[match(out, AMR::microorganisms$mo)]
  lpsn_matches[!lpsn_matches %in% AMR::microorganisms$lpsn] <- NA
-  AMR_env$mo_renamed <- list(old = out[!is.na(gbif_matches) | !is.na(lpsn_matches)],
+  AMR_env$mo_renamed <- list(
+    old = out[!is.na(gbif_matches) | !is.na(lpsn_matches)],
    gbif_matches = gbif_matches[!is.na(gbif_matches) | !is.na(lpsn_matches)],
-                             lpsn_matches = lpsn_matches[!is.na(gbif_matches) | !is.na(lpsn_matches)])
+    lpsn_matches = lpsn_matches[!is.na(gbif_matches) | !is.na(lpsn_matches)]
+  )
  if (isFALSE(keep_synonyms)) {
    out[which(!is.na(gbif_matches))] <- AMR::microorganisms$mo[match(gbif_matches[which(!is.na(gbif_matches))], AMR::microorganisms$gbif)]
    out[which(!is.na(lpsn_matches))] <- AMR::microorganisms$mo[match(lpsn_matches[which(!is.na(lpsn_matches))], AMR::microorganisms$lpsn)]
@@ -475,11 +477,13 @@ mo_renamed <- function() {
  ref_old <- AMR::microorganisms$ref[match(x$old, AMR::microorganisms$mo)]
  ref_new <- AMR::microorganisms$ref[match(x$new, AMR::microorganisms$mo)]

-  df_renamed <- data.frame(old = mo_old,
+  df_renamed <- data.frame(
+    old = mo_old,
    new = mo_new,
    ref_old = ref_old,
    ref_new = ref_new,
-                           stringsAsFactors = FALSE)
+    stringsAsFactors = FALSE
+  )
  df_renamed <- unique(df_renamed)
  df_renamed <- df_renamed[order(df_renamed$old), , drop = FALSE]
  set_clean_class(df_renamed, new_class = c("mo_renamed", "data.frame"))
@@ -512,7 +516,8 @@ mo_cleaning_regex <- function() {
    "|",
    "([({]|\\[).+([})]|\\])",
    "|",
-    "(^| )(e?spp|e?ssp|e?ss|e?sp|e?subsp|sube?species|biovar|biotype|serovar|e?species)[.]*( |$))")
+    "(^| )(e?spp|e?ssp|e?ss|e?sp|e?subsp|sube?species|biovar|biotype|serovar|e?species)[.]*( |$))"
+  )
 }

 # UNDOCUMENTED METHODS ----------------------------------------------------
@@ -831,9 +836,12 @@ print.mo_uncertainties <- function(x, ...) {
      ifelse(x[i, ]$original_input != x[i, ]$input, paste0(strrep(" ", nchar(x[i, ]$original_input) + 6), "Based on input \"", x[i, ]$input, "\""), ""),
      # Add note if result was coerced to accepted taxonomic name
      ifelse(x[i, ]$keep_synonyms == FALSE & x[i, ]$mo %in% AMR::microorganisms$mo[which(AMR::microorganisms$status == "synonym")],
-                        paste0(strrep(" ", nchar(x[i, ]$original_input) + 6),
-                               font_red(paste0("This old taxonomic name was converted to ", font_italic(AMR::microorganisms$fullname[match(synonym_mo_to_accepted_mo(x[i, ]$mo), AMR::microorganisms$mo)], collapse = NULL), " (", synonym_mo_to_accepted_mo(x[i, ]$mo), ")."), collapse = NULL)),
-                        ""),
+        paste0(
+          strrep(" ", nchar(x[i, ]$original_input) + 6),
+          font_red(paste0("This old taxonomic name was converted to ", font_italic(AMR::microorganisms$fullname[match(synonym_mo_to_accepted_mo(x[i, ]$mo), AMR::microorganisms$mo)], collapse = NULL), " (", synonym_mo_to_accepted_mo(x[i, ]$mo), ")."), collapse = NULL)
+        ),
+        ""
+      ),
      candidates,
      sep = "\n"
    )
@@ -879,20 +887,24 @@ convert_colloquial_input <- function(x) {
  out[x %like_case% "^g[abcdfghkl]s$"] <- gsub("g([abcdfghkl])s",
    "B_STRPT_GRP\\U\\1",
    x[x %like_case% "^g[abcdfghkl]s$"],
-                                               perl = TRUE)
+    perl = TRUE
+  )
  # Streptococci in different languages, like "estreptococos grupo B"
  out[x %like_case% "strepto[ck]o[ck].* [abcdfghkl]$"] <- gsub(".*e?strepto[ck]o[ck].* ([abcdfghkl])$",
    "B_STRPT_GRP\\U\\1",
    x[x %like_case% "strepto[ck]o[ck].* [abcdfghkl]$"],
-                                                               perl = TRUE)
+    perl = TRUE
+  )
  out[x %like_case% "group [abcdfghkl] strepto[ck]o[ck]"] <- gsub(".*group ([abcdfghkl]) strepto[ck]o[ck].*",
    "B_STRPT_GRP\\U\\1",
    x[x %like_case% "group [abcdfghkl] strepto[ck]o[ck]"],
-                                                                  perl = TRUE)
+    perl = TRUE
+  )
  out[x %like_case% "ha?emoly.*strep"] <- "B_STRPT_HAEM"
  out[x %like_case% "(strepto.* mil+er+i|^mgs[^a-z]*$)"] <- "B_STRPT_MILL"
  out[x %like_case% "mil+er+i gr"] <- "B_STRPT_MILL"
  out[x %like_case% "((strepto|^s).* viridans|^vgs[^a-z]*$)"] <- "B_STRPT_VIRI"
+  out[x %like_case% "(viridans.* (strepto|^s).*|^vgs[^a-z]*$)"] <- "B_STRPT_VIRI"

  # CoNS/CoPS in different languages (support for German, Dutch, Spanish, Portuguese)
  out[x %like_case% "([ck]oagulas[ea].negatie?[vf]|^[ck]o?ns[^a-z]*$)"] <- "B_STPHY_CONS"
@@ -1088,5 +1100,6 @@ synonym_mo_to_accepted_mo <- function(x) {

  ifelse(is.na(x_lpsn),
    AMR::microorganisms$mo[match(x_gbif, AMR::microorganisms$gbif)],
-         AMR::microorganisms$mo[match(x_lpsn, AMR::microorganisms$lpsn)])
+    AMR::microorganisms$mo[match(x_lpsn, AMR::microorganisms$lpsn)]
+  )
 }
--- a/R/mo_matching_score.R
+++ b/R/mo_matching_score.R
@@ -80,7 +80,7 @@ mo_matching_score <- function(x, n) {
  # only keep one space
  x <- gsub(" +", " ", x)

-  # start with a capital letter
+  # force a capital letter, so this conversion will not count as a substitution
  substr(x, 1, 1) <- toupper(substr(x, 1, 1))

  # n is always a taxonomically valid full name
@@ -100,7 +100,8 @@ mo_matching_score <- function(x, n) {
      ignore.case = FALSE,
      fixed = TRUE,
      costs = c(insertions = 1, deletions = 2, substitutions = 2),
-                           counts = FALSE))
+      counts = FALSE
+    ))
  }, x, n, USE.NAMES = FALSE))

  l_n.lev[l_n < lev] <- l_n[l_n < lev]
--- a/R/sysdata.rda
+++ b/R/sysdata.rda
--- a/R/translate.R
+++ b/R/translate.R
@@ -124,11 +124,14 @@ set_AMR_locale <- function(language) {
  options(AMR_locale = language)
  if (interactive() || identical(Sys.getenv("IN_PKGDOWN"), "true")) {
    # show which language to use now
-    message_("Using ", LANGUAGES_SUPPORTED_NAMES[[language]]$exonym,
+    message_(
+      "Using ", LANGUAGES_SUPPORTED_NAMES[[language]]$exonym,
      ifelse(language != "en",
        paste0(" (", LANGUAGES_SUPPORTED_NAMES[[language]]$endonym, ")"),
-                    ""), 
-             " for the AMR package for this session.")
+        ""
+      ),
+      " for the AMR package for this session."
+    )
  }
 }

--- a/data-raw/_pre_commit_hook.R
+++ b/data-raw/_pre_commit_hook.R
@@ -104,7 +104,7 @@ create_species_cons_cops <- function(type = c("CoNS", "CoPS")) {
    MO_staph[which(MO_staph$species %in% c(
      "coagulase-negative", "argensis", "arlettae",
      "auricularis", "borealis", "caeli", "capitis", "caprae",
-      "carnosus", "casei", "chromogenes", "cohnii", "condimenti",
+      "carnosus", "casei", "caseolyticus", "chromogenes", "cohnii", "condimenti",
      "croceilyticus",
      "debuckii", "devriesei", "edaphicus", "epidermidis",
      "equorum", "felis", "fleurettii", "gallinarum",
@@ -118,7 +118,7 @@ create_species_cons_cops <- function(type = c("CoNS", "CoPS")) {
      "vitulinus", "vitulus", "warneri", "xylosus",
      "caledonicus", "canis",
      "durrellii", "lloydii",
-      "ratti", "taiwanensis"
+      "ratti", "taiwanensis", "veratri", "urealyticus"
    ) |
      # old, now renamed to S. schleiferi (but still as synonym in our data of course):
      (MO_staph$species == "schleiferi" & MO_staph$subspecies %in% c("schleiferi", ""))),
--- a/data-raw/reproduction_of_microorganisms.R
+++ b/data-raw/reproduction_of_microorganisms.R
@@ -1016,7 +1016,8 @@ taxonomy <- taxonomy %>%
    # Animalia:
    !genus %in% c("Lucilia", "Lumbricus"),
    !(genus %in% c("Aedes", "Anopheles") & rank %in% c("species", "subspecies")), # only genus of the many hundreds of mosquitoes species
-    kingdom != "Plantae") # this kingdom only contained Curvularia and Hymenolepis, which have coincidental twin names with Fungi
+    kingdom != "Plantae"
+  ) # this kingdom only contained Curvularia and Hymenolepis, which have coincidental twin names with Fungi

 message("\nCongratulations! The new taxonomic table will contain ", format(nrow(taxonomy), big.mark = ","), " rows.\n")

--- a/inst/tinytest/test-data.R
+++ b/inst/tinytest/test-data.R
@@ -68,10 +68,10 @@ df <- AMR:::MO_lookup
 expect_true(nrow(df[which(df$prevalence == 1), , drop = FALSE]) < nrow(df[which(df$prevalence == 2), , drop = FALSE]))
 expect_true(nrow(df[which(df$prevalence == 2), , drop = FALSE]) < nrow(df[which(df$prevalence == 3), , drop = FALSE]))
 expect_true(all(c(
-  "mo", "fullname",
-  "kingdom", "phylum", "class", "order", "family", "genus", "species", "subspecies",
-  "rank", "ref", "lpsn", "gbif", "status", "source", "prevalence", "snomed",
-  "kingdom_index", "fullname_lower", "g_species"
+  "mo", "fullname", "status", "kingdom", "phylum", "class", "order", 
+  "family", "genus", "species", "subspecies", "rank", "ref", "source", 
+  "lpsn", "lpsn_parent", "lpsn_renamed_to", "gbif", "gbif_parent", "gbif_renamed_to", "prevalence", 
+  "snomed", "kingdom_index", "fullname_lower", "full_first", "species_first"
 ) %in% colnames(df)))

 expect_inherits(AMR:::MO_CONS, "mo")
@@ -87,7 +87,8 @@ expect_true(NROW(uncategorised) == 0,
    "All staphylococcal species categorised as CoNS/CoPS.",
    paste0(
      "Staphylococcal species not categorised as CoNS/CoPS: S. ",
-      uncategorised$species, " (", uncategorised$mo, ")"
+      uncategorised$species, " (", uncategorised$mo, ")",
+      collapse = "\n"
    )
  )
 )
--- a/inst/tinytest/test-eucast_rules.R
+++ b/inst/tinytest/test-eucast_rules.R
@@ -41,7 +41,7 @@ expect_equal(
 )
 MOs_mentioned <- unique(AMR:::EUCAST_RULES_DF$this_value)
 MOs_mentioned <- sort(trimws(unlist(strsplit(MOs_mentioned[!AMR:::is_valid_regex(MOs_mentioned)], ",", fixed = TRUE))))
-MOs_test <- suppressWarnings(suppressMessages(mo_name(MOs_mentioned)))
+MOs_test <- suppressWarnings(suppressMessages(mo_name(MOs_mentioned, keep_synonyms = TRUE, language = NULL)))
 expect_true(length(MOs_mentioned[MOs_test != MOs_mentioned]) == 0)

 expect_error(suppressWarnings(eucast_rules(example_isolates, col_mo = "Non-existing")))
--- a/inst/tinytest/test-mo_property.R
+++ b/inst/tinytest/test-mo_property.R
@@ -59,7 +59,7 @@ expect_inherits(mo_synonyms(c("Candida albicans", "Escherichia coli")), "list")
 expect_equal(names(mo_info("Escherichia coli")), c(
  "kingdom", "phylum", "class", "order",
  "family", "genus", "species", "subspecies",
-  "synonyms", "gramstain", "url", "ref",
+  "status", "synonyms", "gramstain", "url", "ref",
  "snomed"
 ))
 expect_inherits(mo_info(c("Escherichia coli", "Staphylococcus aureus")), "list")
@@ -73,7 +73,7 @@ expect_true(mo_url("Escherichia coli") %like% "lpsn.dsmz.de")

 # test integrity
 MOs <- microorganisms
-expect_identical(MOs$fullname, mo_fullname(MOs$fullname, language = "en"))
+expect_identical(MOs$fullname, mo_fullname(MOs$fullname, language = "en", keep_synonyms = TRUE))

 # check languages
 expect_equal(mo_type("Escherichia coli", language = "de"), "Bakterien")
@@ -81,13 +81,13 @@ expect_equal(mo_gramstain("Escherichia coli", language = "nl"), "Gram-negatief")

 gr <- mo_gramstain("Escherichia coli", language = NULL)
 for (l in AMR:::LANGUAGES_SUPPORTED[-1]) {
-  expect_false(mo_gramstain("Escherichia coli", language = l) == gr, info = paste("Gram-stain in langauge", l))
+  expect_false(mo_gramstain("Escherichia coli", language = l) == gr, info = paste("Gram-stain in language", l))
 }

 expect_error(mo_gramstain("Escherichia coli", language = "UNKNOWN"))
-dutch <- mo_name(microorganisms$fullname[which(microorganisms$fullname %unlike% "unknown|coagulase")], language = "nl") # should be transformable to English again
-expect_identical(mo_name(dutch, language = NULL),
-                 microorganisms$fullname[which(microorganisms$fullname %unlike% "unknown|coagulase")]) # gigantic test - will run ALL names
+dutch <- mo_name(microorganisms$fullname[which(microorganisms$fullname %unlike% "unknown|coagulase|Fungi")], language = "nl", keep_synonyms = TRUE) # should be transformable to English again
+expect_identical(mo_name(dutch, language = NULL, keep_synonyms = TRUE),
+                 microorganisms$fullname[which(microorganisms$fullname %unlike% "unknown|coagulase|Fungi")]) # gigantic test - will run ALL names

 # manual property function
 expect_error(mo_property("Escherichia coli", property = c("genus", "fullname")))
--- a/tests/tinytest.R
+++ b/tests/tinytest.R
@@ -76,4 +76,3 @@ if (identical(Sys.getenv("R_RUN_TINYTEST"), "true")) {
    print(summary(out))
  }
 }
-