From 7f344836ea3d7ddefa521361fc33460a0cdbce58 Mon Sep 17 00:00:00 2001
From: Matthijs Berends <m.s.berends@umcg.nl>
Date: Tue, 16 Jul 2024 15:55:58 +0200
Subject: [PATCH] (v2.1.1.9064) unit tests

---
 .github/prehooks/pre-commit      | 38 ++++++++++++++++++++++++++++++++
 DESCRIPTION                      |  2 +-
 NEWS.md                          |  2 +-
 R/mo.R                           | 19 ++++++++++------
 data-raw/_pre_commit_checks.R    |  3 ++-
 inst/tinytest/test-mo.R          |  2 +-
 inst/tinytest/test-mo_property.R |  9 ++++----
 7 files changed, 60 insertions(+), 15 deletions(-)
 create mode 100755 .github/prehooks/pre-commit

diff --git a/.github/prehooks/pre-commit b/.github/prehooks/pre-commit
new file mode 100755
index 000000000..8eaf689fb
--- /dev/null
+++ b/.github/prehooks/pre-commit
@@ -0,0 +1,38 @@
+#!/bin/bash
+
+# ==================================================================== #
+# TITLE:                                                               #
+# AMR: An R Package for Working with Antimicrobial Resistance Data     #
+#                                                                      #
+# SOURCE CODE:                                                         #
+# https://github.com/msberends/AMR                                     #
+#                                                                      #
+# PLEASE CITE THIS SOFTWARE AS:                                        #
+# Berends MS, Luz CF, Friedrich AW, et al. (2022).                     #
+# AMR: An R Package for Working with Antimicrobial Resistance Data.    #
+# Journal of Statistical Software, 104(3), 1-31.                       #
+# https://doi.org/10.18637/jss.v104.i03                                #
+#                                                                      #
+# Developed at the University of Groningen and the University Medical  #
+# Center Groningen in The Netherlands, in collaboration with many      #
+# colleagues from around the world, see our website.                   #
+#                                                                      #
+# This R package is free software; you can freely use and distribute   #
+# it for both personal and commercial purposes under the terms of the  #
+# GNU General Public License version 2.0 (GNU GPL-2), as published by  #
+# the Free Software Foundation.                                        #
+# We created this package for both routine data analysis and academic  #
+# research and it was publicly released in the hope that it will be    #
+# useful, but it comes WITHOUT ANY WARRANTY OR LIABILITY.              #
+#                                                                      #
+# Visit our website for the full manual and a complete tutorial about  #
+# how to conduct AMR data analysis: https://msberends.github.io/AMR/   #
+# ==================================================================== #
+
+# always add these:
+git add data-raw/*
+git add man/*
+git add R/sysdata.rda
+git add NAMESPACE
+git add DESCRIPTION
+git add NEWS.md
diff --git a/DESCRIPTION b/DESCRIPTION
index 762161863..47e422324 100644
--- a/DESCRIPTION
+++ b/DESCRIPTION
@@ -1,5 +1,5 @@
 Package: AMR
-Version: 2.1.1.9063
+Version: 2.1.1.9064
 Date: 2024-07-16
 Title: Antimicrobial Resistance Data Analysis
 Description: Functions to simplify and standardise antimicrobial resistance (AMR)
diff --git a/NEWS.md b/NEWS.md
index fc1787797..aa5646a75 100644
--- a/NEWS.md
+++ b/NEWS.md
@@ -1,4 +1,4 @@
-# AMR 2.1.1.9063
+# AMR 2.1.1.9064
 
 *(this beta version will eventually become v3.0. We're happy to reach a new major milestone soon, which will be all about the new One Health support! Install this beta using [the instructions here](https://msberends.github.io/AMR/#latest-development-version).)*
 
diff --git a/R/mo.R b/R/mo.R
index d5697dfd2..94352a5d0 100755
--- a/R/mo.R
+++ b/R/mo.R
@@ -420,17 +420,21 @@ as.mo <- function(x,
   # Keep or replace synonyms ----
   lpsn_matches <- AMR_env$MO_lookup$lpsn_renamed_to[match(out, AMR_env$MO_lookup$mo)]
   lpsn_matches[!lpsn_matches %in% AMR_env$MO_lookup$lpsn] <- NA
-  # GBIF only for non-bacteria, since we use LPSN as primary source for bacteria
+  mycobank_matches <- AMR_env$MO_lookup$mycobank_renamed_to[match(out, AMR_env$MO_lookup$mo)]
+  mycobank_matches[!mycobank_matches %in% AMR_env$MO_lookup$mycobank] <- NA
+  # GBIF only for non-bacteria and non-fungi, since we use LPSN as primary source for bacteria and MycoBank for fungi
   # (an example is Strep anginosus, renamed according to GBIF, not according to LPSN)
-  gbif_matches <- AMR_env$MO_lookup$gbif_renamed_to[AMR_env$MO_lookup$kingdom != "Bacteria"][match(out, AMR_env$MO_lookup$mo[AMR_env$MO_lookup$kingdom != "Bacteria"])]
+  gbif_matches <- AMR_env$MO_lookup$gbif_renamed_to[!AMR_env$MO_lookup$kingdom %in% c("Bacteria", "Fungi")][match(out, AMR_env$MO_lookup$mo[!AMR_env$MO_lookup$kingdom %in% c("Bacteria", "Fungi")])]
   gbif_matches[!gbif_matches %in% AMR_env$MO_lookup$gbif] <- NA
   AMR_env$mo_renamed <- list(
-    old = out[!is.na(gbif_matches) | !is.na(lpsn_matches)],
-    gbif_matches = gbif_matches[!is.na(gbif_matches) | !is.na(lpsn_matches)],
-    lpsn_matches = lpsn_matches[!is.na(gbif_matches) | !is.na(lpsn_matches)]
+    old = out[!is.na(gbif_matches) | !is.na(lpsn_matches) | !is.na(mycobank_matches)],
+    gbif_matches = gbif_matches[!is.na(gbif_matches) | !is.na(lpsn_matches) | !is.na(mycobank_matches)],
+    mycobank_matches = mycobank_matches[!is.na(gbif_matches) | !is.na(lpsn_matches) | !is.na(mycobank_matches)],
+    lpsn_matches = lpsn_matches[!is.na(gbif_matches) | !is.na(lpsn_matches) | !is.na(mycobank_matches)]
   )
   if (isFALSE(keep_synonyms)) {
     out[which(!is.na(gbif_matches))] <- AMR_env$MO_lookup$mo[match(gbif_matches[which(!is.na(gbif_matches))], AMR_env$MO_lookup$gbif)]
+    out[which(!is.na(mycobank_matches))] <- AMR_env$MO_lookup$mo[match(mycobank_matches[which(!is.na(mycobank_matches))], AMR_env$MO_lookup$mycobank)]
     out[which(!is.na(lpsn_matches))] <- AMR_env$MO_lookup$mo[match(lpsn_matches[which(!is.na(lpsn_matches))], AMR_env$MO_lookup$lpsn)]
     if (isTRUE(info) && length(AMR_env$mo_renamed$old) > 0) {
       print(mo_renamed(), extra_txt = " (use `keep_synonyms = TRUE` to leave uncorrected)")
@@ -1040,13 +1044,14 @@ convert_colloquial_input <- function(x) {
   out[x %like_case% "anaerob[a-z]+ .*gram[ -]?neg.*"] <- "B_ANAER-NEG"
   out[x %like_case% "anaerob[a-z]+ .*gram[ -]?pos.*"] <- "B_ANAER-POS"
   out[is.na(out) & x %like_case% "anaerob[a-z]+ (micro)?.*organism"] <- "B_ANAER"
+  out[is.na(out) & x %like_case% "anaerob[a-z]+ bacter"] <- "B_ANAER"
   
   # coryneform bacteria
   out[x %like_case% "^coryneform"] <- "B_CORYNF"
   
   # yeasts and fungi
-  out[x %like_case% "^yeast?"] <- "F_YEAST"
-  out[x %like_case% "^fung(us|i)"] <- "F_FUNGUS"
+  out[x %like_case% "(^| )yeast?"] <- "F_YEAST"
+  out[x %like_case% "(^| )fung(us|i)"] <- "F_FUNGUS"
   
   # trivial names known to the field
   out[x %like_case% "meningo[ck]o[ck]"] <- "B_NESSR_MNNG"
diff --git a/data-raw/_pre_commit_checks.R b/data-raw/_pre_commit_checks.R
index 5af76f869..5c6066e30 100644
--- a/data-raw/_pre_commit_checks.R
+++ b/data-raw/_pre_commit_checks.R
@@ -122,7 +122,8 @@ create_species_cons_cops <- function(type = c("CoNS", "CoPS")) {
         "vitulinus", "vitulus", "warneri", "xylosus",
         "caledonicus", "canis",
         "durrellii", "lloydii",
-        "ratti", "taiwanensis", "veratri", "urealyticus"
+        "ratti", "taiwanensis", "veratri", "urealyticus",
+        "americanisciuri", "marylandisciuri", "shinii", "brunensis"
       ) |
         # old, now renamed to S. schleiferi (but still as synonym in our data of course):
         (MO_staph$species == "schleiferi" & MO_staph$subspecies %in% c("schleiferi", ""))),
diff --git a/inst/tinytest/test-mo.R b/inst/tinytest/test-mo.R
index e7fdc607c..c3d386c17 100644
--- a/inst/tinytest/test-mo.R
+++ b/inst/tinytest/test-mo.R
@@ -265,7 +265,7 @@ expect_equal(
       "Raoultella (here some text) terrigena"
     )
   ))),
-  c("B_MCRBC_PRXY", "B_STRPT_SUIS", "B_RLTLL_TRRG")
+  c("B_MCRBC_PRXY", "B_STRPT_SUIS", "B_KLBSL_TRRG")
 )
 expect_stdout(print(mo_uncertainties()))
 x <- as.mo("Sta. aur")
diff --git a/inst/tinytest/test-mo_property.R b/inst/tinytest/test-mo_property.R
index e4c5dd48f..9d2659e0f 100644
--- a/inst/tinytest/test-mo_property.R
+++ b/inst/tinytest/test-mo_property.R
@@ -104,10 +104,10 @@ expect_true(length(mo_group_members("B_HACEK")) > 1)
 expect_inherits(mo_group_members(c("Candida albicans", "Escherichia coli")), "list")
 
 expect_identical(mo_oxygen_tolerance(c("Klebsiella pneumoniae", "Clostridioides difficile")),
-                 c("aerobe", "anaerobe"))
+                 c("facultative anaerobe", "anaerobe"))
 
 expect_equal(as.character(table(mo_pathogenicity(example_isolates$mo))),
-             c("1874", "109", "1", "16"))
+             c("1911", "72", "1", "16"))
 
 expect_equal(mo_ref("Escherichia coli"), "Castellani et al., 1919")
 expect_equal(mo_authors("Escherichia coli"), "Castellani et al.")
@@ -116,8 +116,9 @@ expect_equal(mo_year("Escherichia coli"), 1919)
 expect_true(mo_url("Candida albicans") %like% "gbif.org")
 expect_true(mo_url("Escherichia coli") %like% "lpsn.dsmz.de")
 
-# test integrity
-expect_identical(microorganisms$fullname, mo_fullname(microorganisms$fullname, language = "en", keep_synonyms = TRUE))
+# test integrity of getting back full names
+expect_identical(microorganisms$fullname[microorganisms$fullname %unlike% "(Fungi|{)"],
+                 suppressWarnings(mo_fullname(microorganisms$fullname[microorganisms$fullname %unlike% "(Fungi|{)"], language = "en", keep_synonyms = TRUE)))
 
 # check languages
 expect_equal(mo_type("Escherichia coli", language = "de"), "Bakterien")