(v1.1.0.9020) updated taxonomy

2026-07-24 10:30:57 +02:00 · 2020-05-27 16:37:49 +02:00
parent ae1969b941
commit 86d44054f0
55 changed files with 68063 additions and 70233 deletions
--- a/4
+++ b/4
@@ -1,6 +1,6 @@
 Package: AMR
-Version: 1.1.0.9019
-Date: 2020-05-25
+Version: 1.1.0.9020
+Date: 2020-05-27
 Title: Antimicrobial Resistance Analysis
 Authors@R: c(
    person(role = c("aut", "cre"), 
--- a/NEWS.md
+++ b/NEWS.md
@@ -1,5 +1,5 @@
-# AMR 1.1.0.9019
-## <small>Last updated: 25-May-2020</small>
+# AMR 1.1.0.9020
+## <small>Last updated: 27-May-2020</small>

 ### Breaking 
 * Removed code dependency on all other R packages, making this package fully independent of the development process of others. This is a major code change, but will probably not be noticeable by most users.
@@ -13,8 +13,17 @@
  * For developers: classes `mo` and `ab` now both also inherit class `character`, to support any data transformation. This change invalidates code that checks for class length == 1.

 ### Changed
+* Taxonomy:
+  * Updated the taxonomy of microorganisms tot May 2020, using the Catalogue of Life (CoL), the Global Biodiversity Information Facility (GBIF) and the List of Prokaryotic names with Standing in Nomenclature (LPSN, hosted by DSMZ since February 2020)
+  * Removed the Catalogue of Life IDs (like 776351), since they now work with a species ID (hexadecimal string)
 * EUCAST rules:
-  * The `eucast_rules()` function no longer applies "other" rules at default that are made available by this package (like setting ampicillin = R when ampicillin + enzym inhibitor = R). The default input value for `rules` is now `c("breakpoints", "expert")` instead of `"all"`, but this can be changed by the user. To return to the old behaviour, set `options(AMR.eucast_rules = "all")`.
+  * The `eucast_rules()` function no longer applies "other" rules at default that are made available by this package (like setting ampicillin = R when ampicillin + enzyme inhibitor = R). The default input value for `rules` is now `c("breakpoints", "expert")` instead of `"all"`, but this can be changed by the user. To return to the old behaviour, set `options(AMR.eucast_rules = "all")`.
+  * Fixed a bug where checking antimicrobial results in the original data were not regarded as valid R/SI values
+  * All "other" rules now apply for all drug combinations in the `antibiotics` data set these two rules:
+    1. A drug **with** enzyme inhibitor will be set to S if the drug **without** enzyme inhibitor is S
+    2. A drug **without** enzyme inhibitor will be set to R if the drug **with** enzyme inhibitor is R
+    
+    This works for all drug combinations, such as ampicillin/sulbactam, ceftazidime/avibactam, trimethoprim/sulfamethoxazole, etc.
  * Added official drug names to verbose output of `eucast_rules()`
 * Added function `ab_url()` to return the direct URL of an antimicrobial agent from the official WHO website
 * Improvements for algorithm in `as.ab()`, so that e.g. `as.ab("ampi sul")` and `ab_name("ampi sul")` work
@@ -22,6 +31,7 @@
 * Small fix for some text input that could not be coerced as valid MIC values 
 * Fix for interpretation of generic CLSI interpretation rules (thanks to Anthony Underwood)
 * Fix for `set_mo_source()` to make sure that column `mo` will always be the second column
+* Added abbreviation "cfsc" for Cefoxitin and "cfav" for Ceftazidime/avibactam

 ### Other
 * Removed previously deprecated function `p.symbol()` - it was replaced with `p_symbol()`
--- a/R/aa_helper_functions.R
+++ b/R/aa_helper_functions.R
@@ -78,8 +78,7 @@ check_dataset_integrity <- function() {
    check_microorganisms <- all(c("mo", "fullname", "kingdom", "phylum",
                                  "class", "order", "family", "genus", 
                                  "species", "subspecies", "rank",
-                                  "col_id", "species_id", "source",
-                                  "ref", "prevalence", "snomed") %in% colnames(microorganisms),
+                                  "species_id", "source", "ref", "prevalence") %in% colnames(microorganisms),
                                na.rm = TRUE) & NROW(microorganisms) == NROW(MO_lookup)
    check_antibiotics <- all(c("ab", "atc", "cid", "name", "group", 
                               "atc_group1", "atc_group2", "abbreviations",
--- a/R/ab.R
+++ b/R/ab.R
@@ -347,7 +347,7 @@ is.ab <- function(x) {
 #' @export
 #' @noRd
 print.ab <- function(x, ...) {
-  cat("Class 'ab'\n")
+  cat("Class <ab>\n")
  print(as.character(x), quote = FALSE)
 }

--- a/R/catalogue_of_life.R
+++ b/R/catalogue_of_life.R
@@ -50,9 +50,9 @@
 #'
 #'
 #' # Get a note when a species was renamed
-#' mo_shortname("Chlamydia psittaci")
-#' # Note: 'Chlamydia psittaci' (Page, 1968) was renamed
-#' #       'Chlamydophila psittaci' (Everett et al., 1999)
+#' mo_shortname("Chlamydophila psittaci")
+#' # Note: 'Chlamydophila psittaci' (Everett et al., 1999) was renamed back to
+#' #       'Chlamydia psittaci' (Page, 1968)
 #' # [1] "C. psittaci"
 #'
 #' # Get any property from the entire taxonomic tree for all included species
@@ -70,9 +70,9 @@
 #'
 #' # Do not get mistaken - this package is about microorganisms
 #' mo_kingdom("C. elegans")
-#' # [1] "Bacteria"                        # Bacteria?!
+#' # [1] "Fungi"                 # Fungi?!
 #' mo_name("C. elegans")
-#' # [1] "Chroococcus limneticus elegans"  # Because a microorganism was found
+#' # [1] "Cladosporium elegans"  # Because a microorganism was found
 NULL

 #' Version info of included Catalogue of Life
--- a/R/data.R
+++ b/R/data.R
@@ -82,7 +82,6 @@
 #' @inheritSection catalogue_of_life Catalogue of Life
 #' @format A [`data.frame`] with `r format(nrow(microorganisms), big.mark = ",")` observations and `r ncol(microorganisms)` variables:
 #' - `mo`\cr ID of microorganism as used by this package
-#' - `col_id`\cr Catalogue of Life ID
 #' - `fullname`\cr Full name, like `"Escherichia coli"`
 #' - `kingdom`, `phylum`, `class`, `order`, `family`, `genus`, `species`, `subspecies`\cr Taxonomic rank of the microorganism
 #' - `rank`\cr Text of the taxonomic rank of the microorganism, like `"species"` or `"genus"`
@@ -114,17 +113,19 @@
 #' From: <https://www.dsmz.de/services/online-tools/prokaryotic-nomenclature-up-to-date/complete-list-readme>
 #' @source Catalogue of Life: Annual Checklist (public online taxonomic database), <http://www.catalogueoflife.org> (check included annual version with [catalogue_of_life_version()]).
 #' 
+#' Parte, A.C. (2018). LPSN — List of Prokaryotic names with Standing in Nomenclature (bacterio.net), 20 years on. International Journal of Systematic and Evolutionary Microbiology, 68, 1825-1829; doi: 10.1099/ijsem.0.002786
+#'
 #' Leibniz Institute DSMZ-German Collection of Microorganisms and Cell Cultures, Germany, Prokaryotic Nomenclature Up-to-Date, <https://www.dsmz.de/services/online-tools/prokaryotic-nomenclature-up-to-date> (check included version with [catalogue_of_life_version()]).
 #' @inheritSection AMR Read more on our website!
 #' @seealso [as.mo()], [mo_property()], [microorganisms.codes]
 "microorganisms"

 catalogue_of_life <- list(
-  year = 2018,
+  year = 2019,
  version = "Catalogue of Life: {year} Annual Checklist",
-  url_CoL = "http://www.catalogueoflife.org/annual-checklist/{year}/",
-  url_DSMZ = "https://www.dsmz.de/services/online-tools/prokaryotic-nomenclature-up-to-date/prokaryotic-nomenclature-up-to-date/genus-search",
-  yearmonth_DSMZ = "August 2019"
+  url_CoL = "http://www.catalogueoflife.org/col/",
+  url_DSMZ = "https://lpsn.dsmz.de",
+  yearmonth_DSMZ = "May 2020"
 )

 #' Data set with previously accepted taxonomic names
@@ -132,17 +133,18 @@ catalogue_of_life <- list(
 #' A data set containing old (previously valid or accepted) taxonomic names according to the Catalogue of Life. This data set is used internally by [as.mo()].
 #' @inheritSection catalogue_of_life Catalogue of Life
 #' @format A [`data.frame`] with `r format(nrow(microorganisms.old), big.mark = ",")` observations and `r ncol(microorganisms.old)` variables:
-#' - `col_id`\cr Catalogue of Life ID that was originally given
-#' - `col_id_new`\cr New Catalogue of Life ID that responds to an entry in the [microorganisms] data set
 #' - `fullname`\cr Old full taxonomic name of the microorganism
+#' - `fullname_new`\cr New full taxonomic name of the microorganism
 #' - `ref`\cr Author(s) and year of concerning scientific publication
 #' - `prevalence`\cr Prevalence of the microorganism, see [as.mo()]
 #' @source Catalogue of Life: Annual Checklist (public online taxonomic database), <http://www.catalogueoflife.org> (check included annual version with [catalogue_of_life_version()]).
+#' 
+#' Parte, A.C. (2018). LPSN — List of Prokaryotic names with Standing in Nomenclature (bacterio.net), 20 years on. International Journal of Systematic and Evolutionary Microbiology, 68, 1825-1829; doi: 10.1099/ijsem.0.002786
 #' @inheritSection AMR Read more on our website!
 #' @seealso [as.mo()] [mo_property()] [microorganisms]
 "microorganisms.old"

-#' Translation table for common microorganism codes
+#' Translation table with `r format(nrow(microorganisms.codes), big.mark = ",")` common microorganism codes
 #'
 #' A data set containing commonly used codes for microorganisms, from laboratory systems and WHONET. Define your own with [set_mo_source()]. They will all be searched when using [as.mo()] and consequently all the [`mo_*`][mo_property()] functions.
 #' @format A [`data.frame`] with `r format(nrow(microorganisms.codes), big.mark = ",")` observations and `r ncol(microorganisms.codes)` variables:
--- a/R/disk.R
+++ b/R/disk.R
@@ -99,7 +99,7 @@ is.disk <- function(x) {
 #' @export
 #' @noRd
 print.disk <- function(x, ...) {
-  cat("Class 'disk'\n")
+  cat("Class <disk>\n")
  print(as.integer(x), quote = FALSE)
 }

--- a/R/eucast_rules.R
+++ b/R/eucast_rules.R
@@ -245,6 +245,7 @@ eucast_rules <- function(x,
  }
  
  warned <- FALSE
+  warn_lacking_rsi_class <- FALSE
  
  txt_error <- function() {
    if (info == TRUE) cat("", font_red_bg(font_white(" ERROR ")), "\n\n") 
@@ -410,6 +411,7 @@ eucast_rules <- function(x,
  RID <- cols_ab["RID"]
  RIF <- cols_ab["RIF"]
  RXT <- cols_ab["RXT"]
+  SAM <- cols_ab["SAM"]
  SIS <- cols_ab["SIS"]
  SXT <- cols_ab["SXT"]
  TCY <- cols_ab["TCY"]
@@ -440,7 +442,9 @@ eucast_rules <- function(x,
    cols <- unique(cols[!is.na(cols) & !is.null(cols)])
    if (length(rows) > 0 & length(cols) > 0) {
      before_df <- x_original
-      
+      if (any(!sapply(x[, cols, drop = FALSE], is.rsi), na.rm = TRUE)) {
+        warn_lacking_rsi_class <<- TRUE
+      }
      tryCatch(
        # insert into original table
        x_original[rows, cols] <<- to,
@@ -599,14 +603,79 @@ eucast_rules <- function(x,
    }
  }
  
-  if (info == TRUE & !any(c("other", "all") %in% rules, na.rm = TRUE)) {
-    cat(font_red("\nSkipping inheritance rules defined by this package, such as setting trimethoprim (TMP) = R where trimethoprim/sulfamethoxazole (SXT) = R.\nUse eucast_rules(..., rules = \"all\") to also apply those rules.\n"))
-  }
-  
-  eucast_notification_shown <- FALSE
-  eucast_rules_df <- eucast_rules_file # internal data file
+  as.rsi_no_warning <- function(x) suppressWarnings(as.rsi(x))
  no_added <- 0
  no_changed <- 0
+  
+  # Other rules: enzyme inhibitors ------------------------------------------
+  if (any(c("all", "other") %in% rules)) {
+    if (info == TRUE) {
+      cat(font_bold(paste0("\nRules by this AMR package (",
+                           font_red(paste0("v", utils::packageVersion("AMR"), ", ", 
+                                           format(utils::packageDate("AMR"), "%Y"))), ")\n")))
+    }
+    
+    ab_enzyme <- subset(antibiotics, name %like% "/")[, c("ab", "name")]
+    ab_enzyme$base_name <- gsub("^([a-zA-Z0-9]+).*", "\\1", ab_enzyme$name)
+    ab_enzyme$base_ab <- as.ab(ab_enzyme$base_name)
+    for (i in seq_len(nrow(ab_enzyme))) {
+      if (all(c(ab_enzyme[i, ]$ab, ab_enzyme[i, ]$base_ab) %in% names(cols_ab), na.rm = TRUE)) {
+        ab_name_base <- ab_name(cols_ab[ab_enzyme[i, ]$base_ab], language = NULL, tolower = TRUE)
+        ab_name_enzyme <- ab_name(cols_ab[ab_enzyme[i, ]$ab], language = NULL, tolower = TRUE)
+        
+        # Set base to R where base + enzyme inhibitor is R
+        rule_current <- paste0("Set ", ab_name_base, " (", cols_ab[ab_enzyme[i, ]$base_ab], ") = R where ",
+                               ab_name_enzyme, " (", cols_ab[ab_enzyme[i, ]$ab], ") = R")
+        if (info == TRUE) {
+          cat(rule_current)
+        }
+        run_changes <- edit_rsi(to = "R",
+                                rule = c(rule_current, "Other rules", ""),
+                                rows = which(as.rsi_no_warning(x[, cols_ab[ab_enzyme[i, ]$ab]]) == "R"),
+                                cols = cols_ab[ab_enzyme[i, ]$base_ab])
+        no_added <- no_added + run_changes$added
+        no_changed <- no_changed + run_changes$changed
+        # Print number of new changes
+        if (info == TRUE) {
+          # print only on last one of rules in this group
+          txt_ok(no_added = no_added, no_changed = no_changed)
+          # and reset counters
+          no_added <- 0
+          no_changed <- 0
+        }
+        
+        # Set base + enzyme inhibitor to S where base is S
+        rule_current <- paste0("Set ", ab_name_enzyme, " (", cols_ab[ab_enzyme[i, ]$ab], ") = S where ",
+                               ab_name_base, " (", cols_ab[ab_enzyme[i, ]$base_ab], ") = S")
+        if (info == TRUE) {
+          cat(rule_current)
+        }
+        run_changes <- edit_rsi(to = "S",
+                                rule = c(rule_current, "Other rules", ""),
+                                rows = which(as.rsi_no_warning(x[, cols_ab[ab_enzyme[i, ]$base_ab]]) == "S"),
+                                cols = cols_ab[ab_enzyme[i, ]$ab])
+        no_added <- no_added + run_changes$added
+        no_changed <- no_changed + run_changes$changed
+        # Print number of new changes
+        if (info == TRUE) {
+          # print only on last one of rules in this group
+          txt_ok(no_added = no_added, no_changed = no_changed)
+          # and reset counters
+          no_added <- 0
+          no_changed <- 0
+        }
+      }
+    }
+    
+  } else {
+    if (info == TRUE) {
+      cat(font_red("\nSkipping inheritance rules defined by this package, such as setting trimethoprim (TMP) = R where trimethoprim/sulfamethoxazole (SXT) = R.\nUse eucast_rules(..., rules = \"all\") to also apply those rules.\n"))
+    }
+  }
+  
+  # Official EUCAST rules ---------------------------------------------------
+  eucast_notification_shown <- FALSE
+  eucast_rules_df <- eucast_rules_file # internal data file
  for (i in seq_len(nrow(eucast_rules_df))) {
    
    rule_previous <- eucast_rules_df[max(1, i - 1), "reference.rule"]
@@ -637,18 +706,14 @@ eucast_rules <- function(x,
    if (rule_group_current %like% "expert" & !any(c("all", "expert") %in% rules)) {
      next
    }
-    if (rule_group_current %like% "other" & !any(c("all", "other") %in% rules)) {
-      next
-    }
    
    if (info == TRUE & !rule_group_current %like% "other" & eucast_notification_shown == FALSE) {
-      cat(paste0(
-        "\n----\nRules by the ", font_bold("European Committee on Antimicrobial Susceptibility Testing (EUCAST)"),
+      cat(paste0("\n", font_grey(strrep("-", options()$width - 1)),
+        "\nRules by the ", font_bold("European Committee on Antimicrobial Susceptibility Testing (EUCAST)"),
        "\n", font_blue("http://eucast.org/"), "\n"))
      eucast_notification_shown <- TRUE
    }
    
-    
    if (info == TRUE) {
      # Print rule (group) ------------------------------------------------------
      if (rule_group_current != rule_group_previous) {
@@ -662,7 +727,7 @@ eucast_rules <- function(x,
              rule_group_current %like% "expert",
              paste0("\nEUCAST Expert Rules, Intrinsic Resistance and Exceptional Phenotypes (", 
                     font_red(paste0("v", EUCAST_VERSION_EXPERT_RULES)), ")\n"),
-              "\nOther rules by this AMR package\n"))))
+              ""))))
      }
      # Print rule  -------------------------------------------------------------
      if (rule_current != rule_previous) {
@@ -733,18 +798,18 @@ eucast_rules <- function(x,
        rows <- integer(0)
      } else if (length(source_antibiotics) == 1) {
        rows <-  tryCatch(which(x[, col_mo_property] %like% mo_value
-                                & x[, source_antibiotics[1L]] == source_value[1L]),
+                                & as.rsi_no_warning(x[, source_antibiotics[1L]]) == source_value[1L]),
                          error = function(e) integer(0))
      } else if (length(source_antibiotics) == 2) {
        rows <-  tryCatch(which(x[, col_mo_property] %like% mo_value
-                                & x[, source_antibiotics[1L]] == source_value[1L]
-                                & x[, source_antibiotics[2L]] == source_value[2L]),
+                                & as.rsi_no_warning(x[, source_antibiotics[1L]]) == source_value[1L]
+                                & as.rsi_no_warning(x[, source_antibiotics[2L]]) == source_value[2L]),
                          error = function(e) integer(0))
      } else if (length(source_antibiotics) == 3) {
        rows <-  tryCatch(which(x[, col_mo_property] %like% mo_value
-                                & x[, source_antibiotics[1L]] == source_value[1L]
-                                & x[, source_antibiotics[2L]] == source_value[2L]
-                                & x[, source_antibiotics[3L]] == source_value[3L]),
+                                & as.rsi_no_warning(x[, source_antibiotics[1L]]) == source_value[1L]
+                                & as.rsi_no_warning(x[, source_antibiotics[2L]]) == source_value[2L]
+                                & as.rsi_no_warning(x[, source_antibiotics[3L]]) == source_value[3L]),
                          error = function(e) integer(0))
      } else {
        stop("only 3 antibiotics supported for source_antibiotics ", call. = FALSE)
@@ -784,7 +849,7 @@ eucast_rules <- function(x,
      arrange(row, rule_group, rule_name, col)
    
    cat(paste0("\n", font_grey(strrep("-", options()$width - 1)), "\n"))
-    cat(font_bold(paste("EUCAST rules", paste0(wouldve, "affected"),
+    cat(font_bold(paste("The rules", paste0(wouldve, "affected"),
                   formatnr(n_distinct(verbose_info$row)),
                   "out of", formatnr(nrow(x_original)),
                   "rows, making a total of", formatnr(nrow(verbose_info)), "edits\n")))
@@ -846,6 +911,12 @@ eucast_rules <- function(x,
    }
  }
  
+  if (isTRUE(warn_lacking_rsi_class)) {
+    warning("Not all columns with antimicrobial results are of class <rsi>.\n",
+            "Transform eligible columns to class <rsi> on beforehand: your_data %>% mutate_if(is.rsi.eligible, as.rsi)",
+            call. = FALSE)
+  }
+  
  # Return data set ---------------------------------------------------------
  if (verbose == TRUE) {
    rownames(verbose_info) <- NULL
--- a/R/join_microorganisms.R
+++ b/R/join_microorganisms.R
@@ -154,7 +154,7 @@ joins_check_df <- function(x, by) {
        by <- "mo"
        x[, "mo"] <- as.mo(x[, "mo"])
      } else {
-        stop("Cannot join - no column found with name or class  `mo`.", call. = FALSE)
+        stop("Cannot join - no column found with name or class <mo>.", call. = FALSE)
      }
    }
    message('Joining, by = "', by, '"') # message same as dplyr::join functions
--- a/R/mic.R
+++ b/R/mic.R
@@ -174,7 +174,7 @@ droplevels.mic <- function(x, exclude = ifelse(anyNA(levels(x)), NULL, NA), ...)
 #' @export
 #' @noRd
 print.mic <- function(x, ...) {
-  cat("Class 'mic'\n")
+  cat("Class <mic>\n")
  print(as.character(x), quote = FALSE)
 }

--- a/R/mo.R
+++ b/R/mo.R
@@ -126,7 +126,6 @@
 #' as.mo("MRSA")    # Methicillin Resistant S. aureus
 #' as.mo("VISA")    # Vancomycin Intermediate S. aureus
 #' as.mo("VRSA")    # Vancomycin Resistant S. aureus
-#' as.mo(22242419)  # Catalogue of Life ID
 #' as.mo(115329001) # SNOMED CT code
 #' 
 #' # Dyslexia is no problem - these all work:
@@ -557,19 +556,43 @@ exec_as.mo <- function(x,
        progress$tick()
      }

-      if (x_backup[i] %like_case% "\\(unknown [a-z]+\\)" | tolower(x_backup_without_spp[i]) %in% c("other", "none", "unknown")) {
-        # empty and nonsense values, ignore without warning
-        x[i] <- lookup(mo == "UNKNOWN")
-        next
-      }
-      
-      # valid MO code ---
+      # valid MO code ----
      found <- lookup(mo == toupper(x_backup[i]))
      if (!is.na(found)) {
        x[i] <- found[1L]
        next
      }
      
+      # valid fullname ----
+      found <- lookup(fullname_lower %in% gsub("[^a-zA-Z0-9_. -]", "", tolower(c(x_backup[i], x_backup_without_spp[i]))))
+      # added the gsub() for "(unknown fungus)", since fullname_lower does not contain brackets
+      if (!is.na(found)) {
+        x[i] <- found[1L]
+        next
+      }
+      
+      # old fullname ----
+      found <- lookup(fullname_lower %in% tolower(c(x_backup[i], x_backup_without_spp[i])),
+                      column = NULL, # all columns
+                      haystack = MO.old_lookup)
+      if (!all(is.na(found))) {
+        # when property is "ref" (which is the case in mo_ref, mo_authors and mo_year), return the old value, so:
+        # mo_ref() of "Chlamydia psittaci" will be "Page, 1968" (with warning)
+        # mo_ref() of "Chlamydophila psittaci" will be "Everett et al., 1999"
+        if (property == "ref") {
+          x[i] <- found["ref"]
+        } else {
+          x[i] <- lookup(fullname == found["fullname_new"], haystack = MO_lookup)
+        }
+        options(mo_renamed_last_run = found["fullname"])
+        was_renamed(name_old = found["fullname"],
+                    name_new = lookup(fullname == found["fullname_new"], "fullname", haystack = MO_lookup),
+                    ref_old = found["ref"],
+                    ref_new = lookup(fullname == found["fullname_new"], "ref", haystack = MO_lookup),
+                    mo = lookup(fullname == found["fullname_new"], "mo", haystack = MO_lookup))
+        next
+      }
+      
      # old mo code, used in previous versions of this package ----
      if (x_backup[i] %in% microorganisms.translation$mo_old) {
        old_mo_warning <- TRUE
@@ -582,10 +605,9 @@ exec_as.mo <- function(x,
        }
      }
      
-      found <- lookup(fullname_lower %in% tolower(c(x_backup[i], x_backup_without_spp[i])))
-      # most probable: is exact match in fullname
-      if (!is.na(found)) {
-        x[i] <- found[1L]
+      if (x_backup[i] %like_case% "\\(unknown [a-z]+\\)" | tolower(x_backup_without_spp[i]) %in% c("other", "none", "unknown")) {
+        # empty and nonsense values, ignore without warning
+        x[i] <- lookup(mo == "UNKNOWN")
        next
      }
      
@@ -614,13 +636,6 @@ exec_as.mo <- function(x,
        next
      }
      
-      # valid Catalogue of Life ID ---
-      found <- lookup(col_id == x_backup[i])
-      if (!is.na(found)) {
-        x[i] <- found[1L]
-        next
-      }
-      
      # WHONET and other common LIS codes ----
      found <- lookup(code %in% toupper(c(x_backup_untouched[i], x_backup[i], x_backup_without_spp[i])),
                      column = "mo", 
@@ -943,21 +958,20 @@ exec_as.mo <- function(x,
                        column = NULL, # all columns
                        haystack = data.old_to_check)
        if (!all(is.na(found))) {
-          col_id_new <- found["col_id_new"]
          # when property is "ref" (which is the case in mo_ref, mo_authors and mo_year), return the old value, so:
          # mo_ref() of "Chlamydia psittaci" will be "Page, 1968" (with warning)
          # mo_ref() of "Chlamydophila psittaci" will be "Everett et al., 1999"
          if (property == "ref") {
            x[i] <- found["ref"]
          } else {
-            x[i] <- lookup(col_id == found["col_id_new"], haystack = MO_lookup)
+            x[i] <- lookup(fullname == found["fullname_new"], haystack = MO_lookup)
          }
          options(mo_renamed_last_run = found["fullname"])
          was_renamed(name_old = found["fullname"],
-                      name_new = lookup(col_id == found["col_id_new"], "fullname", haystack = MO_lookup),
+                      name_new = lookup(fullname == found["fullname_new"], "fullname", haystack = MO_lookup),
                      ref_old = found["ref"],
-                      ref_new = lookup(col_id == found["col_id_new"], "ref", haystack = MO_lookup),
-                      mo = lookup(col_id == found["col_id_new"], "mo", haystack = MO_lookup))
+                      ref_new = lookup(fullname == found["fullname_new"], "ref", haystack = MO_lookup),
+                      mo = lookup(fullname == found["fullname_new"], "mo", haystack = MO_lookup))
          return(x[i])
        }
        
@@ -997,18 +1011,18 @@ exec_as.mo <- function(x,
                # mo_ref("Chlamydophila psittaci) = "Everett et al., 1999"
                x <- found["ref"]
              } else {
-                x <- lookup(col_id == found["col_id_new"], haystack = MO_lookup)
+                x <- lookup(fullname == found["fullname_new"], haystack = MO_lookup)
              }
              was_renamed(name_old = found["fullname"],
-                          name_new = lookup(col_id == found["col_id_new"], "fullname", haystack = MO_lookup),
+                          name_new = lookup(fullname == found["fullname_new"], "fullname", haystack = MO_lookup),
                          ref_old = found["ref"],
-                          ref_new = lookup(col_id == found["col_id_new"], "ref", haystack = MO_lookup),
-                          mo = lookup(col_id == found["col_id_new"], "mo", haystack = MO_lookup))
+                          ref_new = lookup(fullname == found["fullname_new"], "ref", haystack = MO_lookup),
+                          mo = lookup(fullname == found["fullname_new"], "mo", haystack = MO_lookup))
              options(mo_renamed_last_run = found["fullname"])
              uncertainties <<- rbind(uncertainties,
                                      format_uncertainty_as_df(uncertainty_level = now_checks_for_uncertainty_level,
                                                               input = a.x_backup,
-                                                               result_mo = lookup(col_id == found["col_id_new"], "mo", haystack = MO_lookup)))
+                                                               result_mo = lookup(fullname == found["fullname_new"], "mo", haystack = MO_lookup)))
              return(x)
            }
            
@@ -1366,6 +1380,10 @@ exec_as.mo <- function(x,
        failures <- c(failures, x_backup[i])
      }
    }
+    
+    if (initial_search == TRUE) {
+      close(progress)
+    }
  }
  
  # handling failures ----
@@ -1552,7 +1570,7 @@ format_uncertainty_as_df <- function(uncertainty_level,
 #' @export
 #' @noRd
 print.mo <- function(x, ...) {
-  cat("Class 'mo'\n")
+  cat("Class <mo>\n")
  x_names <- names(x)
  x <- as.character(x)
  names(x) <- x_names
@@ -1711,6 +1729,9 @@ print.mo_renamed <- function(x, ...) {
                             font_italic(x$old_name[i]), ifelse(x$old_ref[i] %in% c("", NA), "", 
                                                                paste0(" (",  gsub("et al.", font_italic("et al."), x$old_ref[i]), ")")),
                             " was renamed ", 
+                             ifelse(as.integer(gsub("[^0-9]", "", x$new_ref[i])) < as.integer(gsub("[^0-9]", "", x$old_ref[i])),
+                                    font_bold("back to "),
+                                    ""),
                             font_italic(x$new_name[i]), ifelse(x$new_ref[i] %in% c("", NA), "", 
                                                                paste0(" (",  gsub("et al.", font_italic("et al."), x$new_ref[i]), ")")),
                             " [", x$mo[i], "]")))
@@ -1747,9 +1768,14 @@ translate_allow_uncertain <- function(allow_uncertain) {
 }

 get_mo_failures_uncertainties_renamed <- function() {
-  list(failures = getOption("mo_failures"),
+  remember <- list(failures = getOption("mo_failures"),
                   uncertainties = getOption("mo_uncertainties"),
                   renamed = getOption("mo_renamed"))
+  # empty them, otherwise mo_shortname("Chlamydophila psittaci") will give 3 notes
+  options("mo_failures" = NULL)
+  options("mo_uncertainties" = NULL)
+  options("mo_renamed" = NULL)
+  remember
 }

 load_mo_failures_uncertainties_renamed <- function(metadata) {
--- a/R/mo_property.R
+++ b/R/mo_property.R
@@ -149,6 +149,7 @@ mo_fullname <- mo_name
 #' @export
 mo_shortname <- function(x, language = get_locale(), ...) {
  x.mo <- as.mo(x, ...)
+
  metadata <- get_mo_failures_uncertainties_renamed()

  replace_empty <- function(x) {
@@ -315,9 +316,9 @@ mo_synonyms <- function(x, ...) {
  x <- as.mo(x, ...)
  metadata <- get_mo_failures_uncertainties_renamed()

-  IDs <- mo_property(x = x, property = "col_id", language = NULL)
-  syns <- lapply(IDs, function(col_id) {
-    res <- sort(microorganisms.old[which(microorganisms.old$col_id_new == col_id), "fullname"])
+  IDs <- mo_name(x = x, language = NULL)
+  syns <- lapply(IDs, function(newname) {
+    res <- sort(microorganisms.old[which(microorganisms.old$fullname_new == newname), "fullname"])
    if (length(res) == 0) {
      NULL
    } else {
@@ -368,14 +369,9 @@ mo_url <- function(x, open = FALSE, ...) {
  df <- data.frame(mo, stringsAsFactors = FALSE) %>%
    left_join(select(microorganisms, mo, source, species_id), by = "mo")
  df$url <- ifelse(df$source == "CoL",
-                   paste0(gsub("{year}",
-                               catalogue_of_life$year, 
-                               catalogue_of_life$url_CoL,
-                               fixed = TRUE), 
-                          "details/species/id/",
-                          df$species_id),
+                   paste0(catalogue_of_life$url_CoL, "details/species/id/", df$species_id, "/"),
                   ifelse(df$source == "DSMZ",
-                          paste0(catalogue_of_life$url_DSMZ, "/", unlist(lapply(strsplit(mo_names, ""), function(x) x[1]))),
+                          paste0(catalogue_of_life$url_DSMZ, "/advanced_search?adv[taxon-name]=", gsub(" ", "+", mo_names), "/"),
                          NA_character_))
  u <- df$url
  names(u) <- mo_names
--- a/R/rsi.R
+++ b/R/rsi.R
@@ -533,7 +533,7 @@ is.rsi.eligible <- function(x, threshold = 0.05) {
 #' @export
 #' @noRd
 print.rsi <- function(x, ...) {
-  cat("Class 'rsi'\n")
+  cat("Class <rsi>\n")
  print(as.character(x), quote = FALSE)
 }

--- a/R/rsi_calc.R
+++ b/R/rsi_calc.R
@@ -128,7 +128,7 @@ rsi_calc <- function(...,
  }
  
  if (print_warning == TRUE) {
-    warning("Increase speed by transforming to class `rsi` on beforehand: df %>% mutate_if(is.rsi.eligible, as.rsi)",
+    warning("Increase speed by transforming to class <rsi> on beforehand: your_data %>% mutate_if(is.rsi.eligible, as.rsi)",
            call. = FALSE)
  }
  
@@ -177,7 +177,7 @@ rsi_calc_df <- function(type, # "proportion", "count" or "both"
  }
  
  if (!any(sapply(data, is.rsi), na.rm = TRUE)) {
-    stop("No columns with class 'rsi' found. See ?as.rsi.", call. = FALSE)
+    stop("No columns with class <rsi> found. See ?as.rsi.", call. = FALSE)
  }
  
  if (as.character(translate_ab) %in% c("TRUE", "official")) {
--- a/R/sysdata.rda
+++ b/R/sysdata.rda
--- a/data-raw/antibiotics.txt
+++ b/data-raw/antibiotics.txt
@@ -89,7 +89,7 @@
 "CTF"	"J01DC07"	43708	"Cefotiam"	"Cephalosporins (2nd gen.)"	"Other beta-lactam antibacterials"	"Second-generation cephalosporins"	""	"c(\"cefotiam\", \"cefotiam?\", \"cefotiamum\", \"ceradolan\", \"ceradon\", \"haloapor\")"	1.2	"g"	4	"g"	
 "CHE"		125846	"Cefotiam hexetil"	"Cephalosporins (3rd gen.)"			""	"c(\"cefotiam cilexetil\", \"pansporin t\")"					
 "FOV"		9578573	"Cefovecin"	"Cephalosporins (3rd gen.)"			""	""					
-"FOX"	"J01DC01"	441199	"Cefoxitin"	"Cephalosporins (2nd gen.)"	"Other beta-lactam antibacterials"	"Second-generation cephalosporins"	"c(\"cfox\", \"cfx\", \"cfxt\", \"cx\", \"fox\", \"fx\")"	"c(\"cefoxitin\", \"cefoxitina\", \"cefoxitine\", \"cefoxitinum\", \"cefoxotin\", \"cephoxitin\", \"mefoxin\", \"mefoxitin\", \"rephoxitin\")"			6	"g"	"c(\"25240-3\", \"3448-8\")"
+"FOX"	"J01DC01"	441199	"Cefoxitin"	"Cephalosporins (2nd gen.)"	"Other beta-lactam antibacterials"	"Second-generation cephalosporins"	"c(\"cfox\", \"cfsc\", \"cfx\", \"cfxt\", \"cx\", \"fox\", \"fx\")"	"c(\"cefoxitin\", \"cefoxitina\", \"cefoxitine\", \"cefoxitinum\", \"cefoxotin\", \"cephoxitin\", \"mefoxin\", \"mefoxitin\", \"rephoxitin\")"			6	"g"	"c(\"25240-3\", \"3448-8\")"
 "ZOP"		9571080	"Cefozopran"	"Cephalosporins (4th gen.)"			""	"cefozopran"					
 "CFZ"		68597	"Cefpimizole"	"Cephalosporins (3rd gen.)"			""	"c(\"cefpimizol\", \"cefpimizole\", \"cefpimizole sodium\", \"cefpimizolum\")"					
 "CPM"	"J01DD11"	636405	"Cefpiramide"	"Cephalosporins (3rd gen.)"	"Other beta-lactam antibacterials"	"Third-generation cephalosporins"	""	"c(\"cefpiramide\", \"cefpiramide acid\", \"cefpiramido\", \"cefpiramidum\")"			2	"g"	
@@ -105,7 +105,7 @@
 "CPT"	"J01DI02"	56841980	"Ceftaroline"	"Cephalosporins (5th gen.)"			"c(\"\", \"cfro\")"	"c(\"teflaro\", \"zinforo\")"					
 "CPA"			"Ceftaroline/avibactam"	"Cephalosporins (5th gen.)"			""	""					
 "CAZ"	"J01DD02"	5481173	"Ceftazidime"	"Cephalosporins (3rd gen.)"	"Other beta-lactam antibacterials"	"Third-generation cephalosporins"	"c(\"caz\", \"cefta\", \"cfta\", \"cftz\", \"taz\", \"tz\", \"xtz\")"	"c(\"ceftazidim\", \"ceftazidima\", \"ceftazidime\", \"ceftazidimum\", \"ceptaz\", \"fortaz\", \"fortum\", \"pentacef\", \"tazicef\", \"tazidime\")"			4	"g"	"c(\"21151-6\", \"3449-6\", \"80960-8\")"
-"CZA"			"Ceftazidime/avibactam"	"Cephalosporins (3rd gen.)"			""	""					
+"CZA"			"Ceftazidime/avibactam"	"Cephalosporins (3rd gen.)"			"c(\"\", \"cfav\")"	""					
 "CCV"	"J01DD52"	9575352	"Ceftazidime/clavulanic acid"	"Cephalosporins (3rd gen.)"	"Other beta-lactam antibacterials"	"Third-generation cephalosporins"	"c(\"czcl\", \"xtzl\")"	""					
 "CEM"		6537431	"Cefteram"	"Cephalosporins (3rd gen.)"			""	"c(\"cefteram\", \"cefterame\", \"cefteramum\", \"ceftetrame\")"					
 "CPL"		5362114	"Cefteram pivoxil"	"Cephalosporins (3rd gen.)"			""	"c(\"cefteram pivoxil\", \"tomiron\")"					
--- a/data-raw/data_dsmz.rds
+++ b/data-raw/data_dsmz.rds
--- a/data-raw/eucast_rules.tsv
+++ b/data-raw/eucast_rules.tsv
@@ -9,22 +9,6 @@
 # >>>>> IF YOU WANT TO IMPORT THIS FILE INTO YOUR OWN SOFTWARE, HAVE THE FIRST 10 LINES SKIPPED <<<<<
 # -------------------------------------------------------------------------------------------------------------------------------
 if_mo_property	like.is.one_of	this_value	and_these_antibiotics	have_these_values	then_change_these_antibiotics	to_value	reference.rule	reference.rule_group
-genus	like	.*	AMP	S	AMX	S	Non-EUCAST: inherit ampicillin results for unavailable amoxicillin	Other rules
-genus	like	.*	AMP	I	AMX	I	Non-EUCAST: inherit ampicillin results for unavailable amoxicillin	Other rules
-genus	like	.*	AMP	R	AMX	R	Non-EUCAST: inherit ampicillin results for unavailable amoxicillin	Other rules
-genus	like	.*	AMX	S	AMP	S	Non-EUCAST: inherit amoxicillin results for unavailable ampicillin	Other rules
-genus	like	.*	AMX	I	AMP	I	Non-EUCAST: inherit amoxicillin results for unavailable ampicillin	Other rules
-genus	like	.*	AMX	R	AMP	R	Non-EUCAST: inherit amoxicillin results for unavailable ampicillin	Other rules
-genus	like	.*	AMC	R	AMP, AMX	R	Non-EUCAST: set ampicillin = R where amoxicillin/clav acid = R	Other rules
-genus	like	.*	SAM	R	AMP, AMX	R	Non-EUCAST: set ampicillin = R where ampicillin/sulbactam = R	Other rules
-genus	like	.*	TZP	R	PIP	R	Non-EUCAST: set piperacillin = R where piperacillin/tazobactam = R	Other rules
-genus	like	.*	SXT	R	TMP	R	Non-EUCAST: set trimethoprim = R where trimethoprim/sulfa = R	Other rules
-genus	like	.*	AMP	S	AMC	S	Non-EUCAST: set amoxicillin/clav acid = S where ampicillin = S	Other rules
-genus	like	.*	AMX	S	AMC	S	Non-EUCAST: set amoxicillin/clav acid = S where ampicillin = S	Other rules
-genus	like	.*	AMP	S	SAM	S	Non-EUCAST: set ampicillin/sulbactam = S where ampicillin = S	Other rules
-genus	like	.*	AMX	S	SAM	S	Non-EUCAST: set ampicillin/sulbactam = S where ampicillin = S	Other rules
-genus	like	.*	PIP	S	TZP	S	Non-EUCAST: set piperacillin/tazobactam = S where piperacillin = S	Other rules
-genus	like	.*	TMP	S	SXT	S	Non-EUCAST: set trimethoprim/sulfa = S where trimethoprim = S	Other rules
 order	is	Enterobacterales	AMP	S	AMX	S	Enterobacterales (Order)	Breakpoints
 order	is	Enterobacterales	AMP	I	AMX	I	Enterobacterales (Order)	Breakpoints
 order	is	Enterobacterales	AMP	R	AMX	R	Enterobacterales (Order)	Breakpoints
--- a/data-raw/microorganisms.translation.rds
+++ b/data-raw/microorganisms.translation.rds
--- a/data-raw/microorganisms.txt
+++ b/data-raw/microorganisms.txt
--- a/data-raw/reproduction_of_antibiotics.R
+++ b/data-raw/reproduction_of_antibiotics.R
@@ -322,7 +322,7 @@ antibiotics[which(antibiotics$ab == as.ab("cefuroxim")), "abbreviations"][[1]] <
 antibiotics[which(antibiotics$ab == as.ab("cefotaxim")), "abbreviations"][[1]] <- list(c(antibiotics[which(antibiotics$ab == as.ab("cefotaxim")), "abbreviations"][[1]], "cftx"))
 antibiotics[which(antibiotics$ab == as.ab("ceftazidime")), "abbreviations"][[1]] <- list(c(antibiotics[which(antibiotics$ab == as.ab("ceftazidime")), "abbreviations"][[1]], "cftz"))
 antibiotics[which(antibiotics$ab == as.ab("cefepime")), "abbreviations"][[1]] <- list(c(antibiotics[which(antibiotics$ab == as.ab("cefepime")), "abbreviations"][[1]], "cfpi"))
-antibiotics[which(antibiotics$ab == as.ab("cefoxitin")), "abbreviations"][[1]] <- list(c(antibiotics[which(antibiotics$ab == as.ab("cefoxitin")), "abbreviations"][[1]], "cfxt"))
+antibiotics[which(antibiotics$ab == as.ab("cefoxitin")), "abbreviations"][[1]] <- list(c(antibiotics[which(antibiotics$ab == as.ab("cefoxitin")), "abbreviations"][[1]], "cfxt", "cfsc"))
 # More GLIMS codes
 antibiotics[which(antibiotics$ab == "CAZ"), "abbreviations"][[1]] <- list(c(antibiotics[which(antibiotics$ab == "CAZ"), "abbreviations"][[1]], "cftz"))
 antibiotics[which(antibiotics$ab == "CRO"), "abbreviations"][[1]] <- list(c(antibiotics[which(antibiotics$ab == "CRO"), "abbreviations"][[1]], "cftr"))
@@ -377,6 +377,7 @@ antibiotics[which(antibiotics$ab == "CTX"), "abbreviations"][[1]] <- list(c(anti
 antibiotics[which(antibiotics$ab == "CAZ"), "abbreviations"][[1]] <- list(c(antibiotics[which(antibiotics$ab == "CAZ"), "abbreviations"][[1]], "cftz"))
 antibiotics[which(antibiotics$ab == "CFM"), "abbreviations"][[1]] <- list(c(antibiotics[which(antibiotics$ab == "CFM"), "abbreviations"][[1]], "cfxm"))
 antibiotics[which(antibiotics$ab == "FOX"), "abbreviations"][[1]] <- list(c(antibiotics[which(antibiotics$ab == "FOX"), "abbreviations"][[1]], "cfxt"))
+antibiotics[which(antibiotics$ab == "CZA"), "abbreviations"][[1]] <- list(c(antibiotics[which(antibiotics$ab == "CZA"), "abbreviations"][[1]], "cfav"))
 antibiotics[which(antibiotics$ab == "CZO"), "abbreviations"][[1]] <- list(c(antibiotics[which(antibiotics$ab == "CZO"), "abbreviations"][[1]], "cfzl"))
 antibiotics[which(antibiotics$ab == "CZX"), "abbreviations"][[1]] <- list(c(antibiotics[which(antibiotics$ab == "CZX"), "abbreviations"][[1]], "cfzx"))
 antibiotics[which(antibiotics$ab == "CHL"), "abbreviations"][[1]] <- list(c(antibiotics[which(antibiotics$ab == "CHL"), "abbreviations"][[1]], "chlo"))
@@ -577,10 +578,10 @@ antibiotics <- antibiotics %>%

 # set as data.frame again
 antibiotics <- as.data.frame(antibiotics, stringsAsFactors = FALSE)
-class(antibiotics$ab) <- "ab"
+class(antibiotics$ab) <- c("ab", "character")
 antibiotics <- antibiotics %>% arrange(name)

-# make all abbreviations and synonyms lower case, unique and alphabetically sorted
+# make all abbreviations and synonyms lower case, unique and alphabetically sorted ----
 for (i in 1:nrow(antibiotics)) {
  abb <- sort(unique(tolower(antibiotics[i, "abbreviations"][[1]])))
  syn <- sort(unique(tolower(antibiotics[i, "synonyms"][[1]])))
--- a/data-raw/reproduction_of_microorganisms.R
+++ b/data-raw/reproduction_of_microorganisms.R
@@ -23,41 +23,81 @@

 # Data retrieved from the Catalogue of Life (CoL) through the Encyclopaedia of Life:
 # https://opendata.eol.org/dataset/catalogue-of-life/
+# Data retrieved from the Global Biodiversity Information Facility (GBIF): 
 # https://doi.org/10.15468/rffz4x
-# (download the resource file with a name like "Catalogue of Life yyyy-mm-dd")
-# and from the Leibniz Institute DSMZ-German Collection of Microorganisms and Cell Cultures
-# https://www.dsmz.de/support/bacterial-nomenclature-up-to-date-downloads.html
-# (download the latest "Complete List" as xlsx file)
+#
+# And from the Leibniz Institute: German Collection of Microorganisms and Cell Cultures (DSMZ)
+# (register first at https://bacdive.dsmz.de/api/pnu/registration/register/ and use API as done below)

 library(dplyr)
 library(AMR)
+# also needed: data.table, httr, jsonlite, cleaner, stringr

-# unzip and extract taxon.tab (around 1.5 GB) from the CoL archive, then:
-# data_col <- data.table::fread("data-raw/taxon.tab")
-data_col <- data.table::fread("data-raw/taxa.txt", quote = "")
+# unzip and extract taxa.txt (both around 1.5 GB, 3.7-3.9M rows) from Col and GBIF, then:
+data_col_raw <- data.table::fread("data-raw/taxon.tab", quote = "")
+data_gbif <- data.table::fread("data-raw/taxa.txt", quote = "")

-# read the xlsx file from DSMZ (only around 2.5 MB):
-data_dsmz <- readxl::read_xlsx("data-raw/DSMZ_bactnames.xlsx")
+# merge the two
+data_col <- data_gbif %>% 
+  rename(referenceID = identifier) %>% 
+  bind_rows(data_col_raw) %>% 
+  distinct(scientificName, kingdom, genus, specificEpithet, infraspecificEpithet, .keep_all = TRUE)
+rm(data_col_raw)
+rm(data_gbif)
+
+
+# read the data from the DSMZ API (around 19000 rows)
+dsmz_username <- ""
+dsmz_password <- ""
+GET_df <- function(url) {
+  result <- httr::GET(url, httr::authenticate(dsmz_username, dsmz_password))
+  httr::stop_for_status(result)
+  result %>%
+    httr::content(type = "text", encoding = "UTF-8") %>%
+    jsonlite::fromJSON(flatten = TRUE)
+}
+dsmz_first <- GET_df("https://bacdive.dsmz.de/api/pnu/species?page=1&format=json")
+data_dsmz <- dsmz_first$results
+# this next process will take appr. `dsmz_first$count / 100 * 5 / 60` minutes
+for (i in 2:round((dsmz_first$count / 100) + 0.5)) {
+  data_dsmz <<- rbind(data_dsmz,
+                      GET_df(paste0("https://bacdive.dsmz.de/api/pnu/species/?page=", i, "&format=json"))$results)
+  cat(i, "-", AMR:::percentage(i / round((dsmz_first$count / 100) + 0.5)), "\n")
+}
+rm(dsmz_first)

 # the CoL data is over 3.7M rows:
-data_col %>% freq(kingdom)
+data_col %>% cleaner::freq(kingdom)
 #      Item             Count   Percent   Cum. Count   Cum. Percent
 # ---  ----------  ----------  --------  -----------  -------------
-# 1    Animalia     2,225,627     59.1%    2,225,627          59.1%
-# 2    Plantae      1,177,412     31.3%    3,403,039          90.4%
-# 3    Fungi          290,145      7.7%    3,693,184          98.1%
-# 4    Chromista       47,126      1.3%    3,740,310          99.3%
-# 5    Bacteria        14,478      0.4%    3,754,788          99.7%
-# 6    Protozoa         6,060      0.2%    3,760,848          99.9%
-# 7    Viruses          3,827      0.1%    3,764,675         100.0%
-# 8    Archaea            610      0.0%    3,765,285         100.0%
+# 1    Animalia     2,494,992    55.43%    2,494,992         55.43%
+# 2    Plantae      1,379,674    30.65%    3,874,666         86.08%
+# 3    Fungi          547,619    12.17%    4,422,285         98.24%
+# 4    Chromista       51,475     1.14%    4,473,760         99.39%
+# 5    Bacteria        14,442     0.32%    4,488,202         99.71%
+# 6    Protozoa         8,750     0.19%    4,496,952         99.90%
+# 7    Viruses          3,805     0.08%    4,500,757         99.99%
+# 8    Archaea            609     0.01%    4,501,366        100.00%

 # clean data_col
-data_col <- data_col %>%
+data_col.bak <- data_col
+data_col_old <- data_col %>%
+  # filter: has new accepted name
+  filter(!is.na(acceptedNameUsageID)) %>% 
  as_tibble() %>%
-  select(col_id = taxonID,
-         col_id_new = acceptedNameUsageID,
-         fullname = scientificName,
+  transmute(fullname = trimws(stringr::str_replace(scientificName, 
+                                                   pattern = stringr::fixed(scientificNameAuthorship), 
+                                                   replacement = "")),
+            fullname_new = trimws(paste(ifelse(is.na(genus), "", genus), 
+                                        ifelse(is.na(specificEpithet), "", specificEpithet), 
+                                        ifelse(is.na(infraspecificEpithet), "", infraspecificEpithet))),
+            ref = scientificNameAuthorship,
+            prevalence = NA_integer_)
+data_col <- data_col %>%
+  # filter: has no new accepted name
+  filter(is.na(acceptedNameUsageID)) %>% 
+  as_tibble() %>%
+  transmute(fullname = "",
            kingdom,
            phylum,
            class,
@@ -68,49 +108,49 @@ data_col <- data_col %>%
            subspecies = infraspecificEpithet,
            rank = taxonRank,
            ref = scientificNameAuthorship,
-         species_id = references)
-data_col$source <- "CoL"
+            species_id = referenceID,
+            source = "CoL")

 # clean data_dsmz
-data_dsmz <- data_dsmz %>%
+data_dsmz.bak <- data_dsmz
+data_dsmz_old <- data_dsmz %>%
+  # filter: correct name is not NULL
+  filter(!sapply(correct_name, is.null)) %>% 
  as_tibble() %>%
-  transmute(col_id = NA_integer_,
-            col_id_new = NA_integer_,
-            fullname = "",
-            # kingdom = "",
-            # phylum = "",
-            # class = "",
-            # order = "",
-            # family = "",
-            genus = ifelse(is.na(GENUS), "", GENUS),
-            species = ifelse(is.na(SPECIES), "", SPECIES),
-            subspecies = ifelse(is.na(SUBSPECIES), "", SUBSPECIES),
+  transmute(fullname = trimws(paste(ifelse(is.na(genus), "", genus), 
+                                    ifelse(is.na(species_epithet), "", species_epithet), 
+                                    ifelse(is.na(subspecies_epithet), "", subspecies_epithet))),
+            fullname_new = sapply(correct_name, function(x) x[2L]),
+            ref = authors,
+            prevalence = NA_integer_)
+
+data_dsmz <- data_dsmz %>%
+  # filter: correct name is NULL
+  filter(sapply(correct_name, is.null)) %>% 
+  as_tibble() %>%
+  transmute(fullname = "",
+            kingdom = regio,
+            phylum,
+            class = classis,
+            # order = "", # does not contain order, will add later based on CoL
+            family = familia,
+            genus = ifelse(is.na(genus), "", genus),
+            species = ifelse(is.na(species_epithet), "", species_epithet),
+            subspecies = ifelse(is.na(subspecies_epithet), "", subspecies_epithet),
            rank = ifelse(species == "", "genus", "species"),
-            ref = AUTHORS,
-            species_id = as.character(RECORD_NO),
+            ref = authors,
+            species_id = as.character(pnu_no),
            source = "DSMZ")

 # DSMZ only contains genus/(sub)species, try to find taxonomic properties based on genus and data_col
 ref_taxonomy <- data_col %>%
-  filter(genus %in% data_dsmz$genus,
-         kingdom %in% c("Bacteria", "Chromista", "Archaea", "Protozoa", "Fungi"),
-         family != "") %>%
-  mutate(kingdom = factor(kingdom,
-                          # in the left_join following, try Bacteria first, then Chromista, ...
-                          levels = c("Bacteria", "Chromista", "Archaea", "Protozoa", "Fungi"),
-                          ordered = TRUE)) %>% 
+  filter(family %in% data_dsmz$family & family != "") %>% 
  arrange(kingdom) %>% 
-  distinct(genus, .keep_all = TRUE) %>%
-  select(kingdom, phylum, class, order, family, genus)
+  distinct(family, .keep_all = TRUE) %>% 
+  select(family, order)

 data_dsmz <- data_dsmz %>%
-  left_join(ref_taxonomy, by = "genus") %>%
-  mutate(kingdom = "Bacteria",
-         phylum = ifelse(is.na(phylum), "(unknown phylum)", phylum),
-         class = ifelse(is.na(class), "(unknown class)", class),
-         order = ifelse(is.na(order), "(unknown order)", order),
-         family = ifelse(is.na(family), "(unknown family)", family),
-  )
+  left_join(ref_taxonomy, by = "family") # NAs will later become "(unknown ...)"

 # combine everything
 data_total <- data_col %>%
@@ -119,6 +159,8 @@ data_total <- data_col %>%
 rm(data_col)
 rm(data_dsmz)
 rm(ref_taxonomy)
+rm(data_col.bak)
+rm(data_dsmz.bak)

 mo_found_in_NL <- c("Absidia", "Acremonium", "Actinotignum", "Aedes", "Alternaria", "Anaerosalibacter", "Ancylostoma", 
                    "Angiostrongylus", "Anisakis", "Anopheles", "Apophysomyces", "Arachnia", "Ascaris", "Aspergillus", 
@@ -158,8 +200,6 @@ MOs <- data_total %>%
    )
    # or the genus has to be one of the genera we found in our hospitals last decades (Northern Netherlands, 2002-2018)
    | genus %in% mo_found_in_NL
-    # or the taxonomic entry is old - the species was renamed
-    | !is.na(col_id_new)
  ) %>%
  # really no Plantae (e.g. Dracunculus exist both as worm and as plant)
  filter(kingdom != "Plantae") %>% 
@@ -174,59 +214,56 @@ MOs <- MOs %>% bind_rows(data_total %>%
                                  | (family %in% MOs$family & rank == "family")
                                  | (genus %in% MOs$genus & rank == "genus")))

-# filter old taxonomic names so only the ones with an existing reference will be kept
-MOs <- MOs %>%
-  filter(is.na(col_id_new) | (!is.na(col_id_new) & col_id_new %in% MOs$col_id))
+get_author_year <- function(ref) {
+  # Only keep first author, e.g. transform 'Smith, Jones, 2011' to 'Smith et al., 2011'
  
-MOs <- MOs %>%
-  # remove text if it contains 'Not assigned' like phylum in viruses
-  mutate_all(~gsub("(Not assigned|\\[homonym\\]|\\[mistake\\])", "", ., ignore.case = TRUE))
-
-MOs <- MOs %>%
-  # Only keep first author, e.g. transform 'Smith, Jones, 2011' to 'Smith et al., 2011':
-  mutate(authors2 = iconv(ref, from = "UTF-8", to = "ASCII//TRANSLIT"),
+  authors2 <- iconv(ref, from = "UTF-8", to = "ASCII//TRANSLIT")
  # remove leading and trailing brackets
-         authors2 = gsub("^[(](.*)[)]$", "\\1", authors2),
+  authors2 <- gsub("^[(](.*)[)]$", "\\1", authors2)
  # only take part after brackets if there's a name
-         authors2 = ifelse(grepl(".*[)] [a-zA-Z]+.*", authors2),
+  authors2 <- ifelse(grepl(".*[)] [a-zA-Z]+.*", authors2),
                     gsub(".*[)] (.*)", "\\1", authors2),
-                           authors2),
+                     authors2)
  # get year from last 4 digits
-         lastyear = as.integer(gsub(".*([0-9]{4})$", "\\1", authors2)),
+  lastyear = as.integer(gsub(".*([0-9]{4})$", "\\1", authors2))
  # can never be later than now
  lastyear = ifelse(lastyear > as.integer(format(Sys.Date(), "%Y")),
                    NA,
-                           lastyear),
+                    lastyear)
  # get authors without last year
-         authors = gsub("(.*)[0-9]{4}$", "\\1", authors2),
+  authors <- gsub("(.*)[0-9]{4}$", "\\1", authors2)
  # remove nonsense characters from names
-         authors = gsub("[^a-zA-Z,'& -]", "", authors),
+  authors <- gsub("[^a-zA-Z,'& -]", "", authors)
  # remove trailing and leading spaces
-         authors = trimws(authors),
+  authors <- trimws(authors)
  # only keep first author and replace all others by 'et al'
-         authors = gsub("(,| and| et| &| ex| emend\\.?) .*", " et al.", authors),
+  authors <- gsub("(,| and| et| &| ex| emend\\.?) .*", " et al.", authors)
  # et al. always with ending dot
-         authors = gsub(" et al\\.?", " et al.", authors),
-         authors = gsub(" ?,$", "", authors),
+  authors <- gsub(" et al\\.?", " et al.", authors)
+  authors <- gsub(" ?,$", "", authors)
  # don't start with 'sensu' or 'ehrenb'
-         authors = gsub("^(sensu|Ehrenb.?) ", "", authors, ignore.case = TRUE),
+  authors <- gsub("^(sensu|Ehrenb.?) ", "", authors, ignore.case = TRUE)
  # no initials, only surname
-         authors = gsub("^([A-Z]+ )+", "", authors, ignore.case = FALSE),
+  authors <- gsub("^([A-Z]+ )+", "", authors, ignore.case = FALSE)
  # combine author and year if year is available
-         ref = ifelse(!is.na(lastyear),
+  ref <- ifelse(!is.na(lastyear),
                paste0(authors, ", ", lastyear),
-                      authors),
+                authors)
  # fix beginning and ending
-         ref = gsub(", $", "", ref),
-         ref = gsub("^, ", "", ref),
-         ref = gsub("^(emend|et al.,?)", "", ref),
-         ref = trimws(ref)
-  )
-# a lot start with a lowercase character - fix that
-MOs$ref[!grepl("^d[A-Z]", MOs$ref)] <- gsub("^([a-z])", "\\U\\1", MOs$ref[!grepl("^d[A-Z]", MOs$ref)], perl = TRUE)
-# specific one for the French that are named dOrbigny 
-MOs$ref[grepl("^d[A-Z]", MOs$ref)] <- gsub("^d", "d'", MOs$ref[grepl("^d[A-Z]", MOs$ref)])
-MOs <- MOs %>% mutate(ref = gsub(" +", " ", ref))
+  ref <- gsub(", $", "", ref)
+  ref <- gsub("^, ", "", ref)
+  ref <- gsub("^(emend|et al.,?)", "", ref)
+  ref <- trimws(ref)
+  
+  # a lot start with a lowercase character - fix that
+  ref[!grepl("^d[A-Z]", ref)] <- gsub("^([a-z])", "\\U\\1", ref[!grepl("^d[A-Z]", ref)], perl = TRUE)
+  # specific one for the French that are named dOrbigny 
+  ref[grepl("^d[A-Z]", ref)] <- gsub("^d", "d'", ref[grepl("^d[A-Z]", ref)])
+  ref <- gsub(" +", " ", ref)
+  ref
+}
+
+MOs <- MOs %>% mutate(ref = get_author_year(ref))

 # Remove non-ASCII characters (these are not allowed by CRAN)
 MOs <- MOs %>%
@@ -235,53 +272,58 @@ MOs <- MOs %>%
  # remove invalid characters
  mutate_all(~gsub("[\"'`]+", "", .))

-# Split old taxonomic names - they refer in the original data to a new `taxonID` with `acceptedNameUsageID`
-MOs.old <- MOs %>%
-  filter(!is.na(col_id_new),
-         ref != "",
-         source != "DSMZ") %>%
-  transmute(col_id,
-            col_id_new,
-            fullname =
-              trimws(
-                gsub("(.*)[(].*", "\\1",
-                     stringr::str_replace(
-                       string = fullname,
-                       pattern = stringr::fixed(authors2),
-                       replacement = "")) %>%
-                  gsub(" (var|f|subsp)[.]", "", .)),
-            ref) %>%
-  filter(!is.na(fullname)) %>%
-  distinct(fullname, .keep_all = TRUE) %>%
-  arrange(col_id)
-
-MO.bak <- MOs
-
+# set new fullnames
 MOs <- MOs %>%
-  filter(is.na(col_id_new) | source == "DSMZ") %>%
-  transmute(col_id,
-            fullname = trimws(case_when(rank == "family" ~ family,
+  mutate(fullname = trimws(case_when(rank == "family" ~ family,
                                     rank == "order" ~ order,
                                     rank == "class" ~ class,
                                     rank == "phylum" ~ phylum,
                                     rank == "kingdom" ~ kingdom,
                                     TRUE ~ paste(genus, species, subspecies))),
+         fullname = gsub(" (var|f|subsp)[.]", "", fullname)) %>% 
+  # remove text if it contains 'Not assigned', etc.
+  mutate_all(function(x) ifelse(x %like% "(not assigned|homonym|mistake)", NA, x)) %>% 
+  # clean taxonomy
+  mutate(kingdom = ifelse(is.na(kingdom) | trimws(kingdom) == "", "(unknown kingdom)", trimws(kingdom)),
+         phylum = ifelse(is.na(phylum) | trimws(phylum) == "", "(unknown phylum)", trimws(phylum)),
+         class = ifelse(is.na(class) | trimws(class) == "", "(unknown class)", trimws(class)),
+         order = ifelse(is.na(order) | trimws(order) == "", "(unknown order)", trimws(order)),
+         family = ifelse(is.na(family) | trimws(family) == "", "(unknown family)", trimws(family)))
+
+# Split old taxonomic names
+MOs.old <- data_col_old %>% 
+  filter(!gsub(" (var|f|subsp)[.]", "", fullname_new) %in% data_dsmz_old$fullname) %>% 
+  bind_rows(data_dsmz_old) %>%
+  mutate(fullname_new = gsub(" (var|f|subsp)[.]", "", fullname_new),
+         fullname = gsub(" (var|f|subsp)[.]", "", fullname)) %>% 
+  # for cases like Chlamydia pneumoniae -> Chlamydophila pneumoniae -> Chlamydia pneumoniae:
+  filter(!fullname %in% fullname_new &
+           fullname_new %in% MOs$fullname &
+           !is.na(fullname) & 
+           fullname != fullname_new) %>%
+  distinct(fullname, .keep_all = TRUE) %>%
+  arrange(fullname) %>% 
+  mutate(ref = get_author_year(ref))
+
+MOs <- MOs %>%
+  # remove entries that are old and in MOs.old
+  filter(!fullname %in% MOs.old$fullname) %>% 
+  # mark up
+  transmute(fullname,
            kingdom,
            phylum,
            class,
            order,
            family,
-            genus = gsub(":", "", genus),
+            genus,
            species,
            subspecies,
            rank,
            ref,
-            species_id = gsub(".*/([a-f0-9]+)", "\\1", species_id),
+            species_id = gsub("[^a-zA-Z0-9].*", "", species_id),
            source) %>%
-  #distinct(fullname, .keep_all = TRUE) %>%
-  filter(!grepl("unassigned", fullname, ignore.case = TRUE)) %>% 
-  # prefer DSMZ over CoL, since that's more recent
-  arrange(desc(source)) %>% 
+  # prefer known taxonomy over unknown taxonomy, then DSMZ over CoL (= desc)
+  arrange(desc(kingdom, genus, species, source)) %>%
  distinct(kingdom, fullname, .keep_all = TRUE)

 # remove all genera that have no species - they are irrelevant for microbiology and almost all from the kingdom of Animalia
@@ -296,43 +338,45 @@ to_remove <- MOs %>%
 MOs <- MOs %>% filter(!(paste(kingdom, genus) %in% to_remove))
 rm(to_remove)

-# add CoL's col_id, source and ref from MOs.bak, for the cases where DSMZ took preference
+# add all mssing genera, families and orders
 MOs <- MOs %>% 
-  mutate(kingdom_fullname = paste(kingdom, fullname)) %>% 
-  left_join(MO.bak %>%
-              filter(is.na(col_id_new), !is.na(col_id)) %>%
-              transmute(col_id, species_id, source, ref, kingdom_fullname = trimws(paste(kingdom, genus, species, subspecies))), 
-            by = "kingdom_fullname",
-            suffix = c("_dsmz", "_col")) %>% 
-  mutate(col_id = col_id_col, 
-         species_id = ifelse(!is.na(species_id_col) & ref_col == ref_dsmz, 
-                             gsub(".*/(.*)$", "\\1", species_id_col), 
-                             species_id_dsmz),
-         source = ifelse(!is.na(species_id_col) & ref_col == ref_dsmz,
-                         source_col, 
-                         source_dsmz), 
-         ref = ifelse(!is.na(species_id_col) & ref_col == ref_dsmz,
-                      ref_col, 
-                      ref_dsmz)) %>% 
-  select(-matches("(_col|_dsmz|kingdom_fullname)"))
+  bind_rows(MOs %>% 
+              arrange(genus, species) %>%
+              distinct(genus, .keep_all = TRUE) %>%
+              filter(rank == "species") %>%
+              mutate(fullname = genus, 
+                     species = "", 
+                     rank = "genus", 
+                     species_id = "",
+                     ref = NA_character_)) %>% 
+  bind_rows(MOs %>% 
+              arrange(family, genus) %>%
+              distinct(family, .keep_all = TRUE) %>%
+              filter(rank == "genus") %>%
+              mutate(fullname = family, 
+                     genus = "",
+                     rank = "family", 
+                     species_id = "",
+                     ref = NA_character_)) %>% 
+  bind_rows(MOs %>% 
+              arrange(order, family) %>%
+              distinct(family, .keep_all = TRUE) %>%
+              filter(rank == "family") %>%
+              mutate(fullname = order, 
+                     family = "",
+                     rank = "order", 
+                     species_id = "",
+                     ref = NA_character_))

-
-MOs.old <- MOs.old %>%
-  # remove the ones that are in the MOs data set
-  filter(col_id_new %in% MOs$col_id) %>% 
-  # and remove the ones that have the exact same fullname in the MOs data set, like Moraxella catarrhalis
-  left_join(MOs, by = "fullname") %>%
-  filter(col_id_new != col_id.y | is.na(col_id.y)) %>% 
-  select(col_id = col_id.x, col_id_new, fullname, ref = ref.x)
-
-# remove the records that are in MOs.old
-sum(MOs.old$fullname %in% MOs$fullname)
-MOs <- MOs %>% filter(!fullname %in% MOs.old$fullname)
-sum(MOs.old$fullname %in% MOs$fullname)
+# remove the empty ones
+MOs <- MOs %>%
+  mutate(fullname = gsub(",.*", "", fullname)) %>% 
+  distinct(kingdom, fullname, .keep_all = TRUE) %>% 
+  filter(fullname != "")

 # what characters are in the fullnames?
 table(sort(unlist(strsplit(x = paste(MOs$fullname, collapse = ""), split = ""))))
-MOs %>% filter(!fullname %like% "^[a-z ]+$") %>% View()
+MOs %>% filter(!fullname %like% "^[a-z ]+$") %>% arrange(fullname) %>% View()

 table(MOs$kingdom, MOs$rank)
 table(AMR::microorganisms$kingdom, AMR::microorganisms$rank)
@@ -436,7 +480,6 @@ MOs <- MOs %>%
  bind_rows(
    # Unknowns
    data.frame(mo = "UNKNOWN",
-               col_id = NA_integer_,
               fullname = "(unknown name)",
               kingdom = "(unknown kingdom)",
               phylum = "(unknown phylum)",
@@ -453,7 +496,6 @@ MOs <- MOs %>%
               prevalence = 1,
               stringsAsFactors = FALSE),
    data.frame(mo = "B_GRAMN",
-               col_id = NA_integer_,
               fullname = "(unknown Gram-negatives)",
               kingdom = "Bacteria",
               phylum = "(unknown phylum)",
@@ -470,7 +512,6 @@ MOs <- MOs %>%
               prevalence = 1,
               stringsAsFactors = FALSE),
    data.frame(mo = "B_GRAMP",
-               col_id = NA_integer_,
               fullname = "(unknown Gram-positives)",
               kingdom = "Bacteria",
               phylum = "(unknown phylum)",
@@ -487,7 +528,6 @@ MOs <- MOs %>%
               prevalence = 1,
               stringsAsFactors = FALSE),
    data.frame(mo = "F_YEAST",
-               col_id = NA_integer_,
               fullname = "(unknown yeast)",
               kingdom = "Fungi",
               phylum = "(unknown phylum)",
@@ -504,7 +544,6 @@ MOs <- MOs %>%
               prevalence = 2,
               stringsAsFactors = FALSE),
    data.frame(mo = "F_FUNGUS",
-               col_id = NA_integer_,
               fullname = "(unknown fungus)",
               kingdom = "Fungi",
               phylum = "(unknown phylum)",
@@ -524,7 +563,6 @@ MOs <- MOs %>%
    MOs %>%
      filter(genus == "Staphylococcus", species == "epidermidis") %>% .[1,] %>%
      mutate(mo = gsub("(.*)_(.*)_.*", "\\1_\\2_CONS", mo),
-             col_id = NA_integer_,
             species = "coagulase-negative",
             fullname = "Coagulase-negative Staphylococcus (CoNS)",
             ref = NA_character_,
@@ -534,7 +572,6 @@ MOs <- MOs %>%
    MOs %>%
      filter(genus == "Staphylococcus", species == "epidermidis") %>% .[1,] %>%
      mutate(mo = gsub("(.*)_(.*)_.*", "\\1_\\2_COPS", mo),
-             col_id = NA_integer_,
             species = "coagulase-positive",
             fullname = "Coagulase-positive Staphylococcus (CoPS)",
             ref = NA_character_,
@@ -558,7 +595,6 @@ MOs <- MOs %>%
    MOs %>%
      filter(genus == "Streptococcus", species == "dysgalactiae") %>% .[1,] %>%
      mutate(mo = gsub("(.*)_(.*)_.*", "\\1_\\2_GRPC", mo),
-             col_id = NA_integer_,
             species = "group C" ,
             fullname = "Streptococcus group C",
             ref = NA_character_,
@@ -567,7 +603,6 @@ MOs <- MOs %>%
    MOs %>%
      filter(genus == "Streptococcus", species == "agalactiae") %>% .[1,] %>%
      mutate(mo = gsub("(.*)_(.*)_.*", "\\1_\\2_GRPD", mo),
-             col_id = NA_integer_,
             species = "group D" ,
             fullname = "Streptococcus group D",
             ref = NA_character_,
@@ -576,7 +611,6 @@ MOs <- MOs %>%
    MOs %>%
      filter(genus == "Streptococcus", species == "agalactiae") %>% .[1,] %>%
      mutate(mo = gsub("(.*)_(.*)_.*", "\\1_\\2_GRPF", mo),
-             col_id = NA_integer_,
             species = "group F" ,
             fullname = "Streptococcus group F",
             ref = NA_character_,
@@ -585,7 +619,6 @@ MOs <- MOs %>%
    MOs %>%
      filter(genus == "Streptococcus", species == "agalactiae") %>% .[1,] %>%
      mutate(mo = gsub("(.*)_(.*)_.*", "\\1_\\2_GRPG", mo),
-             col_id = NA_integer_,
             species = "group G" ,
             fullname = "Streptococcus group G",
             ref = NA_character_,
@@ -594,7 +627,6 @@ MOs <- MOs %>%
    MOs %>%
      filter(genus == "Streptococcus", species == "agalactiae") %>% .[1,] %>%
      mutate(mo = gsub("(.*)_(.*)_.*", "\\1_\\2_GRPH", mo),
-             col_id = NA_integer_,
             species = "group H" ,
             fullname = "Streptococcus group H",
             ref = NA_character_,
@@ -603,7 +635,6 @@ MOs <- MOs %>%
    MOs %>%
      filter(genus == "Streptococcus", species == "agalactiae") %>% .[1,] %>%
      mutate(mo = gsub("(.*)_(.*)_.*", "\\1_\\2_GRPK", mo),
-             col_id = NA_integer_,
             species = "group K" ,
             fullname = "Streptococcus group K",
             ref = NA_character_,
@@ -613,7 +644,6 @@ MOs <- MOs %>%
    MOs %>%
      filter(genus == "Streptococcus", species == "agalactiae") %>% .[1,] %>%
      mutate(mo = gsub("(.*)_(.*)_.*", "\\1_\\2_HAEM", mo),
-             col_id = NA_integer_,
             species = "beta-haemolytic" ,
             fullname = "Beta-haemolytic Streptococcus",
             ref = NA_character_,
@@ -623,7 +653,6 @@ MOs <- MOs %>%
    MOs %>%
      filter(genus == "Streptococcus", species == "agalactiae") %>% .[1,] %>%
      mutate(mo = gsub("(.*)_(.*)_.*", "\\1_\\2_VIRI", mo),
-             col_id = NA_integer_,
             species = "viridans" ,
             fullname = "Viridans Group Streptococcus (VGS)",
             ref = NA_character_,
@@ -633,7 +662,6 @@ MOs <- MOs %>%
    MOs %>%
      filter(genus == "Streptococcus", species == "agalactiae") %>% .[1,] %>%
      mutate(mo = gsub("(.*)_(.*)_.*", "\\1_\\2_MILL", mo),
-             col_id = NA_integer_,
             species = "milleri" ,
             fullname = "Milleri Group Streptococcus (MGS)",
             ref = NA_character_,
@@ -646,7 +674,6 @@ MOs <- MOs %>%
      mutate(mo = paste0(mo, "_HMNS"),
             fullname = paste(fullname, "hominis"),
             species = "hominis",
-             col_id = NA,
             source = "manually added",
             ref = NA_character_,
             species_id = ""),
@@ -654,7 +681,6 @@ MOs <- MOs %>%
    MOs %>%
      filter(fullname == "Dientamoeba") %>%
      mutate(mo = gsub("(.*?)_.*", "\\1_THMNS", mo),
-             col_id = NA,
             fullname = "Trichomonas",
             family = "Trichomonadidae",
             genus = "Trichomonas",
@@ -664,7 +690,6 @@ MOs <- MOs %>%
    MOs %>%
      filter(fullname == "Dientamoeba fragilis") %>%
      mutate(mo = gsub("(.*?)_.*", "\\1_THMNS_VAG", mo),
-             col_id = NA,
             fullname = "Trichomonas vaginalis",
             family = "Trichomonadidae",
             genus = "Trichomonas",
@@ -675,7 +700,6 @@ MOs <- MOs %>%
    MOs %>% # add family as such too
      filter(fullname == "Monocercomonadidae") %>%
      mutate(mo = gsub("(.*)_(.*)_.*", "\\1_\\2_TRCHMNDD", mo),
-             col_id = NA,
             fullname = "Trichomonadidae",
             family = "Trichomonadidae",
             rank = "family",
@@ -760,33 +784,37 @@ new_families <- MOs %>%
  filter(order == "Enterobacterales") %>%
  pull(family) %>%
  unique()
-class(MOs$mo) <- "character"
-MOs <- rbind(MOs %>% filter(!(rank == "family" & fullname %in% new_families)), 
-             AMR::microorganisms %>%
-               select(-snomed) %>% 
-               filter(family == "Enterobacteriaceae" & rank == "family") %>%
-               rbind(., ., ., ., ., ., .) %>% 
-               mutate(fullname = new_families,
-                      source = "manually added",
-                      ref = "Adeolu et al., 2016",
-                      family = fullname, mo = paste0("B_[FAM]_",
+
+MOs <- MOs %>% 
+  filter(!(rank == "family" & fullname %in% new_families)) %>% 
+  bind_rows(tibble(mo = paste0("B_[FAM]_",
                               toupper(abbreviate(new_families,
                                                  minlength = 8,
                                                  use.classes = TRUE,
                                                  method = "both.sides",
-                                                                        strict = FALSE)))))
+                                                  strict = FALSE))),
+                   fullname = new_families,
+                   kingdom = "Bacteria",
+                   phylum = "Proteobacteria",
+                   class = "Gammaproteobacteria",
+                   order = "Enterobacterales",
+                   family = new_families,
+                   genus = "",
+                   species = "",
+                   subspecies = "",
+                   rank = "family",
+                   ref = "Adeolu et al., 2016",
+                   species_id = NA_character_,
+                   source = "manually added",
+                   prevalence = 1))
+
 MOs[which(MOs$order == "Enterobacteriales"), "order"] <- "Enterobacterales"
 MOs[which(MOs$fullname == "Enterobacteriales"), "fullname"] <- "Enterobacterales"

-MOs <- MOs %>%
-  group_by(kingdom) %>%
-  distinct(fullname, .keep_all = TRUE) %>% 
-  ungroup() %>% 
-  filter(fullname != "")
-
 # add prevalence to old taxonomic names
 MOs.old <- MOs.old %>% 
-  left_join(MOs %>% select(col_id, prevalence), by = c("col_id_new" = "col_id"))
+  select(-prevalence) %>% 
+  left_join(MOs %>% select(fullname, prevalence), by = c("fullname_new" = "fullname"))

 # everything distinct?
 sum(duplicated(MOs$mo))
@@ -797,18 +825,105 @@ colnames(MOs)
 MOs %>% arrange(fullname) %>% filter(!fullname %in% AMR::microorganisms$fullname) %>% View()
 MOs.old %>% arrange(fullname) %>% filter(!fullname %in% AMR::microorganisms.old$fullname) %>% View()
 # and the ones we lost:
-AMR::microorganisms %>% filter(!fullname %in% MOs$fullname) %>% View() # based on fullname
-AMR::microorganisms %>% filter(!mo %in% MOs$mo) %>% View()             # based on mo
-AMR::microorganisms %>% filter(!mo %in% MOs$mo & !fullname %in% MOs$fullname) %>% View() 
+# AMR::microorganisms %>% filter(!fullname %in% MOs$fullname) %>% View() # based on fullname
+AMR::microorganisms %>% filter(!fullname %in% c(MOs$fullname, MOs.old$fullname)) %>% View() # excluding renamed ones
+# AMR::microorganisms %>% filter(!mo %in% MOs$mo) %>% View()             # based on mo
+# AMR::microorganisms %>% filter(!mo %in% MOs$mo & !fullname %in% MOs$fullname) %>% View() 
 # and these IDs have changed:
 old_new <- MOs %>%
  mutate(kingdom_fullname = paste(kingdom, fullname)) %>% 
-  filter(kingdom_fullname %in% (AMR::microorganisms %>% mutate(kingdom_fullname = paste(kingdom, fullname)) %>% pull(kingdom_fullname))) %>%
-  left_join(AMR::microorganisms %>% mutate(kingdom_fullname = paste(kingdom, fullname)) %>% select(mo, kingdom_fullname), by = "kingdom_fullname", suffix = c("_new", "_old")) %>% 
+  filter(kingdom_fullname %in% (AMR::microorganisms %>% 
+                                  mutate(kingdom_fullname = paste(kingdom, fullname)) %>%
+                                  pull(kingdom_fullname))) %>%
+  left_join(AMR::microorganisms %>% 
+              mutate(kingdom_fullname = paste(kingdom, fullname)) %>%
+              select(mo, kingdom_fullname), by = "kingdom_fullname", suffix = c("_new", "_old")) %>% 
  filter(mo_new != mo_old) %>% 
  select(mo_old, mo_new, everything())
-
 View(old_new)
+
+# set new MO codes as names to existing data sets
+rsi_translation$mo <- mo_name(rsi_translation$mo, language = NULL)
+microorganisms.codes$mo <- mo_name(microorganisms.codes$mo, language = NULL)
+microorganisms.translation <- AMR:::microorganisms.translation %>%
+  bind_rows(tibble(mo_old = AMR:::microorganisms.translation$mo_new, mo_new = mo_old)) %>%
+  filter(!mo_old %in% MOs$mo) %>% 
+  mutate(mo_new = mo_name(mo_new, language = NULL)) %>% 
+  bind_rows(old_new %>% select(mo_old, mo_new)) %>% 
+  distinct(mo_old, .keep_all = TRUE)
+
+# arrange the data sets to save
+MOs <- MOs %>% arrange(fullname)
+MOs.old <- MOs.old %>% arrange(fullname)
+
+# transform
+MOs <- as.data.frame(MOs, stringsAsFactors = FALSE)
+MOs.old <- as.data.frame(MOs.old, stringsAsFactors = FALSE)
+microorganisms.codes <- as.data.frame(microorganisms.codes, stringsAsFactors = FALSE)
+class(MOs$mo) <- c("mo", "character")
+
+# SAVE
+### for same server
+microorganisms <- dataset_UTF8_to_ASCII(MOs)
+microorganisms.old <- dataset_UTF8_to_ASCII(MOs.old)
+### for other server
+saveRDS(MOs, "microorganisms.rds")
+saveRDS(MOs.old, "microorganisms.old.rds")
+saveRDS(microorganisms.codes, "microorganisms.codes.rds")
+
+# on the server, do:
+usethis::use_data(microorganisms, overwrite = TRUE, version = 2)
+usethis::use_data(microorganisms.old, overwrite = TRUE, version = 2)
+rm(microorganisms)
+rm(microorganisms.old)
+
+# load new data sets
+devtools::load_all(".")
+
+# reset previously changed mo codes
+rsi_translation$mo <- as.mo(rsi_translation$mo)
+microorganisms.codes$mo <- as.mo(microorganisms.codes$mo)
+class(microorganisms.codes$mo) <- c("mo", "character")
+microorganisms.translation <- microorganisms.translation %>%
+  left_join(microorganisms.old[, c("fullname", "fullname_new")], # microorganisms.old is now new and loaded
+            by = c("mo_new" = "fullname")) %>%
+  mutate(name = ifelse(!is.na(fullname_new), fullname_new, mo_new)) %>% 
+  left_join(microorganisms[, c("fullname", "mo")],               # as is microorganisms
+            by = c("name" = "fullname")) %>% 
+  select(mo_old, mo_new = mo) %>% 
+  filter(!is.na(mo_old), !is.na(mo_new))
+class(microorganisms.translation$mo_old) <- "character" # no class <mo> since those aren't valid MO codes
+class(microorganisms.translation$mo_new) <- c("mo", "character")
+# save those to the package
+usethis::use_data(rsi_translation, overwrite = TRUE, version = 2)
+usethis::use_data(microorganisms.codes, overwrite = TRUE, version = 2)
+saveRDS(microorganisms.translation, file = "data-raw/microorganisms.translation.rds", version = 2)
+# to save microorganisms.translation internally to the package
+source("data-raw/internals.R")
+
+# load new data sets again
+devtools::load_all(".")
+
+# and check: these codes should not be missing (will otherwise throw a unit test error):
+AMR::microorganisms.codes %>% filter(!mo %in% MOs$mo)
+AMR::rsi_translation %>% filter(!mo %in% MOs$mo)
+AMR:::microorganisms.translation %>% filter(!mo_new %in% MOs$mo)
+
+# update the example_isolates data set
+example_isolates$mo <- as.mo(example_isolates$mo)
+usethis::use_data(example_isolates, overwrite = TRUE)
+
+# Don't forget to add SNOMED codes! (data-raw/snomed.R)
+
+# run the unit tests
+testthat::test_file("tests/testthat/test-data.R")
+testthat::test_file("tests/testthat/test-mo.R")
+testthat::test_file("tests/testthat/test-mo_property.R")
+
+
+
+# OLD CODE ----------------------------------------------------------------
+
 # to keep all the old IDs:
 # MOs <- MOs %>% filter(!mo %in% old_new$mo_new) %>%
 #   rbind(microorganisms %>%
@@ -816,79 +931,32 @@ View(old_new)
 #           select(mo, fullname) %>%
 #           left_join(MOs %>%
 #                       select(-mo), by = "fullname"))
-
-# and these codes are now missing (which will throw a unit test error):
-AMR::microorganisms.codes %>% filter(!mo %in% MOs$mo)
-AMR::rsi_translation %>% filter(!mo %in% MOs$mo)
-AMR:::microorganisms.translation %>% filter(!mo_new %in% MOs$mo) %>% View()
 # this is how to fix it
-microorganisms.codes <- AMR::microorganisms.codes %>% 
-  left_join(MOs %>%
-              mutate(kingdom_fullname = paste(kingdom, fullname)) %>% 
-              left_join(AMR::microorganisms %>%
-                          transmute(mo, kingdom_fullname = paste(kingdom, fullname)),
-                        by = "kingdom_fullname", suffix = c("_new", "_old")) %>%
-              select(mo_old, mo_new),
-            by = c("mo" = "mo_old")) %>% 
-  select(code, mo = mo_new) %>% 
-  filter(!is.na(mo))
-microorganisms.codes %>% filter(!mo %in% MOs$mo)
-# and for microorganisms.translation:
-microorganisms.translation <- AMR:::microorganisms.translation %>% 
-  select(mo = mo_new) %>% 
-  left_join(AMR::microorganisms %>%
-              transmute(mo, kingdom_fullname = paste(kingdom, fullname)),
-            by = "kingdom_fullname", suffix = c("_new", "_old")) %>%
-  select(mo_old, mo_new)
-  left_join(MOs %>%
-              mutate(kingdom_fullname = paste(kingdom, fullname)) %>% 
-              left_join(AMR::microorganisms %>%
-                          transmute(mo, kingdom_fullname = paste(kingdom, fullname)),
-                        by = "kingdom_fullname", suffix = c("_new", "_old")) %>%
-              select(mo_old, mo_new),
-            by = c("mo" = "mo_old")) %>% 
-  select(code, mo = mo_new) %>% 
-  filter(!is.na(mo))
-microorganisms.codes %>% filter(!mo %in% MOs$mo)
-
-# arrange
-MOs <- MOs %>% arrange(fullname)
-MOs.old <- MOs.old %>% arrange(fullname)
-microorganisms.codes <- microorganisms.codes %>% arrange(code)
-
-# transform
-MOs <- as.data.frame(MOs, stringsAsFactors = FALSE)
-MOs.old <- as.data.frame(MOs.old, stringsAsFactors = FALSE)
-microorganisms.codes <- as.data.frame(microorganisms.codes, stringsAsFactors = FALSE)
-class(MOs$mo) <- "mo"
-class(microorganisms.codes$mo) <- "mo"
-MOs$col_id <- as.integer(MOs$col_id)
-MOs.old$col_id <- as.integer(MOs.old$col_id)
-MOs.old$col_id_new <- as.integer(MOs.old$col_id_new)
-
-# SAVE
-### for other server
-saveRDS(MOs, "microorganisms.rds")
-saveRDS(MOs.old, "microorganisms.old.rds")
-saveRDS(microorganisms.codes, "microorganisms.codes.rds")
-### for same server
-microorganisms <- MOs
-microorganisms.old <- MOs.old
-microorganisms.translation <- old_new %>% select(mo_old, mo_new) %>% as.data.frame()
-class(microorganisms.translation$mo_old) <- "mo"
-class(microorganisms.translation$mo_new) <- "mo"
-
-# on the server, do:
-usethis::use_data(microorganisms, overwrite = TRUE, version = 2)
-usethis::use_data(microorganisms.old, overwrite = TRUE, version = 2)
-usethis::use_data(microorganisms.codes, overwrite = TRUE, version = 2)
-saveRDS(microorganisms.translation, file = "data-raw/microorganisms.translation.rds", version = 2) # this one will be covered in data-raw/internals.R
-rm(microorganisms)
-rm(microorganisms.old)
-rm(microorganisms.codes)
-rm(microorganisms.translation)
-devtools::load_all(".")
-
-# TO DO AFTER THIS
-# * Rerun data-raw/reproduction_of_rsi_translation.R
-# * Run unit tests
+# microorganisms.codes <- AMR::microorganisms.codes %>% 
+#   left_join(MOs %>%
+#               mutate(kingdom_fullname = paste(kingdom, fullname)) %>% 
+#               left_join(AMR::microorganisms %>%
+#                           transmute(mo, kingdom_fullname = paste(kingdom, fullname)),
+#                         by = "kingdom_fullname", suffix = c("_new", "_old")) %>%
+#               select(mo_old, mo_new),
+#             by = c("mo" = "mo_old")) %>% 
+#   select(code, mo = mo_new) %>% 
+#   filter(!is.na(mo))
+# microorganisms.codes %>% filter(!mo %in% MOs$mo)
+# # and for microorganisms.translation:
+# microorganisms.translation <- AMR:::microorganisms.translation %>% 
+#   select(mo = mo_new) %>% 
+#   left_join(AMR::microorganisms %>%
+#               transmute(mo, kingdom_fullname = paste(kingdom, fullname)),
+#             by = "kingdom_fullname", suffix = c("_new", "_old")) %>%
+#   select(mo_old, mo_new)
+#   left_join(MOs %>%
+#               mutate(kingdom_fullname = paste(kingdom, fullname)) %>% 
+#               left_join(AMR::microorganisms %>%
+#                           transmute(mo, kingdom_fullname = paste(kingdom, fullname)),
+#                         by = "kingdom_fullname", suffix = c("_new", "_old")) %>%
+#               select(mo_old, mo_new),
+#             by = c("mo" = "mo_old")) %>% 
+#   select(code, mo = mo_new) %>% 
+#   filter(!is.na(mo))
+# microorganisms.codes %>% filter(!mo %in% MOs$mo)
--- a/data-raw/rsi_translation.txt
+++ b/data-raw/rsi_translation.txt
--- a/data-raw/snomed.R
+++ b/data-raw/snomed.R
@@ -22,16 +22,17 @@
 library(AMR)
 library(tidyverse)

-# go to https://www.nictiz.nl/standaardisatie/terminologiecentrum/referentielijsten/micro-organismen/
+# go to https://www.nictiz.nl/standaardisatie/terminologiecentrum/referentielijsten/micro-organismen/ (Ctrl/Cmd + A in table)
 # read the table from clipboard
-snomed <- clipr::read_clip_tbl()
-# snomed <- snomed %>%
-#   transmute(fullname = trimws(gsub("^genus", "", Omschrijving, ignore.case = TRUE)),
-#             snomed = as.integer(Id))
+snomed <- clipr::read_clip_tbl(skip = 2)
 snomed <- snomed %>%
-  transmute(fullname = mo_name(Omschrijving),
+  dplyr::filter(gsub("(^genus |^familie |^stam |ss.? |subsp.? |subspecies )", "", 
+              Omschrijving.,
+              ignore.case = TRUE) %in% c(microorganisms$fullname, 
+                                         microorganisms.old$fullname)) %>% 
+  dplyr::transmute(fullname = mo_name(Omschrijving.),
            snomed = as.integer(Id)) %>% 
-  filter(!fullname %like% "unknown")
+  dplyr::filter(!fullname %like% "unknown")
 snomed_trans <- snomed %>%
  group_by(fullname) %>%
  mutate(snomed_list = list(snomed)) %>%
@@ -51,59 +52,59 @@ rm(microorganisms)

 # OLD ---------------------------------------------------------------------

-baseUrl <- 'https://browser.ihtsdotools.org/snowstorm/snomed-ct'
-edition <- 'MAIN'
-version <- '2019-07-31'
-
-microorganisms.snomed <- data.frame(conceptid = character(0),
-                                    mo = character(0),
-                                    stringsAsFactors = FALSE)
-microorganisms$snomed <- ""
-
-# for (i in 1:50) {
-for (i in 1:1000) {
-  
-  if (i %% 10 == 0) {
-    cat(paste0(i, " - ", cleaner::percentage(i / nrow(microorganisms)), "\n"))
-  }
-  
-  mo_data <- microorganisms %>% 
-    filter(mo == microorganisms$mo[i]) %>% 
-    as.list()
-  
-  if (!mo_data$rank %in% c("genus", "species")) {
-    next
-  }
-  
-  searchTerm <- paste0(
-    ifelse(mo_data$rank == "genus", "Genus ", ""),
-    mo_data$fullname, 
-    " (organism)")
-  
-  url <- paste0(baseUrl, '/browser/',
-                edition, '/', 
-                version, 
-                '/descriptions?term=', curl::curl_escape(searchTerm),
-                '&mode=fullText&activeFilter=true&limit=', 250)
-  results <- url %>% 
-    httr::GET() %>%
-    httr::content(type = "text", encoding = "UTF-8") %>% 
-    jsonlite::fromJSON(flatten = TRUE) %>% 
-    .$items
-  if (NROW(results) == 0) {
-    next
-  } else {
-    message("Adding ", crayon::italic(mo_data$fullname))
-  }
-  
-  tryCatch(
-    microorganisms$snomed[i] <- results %>% filter(term == searchTerm) %>% pull(concept.conceptId),
-    error = function(e) invisible()
-  )
-  
-  if (nrow(results) > 1) {
-      microorganisms.snomed <- microorganisms.snomed %>% 
-        bind_rows(tibble(conceptid = results %>% filter(term != searchTerm) %>% pull(concept.conceptId) %>% unique(),
-                         mo = as.character(mo_data$mo)))
-  }
-}
+# baseUrl <- 'https://browser.ihtsdotools.org/snowstorm/snomed-ct'
+# edition <- 'MAIN'
+# version <- '2019-07-31'
+# 
+# microorganisms.snomed <- data.frame(conceptid = character(0),
+#                                     mo = character(0),
+#                                     stringsAsFactors = FALSE)
+# microorganisms$snomed <- ""
+# 
+# # for (i in 1:50) {
+# for (i in 1:1000) {
+#   
+#   if (i %% 10 == 0) {
+#     cat(paste0(i, " - ", cleaner::percentage(i / nrow(microorganisms)), "\n"))
+#   }
+#   
+#   mo_data <- microorganisms %>% 
+#     filter(mo == microorganisms$mo[i]) %>% 
+#     as.list()
+#   
+#   if (!mo_data$rank %in% c("genus", "species")) {
+#     next
+#   }
+#   
+#   searchTerm <- paste0(
+#     ifelse(mo_data$rank == "genus", "Genus ", ""),
+#     mo_data$fullname, 
+#     " (organism)")
+#   
+#   url <- paste0(baseUrl, '/browser/',
+#                 edition, '/', 
+#                 version, 
+#                 '/descriptions?term=', curl::curl_escape(searchTerm),
+#                 '&mode=fullText&activeFilter=true&limit=', 250)
+#   results <- url %>% 
+#     httr::GET() %>%
+#     httr::content(type = "text", encoding = "UTF-8") %>% 
+#     jsonlite::fromJSON(flatten = TRUE) %>% 
+#     .$items
+#   if (NROW(results) == 0) {
+#     next
+#   } else {
+#     message("Adding ", crayon::italic(mo_data$fullname))
+#   }
+#   
+#   tryCatch(
+#     microorganisms$snomed[i] <- results %>% filter(term == searchTerm) %>% pull(concept.conceptId),
+#     error = function(e) invisible()
+#   )
+#   
+#   if (nrow(results) > 1) {
+#       microorganisms.snomed <- microorganisms.snomed %>% 
+#         bind_rows(tibble(conceptid = results %>% filter(term != searchTerm) %>% pull(concept.conceptId) %>% unique(),
+#                          mo = as.character(mo_data$mo)))
+#   }
+# }
--- a/data/antibiotics.rda
+++ b/data/antibiotics.rda
--- a/data/example_isolates.rda
+++ b/data/example_isolates.rda
--- a/data/microorganisms.codes.rda
+++ b/data/microorganisms.codes.rda
--- a/data/microorganisms.old.rda
+++ b/data/microorganisms.old.rda
--- a/data/microorganisms.rda
+++ b/data/microorganisms.rda
--- a/data/rsi_translation.rda
+++ b/data/rsi_translation.rda
--- a/docs/404.html
+++ b/docs/404.html
@@ -81,7 +81,7 @@
      </button>
      <span class="navbar-brand">
        <a class="navbar-link" href="https://msberends.gitlab.io/AMR/index.html">AMR (for R)</a>
-        <span class="version label label-default" data-toggle="tooltip" data-placement="bottom" title="Latest development version">1.1.0.9019</span>
+        <span class="version label label-default" data-toggle="tooltip" data-placement="bottom" title="Latest development version">1.1.0.9020</span>
      </span>
    </div>

--- a/docs/LICENSE-text.html
+++ b/docs/LICENSE-text.html
@@ -81,7 +81,7 @@
      </button>
      <span class="navbar-brand">
        <a class="navbar-link" href="index.html">AMR (for R)</a>
-        <span class="version label label-default" data-toggle="tooltip" data-placement="bottom" title="Latest development version">1.1.0.9019</span>
+        <span class="version label label-default" data-toggle="tooltip" data-placement="bottom" title="Latest development version">1.1.0.9020</span>
      </span>
    </div>

--- a/docs/articles/index.html
+++ b/docs/articles/index.html
@@ -81,7 +81,7 @@
      </button>
      <span class="navbar-brand">
        <a class="navbar-link" href="../index.html">AMR (for R)</a>
-        <span class="version label label-default" data-toggle="tooltip" data-placement="bottom" title="Latest development version">1.1.0.9019</span>
+        <span class="version label label-default" data-toggle="tooltip" data-placement="bottom" title="Latest development version">1.1.0.9020</span>
      </span>
    </div>

--- a/docs/authors.html
+++ b/docs/authors.html
@@ -81,7 +81,7 @@
      </button>
      <span class="navbar-brand">
        <a class="navbar-link" href="index.html">AMR (for R)</a>
-        <span class="version label label-default" data-toggle="tooltip" data-placement="bottom" title="Latest development version">1.1.0.9019</span>
+        <span class="version label label-default" data-toggle="tooltip" data-placement="bottom" title="Latest development version">1.1.0.9020</span>
      </span>
    </div>

--- a/docs/index.html
+++ b/docs/index.html
@@ -43,7 +43,7 @@
      </button>
      <span class="navbar-brand">
        <a class="navbar-link" href="index.html">AMR (for R)</a>
-        <span class="version label label-default" data-toggle="tooltip" data-placement="bottom" title="Latest development version">1.1.0.9019</span>
+        <span class="version label label-default" data-toggle="tooltip" data-placement="bottom" title="Latest development version">1.1.0.9020</span>
      </span>
    </div>

@@ -221,7 +221,7 @@ A methods paper about this package has been preprinted at bioRxiv (DOI: 10.1101/
 <a href="#what-can-you-do-with-this-package" class="anchor"></a>What can you do with this package?</h3>
 <p>This package can be used for:</p>
 <ul>
-<li>Reference for the taxonomy of microorganisms, since the package contains all microbial (sub)species from the <a href="http://www.catalogueoflife.org">Catalogue of Life</a> (<a href="./reference/mo_property.html">manual</a>)</li>
+<li>Reference for the taxonomy of microorganisms, since the package contains all microbial (sub)species from the <a href="http://www.catalogueoflife.org">Catalogue of Life</a> and <a href="https://lpsn.dsmz.de">List of Prokaryotic names with Standing in Nomenclature</a> (<a href="./reference/mo_property.html">manual</a>)</li>
 <li>Interpreting raw MIC and disk diffusion values, based on the latest CLSI or EUCAST guidelines (<a href="./reference/as.rsi.html">manual</a>)</li>
 <li>Determining first isolates to be used for AMR analysis (<a href="./reference/first_isolate.html">manual</a>)</li>
 <li>Calculating antimicrobial resistance (<a href="./articles/AMR.html">tutorial</a>)</li>
@@ -268,7 +268,7 @@ A methods paper about this package has been preprinted at bioRxiv (DOI: 10.1101/
 <div id="microbial-taxonomic-reference-data" class="section level4">
 <h4 class="hasAnchor">
 <a href="#microbial-taxonomic-reference-data" class="anchor"></a>Microbial (taxonomic) reference data</h4>
-<p>This package contains the complete taxonomic tree of almost all ~70,000 microorganisms from the authoritative and comprehensive Catalogue of Life (CoL, <a href="http://www.catalogueoflife.org">www.catalogueoflife.org</a>), supplemented by data from the Deutsche Sammlung von Mikroorganismen und Zellkulturen (DSMZ, <a href="https://www.dsmz.de">www.dsmz.de</a>). This supplementation is needed until the <a href="https://github.com/Sp2000/colplus">CoL+ project</a> is finished, which we await. With <code><a href="reference/catalogue_of_life_version.html">catalogue_of_life_version()</a></code> can be checked which version of the CoL is included in this package.</p>
+<p>This package contains the complete taxonomic tree of almost all ~70,000 microorganisms from the authoritative and comprehensive Catalogue of Life (CoL, <a href="http://www.catalogueoflife.org">www.catalogueoflife.org</a>), supplemented by data from the List of Prokaryotic names with Standing in Nomenclature (LPSN, <a href="https://lpsn.dsmz.de">lpsn.dsmz.de</a>). This supplementation is needed until the <a href="https://github.com/Sp2000/colplus">CoL+ project</a> is finished, which we await. With <code><a href="reference/catalogue_of_life_version.html">catalogue_of_life_version()</a></code> can be checked which version of the CoL is included in this package.</p>
 <p>Read more about which data from the Catalogue of Life <a href="./reference/catalogue_of_life.html">in our manual</a>.</p>
 </div>
 <div id="antimicrobial-reference-data" class="section level4">
--- a/docs/news/index.html
+++ b/docs/news/index.html
@@ -81,7 +81,7 @@
      </button>
      <span class="navbar-brand">
        <a class="navbar-link" href="../index.html">AMR (for R)</a>
-        <span class="version label label-default" data-toggle="tooltip" data-placement="bottom" title="Latest development version">1.1.0.9019</span>
+        <span class="version label label-default" data-toggle="tooltip" data-placement="bottom" title="Latest development version">1.1.0.9020</span>
      </span>
    </div>

@@ -229,13 +229,13 @@
      <small>Source: <a href='https://gitlab.com/msberends/AMR/blob/master/NEWS.md'><code>NEWS.md</code></a></small>
    </div>

-    <div id="amr-1-1-0-9019" class="section level1">
-<h1 class="page-header" data-toc-text="1.1.0.9019">
-<a href="#amr-1-1-0-9019" class="anchor"></a>AMR 1.1.0.9019<small> Unreleased </small>
+    <div id="amr-1-1-0-9020" class="section level1">
+<h1 class="page-header" data-toc-text="1.1.0.9020">
+<a href="#amr-1-1-0-9020" class="anchor"></a>AMR 1.1.0.9020<small> Unreleased </small>
 </h1>
-<div id="last-updated-25-may-2020" class="section level2">
+<div id="last-updated-27-may-2020" class="section level2">
 <h2 class="hasAnchor">
-<a href="#last-updated-25-may-2020" class="anchor"></a><small>Last updated: 25-May-2020</small>
+<a href="#last-updated-27-may-2020" class="anchor"></a><small>Last updated: 27-May-2020</small>
 </h2>
 <div id="breaking" class="section level3">
 <h3 class="hasAnchor">
@@ -258,9 +258,22 @@ Negative effects of this change are:
 <h3 class="hasAnchor">
 <a href="#changed" class="anchor"></a>Changed</h3>
 <ul>
+<li>Taxonomy:
+<ul>
+<li>Updated the taxonomy of microorganisms tot May 2020, using the Catalogue of Life (CoL), the Global Biodiversity Information Facility (GBIF) and the List of Prokaryotic names with Standing in Nomenclature (LPSN, hosted by DSMZ since February 2020)</li>
+<li>Removed the Catalogue of Life IDs (like 776351), since they now work with a species ID (hexadecimal string)</li>
+</ul>
+</li>
 <li>EUCAST rules:
 <ul>
-<li>The <code><a href="../reference/eucast_rules.html">eucast_rules()</a></code> function no longer applies “other” rules at default that are made available by this package (like setting ampicillin = R when ampicillin + enzym inhibitor = R). The default input value for <code>rules</code> is now <code><a href="https://rdrr.io/r/base/c.html">c("breakpoints", "expert")</a></code> instead of <code>"all"</code>, but this can be changed by the user. To return to the old behaviour, set <code><a href="https://rdrr.io/r/base/options.html">options(AMR.eucast_rules = "all")</a></code>.</li>
+<li>The <code><a href="../reference/eucast_rules.html">eucast_rules()</a></code> function no longer applies “other” rules at default that are made available by this package (like setting ampicillin = R when ampicillin + enzyme inhibitor = R). The default input value for <code>rules</code> is now <code><a href="https://rdrr.io/r/base/c.html">c("breakpoints", "expert")</a></code> instead of <code>"all"</code>, but this can be changed by the user. To return to the old behaviour, set <code><a href="https://rdrr.io/r/base/options.html">options(AMR.eucast_rules = "all")</a></code>.</li>
+<li>Fixed a bug where checking antimicrobial results in the original data were not regarded as valid R/SI values</li>
+<li>All “other” rules now apply for all drug combinations in the <code>antibiotics</code> data set these two rules:
+<ol>
+<li>A drug <strong>with</strong> enzyme inhibitor will be set to S if the drug <strong>without</strong> enzyme inhibitor is S</li>
+<li>A drug <strong>without</strong> enzyme inhibitor will be set to R if the drug <strong>with</strong> enzyme inhibitor is R</li>
+</ol>
+This works for all drug combinations, such as ampicillin/sulbactam, ceftazidime/avibactam, trimethoprim/sulfamethoxazole, etc.</li>
 <li>Added official drug names to verbose output of <code><a href="../reference/eucast_rules.html">eucast_rules()</a></code>
 </li>
 </ul>
@@ -271,6 +284,7 @@ Negative effects of this change are:
 <li>Small fix for some text input that could not be coerced as valid MIC values</li>
 <li>Fix for interpretation of generic CLSI interpretation rules (thanks to Anthony Underwood)</li>
 <li>Fix for <code><a href="../reference/mo_source.html">set_mo_source()</a></code> to make sure that column <code>mo</code> will always be the second column</li>
+<li>Added abbreviation “cfsc” for Cefoxitin and “cfav” for Ceftazidime/avibactam</li>
 </ul>
 </div>
 <div id="other" class="section level3">
--- a/docs/pkgdown.yml
+++ b/docs/pkgdown.yml
@@ -10,7 +10,7 @@ articles:
  WHONET: WHONET.html
  benchmarks: benchmarks.html
  resistance_predict: resistance_predict.html
-last_built: 2020-05-24T22:55Z
+last_built: 2020-05-27T14:37Z
 urls:
  reference: https://msberends.gitlab.io/AMR/reference
  article: https://msberends.gitlab.io/AMR/articles
--- a/docs/reference/as.disk.html
+++ b/docs/reference/as.disk.html
@@ -82,7 +82,7 @@
      </button>
      <span class="navbar-brand">
        <a class="navbar-link" href="../index.html">AMR (for R)</a>
-        <span class="version label label-default" data-toggle="tooltip" data-placement="bottom" title="Latest development version">1.1.0.9019</span>
+        <span class="version label label-default" data-toggle="tooltip" data-placement="bottom" title="Latest development version">1.1.0.9020</span>
      </span>
    </div>

--- a/docs/reference/as.mic.html
+++ b/docs/reference/as.mic.html
@@ -82,7 +82,7 @@
      </button>
      <span class="navbar-brand">
        <a class="navbar-link" href="../index.html">AMR (for R)</a>
-        <span class="version label label-default" data-toggle="tooltip" data-placement="bottom" title="Latest development version">1.1.0.9019</span>
+        <span class="version label label-default" data-toggle="tooltip" data-placement="bottom" title="Latest development version">1.1.0.9020</span>
      </span>
    </div>

--- a/docs/reference/as.mo.html
+++ b/docs/reference/as.mo.html
@@ -82,7 +82,7 @@
      </button>
      <span class="navbar-brand">
        <a class="navbar-link" href="../index.html">AMR (for R)</a>
-        <span class="version label label-default" data-toggle="tooltip" data-placement="bottom" title="Latest development version">1.1.0.9019</span>
+        <span class="version label label-default" data-toggle="tooltip" data-placement="bottom" title="Latest development version">1.1.0.9020</span>
      </span>
    </div>

@@ -399,7 +399,6 @@ This package contains the complete taxonomic tree of almost all microorganisms (
 <span class='fu'>as.mo</span>(<span class='st'>"MRSA"</span>)    <span class='co'># Methicillin Resistant S. aureus</span>
 <span class='fu'>as.mo</span>(<span class='st'>"VISA"</span>)    <span class='co'># Vancomycin Intermediate S. aureus</span>
 <span class='fu'>as.mo</span>(<span class='st'>"VRSA"</span>)    <span class='co'># Vancomycin Resistant S. aureus</span>
-<span class='fu'>as.mo</span>(<span class='fl'>22242419</span>)  <span class='co'># Catalogue of Life ID</span>
 <span class='fu'>as.mo</span>(<span class='fl'>115329001</span>) <span class='co'># SNOMED CT code</span>

 <span class='co'># Dyslexia is no problem - these all work:</span>
--- a/docs/reference/as.rsi.html
+++ b/docs/reference/as.rsi.html
@@ -82,7 +82,7 @@
      </button>
      <span class="navbar-brand">
        <a class="navbar-link" href="../index.html">AMR (for R)</a>
-        <span class="version label label-default" data-toggle="tooltip" data-placement="bottom" title="Latest development version">1.1.0.9019</span>
+        <span class="version label label-default" data-toggle="tooltip" data-placement="bottom" title="Latest development version">1.1.0.9020</span>
      </span>
    </div>

--- a/docs/reference/catalogue_of_life.html
+++ b/docs/reference/catalogue_of_life.html
@@ -82,7 +82,7 @@
      </button>
      <span class="navbar-brand">
        <a class="navbar-link" href="../index.html">AMR (for R)</a>
-        <span class="version label label-default" data-toggle="tooltip" data-placement="bottom" title="Latest development version">1.1.0</span>
+        <span class="version label label-default" data-toggle="tooltip" data-placement="bottom" title="Latest development version">1.1.0.9020</span>
      </span>
    </div>

@@ -275,9 +275,9 @@ Function <code><a href='as.mo.html'>as.mo()</a></code> to use the data for intel


 <span class='co'># Get a note when a species was renamed</span>
-<span class='fu'><a href='mo_property.html'>mo_shortname</a></span>(<span class='st'>"Chlamydia psittaci"</span>)
-<span class='co'># Note: 'Chlamydia psittaci' (Page, 1968) was renamed</span>
-<span class='co'>#       'Chlamydophila psittaci' (Everett et al., 1999)</span>
+<span class='fu'><a href='mo_property.html'>mo_shortname</a></span>(<span class='st'>"Chlamydophila psittaci"</span>)
+<span class='co'># Note: 'Chlamydophila psittaci' (Everett et al., 1999) was renamed back to</span>
+<span class='co'>#       'Chlamydia psittaci' (Page, 1968)</span>
 <span class='co'># [1] "C. psittaci"</span>

 <span class='co'># Get any property from the entire taxonomic tree for all included species</span>
@@ -295,9 +295,9 @@ Function <code><a href='as.mo.html'>as.mo()</a></code> to use the data for intel

 <span class='co'># Do not get mistaken - this package is about microorganisms</span>
 <span class='fu'><a href='mo_property.html'>mo_kingdom</a></span>(<span class='st'>"C. elegans"</span>)
-<span class='co'># [1] "Bacteria"                        # Bacteria?!</span>
+<span class='co'># [1] "Fungi"                 # Fungi?!</span>
 <span class='fu'><a href='mo_property.html'>mo_name</a></span>(<span class='st'>"C. elegans"</span>)
-<span class='co'># [1] "Chroococcus limneticus elegans"  # Because a microorganism was found</span></pre>
+<span class='co'># [1] "Cladosporium elegans"  # Because a microorganism was found</span></pre>
  </div>
  <div class="col-md-3 hidden-xs hidden-sm" id="pkgdown-sidebar">
    <nav id="toc" data-toggle="toc" class="sticky-top">
@@ -313,7 +313,7 @@ Function <code><a href='as.mo.html'>as.mo()</a></code> to use the data for intel
 </div>

 <div class="pkgdown">
-  <p>Site built with <a href="https://pkgdown.r-lib.org/">pkgdown</a> 1.5.0.</p>
+  <p>Site built with <a href="https://pkgdown.r-lib.org/">pkgdown</a> 1.5.1.</p>
 </div>

      </footer>
--- a/docs/reference/index.html
+++ b/docs/reference/index.html
@@ -81,7 +81,7 @@
      </button>
      <span class="navbar-brand">
        <a class="navbar-link" href="../index.html">AMR (for R)</a>
-        <span class="version label label-default" data-toggle="tooltip" data-placement="bottom" title="Latest development version">1.1.0.9019</span>
+        <span class="version label label-default" data-toggle="tooltip" data-placement="bottom" title="Latest development version">1.1.0.9020</span>
      </span>
    </div>

@@ -472,7 +472,7 @@
        <td>
          <p><code><a href="microorganisms.html">microorganisms</a></code> </p>
        </td>
-        <td><p>Data set with 69,447 microorganisms</p></td>
+        <td><p>Data set with 67,107 microorganisms</p></td>
      </tr><tr>
        
        <td>
@@ -502,7 +502,7 @@
        <td>
          <p><code><a href="microorganisms.codes.html">microorganisms.codes</a></code> </p>
        </td>
-        <td><p>Translation table for common microorganism codes</p></td>
+        <td><p>Translation table with 5,582 common microorganism codes</p></td>
      </tr><tr>
        
        <td>
--- a/docs/reference/microorganisms.codes.html
+++ b/docs/reference/microorganisms.codes.html
@@ -6,7 +6,7 @@
 <meta http-equiv="X-UA-Compatible" content="IE=edge">
 <meta name="viewport" content="width=device-width, initial-scale=1.0">

-<title>Translation table for common microorganism codes — microorganisms.codes • AMR (for R)</title>
+<title>Translation table with 5,582 common microorganism codes — microorganisms.codes • AMR (for R)</title>

 <!-- favicons -->
 <link rel="icon" type="image/png" sizes="16x16" href="../favicon-16x16.png">
@@ -48,7 +48,7 @@
  <link href="../extra.css" rel="stylesheet">
  <script src="../extra.js"></script>

-<meta property="og:title" content="Translation table for common microorganism codes — microorganisms.codes" />
+<meta property="og:title" content="Translation table with 5,582 common microorganism codes — microorganisms.codes" />
 <meta property="og:description" content="A data set containing commonly used codes for microorganisms, from laboratory systems and WHONET. Define your own with set_mo_source(). They will all be searched when using as.mo() and consequently all the mo_* functions." />
 <meta property="og:image" content="https://msberends.gitlab.io/AMR/logo.svg" />

@@ -82,7 +82,7 @@
      </button>
      <span class="navbar-brand">
        <a class="navbar-link" href="../index.html">AMR (for R)</a>
-        <span class="version label label-default" data-toggle="tooltip" data-placement="bottom" title="Latest development version">1.1.0</span>
+        <span class="version label label-default" data-toggle="tooltip" data-placement="bottom" title="Latest development version">1.1.0.9020</span>
      </span>
    </div>

@@ -226,7 +226,7 @@
 <div class="row">
  <div class="col-md-9 contents">
    <div class="page-header">
-    <h1>Translation table for common microorganism codes</h1>
+    <h1>Translation table with 5,582 common microorganism codes</h1>
    <small class="dont-index">Source: <a href='https://gitlab.com/msberends/AMR/blob/master/R/data.R'><code>R/data.R</code></a></small>
    <div class="hidden name"><code>microorganisms.codes.Rd</code></div>
    </div>
@@ -240,7 +240,7 @@

    <h2 class="hasAnchor" id="format"><a class="anchor" href="#format"></a>Format</h2>

-    <p>A <code><a href='https://rdrr.io/r/base/data.frame.html'>data.frame</a></code> with 5,585 observations and 2 variables:</p><ul>
+    <p>A <code><a href='https://rdrr.io/r/base/data.frame.html'>data.frame</a></code> with 5,582 observations and 2 variables:</p><ul>
 <li><p><code>code</code><br /> Commonly used code of a microorganism</p></li>
 <li><p><code>mo</code><br /> ID of the microorganism in the <a href='microorganisms.html'>microorganisms</a> data set</p></li>
 </ul>
@@ -276,7 +276,7 @@ This package contains the complete taxonomic tree of almost all microorganisms (
 </div>

 <div class="pkgdown">
-  <p>Site built with <a href="https://pkgdown.r-lib.org/">pkgdown</a> 1.5.0.</p>
+  <p>Site built with <a href="https://pkgdown.r-lib.org/">pkgdown</a> 1.5.1.</p>
 </div>

      </footer>
--- a/docs/reference/microorganisms.html
+++ b/docs/reference/microorganisms.html
@@ -6,7 +6,7 @@
 <meta http-equiv="X-UA-Compatible" content="IE=edge">
 <meta name="viewport" content="width=device-width, initial-scale=1.0">

-<title>Data set with 69,447 microorganisms — microorganisms • AMR (for R)</title>
+<title>Data set with 67,107 microorganisms — microorganisms • AMR (for R)</title>

 <!-- favicons -->
 <link rel="icon" type="image/png" sizes="16x16" href="../favicon-16x16.png">
@@ -48,7 +48,7 @@
  <link href="../extra.css" rel="stylesheet">
  <script src="../extra.js"></script>

-<meta property="og:title" content="Data set with 69,447 microorganisms — microorganisms" />
+<meta property="og:title" content="Data set with 67,107 microorganisms — microorganisms" />
 <meta property="og:description" content="A data set containing the microbial taxonomy of six kingdoms from the Catalogue of Life. MO codes can be looked up using as.mo()." />
 <meta property="og:image" content="https://msberends.gitlab.io/AMR/logo.svg" />

@@ -82,7 +82,7 @@
      </button>
      <span class="navbar-brand">
        <a class="navbar-link" href="../index.html">AMR (for R)</a>
-        <span class="version label label-default" data-toggle="tooltip" data-placement="bottom" title="Latest development version">1.1.0.9004</span>
+        <span class="version label label-default" data-toggle="tooltip" data-placement="bottom" title="Latest development version">1.1.0.9020</span>
      </span>
    </div>

@@ -226,7 +226,7 @@
 <div class="row">
  <div class="col-md-9 contents">
    <div class="page-header">
-    <h1>Data set with 69,447 microorganisms</h1>
+    <h1>Data set with 67,107 microorganisms</h1>
    <small class="dont-index">Source: <a href='https://gitlab.com/msberends/AMR/blob/master/R/data.R'><code>R/data.R</code></a></small>
    <div class="hidden name"><code>microorganisms.Rd</code></div>
    </div>
@@ -240,9 +240,8 @@

    <h2 class="hasAnchor" id="format"><a class="anchor" href="#format"></a>Format</h2>

-    <p>A <code><a href='https://rdrr.io/r/base/data.frame.html'>data.frame</a></code> with 69,447 observations and 17 variables:</p><ul>
+    <p>A <code><a href='https://rdrr.io/r/base/data.frame.html'>data.frame</a></code> with 67,107 observations and 16 variables:</p><ul>
 <li><p><code>mo</code><br /> ID of microorganism as used by this package</p></li>
-<li><p><code>col_id</code><br /> Catalogue of Life ID</p></li>
 <li><p><code>fullname</code><br /> Full name, like <code>"Escherichia coli"</code></p></li>
 <li><p><code>kingdom</code>, <code>phylum</code>, <code>class</code>, <code>order</code>, <code>family</code>, <code>genus</code>, <code>species</code>, <code>subspecies</code><br /> Taxonomic rank of the microorganism</p></li>
 <li><p><code>rank</code><br /> Text of the taxonomic rank of the microorganism, like <code>"species"</code> or <code>"genus"</code></p></li>
@@ -256,6 +255,7 @@
    <h2 class="hasAnchor" id="source"><a class="anchor" href="#source"></a>Source</h2>

    <p>Catalogue of Life: Annual Checklist (public online taxonomic database), <a href='http://www.catalogueoflife.org'>http://www.catalogueoflife.org</a> (check included annual version with <code><a href='catalogue_of_life_version.html'>catalogue_of_life_version()</a></code>).</p>
+<p>Parte, A.C. (2018). LPSN — List of Prokaryotic names with Standing in Nomenclature (bacterio.net), 20 years on. International Journal of Systematic and Evolutionary Microbiology, 68, 1825-1829; doi: 10.1099/ijsem.0.002786</p>
 <p>Leibniz Institute DSMZ-German Collection of Microorganisms and Cell Cultures, Germany, Prokaryotic Nomenclature Up-to-Date, <a href='https://www.dsmz.de/services/online-tools/prokaryotic-nomenclature-up-to-date'>https://www.dsmz.de/services/online-tools/prokaryotic-nomenclature-up-to-date</a> (check included version with <code><a href='catalogue_of_life_version.html'>catalogue_of_life_version()</a></code>).</p>
    <h2 class="hasAnchor" id="details"><a class="anchor" href="#details"></a>Details</h2>

@@ -266,7 +266,7 @@
 <li><p>1 entry of <em>Blastocystis</em> (<em>Blastocystis hominis</em>), although it officially does not exist (Noel <em>et al.</em> 2005, PMID 15634993)</p></li>
 <li><p>5 other 'undefined' entries (unknown, unknown Gram negatives, unknown Gram positives, unknown yeast and unknown fungus)</p></li>
 <li><p>6 families under the Enterobacterales order, according to Adeolu <em>et al.</em> (2016, PMID 27620848), that are not (yet) in the Catalogue of Life</p></li>
-<li><p>12,600 species from the DSMZ (Deutsche Sammlung von Mikroorganismen und Zellkulturen) since the DSMZ contain the latest taxonomic information based on recent publications</p></li>
+<li><p>7,368 species from the DSMZ (Deutsche Sammlung von Mikroorganismen und Zellkulturen) since the DSMZ contain the latest taxonomic information based on recent publications</p></li>
 </ul>
 <h3>Direct download</h3>

--- a/docs/reference/microorganisms.old.html
+++ b/docs/reference/microorganisms.old.html
@@ -82,7 +82,7 @@
      </button>
      <span class="navbar-brand">
        <a class="navbar-link" href="../index.html">AMR (for R)</a>
-        <span class="version label label-default" data-toggle="tooltip" data-placement="bottom" title="Latest development version">1.1.0</span>
+        <span class="version label label-default" data-toggle="tooltip" data-placement="bottom" title="Latest development version">1.1.0.9020</span>
      </span>
    </div>

@@ -240,10 +240,9 @@

    <h2 class="hasAnchor" id="format"><a class="anchor" href="#format"></a>Format</h2>

-    <p>A <code><a href='https://rdrr.io/r/base/data.frame.html'>data.frame</a></code> with 24,253 observations and 5 variables:</p><ul>
-<li><p><code>col_id</code><br /> Catalogue of Life ID that was originally given</p></li>
-<li><p><code>col_id_new</code><br /> New Catalogue of Life ID that responds to an entry in the <a href='microorganisms.html'>microorganisms</a> data set</p></li>
+    <p>A <code><a href='https://rdrr.io/r/base/data.frame.html'>data.frame</a></code> with 12,709 observations and 4 variables:</p><ul>
 <li><p><code>fullname</code><br /> Old full taxonomic name of the microorganism</p></li>
+<li><p><code>fullname_new</code><br /> New full taxonomic name of the microorganism</p></li>
 <li><p><code>ref</code><br /> Author(s) and year of concerning scientific publication</p></li>
 <li><p><code>prevalence</code><br /> Prevalence of the microorganism, see <code><a href='as.mo.html'>as.mo()</a></code></p></li>
 </ul>
@@ -251,6 +250,7 @@
    <h2 class="hasAnchor" id="source"><a class="anchor" href="#source"></a>Source</h2>

    <p>Catalogue of Life: Annual Checklist (public online taxonomic database), <a href='http://www.catalogueoflife.org'>http://www.catalogueoflife.org</a> (check included annual version with <code><a href='catalogue_of_life_version.html'>catalogue_of_life_version()</a></code>).</p>
+<p>Parte, A.C. (2018). LPSN — List of Prokaryotic names with Standing in Nomenclature (bacterio.net), 20 years on. International Journal of Systematic and Evolutionary Microbiology, 68, 1825-1829; doi: 10.1099/ijsem.0.002786</p>
    <h2 class="hasAnchor" id="catalogue-of-life"><a class="anchor" href="#catalogue-of-life"></a>Catalogue of Life</h2>

    
@@ -282,7 +282,7 @@ This package contains the complete taxonomic tree of almost all microorganisms (
 </div>

 <div class="pkgdown">
-  <p>Site built with <a href="https://pkgdown.r-lib.org/">pkgdown</a> 1.5.0.</p>
+  <p>Site built with <a href="https://pkgdown.r-lib.org/">pkgdown</a> 1.5.1.</p>
 </div>

      </footer>
--- a/index.md
+++ b/index.md
@@ -36,7 +36,7 @@ The development of this package is part of, related to, or made possible by:

 This package can be used for:

-  * Reference for the taxonomy of microorganisms, since the package contains all microbial (sub)species from the [Catalogue of Life](http://www.catalogueoflife.org) ([manual](./reference/mo_property.html))
+  * Reference for the taxonomy of microorganisms, since the package contains all microbial (sub)species from the [Catalogue of Life](http://www.catalogueoflife.org) and [List of Prokaryotic names with Standing in Nomenclature](https://lpsn.dsmz.de) ([manual](./reference/mo_property.html))
  * Interpreting raw MIC and disk diffusion values, based on the latest CLSI or EUCAST guidelines ([manual](./reference/as.rsi.html))
  * Determining first isolates to be used for AMR analysis ([manual](./reference/first_isolate.html))
  * Calculating antimicrobial resistance ([tutorial](./articles/AMR.html))
@@ -82,7 +82,7 @@ To find out how to conduct AMR analysis, please [continue reading here to get st

 #### Microbial (taxonomic) reference data

-This package contains the complete taxonomic tree of almost all ~70,000 microorganisms from the authoritative and comprehensive Catalogue of Life (CoL, [www.catalogueoflife.org](http://www.catalogueoflife.org)), supplemented by data from the Deutsche Sammlung von Mikroorganismen und Zellkulturen (DSMZ, [www.dsmz.de](https://www.dsmz.de)). This supplementation is needed until the [CoL+ project](https://github.com/Sp2000/colplus) is finished, which we await. With `catalogue_of_life_version()` can be checked which version of the CoL is included in this package. 
+This package contains the complete taxonomic tree of almost all ~70,000 microorganisms from the authoritative and comprehensive Catalogue of Life (CoL, [www.catalogueoflife.org](http://www.catalogueoflife.org)), supplemented by data from the List of Prokaryotic names with Standing in Nomenclature (LPSN, [lpsn.dsmz.de](https://lpsn.dsmz.de)). This supplementation is needed until the [CoL+ project](https://github.com/Sp2000/colplus) is finished, which we await. With `catalogue_of_life_version()` can be checked which version of the CoL is included in this package. 

 Read more about which data from the Catalogue of Life [in our manual](./reference/catalogue_of_life.html).

--- a/man/as.mo.Rd
+++ b/man/as.mo.Rd
@@ -166,7 +166,6 @@ as.mo("Zthafilokkoockus oureuz") # handles incorrect spelling
 as.mo("MRSA")    # Methicillin Resistant S. aureus
 as.mo("VISA")    # Vancomycin Intermediate S. aureus
 as.mo("VRSA")    # Vancomycin Resistant S. aureus
-as.mo(22242419)  # Catalogue of Life ID
 as.mo(115329001) # SNOMED CT code

 # Dyslexia is no problem - these all work:
--- a/man/catalogue_of_life.Rd
+++ b/man/catalogue_of_life.Rd
@@ -42,9 +42,9 @@ catalogue_of_life_version()


 # Get a note when a species was renamed
-mo_shortname("Chlamydia psittaci")
-# Note: 'Chlamydia psittaci' (Page, 1968) was renamed
-#       'Chlamydophila psittaci' (Everett et al., 1999)
+mo_shortname("Chlamydophila psittaci")
+# Note: 'Chlamydophila psittaci' (Everett et al., 1999) was renamed back to
+#       'Chlamydia psittaci' (Page, 1968)
 # [1] "C. psittaci"

 # Get any property from the entire taxonomic tree for all included species
@@ -62,9 +62,9 @@ mo_ref("E. coli")

 # Do not get mistaken - this package is about microorganisms
 mo_kingdom("C. elegans")
-# [1] "Bacteria"                        # Bacteria?!
+# [1] "Fungi"                 # Fungi?!
 mo_name("C. elegans")
-# [1] "Chroococcus limneticus elegans"  # Because a microorganism was found
+# [1] "Cladosporium elegans"  # Because a microorganism was found
 }
 \seealso{
 Data set \link{microorganisms} for the actual data. \cr
--- a/man/microorganisms.Rd
+++ b/man/microorganisms.Rd
@@ -3,12 +3,11 @@
 \docType{data}
 \name{microorganisms}
 \alias{microorganisms}
-\title{Data set with 69,447 microorganisms}
+\title{Data set with 67,107 microorganisms}
 \format{
-A \code{\link{data.frame}} with 69,447 observations and 17 variables:
+A \code{\link{data.frame}} with 67,107 observations and 16 variables:
 \itemize{
 \item \code{mo}\cr ID of microorganism as used by this package
-\item \code{col_id}\cr Catalogue of Life ID
 \item \code{fullname}\cr Full name, like \code{"Escherichia coli"}
 \item \code{kingdom}, \code{phylum}, \code{class}, \code{order}, \code{family}, \code{genus}, \code{species}, \code{subspecies}\cr Taxonomic rank of the microorganism
 \item \code{rank}\cr Text of the taxonomic rank of the microorganism, like \code{"species"} or \code{"genus"}
@@ -22,6 +21,8 @@ A \code{\link{data.frame}} with 69,447 observations and 17 variables:
 \source{
 Catalogue of Life: Annual Checklist (public online taxonomic database), \url{http://www.catalogueoflife.org} (check included annual version with \code{\link[=catalogue_of_life_version]{catalogue_of_life_version()}}).

+Parte, A.C. (2018). LPSN — List of Prokaryotic names with Standing in Nomenclature (bacterio.net), 20 years on. International Journal of Systematic and Evolutionary Microbiology, 68, 1825-1829; doi: 10.1099/ijsem.0.002786
+
 Leibniz Institute DSMZ-German Collection of Microorganisms and Cell Cultures, Germany, Prokaryotic Nomenclature Up-to-Date, \url{https://www.dsmz.de/services/online-tools/prokaryotic-nomenclature-up-to-date} (check included version with \code{\link[=catalogue_of_life_version]{catalogue_of_life_version()}}).
 }
 \usage{
@@ -39,7 +40,7 @@ Manually added were:
 \item 1 entry of \emph{Blastocystis} (\emph{Blastocystis hominis}), although it officially does not exist (Noel \emph{et al.} 2005, PMID 15634993)
 \item 5 other 'undefined' entries (unknown, unknown Gram negatives, unknown Gram positives, unknown yeast and unknown fungus)
 \item 6 families under the Enterobacterales order, according to Adeolu \emph{et al.} (2016, PMID 27620848), that are not (yet) in the Catalogue of Life
-\item 12,600 species from the DSMZ (Deutsche Sammlung von Mikroorganismen und Zellkulturen) since the DSMZ contain the latest taxonomic information based on recent publications
+\item 7,368 species from the DSMZ (Deutsche Sammlung von Mikroorganismen und Zellkulturen) since the DSMZ contain the latest taxonomic information based on recent publications
 }
 \subsection{Direct download}{

--- a/man/microorganisms.codes.Rd
+++ b/man/microorganisms.codes.Rd
@@ -3,9 +3,9 @@
 \docType{data}
 \name{microorganisms.codes}
 \alias{microorganisms.codes}
-\title{Translation table for common microorganism codes}
+\title{Translation table with 5,582 common microorganism codes}
 \format{
-A \code{\link{data.frame}} with 5,585 observations and 2 variables:
+A \code{\link{data.frame}} with 5,582 observations and 2 variables:
 \itemize{
 \item \code{code}\cr Commonly used code of a microorganism
 \item \code{mo}\cr ID of the microorganism in the \link{microorganisms} data set
--- a/man/microorganisms.old.Rd
+++ b/man/microorganisms.old.Rd
@@ -5,17 +5,18 @@
 \alias{microorganisms.old}
 \title{Data set with previously accepted taxonomic names}
 \format{
-A \code{\link{data.frame}} with 24,253 observations and 5 variables:
+A \code{\link{data.frame}} with 12,709 observations and 4 variables:
 \itemize{
-\item \code{col_id}\cr Catalogue of Life ID that was originally given
-\item \code{col_id_new}\cr New Catalogue of Life ID that responds to an entry in the \link{microorganisms} data set
 \item \code{fullname}\cr Old full taxonomic name of the microorganism
+\item \code{fullname_new}\cr New full taxonomic name of the microorganism
 \item \code{ref}\cr Author(s) and year of concerning scientific publication
 \item \code{prevalence}\cr Prevalence of the microorganism, see \code{\link[=as.mo]{as.mo()}}
 }
 }
 \source{
 Catalogue of Life: Annual Checklist (public online taxonomic database), \url{http://www.catalogueoflife.org} (check included annual version with \code{\link[=catalogue_of_life_version]{catalogue_of_life_version()}}).
+
+Parte, A.C. (2018). LPSN — List of Prokaryotic names with Standing in Nomenclature (bacterio.net), 20 years on. International Journal of Systematic and Evolutionary Microbiology, 68, 1825-1829; doi: 10.1099/ijsem.0.002786
 }
 \usage{
 microorganisms.old
--- a/tests/testthat/test-data.R
+++ b/tests/testthat/test-data.R
@@ -56,11 +56,11 @@ test_that("creation of data sets is valid", {
  expect_lt(nrow(df[which(df$prevalence == 2), ]), nrow(df[which(df$prevalence == 3), ]))
  expect_true(all(c("mo", "fullname",
                    "kingdom", "phylum", "class", "order", "family", "genus", "species", "subspecies",
-                    "rank", "col_id", "species_id", "source", "ref", "prevalence",
+                    "rank", "ref", "species_id", "source", "prevalence", "snomed",
                    "kingdom_index", "fullname_lower", "g_species") %in% colnames(df)))

  olddf <- create_MO.old_lookup()
-  expect_true(all(c("col_id", "col_id_new", "fullname", "ref", "prevalence",
+  expect_true(all(c("fullname", "fullname_new", "ref", "prevalence",
                    "fullname_lower", "g_species") %in% colnames(olddf)))
  
  old <- make_trans_tbl()
--- a/tests/testthat/test-mo.R
+++ b/tests/testthat/test-mo.R
@@ -34,7 +34,6 @@ test_that("as.mo works", {
  
  expect_equal(as.character(as.mo("Escherichia coli")), "B_ESCHR_COLI")
  expect_equal(as.character(as.mo("Escherichia  coli")), "B_ESCHR_COLI")
-  expect_equal(as.character(as.mo(22242416)), "B_ESCHR_COLI")
  expect_equal(as.character(as.mo(112283007)), "B_ESCHR_COLI")
  expect_equal(as.character(as.mo("Escherichia  species")), "B_ESCHR")
  expect_equal(as.character(as.mo("Escherichia")), "B_ESCHR")
@@ -45,7 +44,7 @@ test_that("as.mo works", {
  expect_equal(as.character(as.mo("Klebsiella")), "B_KLBSL")
  expect_equal(as.character(as.mo("K. pneu rhino")), "B_KLBSL_PNMN_RHNS") # K. pneumoniae subspp. rhinoscleromatis
  expect_equal(as.character(as.mo("Bartonella")), "B_BRTNL")
-  expect_equal(as.character(as.mo("C. difficile")), "B_CTRDM_DFFC")
+  expect_equal(as.character(as.mo("C. difficile")), "B_CRDDS_DFFC")
  expect_equal(as.character(as.mo("L. pneumophila")), "B_LGNLL_PNMP")
  expect_equal(as.character(as.mo("Strepto")), "B_STRPT")
  expect_equal(as.character(as.mo("Streptococcus")), "B_STRPT") # not Peptostreptoccus
@@ -99,11 +98,11 @@ test_that("as.mo works", {
  # unprevalent MO
  expect_identical(
    as.character(
-      as.mo(c("burnod",
-              "B. nodosa",
-              "B nodosa",
-              "Burkholderia nodosa"))),
-    rep("B_BRKHL_NODS", 4))
+      as.mo(c("parnod",
+              "P. nodosa",
+              "P nodosa",
+              "Paraburkholderia nodosa"))),
+    rep("B_PRBRK_NODS", 4))
  
  # empty values
  expect_identical(as.character(as.mo(c("", NA, NaN))), rep(NA_character_, 3))
@@ -239,7 +238,7 @@ test_that("as.mo works", {
  
  # Salmonella (City) are all actually Salmonella enterica spp (City)
  expect_equal(suppressWarnings(mo_name(c("Salmonella Goettingen", "Salmonella Typhimurium", "Salmonella Group A"))),
-               c("Salmonella enterica", "Salmonella typhimurium", "Salmonella"))
+               c("Salmonella enterica", "Salmonella enterica", "Salmonella"))
  
  # no virusses
  expect_equal(as.character(as.mo("Virus")), NA_character_)
--- a/tests/testthat/test-mo_property.R
+++ b/tests/testthat/test-mo_property.R
@@ -93,8 +93,7 @@ test_that("mo_property works", {
  expect_identical(suppressWarnings(mo_ref("Chlamydia psittaci")), "Page, 1968")
  expect_identical(mo_ref("Chlamydophila psittaci"), "Everett et al., 1999")

-  expect_equal(mo_snomed("Escherichia coli"), 
-               c(112283007, 116395006, 116396007, 103429008, 83285000, 116394005, 407166006, 457914007))
+  expect_equal(mo_snomed("Escherichia coli"), 112283007)
  
  # old codes must throw a warning in mo_* family
  expect_warning(mo_name(c("B_ESCHR_COL", "B_STPHY_AUR")))