fix for printing tibbles, improve guess_bactid

2025-12-17 11:40:19 +01:00 · 2018-06-08 12:06:54 +02:00
parent efdf5a3dc5
commit 0a5898b17d
12 changed files with 306 additions and 209 deletions
--- a/4
+++ b/4
@@ -1,6 +1,6 @@
 Package: AMR
-Version: 0.2.0.9002
+Version: 0.2.0.9003
-Date: 2018-05-31
+Date: 2018-06-08
 Title: Antimicrobial Resistance Analysis
 Authors@R: c(
    person(
--- a/NEWS.md
+++ b/NEWS.md
@@ -1,13 +1,13 @@
 # 0.2.0.90xx (development version)
-#### New
+
 * Vignettes about frequency tables
 * Possibility to globally set the default for the amount of items to print in frequency tables (`freq` function), with `options(max.print.freq = n)`
 #### Changed
 * Renamed `toConsole` parameter of `freq` to `as.data.frame`
 * Small translational improvements to the `septic_patients` dataset
 * Coerce RSI values from combined MIC/RSI values: `as.rsi("<=0.002; S")` will now return `"S"`
-* Fix for warning `hybrid evaluation forced for row_number` from the `dplyr` package v0.7.5 and above.
+* Fix for warning **hybrid evaluation forced for row_number** ([`924b62`](https://github.com/tidyverse/dplyr/commit/924b62)) from the `dplyr` package v0.7.5 and above
 * Support for 1 or 2 columns as input for `guess_bactid`
 * Fix for printing tibbles where characters would be accidentally transformed to factors
 # 0.2.0 (latest stable version)
 #### New
--- a/R/atc.R
+++ b/R/atc.R
@@ -238,135 +238,7 @@ abname <- function(abcode, from = c("guess", "atc", "molis", "umcg"), to = 'offi
  abcode
 }
 #' Find bacteria ID based on genus/species
 #'
 #' Use this function to determine a valid ID based on a genus (and species). This input could be a full name (like \code{"Staphylococcus aureus"}), an abbreviated name (like \code{"S. aureus"}), or just a genus. You could also use a \code{\link{paste}} of a genus and species column to use the full name as input: \code{x = paste(df$genus, df$species)}, where \code{df} is your dataframe.
 #' @param x character vector to determine \code{bactid}
 #' @export
 #' @importFrom dplyr %>% filter slice pull
 #' @return Character (vector).
 #' @seealso \code{\link{microorganisms}} for the dataframe that is being used to determine ID's.
 #' @examples
 #' # These examples all return "STAAUR", the ID of S. aureus:
 #' guess_bactid("stau")
 #' guess_bactid("STAU")
 #' guess_bactid("staaur")
 #' guess_bactid("S. aureus")
 #' guess_bactid("S aureus")
 #' guess_bactid("Staphylococcus aureus")
 #' guess_bactid("MRSA") # Methicillin-resistant S. aureus
 #' guess_bactid("VISA") # Vancomycin Intermediate S. aureus
 guess_bactid <- function(x) {
  # remove dots and other non-text in case of "E. coli" except spaces
  x <- gsub("[^a-zA-Z ]+", "", x)
  # but spaces before and after should be omitted
  x <- trimws(x, which = "both")
  x.bak <- x
  # replace space by regex sign
  x <- gsub(" ", ".*", x, fixed = TRUE)
  # add start and stop
  x_species <- paste(x, 'species')
  x <- paste0('^', x, '$')
  for (i in 1:length(x)) {
    if (tolower(x[i]) == '^e.*coli$') {
      # avoid detection of Entamoeba coli in case of E. coli
      x[i] <- 'Escherichia coli'
    }
    if (tolower(x[i]) == '^h.*influenzae$') {
      # avoid detection of Haematobacter influenzae in case of H. influenzae
      x[i] <- 'Haemophilus influenzae'
    }
    if (tolower(x[i]) == '^st.*au$'
        | tolower(x[i]) == '^stau$'
        | tolower(x[i]) == '^staaur$') {
      # avoid detection of Staphylococcus auricularis in case of S. aureus
      x[i] <- 'Staphylococcus aureus'
    }
    if (tolower(x[i]) == '^p.*aer$') {
      # avoid detection of Pasteurella aerogenes in case of Pseudomonas aeruginosa
      x[i] <- 'Pseudomonas aeruginosa'
    }
    # translate known trivial names to genus+species
    if (toupper(x.bak[i]) == 'MRSA'
        | toupper(x.bak[i]) == 'VISA'
        | toupper(x.bak[i]) == 'VRSA') {
      x[i] <- 'Staphylococcus aureus'
    }
    if (toupper(x.bak[i]) == 'MRSE') {
      x[i] <- 'Staphylococcus epidermidis'
    }
    if (toupper(x.bak[i]) == 'VRE') {
      x[i] <- 'Enterococcus'
    }
    if (toupper(x.bak[i]) == 'MRPA') {
      # multi resistant P. aeruginosa
      x[i] <- 'Pseudomonas aeruginosa'
    }
    if (toupper(x.bak[i]) == 'PISP'
        | toupper(x.bak[i]) == 'PRSP') {
      # peni resistant S. pneumoniae
      x[i] <- 'Streptococcus pneumoniae'
    }
    if (toupper(x.bak[i]) == 'VISP'
        | toupper(x.bak[i]) == 'VRSP') {
      # vanco resistant S. pneumoniae
      x[i] <- 'Streptococcus pneumoniae'
    }
    # let's try the ID's first
    found <- AMR::microorganisms %>% filter(bactid == x.bak[i])
    if (nrow(found) == 0) {
      # now try exact match
      found <- AMR::microorganisms %>% filter(fullname == x[i])
    }
    if (nrow(found) == 0) {
      # try any match
      found <- AMR::microorganisms %>% filter(fullname %like% x[i])
    }
    if (nrow(found) == 0) {
      # try only genus, with 'species' attached
      found <- AMR::microorganisms %>% filter(fullname %like% x_species[i])
    }
    if (nrow(found) == 0) {
      # search for GLIMS code
      if (toupper(x.bak[i]) %in% toupper(AMR::microorganisms.umcg$mocode)) {
        found <- AMR::microorganisms.umcg %>% filter(toupper(mocode) == toupper(x.bak[i]))
      }
    }
    if (nrow(found) == 0) {
      # try splitting of characters and then find ID
      # like esco = E. coli, klpn = K. pneumoniae, stau = S. aureus
      x_split <- x
      x_length <- nchar(x.bak[i])
      x_split[i] <- paste0(x.bak[i] %>% substr(1, x_length / 2) %>% trimws(),
                     '.* ',
                     x.bak[i] %>% substr((x_length / 2) + 1, x_length) %>% trimws())
      found <- AMR::microorganisms %>% filter(fullname %like% paste0('^', x_split[i]))
    }
    if (nrow(found) == 0) {
      # try any match with text before and after original search string
      # so "negative rods" will be "GNR"
      if (x.bak[i] %like% "^Gram") {
        x.bak[i] <- gsub("^Gram", "", x.bak[i], ignore.case = TRUE)
        # remove leading and trailing spaces again
        x.bak[i] <- trimws(x.bak[i], which = "both")
      }
      found <- AMR::microorganisms %>% filter(fullname %like% x.bak[i])
    }
    if (nrow(found) != 0) {
      x[i] <- found %>%
        slice(1) %>%
        pull(bactid)
    } else {
      x[i] <- ""
    }
  }
  x
 }
 #' Find ATC code based on antibiotic property
 #'
--- a/R/first_isolates.R
+++ b/R/first_isolates.R
@@ -22,7 +22,7 @@
 #' @param tbl a \code{data.frame} containing isolates.
 #' @param col_date column name of the result date (or date that is was received on the lab)
 #' @param col_patient_id column name of the unique IDs of the patients
-#' @param col_bactid column name of the unique IDs of the microorganisms (should occur in the \code{\link{microorganisms}} dataset)
+#' @param col_bactid column name of the unique IDs of the microorganisms (should occur in the \code{\link{microorganisms}} dataset). Get your bactid's with the function \code{\link{guess_bactid}}, that takes microorganism names as input.
 #' @param col_testcode column name of the test codes. Use \code{col_testcode = NA} to \strong{not} exclude certain test codes (like test codes for screening). In that case \code{testcodes_exclude} will be ignored. Supports tidyverse-like quotation.
 #' @param col_specimen column name of the specimen type or group
 #' @param col_icu column name of the logicals (\code{TRUE}/\code{FALSE}) whether a ward or department is an Intensive Care Unit (ICU)
@@ -291,15 +291,15 @@ first_isolate <- function(tbl,
    return(tbl %>% pull(real_first_isolate))
  }
-  scope.size <- tbl %>%
+  # suppress warnings because dplyr want us to use library(dplyr) when using filter(row_number())
-    filter(
+  suppressWarnings(
-      suppressWarnings(
+    scope.size <- tbl %>%
      filter(
        row_number() %>% between(row.start,
-                                 row.end)
+                                 row.end),
-      ),
+        genus != '') %>%
-      genus != '') %>%
+      nrow()
-    nrow()
+  )
  # Analysis of first isolate ----
  all_first <- tbl %>%
@@ -328,39 +328,44 @@ first_isolate <- function(tbl,
      }
    }
    type_param <- type
-    all_first <- all_first %>%
+    # suppress warnings because dplyr want us to use library(dplyr) when using filter(row_number())
-      mutate(key_ab_lag = lag(key_ab)) %>%
+    suppressWarnings(
-      mutate(key_ab_other = !key_antibiotics_equal(x = key_ab,
+      all_first <- all_first %>%
-                                                   y = key_ab_lag,
+        mutate(key_ab_lag = lag(key_ab)) %>%
-                                                   type = type_param,
+        mutate(key_ab_other = !key_antibiotics_equal(x = key_ab,
-                                                   ignore_I = ignore_I,
+                                                     y = key_ab_lag,
-                                                   points_threshold = points_threshold,
+                                                     type = type_param,
-                                                   info = info)) %>%
+                                                     ignore_I = ignore_I,
-      mutate(
+                                                     points_threshold = points_threshold,
-        real_first_isolate =
+                                                     info = info)) %>%
-          if_else(
+        mutate(
-            suppressWarnings(between(row_number(), row.start, row.end))
+          real_first_isolate =
-            & genus != ''
+            if_else(
-            & (other_pat_or_mo
+              between(row_number(), row.start, row.end)
-               | days_diff >= episode_days
+              & genus != ''
-               | key_ab_other),
+              & (other_pat_or_mo
-            TRUE,
+                 | days_diff >= episode_days
-            FALSE))
+                 | key_ab_other),
-
+              TRUE,
              FALSE))
    )
    if (info == TRUE) {
      cat('\n')
    }
  } else {
-    all_first <- all_first %>%
+    # suppress warnings because dplyr want us to use library(dplyr) when using filter(row_number())
-      mutate(
+    suppressWarnings(
-        real_first_isolate =
+      all_first <- all_first %>%
-          if_else(
+        mutate(
-            suppressWarnings(between(row_number(), row.start, row.end))
+          real_first_isolate =
-            & genus != ''
+            if_else(
-            & (other_pat_or_mo
+              between(row_number(), row.start, row.end)
-               | days_diff >= episode_days),
+              & genus != ''
-            TRUE,
+              & (other_pat_or_mo
-            FALSE))
+                 | days_diff >= episode_days),
              TRUE,
              FALSE))
    )
  }
  # first one as TRUE
@@ -402,8 +407,7 @@ first_isolate <- function(tbl,
 #' Key antibiotics based on bacteria ID
 #'
 #' @param tbl table with antibiotics coloms, like \code{amox} and \code{amcl}.
-#' @param col_bactid column of bacteria IDs in \code{tbl}; these should occur in \code{microorganisms$bactid}, see \code{\link{microorganisms}}
+#' @inheritParams first_isolate
 #' @param info print warnings
 #' @param amcl,amox,cfot,cfta,cftr,cfur,cipr,clar,clin,clox,doxy,gent,line,mero,peni,pita,rifa,teic,trsu,vanc column names of antibiotics, case-insensitive
 #' @export
 #' @importFrom dplyr %>% mutate if_else
--- a/R/guess_bactid.R
+++ b/R/guess_bactid.R
@@ -0,0 +1,185 @@
 # ==================================================================== #
 # TITLE                                                                #
 # Antimicrobial Resistance (AMR) Analysis                              #
 #                                                                      #
 # AUTHORS                                                              #
 # Berends MS (m.s.berends@umcg.nl), Luz CF (c.f.luz@umcg.nl)           #
 #                                                                      #
 # LICENCE                                                              #
 # This program is free software; you can redistribute it and/or modify #
 # it under the terms of the GNU General Public License version 2.0,    #
 # as published by the Free Software Foundation.                        #
 #                                                                      #
 # This program is distributed in the hope that it will be useful,      #
 # but WITHOUT ANY WARRANTY; without even the implied warranty of       #
 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the        #
 # GNU General Public License for more details.                         #
 # ==================================================================== #
 #' Find bacteria ID based on genus/species
 #'
 #' Use this function to determine a valid ID based on a genus (and species). This input could be a full name (like \code{"Staphylococcus aureus"}), an abbreviated name (like \code{"S. aureus"}), or just a genus. You could also \code{\link{select}} a genus and species column, zie Examples.
 #' @param x character vector or a dataframe with one or two columns
 #' @export
 #' @importFrom dplyr %>% filter pull
 #' @return Character (vector).
 #' @seealso \code{\link{microorganisms}} for the dataframe that is being used to determine ID's.
 #' @examples
 #' # These examples all return "STAAUR", the ID of S. aureus:
 #' guess_bactid("stau")
 #' guess_bactid("STAU")
 #' guess_bactid("staaur")
 #' guess_bactid("S. aureus")
 #' guess_bactid("S aureus")
 #' guess_bactid("Staphylococcus aureus")
 #' guess_bactid("MRSA") # Methicillin-resistant S. aureus
 #' guess_bactid("VISA") # Vancomycin Intermediate S. aureus
 #'
 #' \dontrun{
 #' df$bactid <- guess_bactid(df$microorganism_name)
 #'
 #' # the select function of tidyverse is also supported:
 #' df$bactid <- df %>% select(microorganism_name) %>% guess_bactid()
 #'
 #' # and can even contain 2 columns, which is convenient for genus/species combinations:
 #' df$bactid <- df %>% select(genus, species) %>% guess_bactid()
 #' # same result:
 #' df <- df %>% mutate(bactid = paste(genus, species) %>% guess_bactid())
 #' }
 guess_bactid <- function(x) {
  if (NCOL(x) == 2) {
    # support tidyverse selection like: df %>% select(colA, colB)
    # paste these columns together
    x_vector <- vector("character", NROW(x))
    for (i in 1:NROW(x)) {
      x_vector[i] <- paste(pull(x[i,], 1), pull(x[i,], 2), sep = " ")
    }
    x <- x_vector
  } else {
    if (NCOL(x) > 2) {
      stop('`x` can be 2 columns at most', call. = FALSE)
    }
    # support tidyverse selection like: df %>% select(colA)
    if (!is.vector(x)) {
      x <- pull(x, 1)
    }
  }
  # remove dots and other non-text in case of "E. coli" except spaces
  x <- gsub("[^a-zA-Z ]+", "", x)
  # but spaces before and after should be omitted
  x <- trimws(x, which = "both")
  x.bak <- x
  # replace space by regex sign
  x <- gsub(" ", ".*", x, fixed = TRUE)
  # add start and stop
  x_species <- paste(x, 'species')
  x <- paste0('^', x, '$')
  for (i in 1:length(x)) {
    if (tolower(x[i]) == '^e.*coli$') {
      # avoid detection of Entamoeba coli in case of E. coli
      x[i] <- 'Escherichia coli'
    }
    if (tolower(x[i]) == '^h.*influenzae$') {
      # avoid detection of Haematobacter influenzae in case of H. influenzae
      x[i] <- 'Haemophilus influenzae'
    }
    if (tolower(x[i]) == '^st.*au$'
        | tolower(x[i]) == '^stau$'
        | tolower(x[i]) == '^staaur$') {
      # avoid detection of Staphylococcus auricularis in case of S. aureus
      x[i] <- 'Staphylococcus aureus'
    }
    if (tolower(x[i]) == '^p.*aer$') {
      # avoid detection of Pasteurella aerogenes in case of Pseudomonas aeruginosa
      x[i] <- 'Pseudomonas aeruginosa'
    }
    # translate known trivial names to genus+species
    if (!is.na(x.bak[i])) {
      if (toupper(x.bak[i]) == 'MRSA'
          | toupper(x.bak[i]) == 'VISA'
          | toupper(x.bak[i]) == 'VRSA') {
        x[i] <- 'Staphylococcus aureus'
      }
      if (toupper(x.bak[i]) == 'MRSE') {
        x[i] <- 'Staphylococcus epidermidis'
      }
      if (toupper(x.bak[i]) == 'VRE') {
        x[i] <- 'Enterococcus'
      }
      if (toupper(x.bak[i]) == 'MRPA') {
        # multi resistant P. aeruginosa
        x[i] <- 'Pseudomonas aeruginosa'
      }
      if (toupper(x.bak[i]) == 'PISP'
          | toupper(x.bak[i]) == 'PRSP') {
        # peni resistant S. pneumoniae
        x[i] <- 'Streptococcus pneumoniae'
      }
      if (toupper(x.bak[i]) == 'VISP'
          | toupper(x.bak[i]) == 'VRSP') {
        # vanco resistant S. pneumoniae
        x[i] <- 'Streptococcus pneumoniae'
      }
    }
    # let's try the ID's first
    found <- AMR::microorganisms %>% filter(bactid == x.bak[i])
    if (nrow(found) == 0) {
      # now try exact match
      found <- AMR::microorganisms %>% filter(fullname == x[i])
    }
    if (nrow(found) == 0) {
      # try any match
      found <- AMR::microorganisms %>% filter(fullname %like% x[i])
    }
    if (nrow(found) == 0) {
      # try exact match of only genus, with 'species' attached
      # (e.g. this prevents Streptococcus for becoming Peptostreptococcus, since "p" < "s")
      found <- AMR::microorganisms %>% filter(fullname == x_species[i])
    }
    if (nrow(found) == 0) {
      # try any match of only genus, with 'species' attached
      found <- AMR::microorganisms %>% filter(fullname %like% x_species[i])
    }
    if (nrow(found) == 0) {
      # search for GLIMS code
      if (toupper(x.bak[i]) %in% toupper(AMR::microorganisms.umcg$mocode)) {
        found <- AMR::microorganisms.umcg %>% filter(toupper(mocode) == toupper(x.bak[i]))
      }
    }
    if (nrow(found) == 0) {
      # try splitting of characters and then find ID
      # like esco = E. coli, klpn = K. pneumoniae, stau = S. aureus
      x_split <- x
      x_length <- nchar(x.bak[i])
      x_split[i] <- paste0(x.bak[i] %>% substr(1, x_length / 2) %>% trimws(),
                           '.* ',
                           x.bak[i] %>% substr((x_length / 2) + 1, x_length) %>% trimws())
      found <- AMR::microorganisms %>% filter(fullname %like% paste0('^', x_split[i]))
    }
    if (nrow(found) == 0) {
      # try any match with text before and after original search string
      # so "negative rods" will be "GNR"
      if (x.bak[i] %like% "^Gram") {
        x.bak[i] <- gsub("^Gram", "", x.bak[i], ignore.case = TRUE)
        # remove leading and trailing spaces again
        x.bak[i] <- trimws(x.bak[i], which = "both")
      }
      if (!is.na(x.bak[i])) {
        found <- AMR::microorganisms %>% filter(fullname %like% x.bak[i])
      }
    }
    if (nrow(found) != 0) {
      x[i] <- as.character(found[1, 'bactid'])
    } else {
      x[i] <- ""
    }
  }
  x
 }
--- a/R/print.R
+++ b/R/print.R
@@ -192,7 +192,7 @@ prettyprint_df <- function(x,
  if (n + 1 < nrow(x)) {
    # remove in between part, 1 extra for ~~~~ between first and last part
    rows_list <- c(1:(n / 2 + 1), (nrow(x) - (n / 2) + 1):nrow(x))
-    x <- as.data.frame(x.bak[rows_list,])
+    x <- as.data.frame(x.bak[rows_list,], stringsAsFactors = FALSE)
    colnames(x) <- colnames(x.bak)
    rownames(x) <- rownames(x.bak)[rows_list]
    # set inbetweener between parts
@@ -204,12 +204,12 @@ prettyprint_df <- function(x,
    # class will be marked up per column
    if (NROW(x.bak) > 0) {
      rownames.x <- rownames(x)
-      x <- x %>%
+      # suppress warnings because dplyr want us to use library(dplyr) when using filter(row_number())
-        filter(
+      suppressWarnings(
-          suppressWarnings(
+        x <- x %>%
-            row_number() == 1)
+          filter(row_number() == 1) %>%
-        ) %>%
+          rbind(x, stringsAsFactors = FALSE)
-        rbind(x, stringsAsFactors = FALSE)
+      )
      rownames(x) <- c('*', rownames.x)
    }
@@ -252,12 +252,12 @@ prettyprint_df <- function(x,
  }
  # markup cols
  for (i in 1:ncol(x)) {
    if (all(!class(x[, i]) %in% class(x.bak[, i]))) {
      class(x[, i]) <- class(x.bak[, i])
    }
    try(x[, i] <- format(x %>% pull(i)), silent = TRUE)
    # replace NAs
    if (nchar(na) < 2) {
      # make as long as the text "NA"
@@ -297,16 +297,16 @@ prettyprint_df <- function(x,
      colnames(x)[i] <- paste0(strrep(" ", width), colnames(x)[i])
    }
-    # strip columns that do not fit (3 chars as margin)
+    # strip columns that do not fit (width + 2 extra chars as margin)
    width_console <- options()$width
    width_until_col <- x %>%
      select(1:i) %>%
-      apply(1, paste, collapse = strrep(" ", width + 1)) %>%
+      apply(1, paste, collapse = strrep(" ", width + 2)) %>%
      nchar() %>%
      max()
    width_until_col_before <- x %>%
      select(1:(max(i, 2) - 1)) %>%
-      apply(1, paste, collapse = strrep(" ", width + 1)) %>%
+      apply(1, paste, collapse = strrep(" ", width + 2)) %>%
      nchar() %>%
      max()
    extraspace <- maxrowchars + nchar(rownames(x)[length(rownames(x))])
@@ -340,9 +340,9 @@ prettyprint_df <- function(x,
  if (ncol(x) < ncol(x.bak)) {
    x.notshown <- x.bak %>% select((ncol(x) + 1):ncol(x.bak))
    if (ncol(x.notshown) == 1) {
-      cat('...and 1 more column: ')
+      cat('... and 1 more column: ')
    } else {
-      cat('...and', ncol(x.notshown), 'more columns: ')
+      cat('... and', ncol(x.notshown), 'more columns: ')
    }
    cat(x.notshown %>%
          colnames() %>%
--- a/data/antibiotics.rda
+++ b/data/antibiotics.rda
--- a/man/first_isolate.Rd
+++ b/man/first_isolate.Rd
@@ -21,7 +21,7 @@ first_isolate(tbl, col_date, col_patient_id, col_bactid = NA,
 \item{col_patient_id}{column name of the unique IDs of the patients}
-\item{col_bactid}{column name of the unique IDs of the microorganisms (should occur in the \code{\link{microorganisms}} dataset)}
+\item{col_bactid}{column name of the unique IDs of the microorganisms (should occur in the \code{\link{microorganisms}} dataset). Get your bactid's with the function \code{\link{guess_bactid}}, that takes microorganism names as input.}
 \item{col_testcode}{column name of the test codes. Use \code{col_testcode = NA} to \strong{not} exclude certain test codes (like test codes for screening). In that case \code{testcodes_exclude} will be ignored. Supports tidyverse-like quotation.}
--- a/man/guess_bactid.Rd
+++ b/man/guess_bactid.Rd
@@ -1,5 +1,5 @@
 % Generated by roxygen2: do not edit by hand
-% Please edit documentation in R/atc.R
+% Please edit documentation in R/guess_bactid.R
 \name{guess_bactid}
 \alias{guess_bactid}
 \title{Find bacteria ID based on genus/species}
@@ -7,13 +7,13 @@
 guess_bactid(x)
 }
 \arguments{
-\item{x}{character vector to determine \code{bactid}}
+\item{x}{character vector or a dataframe with one or two columns}
 }
 \value{
 Character (vector).
 }
 \description{
-Use this function to determine a valid ID based on a genus (and species). This input could be a full name (like \code{"Staphylococcus aureus"}), an abbreviated name (like \code{"S. aureus"}), or just a genus. You could also use a \code{\link{paste}} of a genus and species column to use the full name as input: \code{x = paste(df$genus, df$species)}, where \code{df} is your dataframe.
+Use this function to determine a valid ID based on a genus (and species). This input could be a full name (like \code{"Staphylococcus aureus"}), an abbreviated name (like \code{"S. aureus"}), or just a genus. You could also \code{\link{select}} a genus and species column, zie Examples.
 }
 \examples{
 # These examples all return "STAAUR", the ID of S. aureus:
@@ -25,6 +25,18 @@ guess_bactid("S aureus")
 guess_bactid("Staphylococcus aureus")
 guess_bactid("MRSA") # Methicillin-resistant S. aureus
 guess_bactid("VISA") # Vancomycin Intermediate S. aureus
 \dontrun{
 df$bactid <- guess_bactid(df$microorganism_name)
 # the select function of tidyverse is also supported:
 df$bactid <- df \%>\% select(microorganism_name) \%>\% guess_bactid()
 # and can even contain 2 columns, which is convenient for genus/species combinations:
 df$bactid <- df \%>\% select(genus, species) \%>\% guess_bactid()
 # same result:
 df <- df \%>\% mutate(bactid = paste(genus, species) \%>\% guess_bactid())
 }
 }
 \seealso{
 \code{\link{microorganisms}} for the dataframe that is being used to determine ID's.
--- a/man/key_antibiotics.Rd
+++ b/man/key_antibiotics.Rd
@@ -14,9 +14,9 @@ key_antibiotics(tbl, col_bactid = "bactid", info = TRUE, amcl = "amcl",
 \arguments{
 \item{tbl}{table with antibiotics coloms, like \code{amox} and \code{amcl}.}
-\item{col_bactid}{column of bacteria IDs in \code{tbl}; these should occur in \code{microorganisms$bactid}, see \code{\link{microorganisms}}}
+\item{col_bactid}{column name of the unique IDs of the microorganisms (should occur in the \code{\link{microorganisms}} dataset). Get your bactid's with the function \code{\link{guess_bactid}}, that takes microorganism names as input.}
-\item{info}{print warnings}
+\item{info}{print progress}
 \item{amcl, amox, cfot, cfta, cftr, cfur, cipr, clar, clin, clox, doxy, gent, line, mero, peni, pita, rifa, teic, trsu, vanc}{column names of antibiotics, case-insensitive}
 }
--- a/tests/testthat/test-atc.R
+++ b/tests/testthat/test-atc.R
@@ -1,6 +1,5 @@
 context("atc.R")
 test_that("atc_property works", {
  expect_equal(tolower(atc_property("J01CA04", property = "Name")), "amoxicillin")
  expect_equivalent(atc_property("J01CA04", "DDD"), 1)
@@ -15,21 +14,6 @@ test_that("abname works", {
  expect_equal(abname("J01CA04", from = 'atc'), "Amoxicillin")
 })
 test_that("guess_bactid works", {
  expect_identical(guess_bactid(c("E. coli", "H. influenzae")), c("ESCCOL", "HAEINF"))
  expect_equal(guess_bactid("Escherichia coli"), "ESCCOL")
  expect_equal(guess_bactid("Negative rods"), "GNR")
  expect_equal(guess_bactid(c("stau",
                              "STAU",
                              "staaur",
                              "S. aureus",
                              "S aureus",
                              "Staphylococcus aureus",
                              "MRSA",
                              "VISA")),
               rep("STAAUR", 8))
 })
 test_that("guess_atc works", {
  expect_equal(guess_atc(c("J01FA01",
                           "Erythromycin",
--- a/tests/testthat/test-guess_bactid.R
+++ b/tests/testthat/test-guess_bactid.R
@@ -0,0 +1,40 @@
 context("guess_bactid.R")
 test_that("guess_bactid works", {
  expect_identical(
    guess_bactid(c("E. coli", "H. influenzae")),
    c("ESCCOL", "HAEINF"))
  expect_equal(guess_bactid("Escherichia coli"), "ESCCOL")
  expect_equal(guess_bactid("Negative rods"), "GNR")
  expect_identical(
    guess_bactid(c("stau",
                   "STAU",
                   "staaur",
                   "S. aureus",
                   "S aureus",
                   "Staphylococcus aureus",
                   "MRSA",
                   "VISA")),
    rep("STAAUR", 8))
  # select with one column
  expect_identical(
    septic_patients[1:10,] %>%
      left_join_microorganisms() %>%
      select(genus) %>%
      guess_bactid(),
    c("STC", "STC", "NEI", "STA", "STA",
      "NEI", "ENT", "ENT", "ESC", "KLE"))
  # select with two columns
  expect_identical(
    septic_patients[1:10,] %>%
      pull(bactid),
    septic_patients[1:10,] %>%
      left_join_microorganisms() %>%
      select(genus, species) %>%
      guess_bactid())
 })