fix for printing tibbles, improve guess_bactid

2025-07-11 21:41:58 +02:00 · 2018-06-08 12:06:54 +02:00
parent efdf5a3dc5
commit 0a5898b17d
12 changed files with 306 additions and 209 deletions
--- a/4
+++ b/4
@ -1,6 +1,6 @@
 Package: AMR
-Version: 0.2.0.9002
-Date: 2018-05-31
+Version: 0.2.0.9003
+Date: 2018-06-08
 Title: Antimicrobial Resistance Analysis
 Authors@R: c(
    person(
--- a/NEWS.md
+++ b/NEWS.md
@ -1,13 +1,13 @@
 # 0.2.0.90xx (development version)
-#### New
+
 * Vignettes about frequency tables
 * Possibility to globally set the default for the amount of items to print in frequency tables (`freq` function), with `options(max.print.freq = n)`
-
-#### Changed
 * Renamed `toConsole` parameter of `freq` to `as.data.frame`
 * Small translational improvements to the `septic_patients` dataset
 * Coerce RSI values from combined MIC/RSI values: `as.rsi("<=0.002; S")` will now return `"S"`
-* Fix for warning `hybrid evaluation forced for row_number` from the `dplyr` package v0.7.5 and above.
+* Fix for warning **hybrid evaluation forced for row_number** ([`924b62`](https://github.com/tidyverse/dplyr/commit/924b62)) from the `dplyr` package v0.7.5 and above
+* Support for 1 or 2 columns as input for `guess_bactid`
+* Fix for printing tibbles where characters would be accidentally transformed to factors

 # 0.2.0 (latest stable version)
 #### New
--- a/R/atc.R
+++ b/R/atc.R
@ -238,135 +238,7 @@ abname <- function(abcode, from = c("guess", "atc", "molis", "umcg"), to = 'offi
  abcode
 }

-#' Find bacteria ID based on genus/species
-#'
-#' Use this function to determine a valid ID based on a genus (and species). This input could be a full name (like \code{"Staphylococcus aureus"}), an abbreviated name (like \code{"S. aureus"}), or just a genus. You could also use a \code{\link{paste}} of a genus and species column to use the full name as input: \code{x = paste(df$genus, df$species)}, where \code{df} is your dataframe.
-#' @param x character vector to determine \code{bactid}
-#' @export
-#' @importFrom dplyr %>% filter slice pull
-#' @return Character (vector).
-#' @seealso \code{\link{microorganisms}} for the dataframe that is being used to determine ID's.
-#' @examples
-#' # These examples all return "STAAUR", the ID of S. aureus:
-#' guess_bactid("stau")
-#' guess_bactid("STAU")
-#' guess_bactid("staaur")
-#' guess_bactid("S. aureus")
-#' guess_bactid("S aureus")
-#' guess_bactid("Staphylococcus aureus")
-#' guess_bactid("MRSA") # Methicillin-resistant S. aureus
-#' guess_bactid("VISA") # Vancomycin Intermediate S. aureus
-guess_bactid <- function(x) {
-  # remove dots and other non-text in case of "E. coli" except spaces
-  x <- gsub("[^a-zA-Z ]+", "", x)
-  # but spaces before and after should be omitted
-  x <- trimws(x, which = "both")
-  x.bak <- x
-  # replace space by regex sign
-  x <- gsub(" ", ".*", x, fixed = TRUE)
-  # add start and stop
-  x_species <- paste(x, 'species')
-  x <- paste0('^', x, '$')

-  for (i in 1:length(x)) {
-    if (tolower(x[i]) == '^e.*coli$') {
-      # avoid detection of Entamoeba coli in case of E. coli
-      x[i] <- 'Escherichia coli'
-    }
-    if (tolower(x[i]) == '^h.*influenzae$') {
-      # avoid detection of Haematobacter influenzae in case of H. influenzae
-      x[i] <- 'Haemophilus influenzae'
-    }
-    if (tolower(x[i]) == '^st.*au$'
-        | tolower(x[i]) == '^stau$'
-        | tolower(x[i]) == '^staaur$') {
-      # avoid detection of Staphylococcus auricularis in case of S. aureus
-      x[i] <- 'Staphylococcus aureus'
-    }
-    if (tolower(x[i]) == '^p.*aer$') {
-      # avoid detection of Pasteurella aerogenes in case of Pseudomonas aeruginosa
-      x[i] <- 'Pseudomonas aeruginosa'
-    }
-
-    # translate known trivial names to genus+species
-    if (toupper(x.bak[i]) == 'MRSA'
-        | toupper(x.bak[i]) == 'VISA'
-        | toupper(x.bak[i]) == 'VRSA') {
-      x[i] <- 'Staphylococcus aureus'
-    }
-    if (toupper(x.bak[i]) == 'MRSE') {
-      x[i] <- 'Staphylococcus epidermidis'
-    }
-    if (toupper(x.bak[i]) == 'VRE') {
-      x[i] <- 'Enterococcus'
-    }
-    if (toupper(x.bak[i]) == 'MRPA') {
-      # multi resistant P. aeruginosa
-      x[i] <- 'Pseudomonas aeruginosa'
-    }
-    if (toupper(x.bak[i]) == 'PISP'
-        | toupper(x.bak[i]) == 'PRSP') {
-      # peni resistant S. pneumoniae
-      x[i] <- 'Streptococcus pneumoniae'
-    }
-    if (toupper(x.bak[i]) == 'VISP'
-        | toupper(x.bak[i]) == 'VRSP') {
-      # vanco resistant S. pneumoniae
-      x[i] <- 'Streptococcus pneumoniae'
-    }
-
-    # let's try the ID's first
-    found <- AMR::microorganisms %>% filter(bactid == x.bak[i])
-
-    if (nrow(found) == 0) {
-      # now try exact match
-      found <- AMR::microorganisms %>% filter(fullname == x[i])
-    }
-    if (nrow(found) == 0) {
-      # try any match
-      found <- AMR::microorganisms %>% filter(fullname %like% x[i])
-    }
-    if (nrow(found) == 0) {
-      # try only genus, with 'species' attached
-      found <- AMR::microorganisms %>% filter(fullname %like% x_species[i])
-    }
-    if (nrow(found) == 0) {
-      # search for GLIMS code
-      if (toupper(x.bak[i]) %in% toupper(AMR::microorganisms.umcg$mocode)) {
-        found <- AMR::microorganisms.umcg %>% filter(toupper(mocode) == toupper(x.bak[i]))
-      }
-    }
-    if (nrow(found) == 0) {
-      # try splitting of characters and then find ID
-      # like esco = E. coli, klpn = K. pneumoniae, stau = S. aureus
-      x_split <- x
-      x_length <- nchar(x.bak[i])
-      x_split[i] <- paste0(x.bak[i] %>% substr(1, x_length / 2) %>% trimws(),
-                     '.* ',
-                     x.bak[i] %>% substr((x_length / 2) + 1, x_length) %>% trimws())
-      found <- AMR::microorganisms %>% filter(fullname %like% paste0('^', x_split[i]))
-    }
-    if (nrow(found) == 0) {
-      # try any match with text before and after original search string
-      # so "negative rods" will be "GNR"
-      if (x.bak[i] %like% "^Gram") {
-        x.bak[i] <- gsub("^Gram", "", x.bak[i], ignore.case = TRUE)
-        # remove leading and trailing spaces again
-        x.bak[i] <- trimws(x.bak[i], which = "both")
-      }
-      found <- AMR::microorganisms %>% filter(fullname %like% x.bak[i])
-    }
-
-    if (nrow(found) != 0) {
-      x[i] <- found %>%
-        slice(1) %>%
-        pull(bactid)
-    } else {
-      x[i] <- ""
-    }
-  }
-  x
-}

 #' Find ATC code based on antibiotic property
 #'
--- a/R/first_isolates.R
+++ b/R/first_isolates.R
@ -22,7 +22,7 @@
 #' @param tbl a \code{data.frame} containing isolates.
 #' @param col_date column name of the result date (or date that is was received on the lab)
 #' @param col_patient_id column name of the unique IDs of the patients
-#' @param col_bactid column name of the unique IDs of the microorganisms (should occur in the \code{\link{microorganisms}} dataset)
+#' @param col_bactid column name of the unique IDs of the microorganisms (should occur in the \code{\link{microorganisms}} dataset). Get your bactid's with the function \code{\link{guess_bactid}}, that takes microorganism names as input.
 #' @param col_testcode column name of the test codes. Use \code{col_testcode = NA} to \strong{not} exclude certain test codes (like test codes for screening). In that case \code{testcodes_exclude} will be ignored. Supports tidyverse-like quotation.
 #' @param col_specimen column name of the specimen type or group
 #' @param col_icu column name of the logicals (\code{TRUE}/\code{FALSE}) whether a ward or department is an Intensive Care Unit (ICU)
@ -291,15 +291,15 @@ first_isolate <- function(tbl,
    return(tbl %>% pull(real_first_isolate))
  }

-  scope.size <- tbl %>%
-    filter(
-      suppressWarnings(
+  # suppress warnings because dplyr want us to use library(dplyr) when using filter(row_number())
+  suppressWarnings(
+    scope.size <- tbl %>%
+      filter(
        row_number() %>% between(row.start,
-                                 row.end)
-      ),
-      genus != '') %>%
-    nrow()
-
+                                 row.end),
+        genus != '') %>%
+      nrow()
+  )

  # Analysis of first isolate ----
  all_first <- tbl %>%
@ -328,39 +328,44 @@ first_isolate <- function(tbl,
      }
    }
    type_param <- type
-    all_first <- all_first %>%
-      mutate(key_ab_lag = lag(key_ab)) %>%
-      mutate(key_ab_other = !key_antibiotics_equal(x = key_ab,
-                                                   y = key_ab_lag,
-                                                   type = type_param,
-                                                   ignore_I = ignore_I,
-                                                   points_threshold = points_threshold,
-                                                   info = info)) %>%
-      mutate(
-        real_first_isolate =
-          if_else(
-            suppressWarnings(between(row_number(), row.start, row.end))
-            & genus != ''
-            & (other_pat_or_mo
-               | days_diff >= episode_days
-               | key_ab_other),
-            TRUE,
-            FALSE))
-
+    # suppress warnings because dplyr want us to use library(dplyr) when using filter(row_number())
+    suppressWarnings(
+      all_first <- all_first %>%
+        mutate(key_ab_lag = lag(key_ab)) %>%
+        mutate(key_ab_other = !key_antibiotics_equal(x = key_ab,
+                                                     y = key_ab_lag,
+                                                     type = type_param,
+                                                     ignore_I = ignore_I,
+                                                     points_threshold = points_threshold,
+                                                     info = info)) %>%
+        mutate(
+          real_first_isolate =
+            if_else(
+              between(row_number(), row.start, row.end)
+              & genus != ''
+              & (other_pat_or_mo
+                 | days_diff >= episode_days
+                 | key_ab_other),
+              TRUE,
+              FALSE))
+    )
    if (info == TRUE) {
      cat('\n')
    }
  } else {
-    all_first <- all_first %>%
-      mutate(
-        real_first_isolate =
-          if_else(
-            suppressWarnings(between(row_number(), row.start, row.end))
-            & genus != ''
-            & (other_pat_or_mo
-               | days_diff >= episode_days),
-            TRUE,
-            FALSE))
+    # suppress warnings because dplyr want us to use library(dplyr) when using filter(row_number())
+    suppressWarnings(
+      all_first <- all_first %>%
+        mutate(
+          real_first_isolate =
+            if_else(
+              between(row_number(), row.start, row.end)
+              & genus != ''
+              & (other_pat_or_mo
+                 | days_diff >= episode_days),
+              TRUE,
+              FALSE))
+    )
  }

  # first one as TRUE
@ -402,8 +407,7 @@ first_isolate <- function(tbl,
 #' Key antibiotics based on bacteria ID
 #'
 #' @param tbl table with antibiotics coloms, like \code{amox} and \code{amcl}.
-#' @param col_bactid column of bacteria IDs in \code{tbl}; these should occur in \code{microorganisms$bactid}, see \code{\link{microorganisms}}
-#' @param info print warnings
+#' @inheritParams first_isolate
 #' @param amcl,amox,cfot,cfta,cftr,cfur,cipr,clar,clin,clox,doxy,gent,line,mero,peni,pita,rifa,teic,trsu,vanc column names of antibiotics, case-insensitive
 #' @export
 #' @importFrom dplyr %>% mutate if_else
--- a/R/guess_bactid.R
+++ b/R/guess_bactid.R
@ -0,0 +1,185 @@
+# ==================================================================== #
+# TITLE                                                                #
+# Antimicrobial Resistance (AMR) Analysis                              #
+#                                                                      #
+# AUTHORS                                                              #
+# Berends MS (m.s.berends@umcg.nl), Luz CF (c.f.luz@umcg.nl)           #
+#                                                                      #
+# LICENCE                                                              #
+# This program is free software; you can redistribute it and/or modify #
+# it under the terms of the GNU General Public License version 2.0,    #
+# as published by the Free Software Foundation.                        #
+#                                                                      #
+# This program is distributed in the hope that it will be useful,      #
+# but WITHOUT ANY WARRANTY; without even the implied warranty of       #
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the        #
+# GNU General Public License for more details.                         #
+# ==================================================================== #
+
+#' Find bacteria ID based on genus/species
+#'
+#' Use this function to determine a valid ID based on a genus (and species). This input could be a full name (like \code{"Staphylococcus aureus"}), an abbreviated name (like \code{"S. aureus"}), or just a genus. You could also \code{\link{select}} a genus and species column, zie Examples.
+#' @param x character vector or a dataframe with one or two columns
+#' @export
+#' @importFrom dplyr %>% filter pull
+#' @return Character (vector).
+#' @seealso \code{\link{microorganisms}} for the dataframe that is being used to determine ID's.
+#' @examples
+#' # These examples all return "STAAUR", the ID of S. aureus:
+#' guess_bactid("stau")
+#' guess_bactid("STAU")
+#' guess_bactid("staaur")
+#' guess_bactid("S. aureus")
+#' guess_bactid("S aureus")
+#' guess_bactid("Staphylococcus aureus")
+#' guess_bactid("MRSA") # Methicillin-resistant S. aureus
+#' guess_bactid("VISA") # Vancomycin Intermediate S. aureus
+#'
+#' \dontrun{
+#' df$bactid <- guess_bactid(df$microorganism_name)
+#'
+#' # the select function of tidyverse is also supported:
+#' df$bactid <- df %>% select(microorganism_name) %>% guess_bactid()
+#'
+#' # and can even contain 2 columns, which is convenient for genus/species combinations:
+#' df$bactid <- df %>% select(genus, species) %>% guess_bactid()
+#' # same result:
+#' df <- df %>% mutate(bactid = paste(genus, species) %>% guess_bactid())
+#' }
+guess_bactid <- function(x) {
+
+  if (NCOL(x) == 2) {
+    # support tidyverse selection like: df %>% select(colA, colB)
+    # paste these columns together
+    x_vector <- vector("character", NROW(x))
+    for (i in 1:NROW(x)) {
+      x_vector[i] <- paste(pull(x[i,], 1), pull(x[i,], 2), sep = " ")
+    }
+    x <- x_vector
+  } else {
+    if (NCOL(x) > 2) {
+      stop('`x` can be 2 columns at most', call. = FALSE)
+    }
+    # support tidyverse selection like: df %>% select(colA)
+    if (!is.vector(x)) {
+      x <- pull(x, 1)
+    }
+  }
+
+  # remove dots and other non-text in case of "E. coli" except spaces
+  x <- gsub("[^a-zA-Z ]+", "", x)
+  # but spaces before and after should be omitted
+  x <- trimws(x, which = "both")
+  x.bak <- x
+  # replace space by regex sign
+  x <- gsub(" ", ".*", x, fixed = TRUE)
+  # add start and stop
+  x_species <- paste(x, 'species')
+  x <- paste0('^', x, '$')
+
+  for (i in 1:length(x)) {
+    if (tolower(x[i]) == '^e.*coli$') {
+      # avoid detection of Entamoeba coli in case of E. coli
+      x[i] <- 'Escherichia coli'
+    }
+    if (tolower(x[i]) == '^h.*influenzae$') {
+      # avoid detection of Haematobacter influenzae in case of H. influenzae
+      x[i] <- 'Haemophilus influenzae'
+    }
+    if (tolower(x[i]) == '^st.*au$'
+        | tolower(x[i]) == '^stau$'
+        | tolower(x[i]) == '^staaur$') {
+      # avoid detection of Staphylococcus auricularis in case of S. aureus
+      x[i] <- 'Staphylococcus aureus'
+    }
+    if (tolower(x[i]) == '^p.*aer$') {
+      # avoid detection of Pasteurella aerogenes in case of Pseudomonas aeruginosa
+      x[i] <- 'Pseudomonas aeruginosa'
+    }
+
+    # translate known trivial names to genus+species
+    if (!is.na(x.bak[i])) {
+      if (toupper(x.bak[i]) == 'MRSA'
+          | toupper(x.bak[i]) == 'VISA'
+          | toupper(x.bak[i]) == 'VRSA') {
+        x[i] <- 'Staphylococcus aureus'
+      }
+      if (toupper(x.bak[i]) == 'MRSE') {
+        x[i] <- 'Staphylococcus epidermidis'
+      }
+      if (toupper(x.bak[i]) == 'VRE') {
+        x[i] <- 'Enterococcus'
+      }
+      if (toupper(x.bak[i]) == 'MRPA') {
+        # multi resistant P. aeruginosa
+        x[i] <- 'Pseudomonas aeruginosa'
+      }
+      if (toupper(x.bak[i]) == 'PISP'
+          | toupper(x.bak[i]) == 'PRSP') {
+        # peni resistant S. pneumoniae
+        x[i] <- 'Streptococcus pneumoniae'
+      }
+      if (toupper(x.bak[i]) == 'VISP'
+          | toupper(x.bak[i]) == 'VRSP') {
+        # vanco resistant S. pneumoniae
+        x[i] <- 'Streptococcus pneumoniae'
+      }
+    }
+
+    # let's try the ID's first
+    found <- AMR::microorganisms %>% filter(bactid == x.bak[i])
+
+    if (nrow(found) == 0) {
+      # now try exact match
+      found <- AMR::microorganisms %>% filter(fullname == x[i])
+    }
+    if (nrow(found) == 0) {
+      # try any match
+      found <- AMR::microorganisms %>% filter(fullname %like% x[i])
+    }
+    if (nrow(found) == 0) {
+      # try exact match of only genus, with 'species' attached
+      # (e.g. this prevents Streptococcus for becoming Peptostreptococcus, since "p" < "s")
+      found <- AMR::microorganisms %>% filter(fullname == x_species[i])
+    }
+    if (nrow(found) == 0) {
+      # try any match of only genus, with 'species' attached
+      found <- AMR::microorganisms %>% filter(fullname %like% x_species[i])
+    }
+    if (nrow(found) == 0) {
+      # search for GLIMS code
+      if (toupper(x.bak[i]) %in% toupper(AMR::microorganisms.umcg$mocode)) {
+        found <- AMR::microorganisms.umcg %>% filter(toupper(mocode) == toupper(x.bak[i]))
+      }
+    }
+    if (nrow(found) == 0) {
+      # try splitting of characters and then find ID
+      # like esco = E. coli, klpn = K. pneumoniae, stau = S. aureus
+      x_split <- x
+      x_length <- nchar(x.bak[i])
+      x_split[i] <- paste0(x.bak[i] %>% substr(1, x_length / 2) %>% trimws(),
+                           '.* ',
+                           x.bak[i] %>% substr((x_length / 2) + 1, x_length) %>% trimws())
+      found <- AMR::microorganisms %>% filter(fullname %like% paste0('^', x_split[i]))
+    }
+    if (nrow(found) == 0) {
+      # try any match with text before and after original search string
+      # so "negative rods" will be "GNR"
+      if (x.bak[i] %like% "^Gram") {
+        x.bak[i] <- gsub("^Gram", "", x.bak[i], ignore.case = TRUE)
+        # remove leading and trailing spaces again
+        x.bak[i] <- trimws(x.bak[i], which = "both")
+      }
+      if (!is.na(x.bak[i])) {
+        found <- AMR::microorganisms %>% filter(fullname %like% x.bak[i])
+      }
+    }
+
+    if (nrow(found) != 0) {
+      x[i] <- as.character(found[1, 'bactid'])
+    } else {
+      x[i] <- ""
+    }
+  }
+  x
+}
--- a/R/print.R
+++ b/R/print.R
@ -192,7 +192,7 @@ prettyprint_df <- function(x,
  if (n + 1 < nrow(x)) {
    # remove in between part, 1 extra for ~~~~ between first and last part
    rows_list <- c(1:(n / 2 + 1), (nrow(x) - (n / 2) + 1):nrow(x))
-    x <- as.data.frame(x.bak[rows_list,])
+    x <- as.data.frame(x.bak[rows_list,], stringsAsFactors = FALSE)
    colnames(x) <- colnames(x.bak)
    rownames(x) <- rownames(x.bak)[rows_list]
    # set inbetweener between parts
@ -204,12 +204,12 @@ prettyprint_df <- function(x,
    # class will be marked up per column
    if (NROW(x.bak) > 0) {
      rownames.x <- rownames(x)
-      x <- x %>%
-        filter(
-          suppressWarnings(
-            row_number() == 1)
-        ) %>%
-        rbind(x, stringsAsFactors = FALSE)
+      # suppress warnings because dplyr want us to use library(dplyr) when using filter(row_number())
+      suppressWarnings(
+        x <- x %>%
+          filter(row_number() == 1) %>%
+          rbind(x, stringsAsFactors = FALSE)
+      )
      rownames(x) <- c('*', rownames.x)
    }

@ -252,12 +252,12 @@ prettyprint_df <- function(x,
  }

  # markup cols
+
  for (i in 1:ncol(x)) {
    if (all(!class(x[, i]) %in% class(x.bak[, i]))) {
      class(x[, i]) <- class(x.bak[, i])
    }
    try(x[, i] <- format(x %>% pull(i)), silent = TRUE)
-
    # replace NAs
    if (nchar(na) < 2) {
      # make as long as the text "NA"
@ -297,16 +297,16 @@ prettyprint_df <- function(x,
      colnames(x)[i] <- paste0(strrep(" ", width), colnames(x)[i])
    }

-    # strip columns that do not fit (3 chars as margin)
+    # strip columns that do not fit (width + 2 extra chars as margin)
    width_console <- options()$width
    width_until_col <- x %>%
      select(1:i) %>%
-      apply(1, paste, collapse = strrep(" ", width + 1)) %>%
+      apply(1, paste, collapse = strrep(" ", width + 2)) %>%
      nchar() %>%
      max()
    width_until_col_before <- x %>%
      select(1:(max(i, 2) - 1)) %>%
-      apply(1, paste, collapse = strrep(" ", width + 1)) %>%
+      apply(1, paste, collapse = strrep(" ", width + 2)) %>%
      nchar() %>%
      max()
    extraspace <- maxrowchars + nchar(rownames(x)[length(rownames(x))])
@ -340,9 +340,9 @@ prettyprint_df <- function(x,
  if (ncol(x) < ncol(x.bak)) {
    x.notshown <- x.bak %>% select((ncol(x) + 1):ncol(x.bak))
    if (ncol(x.notshown) == 1) {
-      cat('...and 1 more column: ')
+      cat('... and 1 more column: ')
    } else {
-      cat('...and', ncol(x.notshown), 'more columns: ')
+      cat('... and', ncol(x.notshown), 'more columns: ')
    }
    cat(x.notshown %>%
          colnames() %>%
--- a/data/antibiotics.rda
+++ b/data/antibiotics.rda
--- a/man/first_isolate.Rd
+++ b/man/first_isolate.Rd
@ -21,7 +21,7 @@ first_isolate(tbl, col_date, col_patient_id, col_bactid = NA,

 \item{col_patient_id}{column name of the unique IDs of the patients}

-\item{col_bactid}{column name of the unique IDs of the microorganisms (should occur in the \code{\link{microorganisms}} dataset)}
+\item{col_bactid}{column name of the unique IDs of the microorganisms (should occur in the \code{\link{microorganisms}} dataset). Get your bactid's with the function \code{\link{guess_bactid}}, that takes microorganism names as input.}

 \item{col_testcode}{column name of the test codes. Use \code{col_testcode = NA} to \strong{not} exclude certain test codes (like test codes for screening). In that case \code{testcodes_exclude} will be ignored. Supports tidyverse-like quotation.}

--- a/man/guess_bactid.Rd
+++ b/man/guess_bactid.Rd
@ -1,5 +1,5 @@
 % Generated by roxygen2: do not edit by hand
-% Please edit documentation in R/atc.R
+% Please edit documentation in R/guess_bactid.R
 \name{guess_bactid}
 \alias{guess_bactid}
 \title{Find bacteria ID based on genus/species}
@ -7,13 +7,13 @@
 guess_bactid(x)
 }
 \arguments{
-\item{x}{character vector to determine \code{bactid}}
+\item{x}{character vector or a dataframe with one or two columns}
 }
 \value{
 Character (vector).
 }
 \description{
-Use this function to determine a valid ID based on a genus (and species). This input could be a full name (like \code{"Staphylococcus aureus"}), an abbreviated name (like \code{"S. aureus"}), or just a genus. You could also use a \code{\link{paste}} of a genus and species column to use the full name as input: \code{x = paste(df$genus, df$species)}, where \code{df} is your dataframe.
+Use this function to determine a valid ID based on a genus (and species). This input could be a full name (like \code{"Staphylococcus aureus"}), an abbreviated name (like \code{"S. aureus"}), or just a genus. You could also \code{\link{select}} a genus and species column, zie Examples.
 }
 \examples{
 # These examples all return "STAAUR", the ID of S. aureus:
@ -25,6 +25,18 @@ guess_bactid("S aureus")
 guess_bactid("Staphylococcus aureus")
 guess_bactid("MRSA") # Methicillin-resistant S. aureus
 guess_bactid("VISA") # Vancomycin Intermediate S. aureus
+
+\dontrun{
+df$bactid <- guess_bactid(df$microorganism_name)
+
+# the select function of tidyverse is also supported:
+df$bactid <- df \%>\% select(microorganism_name) \%>\% guess_bactid()
+
+# and can even contain 2 columns, which is convenient for genus/species combinations:
+df$bactid <- df \%>\% select(genus, species) \%>\% guess_bactid()
+# same result:
+df <- df \%>\% mutate(bactid = paste(genus, species) \%>\% guess_bactid())
+}
 }
 \seealso{
 \code{\link{microorganisms}} for the dataframe that is being used to determine ID's.
--- a/man/key_antibiotics.Rd
+++ b/man/key_antibiotics.Rd
@ -14,9 +14,9 @@ key_antibiotics(tbl, col_bactid = "bactid", info = TRUE, amcl = "amcl",
 \arguments{
 \item{tbl}{table with antibiotics coloms, like \code{amox} and \code{amcl}.}

-\item{col_bactid}{column of bacteria IDs in \code{tbl}; these should occur in \code{microorganisms$bactid}, see \code{\link{microorganisms}}}
+\item{col_bactid}{column name of the unique IDs of the microorganisms (should occur in the \code{\link{microorganisms}} dataset). Get your bactid's with the function \code{\link{guess_bactid}}, that takes microorganism names as input.}

-\item{info}{print warnings}
+\item{info}{print progress}

 \item{amcl, amox, cfot, cfta, cftr, cfur, cipr, clar, clin, clox, doxy, gent, line, mero, peni, pita, rifa, teic, trsu, vanc}{column names of antibiotics, case-insensitive}
 }
--- a/tests/testthat/test-atc.R
+++ b/tests/testthat/test-atc.R
@ -1,6 +1,5 @@
 context("atc.R")

-
 test_that("atc_property works", {
  expect_equal(tolower(atc_property("J01CA04", property = "Name")), "amoxicillin")
  expect_equivalent(atc_property("J01CA04", "DDD"), 1)
@ -15,21 +14,6 @@ test_that("abname works", {
  expect_equal(abname("J01CA04", from = 'atc'), "Amoxicillin")
 })

-test_that("guess_bactid works", {
-  expect_identical(guess_bactid(c("E. coli", "H. influenzae")), c("ESCCOL", "HAEINF"))
-  expect_equal(guess_bactid("Escherichia coli"), "ESCCOL")
-  expect_equal(guess_bactid("Negative rods"), "GNR")
-  expect_equal(guess_bactid(c("stau",
-                              "STAU",
-                              "staaur",
-                              "S. aureus",
-                              "S aureus",
-                              "Staphylococcus aureus",
-                              "MRSA",
-                              "VISA")),
-               rep("STAAUR", 8))
-})
-
 test_that("guess_atc works", {
  expect_equal(guess_atc(c("J01FA01",
                           "Erythromycin",
--- a/tests/testthat/test-guess_bactid.R
+++ b/tests/testthat/test-guess_bactid.R
@ -0,0 +1,40 @@
+context("guess_bactid.R")
+
+test_that("guess_bactid works", {
+  expect_identical(
+    guess_bactid(c("E. coli", "H. influenzae")),
+    c("ESCCOL", "HAEINF"))
+
+  expect_equal(guess_bactid("Escherichia coli"), "ESCCOL")
+
+  expect_equal(guess_bactid("Negative rods"), "GNR")
+
+  expect_identical(
+    guess_bactid(c("stau",
+                   "STAU",
+                   "staaur",
+                   "S. aureus",
+                   "S aureus",
+                   "Staphylococcus aureus",
+                   "MRSA",
+                   "VISA")),
+    rep("STAAUR", 8))
+
+  # select with one column
+  expect_identical(
+    septic_patients[1:10,] %>%
+      left_join_microorganisms() %>%
+      select(genus) %>%
+      guess_bactid(),
+    c("STC", "STC", "NEI", "STA", "STA",
+      "NEI", "ENT", "ENT", "ESC", "KLE"))
+
+  # select with two columns
+  expect_identical(
+    septic_patients[1:10,] %>%
+      pull(bactid),
+    septic_patients[1:10,] %>%
+      left_join_microorganisms() %>%
+      select(genus, species) %>%
+      guess_bactid())
+})