1
0
mirror of https://github.com/msberends/AMR.git synced 2024-12-25 19:26:13 +01:00

fix for printing tibbles, improve guess_bactid

This commit is contained in:
dr. M.S. (Matthijs) Berends 2018-06-08 12:06:54 +02:00
parent efdf5a3dc5
commit 0a5898b17d
12 changed files with 306 additions and 209 deletions

View File

@ -1,6 +1,6 @@
Package: AMR
Version: 0.2.0.9002
Date: 2018-05-31
Version: 0.2.0.9003
Date: 2018-06-08
Title: Antimicrobial Resistance Analysis
Authors@R: c(
person(

View File

@ -1,13 +1,13 @@
# 0.2.0.90xx (development version)
#### New
* Vignettes about frequency tables
* Possibility to globally set the default for the amount of items to print in frequency tables (`freq` function), with `options(max.print.freq = n)`
#### Changed
* Renamed `toConsole` parameter of `freq` to `as.data.frame`
* Small translational improvements to the `septic_patients` dataset
* Coerce RSI values from combined MIC/RSI values: `as.rsi("<=0.002; S")` will now return `"S"`
* Fix for warning `hybrid evaluation forced for row_number` from the `dplyr` package v0.7.5 and above.
* Fix for warning **hybrid evaluation forced for row_number** ([`924b62`](https://github.com/tidyverse/dplyr/commit/924b62)) from the `dplyr` package v0.7.5 and above
* Support for 1 or 2 columns as input for `guess_bactid`
* Fix for printing tibbles where characters would be accidentally transformed to factors
# 0.2.0 (latest stable version)
#### New

128
R/atc.R
View File

@ -238,135 +238,7 @@ abname <- function(abcode, from = c("guess", "atc", "molis", "umcg"), to = 'offi
abcode
}
#' Find bacteria ID based on genus/species
#'
#' Use this function to determine a valid ID based on a genus (and species). This input could be a full name (like \code{"Staphylococcus aureus"}), an abbreviated name (like \code{"S. aureus"}), or just a genus. You could also use a \code{\link{paste}} of a genus and species column to use the full name as input: \code{x = paste(df$genus, df$species)}, where \code{df} is your dataframe.
#' @param x character vector to determine \code{bactid}
#' @export
#' @importFrom dplyr %>% filter slice pull
#' @return Character (vector).
#' @seealso \code{\link{microorganisms}} for the dataframe that is being used to determine ID's.
#' @examples
#' # These examples all return "STAAUR", the ID of S. aureus:
#' guess_bactid("stau")
#' guess_bactid("STAU")
#' guess_bactid("staaur")
#' guess_bactid("S. aureus")
#' guess_bactid("S aureus")
#' guess_bactid("Staphylococcus aureus")
#' guess_bactid("MRSA") # Methicillin-resistant S. aureus
#' guess_bactid("VISA") # Vancomycin Intermediate S. aureus
guess_bactid <- function(x) {
# remove dots and other non-text in case of "E. coli" except spaces
x <- gsub("[^a-zA-Z ]+", "", x)
# but spaces before and after should be omitted
x <- trimws(x, which = "both")
x.bak <- x
# replace space by regex sign
x <- gsub(" ", ".*", x, fixed = TRUE)
# add start and stop
x_species <- paste(x, 'species')
x <- paste0('^', x, '$')
for (i in 1:length(x)) {
if (tolower(x[i]) == '^e.*coli$') {
# avoid detection of Entamoeba coli in case of E. coli
x[i] <- 'Escherichia coli'
}
if (tolower(x[i]) == '^h.*influenzae$') {
# avoid detection of Haematobacter influenzae in case of H. influenzae
x[i] <- 'Haemophilus influenzae'
}
if (tolower(x[i]) == '^st.*au$'
| tolower(x[i]) == '^stau$'
| tolower(x[i]) == '^staaur$') {
# avoid detection of Staphylococcus auricularis in case of S. aureus
x[i] <- 'Staphylococcus aureus'
}
if (tolower(x[i]) == '^p.*aer$') {
# avoid detection of Pasteurella aerogenes in case of Pseudomonas aeruginosa
x[i] <- 'Pseudomonas aeruginosa'
}
# translate known trivial names to genus+species
if (toupper(x.bak[i]) == 'MRSA'
| toupper(x.bak[i]) == 'VISA'
| toupper(x.bak[i]) == 'VRSA') {
x[i] <- 'Staphylococcus aureus'
}
if (toupper(x.bak[i]) == 'MRSE') {
x[i] <- 'Staphylococcus epidermidis'
}
if (toupper(x.bak[i]) == 'VRE') {
x[i] <- 'Enterococcus'
}
if (toupper(x.bak[i]) == 'MRPA') {
# multi resistant P. aeruginosa
x[i] <- 'Pseudomonas aeruginosa'
}
if (toupper(x.bak[i]) == 'PISP'
| toupper(x.bak[i]) == 'PRSP') {
# peni resistant S. pneumoniae
x[i] <- 'Streptococcus pneumoniae'
}
if (toupper(x.bak[i]) == 'VISP'
| toupper(x.bak[i]) == 'VRSP') {
# vanco resistant S. pneumoniae
x[i] <- 'Streptococcus pneumoniae'
}
# let's try the ID's first
found <- AMR::microorganisms %>% filter(bactid == x.bak[i])
if (nrow(found) == 0) {
# now try exact match
found <- AMR::microorganisms %>% filter(fullname == x[i])
}
if (nrow(found) == 0) {
# try any match
found <- AMR::microorganisms %>% filter(fullname %like% x[i])
}
if (nrow(found) == 0) {
# try only genus, with 'species' attached
found <- AMR::microorganisms %>% filter(fullname %like% x_species[i])
}
if (nrow(found) == 0) {
# search for GLIMS code
if (toupper(x.bak[i]) %in% toupper(AMR::microorganisms.umcg$mocode)) {
found <- AMR::microorganisms.umcg %>% filter(toupper(mocode) == toupper(x.bak[i]))
}
}
if (nrow(found) == 0) {
# try splitting of characters and then find ID
# like esco = E. coli, klpn = K. pneumoniae, stau = S. aureus
x_split <- x
x_length <- nchar(x.bak[i])
x_split[i] <- paste0(x.bak[i] %>% substr(1, x_length / 2) %>% trimws(),
'.* ',
x.bak[i] %>% substr((x_length / 2) + 1, x_length) %>% trimws())
found <- AMR::microorganisms %>% filter(fullname %like% paste0('^', x_split[i]))
}
if (nrow(found) == 0) {
# try any match with text before and after original search string
# so "negative rods" will be "GNR"
if (x.bak[i] %like% "^Gram") {
x.bak[i] <- gsub("^Gram", "", x.bak[i], ignore.case = TRUE)
# remove leading and trailing spaces again
x.bak[i] <- trimws(x.bak[i], which = "both")
}
found <- AMR::microorganisms %>% filter(fullname %like% x.bak[i])
}
if (nrow(found) != 0) {
x[i] <- found %>%
slice(1) %>%
pull(bactid)
} else {
x[i] <- ""
}
}
x
}
#' Find ATC code based on antibiotic property
#'

View File

@ -22,7 +22,7 @@
#' @param tbl a \code{data.frame} containing isolates.
#' @param col_date column name of the result date (or date that is was received on the lab)
#' @param col_patient_id column name of the unique IDs of the patients
#' @param col_bactid column name of the unique IDs of the microorganisms (should occur in the \code{\link{microorganisms}} dataset)
#' @param col_bactid column name of the unique IDs of the microorganisms (should occur in the \code{\link{microorganisms}} dataset). Get your bactid's with the function \code{\link{guess_bactid}}, that takes microorganism names as input.
#' @param col_testcode column name of the test codes. Use \code{col_testcode = NA} to \strong{not} exclude certain test codes (like test codes for screening). In that case \code{testcodes_exclude} will be ignored. Supports tidyverse-like quotation.
#' @param col_specimen column name of the specimen type or group
#' @param col_icu column name of the logicals (\code{TRUE}/\code{FALSE}) whether a ward or department is an Intensive Care Unit (ICU)
@ -291,15 +291,15 @@ first_isolate <- function(tbl,
return(tbl %>% pull(real_first_isolate))
}
# suppress warnings because dplyr want us to use library(dplyr) when using filter(row_number())
suppressWarnings(
scope.size <- tbl %>%
filter(
suppressWarnings(
row_number() %>% between(row.start,
row.end)
),
row.end),
genus != '') %>%
nrow()
)
# Analysis of first isolate ----
all_first <- tbl %>%
@ -328,6 +328,8 @@ first_isolate <- function(tbl,
}
}
type_param <- type
# suppress warnings because dplyr want us to use library(dplyr) when using filter(row_number())
suppressWarnings(
all_first <- all_first %>%
mutate(key_ab_lag = lag(key_ab)) %>%
mutate(key_ab_other = !key_antibiotics_equal(x = key_ab,
@ -339,28 +341,31 @@ first_isolate <- function(tbl,
mutate(
real_first_isolate =
if_else(
suppressWarnings(between(row_number(), row.start, row.end))
between(row_number(), row.start, row.end)
& genus != ''
& (other_pat_or_mo
| days_diff >= episode_days
| key_ab_other),
TRUE,
FALSE))
)
if (info == TRUE) {
cat('\n')
}
} else {
# suppress warnings because dplyr want us to use library(dplyr) when using filter(row_number())
suppressWarnings(
all_first <- all_first %>%
mutate(
real_first_isolate =
if_else(
suppressWarnings(between(row_number(), row.start, row.end))
between(row_number(), row.start, row.end)
& genus != ''
& (other_pat_or_mo
| days_diff >= episode_days),
TRUE,
FALSE))
)
}
# first one as TRUE
@ -402,8 +407,7 @@ first_isolate <- function(tbl,
#' Key antibiotics based on bacteria ID
#'
#' @param tbl table with antibiotics coloms, like \code{amox} and \code{amcl}.
#' @param col_bactid column of bacteria IDs in \code{tbl}; these should occur in \code{microorganisms$bactid}, see \code{\link{microorganisms}}
#' @param info print warnings
#' @inheritParams first_isolate
#' @param amcl,amox,cfot,cfta,cftr,cfur,cipr,clar,clin,clox,doxy,gent,line,mero,peni,pita,rifa,teic,trsu,vanc column names of antibiotics, case-insensitive
#' @export
#' @importFrom dplyr %>% mutate if_else

185
R/guess_bactid.R Normal file
View File

@ -0,0 +1,185 @@
# ==================================================================== #
# TITLE #
# Antimicrobial Resistance (AMR) Analysis #
# #
# AUTHORS #
# Berends MS (m.s.berends@umcg.nl), Luz CF (c.f.luz@umcg.nl) #
# #
# LICENCE #
# This program is free software; you can redistribute it and/or modify #
# it under the terms of the GNU General Public License version 2.0, #
# as published by the Free Software Foundation. #
# #
# This program is distributed in the hope that it will be useful, #
# but WITHOUT ANY WARRANTY; without even the implied warranty of #
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the #
# GNU General Public License for more details. #
# ==================================================================== #
#' Find bacteria ID based on genus/species
#'
#' Use this function to determine a valid ID based on a genus (and species). This input could be a full name (like \code{"Staphylococcus aureus"}), an abbreviated name (like \code{"S. aureus"}), or just a genus. You could also \code{\link{select}} a genus and species column, zie Examples.
#' @param x character vector or a dataframe with one or two columns
#' @export
#' @importFrom dplyr %>% filter pull
#' @return Character (vector).
#' @seealso \code{\link{microorganisms}} for the dataframe that is being used to determine ID's.
#' @examples
#' # These examples all return "STAAUR", the ID of S. aureus:
#' guess_bactid("stau")
#' guess_bactid("STAU")
#' guess_bactid("staaur")
#' guess_bactid("S. aureus")
#' guess_bactid("S aureus")
#' guess_bactid("Staphylococcus aureus")
#' guess_bactid("MRSA") # Methicillin-resistant S. aureus
#' guess_bactid("VISA") # Vancomycin Intermediate S. aureus
#'
#' \dontrun{
#' df$bactid <- guess_bactid(df$microorganism_name)
#'
#' # the select function of tidyverse is also supported:
#' df$bactid <- df %>% select(microorganism_name) %>% guess_bactid()
#'
#' # and can even contain 2 columns, which is convenient for genus/species combinations:
#' df$bactid <- df %>% select(genus, species) %>% guess_bactid()
#' # same result:
#' df <- df %>% mutate(bactid = paste(genus, species) %>% guess_bactid())
#' }
guess_bactid <- function(x) {
if (NCOL(x) == 2) {
# support tidyverse selection like: df %>% select(colA, colB)
# paste these columns together
x_vector <- vector("character", NROW(x))
for (i in 1:NROW(x)) {
x_vector[i] <- paste(pull(x[i,], 1), pull(x[i,], 2), sep = " ")
}
x <- x_vector
} else {
if (NCOL(x) > 2) {
stop('`x` can be 2 columns at most', call. = FALSE)
}
# support tidyverse selection like: df %>% select(colA)
if (!is.vector(x)) {
x <- pull(x, 1)
}
}
# remove dots and other non-text in case of "E. coli" except spaces
x <- gsub("[^a-zA-Z ]+", "", x)
# but spaces before and after should be omitted
x <- trimws(x, which = "both")
x.bak <- x
# replace space by regex sign
x <- gsub(" ", ".*", x, fixed = TRUE)
# add start and stop
x_species <- paste(x, 'species')
x <- paste0('^', x, '$')
for (i in 1:length(x)) {
if (tolower(x[i]) == '^e.*coli$') {
# avoid detection of Entamoeba coli in case of E. coli
x[i] <- 'Escherichia coli'
}
if (tolower(x[i]) == '^h.*influenzae$') {
# avoid detection of Haematobacter influenzae in case of H. influenzae
x[i] <- 'Haemophilus influenzae'
}
if (tolower(x[i]) == '^st.*au$'
| tolower(x[i]) == '^stau$'
| tolower(x[i]) == '^staaur$') {
# avoid detection of Staphylococcus auricularis in case of S. aureus
x[i] <- 'Staphylococcus aureus'
}
if (tolower(x[i]) == '^p.*aer$') {
# avoid detection of Pasteurella aerogenes in case of Pseudomonas aeruginosa
x[i] <- 'Pseudomonas aeruginosa'
}
# translate known trivial names to genus+species
if (!is.na(x.bak[i])) {
if (toupper(x.bak[i]) == 'MRSA'
| toupper(x.bak[i]) == 'VISA'
| toupper(x.bak[i]) == 'VRSA') {
x[i] <- 'Staphylococcus aureus'
}
if (toupper(x.bak[i]) == 'MRSE') {
x[i] <- 'Staphylococcus epidermidis'
}
if (toupper(x.bak[i]) == 'VRE') {
x[i] <- 'Enterococcus'
}
if (toupper(x.bak[i]) == 'MRPA') {
# multi resistant P. aeruginosa
x[i] <- 'Pseudomonas aeruginosa'
}
if (toupper(x.bak[i]) == 'PISP'
| toupper(x.bak[i]) == 'PRSP') {
# peni resistant S. pneumoniae
x[i] <- 'Streptococcus pneumoniae'
}
if (toupper(x.bak[i]) == 'VISP'
| toupper(x.bak[i]) == 'VRSP') {
# vanco resistant S. pneumoniae
x[i] <- 'Streptococcus pneumoniae'
}
}
# let's try the ID's first
found <- AMR::microorganisms %>% filter(bactid == x.bak[i])
if (nrow(found) == 0) {
# now try exact match
found <- AMR::microorganisms %>% filter(fullname == x[i])
}
if (nrow(found) == 0) {
# try any match
found <- AMR::microorganisms %>% filter(fullname %like% x[i])
}
if (nrow(found) == 0) {
# try exact match of only genus, with 'species' attached
# (e.g. this prevents Streptococcus for becoming Peptostreptococcus, since "p" < "s")
found <- AMR::microorganisms %>% filter(fullname == x_species[i])
}
if (nrow(found) == 0) {
# try any match of only genus, with 'species' attached
found <- AMR::microorganisms %>% filter(fullname %like% x_species[i])
}
if (nrow(found) == 0) {
# search for GLIMS code
if (toupper(x.bak[i]) %in% toupper(AMR::microorganisms.umcg$mocode)) {
found <- AMR::microorganisms.umcg %>% filter(toupper(mocode) == toupper(x.bak[i]))
}
}
if (nrow(found) == 0) {
# try splitting of characters and then find ID
# like esco = E. coli, klpn = K. pneumoniae, stau = S. aureus
x_split <- x
x_length <- nchar(x.bak[i])
x_split[i] <- paste0(x.bak[i] %>% substr(1, x_length / 2) %>% trimws(),
'.* ',
x.bak[i] %>% substr((x_length / 2) + 1, x_length) %>% trimws())
found <- AMR::microorganisms %>% filter(fullname %like% paste0('^', x_split[i]))
}
if (nrow(found) == 0) {
# try any match with text before and after original search string
# so "negative rods" will be "GNR"
if (x.bak[i] %like% "^Gram") {
x.bak[i] <- gsub("^Gram", "", x.bak[i], ignore.case = TRUE)
# remove leading and trailing spaces again
x.bak[i] <- trimws(x.bak[i], which = "both")
}
if (!is.na(x.bak[i])) {
found <- AMR::microorganisms %>% filter(fullname %like% x.bak[i])
}
}
if (nrow(found) != 0) {
x[i] <- as.character(found[1, 'bactid'])
} else {
x[i] <- ""
}
}
x
}

View File

@ -192,7 +192,7 @@ prettyprint_df <- function(x,
if (n + 1 < nrow(x)) {
# remove in between part, 1 extra for ~~~~ between first and last part
rows_list <- c(1:(n / 2 + 1), (nrow(x) - (n / 2) + 1):nrow(x))
x <- as.data.frame(x.bak[rows_list,])
x <- as.data.frame(x.bak[rows_list,], stringsAsFactors = FALSE)
colnames(x) <- colnames(x.bak)
rownames(x) <- rownames(x.bak)[rows_list]
# set inbetweener between parts
@ -204,12 +204,12 @@ prettyprint_df <- function(x,
# class will be marked up per column
if (NROW(x.bak) > 0) {
rownames.x <- rownames(x)
x <- x %>%
filter(
# suppress warnings because dplyr want us to use library(dplyr) when using filter(row_number())
suppressWarnings(
row_number() == 1)
) %>%
x <- x %>%
filter(row_number() == 1) %>%
rbind(x, stringsAsFactors = FALSE)
)
rownames(x) <- c('*', rownames.x)
}
@ -252,12 +252,12 @@ prettyprint_df <- function(x,
}
# markup cols
for (i in 1:ncol(x)) {
if (all(!class(x[, i]) %in% class(x.bak[, i]))) {
class(x[, i]) <- class(x.bak[, i])
}
try(x[, i] <- format(x %>% pull(i)), silent = TRUE)
# replace NAs
if (nchar(na) < 2) {
# make as long as the text "NA"
@ -297,16 +297,16 @@ prettyprint_df <- function(x,
colnames(x)[i] <- paste0(strrep(" ", width), colnames(x)[i])
}
# strip columns that do not fit (3 chars as margin)
# strip columns that do not fit (width + 2 extra chars as margin)
width_console <- options()$width
width_until_col <- x %>%
select(1:i) %>%
apply(1, paste, collapse = strrep(" ", width + 1)) %>%
apply(1, paste, collapse = strrep(" ", width + 2)) %>%
nchar() %>%
max()
width_until_col_before <- x %>%
select(1:(max(i, 2) - 1)) %>%
apply(1, paste, collapse = strrep(" ", width + 1)) %>%
apply(1, paste, collapse = strrep(" ", width + 2)) %>%
nchar() %>%
max()
extraspace <- maxrowchars + nchar(rownames(x)[length(rownames(x))])

Binary file not shown.

View File

@ -21,7 +21,7 @@ first_isolate(tbl, col_date, col_patient_id, col_bactid = NA,
\item{col_patient_id}{column name of the unique IDs of the patients}
\item{col_bactid}{column name of the unique IDs of the microorganisms (should occur in the \code{\link{microorganisms}} dataset)}
\item{col_bactid}{column name of the unique IDs of the microorganisms (should occur in the \code{\link{microorganisms}} dataset). Get your bactid's with the function \code{\link{guess_bactid}}, that takes microorganism names as input.}
\item{col_testcode}{column name of the test codes. Use \code{col_testcode = NA} to \strong{not} exclude certain test codes (like test codes for screening). In that case \code{testcodes_exclude} will be ignored. Supports tidyverse-like quotation.}

View File

@ -1,5 +1,5 @@
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/atc.R
% Please edit documentation in R/guess_bactid.R
\name{guess_bactid}
\alias{guess_bactid}
\title{Find bacteria ID based on genus/species}
@ -7,13 +7,13 @@
guess_bactid(x)
}
\arguments{
\item{x}{character vector to determine \code{bactid}}
\item{x}{character vector or a dataframe with one or two columns}
}
\value{
Character (vector).
}
\description{
Use this function to determine a valid ID based on a genus (and species). This input could be a full name (like \code{"Staphylococcus aureus"}), an abbreviated name (like \code{"S. aureus"}), or just a genus. You could also use a \code{\link{paste}} of a genus and species column to use the full name as input: \code{x = paste(df$genus, df$species)}, where \code{df} is your dataframe.
Use this function to determine a valid ID based on a genus (and species). This input could be a full name (like \code{"Staphylococcus aureus"}), an abbreviated name (like \code{"S. aureus"}), or just a genus. You could also \code{\link{select}} a genus and species column, zie Examples.
}
\examples{
# These examples all return "STAAUR", the ID of S. aureus:
@ -25,6 +25,18 @@ guess_bactid("S aureus")
guess_bactid("Staphylococcus aureus")
guess_bactid("MRSA") # Methicillin-resistant S. aureus
guess_bactid("VISA") # Vancomycin Intermediate S. aureus
\dontrun{
df$bactid <- guess_bactid(df$microorganism_name)
# the select function of tidyverse is also supported:
df$bactid <- df \%>\% select(microorganism_name) \%>\% guess_bactid()
# and can even contain 2 columns, which is convenient for genus/species combinations:
df$bactid <- df \%>\% select(genus, species) \%>\% guess_bactid()
# same result:
df <- df \%>\% mutate(bactid = paste(genus, species) \%>\% guess_bactid())
}
}
\seealso{
\code{\link{microorganisms}} for the dataframe that is being used to determine ID's.

View File

@ -14,9 +14,9 @@ key_antibiotics(tbl, col_bactid = "bactid", info = TRUE, amcl = "amcl",
\arguments{
\item{tbl}{table with antibiotics coloms, like \code{amox} and \code{amcl}.}
\item{col_bactid}{column of bacteria IDs in \code{tbl}; these should occur in \code{microorganisms$bactid}, see \code{\link{microorganisms}}}
\item{col_bactid}{column name of the unique IDs of the microorganisms (should occur in the \code{\link{microorganisms}} dataset). Get your bactid's with the function \code{\link{guess_bactid}}, that takes microorganism names as input.}
\item{info}{print warnings}
\item{info}{print progress}
\item{amcl, amox, cfot, cfta, cftr, cfur, cipr, clar, clin, clox, doxy, gent, line, mero, peni, pita, rifa, teic, trsu, vanc}{column names of antibiotics, case-insensitive}
}

View File

@ -1,6 +1,5 @@
context("atc.R")
test_that("atc_property works", {
expect_equal(tolower(atc_property("J01CA04", property = "Name")), "amoxicillin")
expect_equivalent(atc_property("J01CA04", "DDD"), 1)
@ -15,21 +14,6 @@ test_that("abname works", {
expect_equal(abname("J01CA04", from = 'atc'), "Amoxicillin")
})
test_that("guess_bactid works", {
expect_identical(guess_bactid(c("E. coli", "H. influenzae")), c("ESCCOL", "HAEINF"))
expect_equal(guess_bactid("Escherichia coli"), "ESCCOL")
expect_equal(guess_bactid("Negative rods"), "GNR")
expect_equal(guess_bactid(c("stau",
"STAU",
"staaur",
"S. aureus",
"S aureus",
"Staphylococcus aureus",
"MRSA",
"VISA")),
rep("STAAUR", 8))
})
test_that("guess_atc works", {
expect_equal(guess_atc(c("J01FA01",
"Erythromycin",

View File

@ -0,0 +1,40 @@
context("guess_bactid.R")
test_that("guess_bactid works", {
expect_identical(
guess_bactid(c("E. coli", "H. influenzae")),
c("ESCCOL", "HAEINF"))
expect_equal(guess_bactid("Escherichia coli"), "ESCCOL")
expect_equal(guess_bactid("Negative rods"), "GNR")
expect_identical(
guess_bactid(c("stau",
"STAU",
"staaur",
"S. aureus",
"S aureus",
"Staphylococcus aureus",
"MRSA",
"VISA")),
rep("STAAUR", 8))
# select with one column
expect_identical(
septic_patients[1:10,] %>%
left_join_microorganisms() %>%
select(genus) %>%
guess_bactid(),
c("STC", "STC", "NEI", "STA", "STA",
"NEI", "ENT", "ENT", "ESC", "KLE"))
# select with two columns
expect_identical(
septic_patients[1:10,] %>%
pull(bactid),
septic_patients[1:10,] %>%
left_join_microorganisms() %>%
select(genus, species) %>%
guess_bactid())
})