diff --git a/DESCRIPTION b/DESCRIPTION index a5cb48d5..37bbb549 100755 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,6 +1,6 @@ Package: AMR -Version: 0.2.0.9002 -Date: 2018-05-31 +Version: 0.2.0.9003 +Date: 2018-06-08 Title: Antimicrobial Resistance Analysis Authors@R: c( person( diff --git a/NEWS.md b/NEWS.md index 615fbdde..c94f4c8f 100755 --- a/NEWS.md +++ b/NEWS.md @@ -1,13 +1,13 @@ # 0.2.0.90xx (development version) -#### New + * Vignettes about frequency tables * Possibility to globally set the default for the amount of items to print in frequency tables (`freq` function), with `options(max.print.freq = n)` - -#### Changed * Renamed `toConsole` parameter of `freq` to `as.data.frame` * Small translational improvements to the `septic_patients` dataset * Coerce RSI values from combined MIC/RSI values: `as.rsi("<=0.002; S")` will now return `"S"` -* Fix for warning `hybrid evaluation forced for row_number` from the `dplyr` package v0.7.5 and above. +* Fix for warning **hybrid evaluation forced for row_number** ([`924b62`](https://github.com/tidyverse/dplyr/commit/924b62)) from the `dplyr` package v0.7.5 and above +* Support for 1 or 2 columns as input for `guess_bactid` +* Fix for printing tibbles where characters would be accidentally transformed to factors # 0.2.0 (latest stable version) #### New diff --git a/R/atc.R b/R/atc.R index 6a14b349..444d4610 100755 --- a/R/atc.R +++ b/R/atc.R @@ -238,135 +238,7 @@ abname <- function(abcode, from = c("guess", "atc", "molis", "umcg"), to = 'offi abcode } -#' Find bacteria ID based on genus/species -#' -#' Use this function to determine a valid ID based on a genus (and species). This input could be a full name (like \code{"Staphylococcus aureus"}), an abbreviated name (like \code{"S. aureus"}), or just a genus. You could also use a \code{\link{paste}} of a genus and species column to use the full name as input: \code{x = paste(df$genus, df$species)}, where \code{df} is your dataframe. -#' @param x character vector to determine \code{bactid} -#' @export -#' @importFrom dplyr %>% filter slice pull -#' @return Character (vector). -#' @seealso \code{\link{microorganisms}} for the dataframe that is being used to determine ID's. -#' @examples -#' # These examples all return "STAAUR", the ID of S. aureus: -#' guess_bactid("stau") -#' guess_bactid("STAU") -#' guess_bactid("staaur") -#' guess_bactid("S. aureus") -#' guess_bactid("S aureus") -#' guess_bactid("Staphylococcus aureus") -#' guess_bactid("MRSA") # Methicillin-resistant S. aureus -#' guess_bactid("VISA") # Vancomycin Intermediate S. aureus -guess_bactid <- function(x) { - # remove dots and other non-text in case of "E. coli" except spaces - x <- gsub("[^a-zA-Z ]+", "", x) - # but spaces before and after should be omitted - x <- trimws(x, which = "both") - x.bak <- x - # replace space by regex sign - x <- gsub(" ", ".*", x, fixed = TRUE) - # add start and stop - x_species <- paste(x, 'species') - x <- paste0('^', x, '$') - for (i in 1:length(x)) { - if (tolower(x[i]) == '^e.*coli$') { - # avoid detection of Entamoeba coli in case of E. coli - x[i] <- 'Escherichia coli' - } - if (tolower(x[i]) == '^h.*influenzae$') { - # avoid detection of Haematobacter influenzae in case of H. influenzae - x[i] <- 'Haemophilus influenzae' - } - if (tolower(x[i]) == '^st.*au$' - | tolower(x[i]) == '^stau$' - | tolower(x[i]) == '^staaur$') { - # avoid detection of Staphylococcus auricularis in case of S. aureus - x[i] <- 'Staphylococcus aureus' - } - if (tolower(x[i]) == '^p.*aer$') { - # avoid detection of Pasteurella aerogenes in case of Pseudomonas aeruginosa - x[i] <- 'Pseudomonas aeruginosa' - } - - # translate known trivial names to genus+species - if (toupper(x.bak[i]) == 'MRSA' - | toupper(x.bak[i]) == 'VISA' - | toupper(x.bak[i]) == 'VRSA') { - x[i] <- 'Staphylococcus aureus' - } - if (toupper(x.bak[i]) == 'MRSE') { - x[i] <- 'Staphylococcus epidermidis' - } - if (toupper(x.bak[i]) == 'VRE') { - x[i] <- 'Enterococcus' - } - if (toupper(x.bak[i]) == 'MRPA') { - # multi resistant P. aeruginosa - x[i] <- 'Pseudomonas aeruginosa' - } - if (toupper(x.bak[i]) == 'PISP' - | toupper(x.bak[i]) == 'PRSP') { - # peni resistant S. pneumoniae - x[i] <- 'Streptococcus pneumoniae' - } - if (toupper(x.bak[i]) == 'VISP' - | toupper(x.bak[i]) == 'VRSP') { - # vanco resistant S. pneumoniae - x[i] <- 'Streptococcus pneumoniae' - } - - # let's try the ID's first - found <- AMR::microorganisms %>% filter(bactid == x.bak[i]) - - if (nrow(found) == 0) { - # now try exact match - found <- AMR::microorganisms %>% filter(fullname == x[i]) - } - if (nrow(found) == 0) { - # try any match - found <- AMR::microorganisms %>% filter(fullname %like% x[i]) - } - if (nrow(found) == 0) { - # try only genus, with 'species' attached - found <- AMR::microorganisms %>% filter(fullname %like% x_species[i]) - } - if (nrow(found) == 0) { - # search for GLIMS code - if (toupper(x.bak[i]) %in% toupper(AMR::microorganisms.umcg$mocode)) { - found <- AMR::microorganisms.umcg %>% filter(toupper(mocode) == toupper(x.bak[i])) - } - } - if (nrow(found) == 0) { - # try splitting of characters and then find ID - # like esco = E. coli, klpn = K. pneumoniae, stau = S. aureus - x_split <- x - x_length <- nchar(x.bak[i]) - x_split[i] <- paste0(x.bak[i] %>% substr(1, x_length / 2) %>% trimws(), - '.* ', - x.bak[i] %>% substr((x_length / 2) + 1, x_length) %>% trimws()) - found <- AMR::microorganisms %>% filter(fullname %like% paste0('^', x_split[i])) - } - if (nrow(found) == 0) { - # try any match with text before and after original search string - # so "negative rods" will be "GNR" - if (x.bak[i] %like% "^Gram") { - x.bak[i] <- gsub("^Gram", "", x.bak[i], ignore.case = TRUE) - # remove leading and trailing spaces again - x.bak[i] <- trimws(x.bak[i], which = "both") - } - found <- AMR::microorganisms %>% filter(fullname %like% x.bak[i]) - } - - if (nrow(found) != 0) { - x[i] <- found %>% - slice(1) %>% - pull(bactid) - } else { - x[i] <- "" - } - } - x -} #' Find ATC code based on antibiotic property #' diff --git a/R/first_isolates.R b/R/first_isolates.R index 1dba01c3..f64847d1 100755 --- a/R/first_isolates.R +++ b/R/first_isolates.R @@ -22,7 +22,7 @@ #' @param tbl a \code{data.frame} containing isolates. #' @param col_date column name of the result date (or date that is was received on the lab) #' @param col_patient_id column name of the unique IDs of the patients -#' @param col_bactid column name of the unique IDs of the microorganisms (should occur in the \code{\link{microorganisms}} dataset) +#' @param col_bactid column name of the unique IDs of the microorganisms (should occur in the \code{\link{microorganisms}} dataset). Get your bactid's with the function \code{\link{guess_bactid}}, that takes microorganism names as input. #' @param col_testcode column name of the test codes. Use \code{col_testcode = NA} to \strong{not} exclude certain test codes (like test codes for screening). In that case \code{testcodes_exclude} will be ignored. Supports tidyverse-like quotation. #' @param col_specimen column name of the specimen type or group #' @param col_icu column name of the logicals (\code{TRUE}/\code{FALSE}) whether a ward or department is an Intensive Care Unit (ICU) @@ -291,15 +291,15 @@ first_isolate <- function(tbl, return(tbl %>% pull(real_first_isolate)) } - scope.size <- tbl %>% - filter( - suppressWarnings( + # suppress warnings because dplyr want us to use library(dplyr) when using filter(row_number()) + suppressWarnings( + scope.size <- tbl %>% + filter( row_number() %>% between(row.start, - row.end) - ), - genus != '') %>% - nrow() - + row.end), + genus != '') %>% + nrow() + ) # Analysis of first isolate ---- all_first <- tbl %>% @@ -328,39 +328,44 @@ first_isolate <- function(tbl, } } type_param <- type - all_first <- all_first %>% - mutate(key_ab_lag = lag(key_ab)) %>% - mutate(key_ab_other = !key_antibiotics_equal(x = key_ab, - y = key_ab_lag, - type = type_param, - ignore_I = ignore_I, - points_threshold = points_threshold, - info = info)) %>% - mutate( - real_first_isolate = - if_else( - suppressWarnings(between(row_number(), row.start, row.end)) - & genus != '' - & (other_pat_or_mo - | days_diff >= episode_days - | key_ab_other), - TRUE, - FALSE)) - + # suppress warnings because dplyr want us to use library(dplyr) when using filter(row_number()) + suppressWarnings( + all_first <- all_first %>% + mutate(key_ab_lag = lag(key_ab)) %>% + mutate(key_ab_other = !key_antibiotics_equal(x = key_ab, + y = key_ab_lag, + type = type_param, + ignore_I = ignore_I, + points_threshold = points_threshold, + info = info)) %>% + mutate( + real_first_isolate = + if_else( + between(row_number(), row.start, row.end) + & genus != '' + & (other_pat_or_mo + | days_diff >= episode_days + | key_ab_other), + TRUE, + FALSE)) + ) if (info == TRUE) { cat('\n') } } else { - all_first <- all_first %>% - mutate( - real_first_isolate = - if_else( - suppressWarnings(between(row_number(), row.start, row.end)) - & genus != '' - & (other_pat_or_mo - | days_diff >= episode_days), - TRUE, - FALSE)) + # suppress warnings because dplyr want us to use library(dplyr) when using filter(row_number()) + suppressWarnings( + all_first <- all_first %>% + mutate( + real_first_isolate = + if_else( + between(row_number(), row.start, row.end) + & genus != '' + & (other_pat_or_mo + | days_diff >= episode_days), + TRUE, + FALSE)) + ) } # first one as TRUE @@ -402,8 +407,7 @@ first_isolate <- function(tbl, #' Key antibiotics based on bacteria ID #' #' @param tbl table with antibiotics coloms, like \code{amox} and \code{amcl}. -#' @param col_bactid column of bacteria IDs in \code{tbl}; these should occur in \code{microorganisms$bactid}, see \code{\link{microorganisms}} -#' @param info print warnings +#' @inheritParams first_isolate #' @param amcl,amox,cfot,cfta,cftr,cfur,cipr,clar,clin,clox,doxy,gent,line,mero,peni,pita,rifa,teic,trsu,vanc column names of antibiotics, case-insensitive #' @export #' @importFrom dplyr %>% mutate if_else diff --git a/R/guess_bactid.R b/R/guess_bactid.R new file mode 100644 index 00000000..9194c4c8 --- /dev/null +++ b/R/guess_bactid.R @@ -0,0 +1,185 @@ +# ==================================================================== # +# TITLE # +# Antimicrobial Resistance (AMR) Analysis # +# # +# AUTHORS # +# Berends MS (m.s.berends@umcg.nl), Luz CF (c.f.luz@umcg.nl) # +# # +# LICENCE # +# This program is free software; you can redistribute it and/or modify # +# it under the terms of the GNU General Public License version 2.0, # +# as published by the Free Software Foundation. # +# # +# This program is distributed in the hope that it will be useful, # +# but WITHOUT ANY WARRANTY; without even the implied warranty of # +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # +# GNU General Public License for more details. # +# ==================================================================== # + +#' Find bacteria ID based on genus/species +#' +#' Use this function to determine a valid ID based on a genus (and species). This input could be a full name (like \code{"Staphylococcus aureus"}), an abbreviated name (like \code{"S. aureus"}), or just a genus. You could also \code{\link{select}} a genus and species column, zie Examples. +#' @param x character vector or a dataframe with one or two columns +#' @export +#' @importFrom dplyr %>% filter pull +#' @return Character (vector). +#' @seealso \code{\link{microorganisms}} for the dataframe that is being used to determine ID's. +#' @examples +#' # These examples all return "STAAUR", the ID of S. aureus: +#' guess_bactid("stau") +#' guess_bactid("STAU") +#' guess_bactid("staaur") +#' guess_bactid("S. aureus") +#' guess_bactid("S aureus") +#' guess_bactid("Staphylococcus aureus") +#' guess_bactid("MRSA") # Methicillin-resistant S. aureus +#' guess_bactid("VISA") # Vancomycin Intermediate S. aureus +#' +#' \dontrun{ +#' df$bactid <- guess_bactid(df$microorganism_name) +#' +#' # the select function of tidyverse is also supported: +#' df$bactid <- df %>% select(microorganism_name) %>% guess_bactid() +#' +#' # and can even contain 2 columns, which is convenient for genus/species combinations: +#' df$bactid <- df %>% select(genus, species) %>% guess_bactid() +#' # same result: +#' df <- df %>% mutate(bactid = paste(genus, species) %>% guess_bactid()) +#' } +guess_bactid <- function(x) { + + if (NCOL(x) == 2) { + # support tidyverse selection like: df %>% select(colA, colB) + # paste these columns together + x_vector <- vector("character", NROW(x)) + for (i in 1:NROW(x)) { + x_vector[i] <- paste(pull(x[i,], 1), pull(x[i,], 2), sep = " ") + } + x <- x_vector + } else { + if (NCOL(x) > 2) { + stop('`x` can be 2 columns at most', call. = FALSE) + } + # support tidyverse selection like: df %>% select(colA) + if (!is.vector(x)) { + x <- pull(x, 1) + } + } + + # remove dots and other non-text in case of "E. coli" except spaces + x <- gsub("[^a-zA-Z ]+", "", x) + # but spaces before and after should be omitted + x <- trimws(x, which = "both") + x.bak <- x + # replace space by regex sign + x <- gsub(" ", ".*", x, fixed = TRUE) + # add start and stop + x_species <- paste(x, 'species') + x <- paste0('^', x, '$') + + for (i in 1:length(x)) { + if (tolower(x[i]) == '^e.*coli$') { + # avoid detection of Entamoeba coli in case of E. coli + x[i] <- 'Escherichia coli' + } + if (tolower(x[i]) == '^h.*influenzae$') { + # avoid detection of Haematobacter influenzae in case of H. influenzae + x[i] <- 'Haemophilus influenzae' + } + if (tolower(x[i]) == '^st.*au$' + | tolower(x[i]) == '^stau$' + | tolower(x[i]) == '^staaur$') { + # avoid detection of Staphylococcus auricularis in case of S. aureus + x[i] <- 'Staphylococcus aureus' + } + if (tolower(x[i]) == '^p.*aer$') { + # avoid detection of Pasteurella aerogenes in case of Pseudomonas aeruginosa + x[i] <- 'Pseudomonas aeruginosa' + } + + # translate known trivial names to genus+species + if (!is.na(x.bak[i])) { + if (toupper(x.bak[i]) == 'MRSA' + | toupper(x.bak[i]) == 'VISA' + | toupper(x.bak[i]) == 'VRSA') { + x[i] <- 'Staphylococcus aureus' + } + if (toupper(x.bak[i]) == 'MRSE') { + x[i] <- 'Staphylococcus epidermidis' + } + if (toupper(x.bak[i]) == 'VRE') { + x[i] <- 'Enterococcus' + } + if (toupper(x.bak[i]) == 'MRPA') { + # multi resistant P. aeruginosa + x[i] <- 'Pseudomonas aeruginosa' + } + if (toupper(x.bak[i]) == 'PISP' + | toupper(x.bak[i]) == 'PRSP') { + # peni resistant S. pneumoniae + x[i] <- 'Streptococcus pneumoniae' + } + if (toupper(x.bak[i]) == 'VISP' + | toupper(x.bak[i]) == 'VRSP') { + # vanco resistant S. pneumoniae + x[i] <- 'Streptococcus pneumoniae' + } + } + + # let's try the ID's first + found <- AMR::microorganisms %>% filter(bactid == x.bak[i]) + + if (nrow(found) == 0) { + # now try exact match + found <- AMR::microorganisms %>% filter(fullname == x[i]) + } + if (nrow(found) == 0) { + # try any match + found <- AMR::microorganisms %>% filter(fullname %like% x[i]) + } + if (nrow(found) == 0) { + # try exact match of only genus, with 'species' attached + # (e.g. this prevents Streptococcus for becoming Peptostreptococcus, since "p" < "s") + found <- AMR::microorganisms %>% filter(fullname == x_species[i]) + } + if (nrow(found) == 0) { + # try any match of only genus, with 'species' attached + found <- AMR::microorganisms %>% filter(fullname %like% x_species[i]) + } + if (nrow(found) == 0) { + # search for GLIMS code + if (toupper(x.bak[i]) %in% toupper(AMR::microorganisms.umcg$mocode)) { + found <- AMR::microorganisms.umcg %>% filter(toupper(mocode) == toupper(x.bak[i])) + } + } + if (nrow(found) == 0) { + # try splitting of characters and then find ID + # like esco = E. coli, klpn = K. pneumoniae, stau = S. aureus + x_split <- x + x_length <- nchar(x.bak[i]) + x_split[i] <- paste0(x.bak[i] %>% substr(1, x_length / 2) %>% trimws(), + '.* ', + x.bak[i] %>% substr((x_length / 2) + 1, x_length) %>% trimws()) + found <- AMR::microorganisms %>% filter(fullname %like% paste0('^', x_split[i])) + } + if (nrow(found) == 0) { + # try any match with text before and after original search string + # so "negative rods" will be "GNR" + if (x.bak[i] %like% "^Gram") { + x.bak[i] <- gsub("^Gram", "", x.bak[i], ignore.case = TRUE) + # remove leading and trailing spaces again + x.bak[i] <- trimws(x.bak[i], which = "both") + } + if (!is.na(x.bak[i])) { + found <- AMR::microorganisms %>% filter(fullname %like% x.bak[i]) + } + } + + if (nrow(found) != 0) { + x[i] <- as.character(found[1, 'bactid']) + } else { + x[i] <- "" + } + } + x +} diff --git a/R/print.R b/R/print.R index a631ac22..7cb3b4de 100755 --- a/R/print.R +++ b/R/print.R @@ -192,7 +192,7 @@ prettyprint_df <- function(x, if (n + 1 < nrow(x)) { # remove in between part, 1 extra for ~~~~ between first and last part rows_list <- c(1:(n / 2 + 1), (nrow(x) - (n / 2) + 1):nrow(x)) - x <- as.data.frame(x.bak[rows_list,]) + x <- as.data.frame(x.bak[rows_list,], stringsAsFactors = FALSE) colnames(x) <- colnames(x.bak) rownames(x) <- rownames(x.bak)[rows_list] # set inbetweener between parts @@ -204,12 +204,12 @@ prettyprint_df <- function(x, # class will be marked up per column if (NROW(x.bak) > 0) { rownames.x <- rownames(x) - x <- x %>% - filter( - suppressWarnings( - row_number() == 1) - ) %>% - rbind(x, stringsAsFactors = FALSE) + # suppress warnings because dplyr want us to use library(dplyr) when using filter(row_number()) + suppressWarnings( + x <- x %>% + filter(row_number() == 1) %>% + rbind(x, stringsAsFactors = FALSE) + ) rownames(x) <- c('*', rownames.x) } @@ -252,12 +252,12 @@ prettyprint_df <- function(x, } # markup cols + for (i in 1:ncol(x)) { if (all(!class(x[, i]) %in% class(x.bak[, i]))) { class(x[, i]) <- class(x.bak[, i]) } try(x[, i] <- format(x %>% pull(i)), silent = TRUE) - # replace NAs if (nchar(na) < 2) { # make as long as the text "NA" @@ -297,16 +297,16 @@ prettyprint_df <- function(x, colnames(x)[i] <- paste0(strrep(" ", width), colnames(x)[i]) } - # strip columns that do not fit (3 chars as margin) + # strip columns that do not fit (width + 2 extra chars as margin) width_console <- options()$width width_until_col <- x %>% select(1:i) %>% - apply(1, paste, collapse = strrep(" ", width + 1)) %>% + apply(1, paste, collapse = strrep(" ", width + 2)) %>% nchar() %>% max() width_until_col_before <- x %>% select(1:(max(i, 2) - 1)) %>% - apply(1, paste, collapse = strrep(" ", width + 1)) %>% + apply(1, paste, collapse = strrep(" ", width + 2)) %>% nchar() %>% max() extraspace <- maxrowchars + nchar(rownames(x)[length(rownames(x))]) @@ -340,9 +340,9 @@ prettyprint_df <- function(x, if (ncol(x) < ncol(x.bak)) { x.notshown <- x.bak %>% select((ncol(x) + 1):ncol(x.bak)) if (ncol(x.notshown) == 1) { - cat('...and 1 more column: ') + cat('... and 1 more column: ') } else { - cat('...and', ncol(x.notshown), 'more columns: ') + cat('... and', ncol(x.notshown), 'more columns: ') } cat(x.notshown %>% colnames() %>% diff --git a/data/antibiotics.rda b/data/antibiotics.rda index 6d5d5d8b..27221fda 100755 Binary files a/data/antibiotics.rda and b/data/antibiotics.rda differ diff --git a/man/first_isolate.Rd b/man/first_isolate.Rd index c0ee9a95..a6595f37 100755 --- a/man/first_isolate.Rd +++ b/man/first_isolate.Rd @@ -21,7 +21,7 @@ first_isolate(tbl, col_date, col_patient_id, col_bactid = NA, \item{col_patient_id}{column name of the unique IDs of the patients} -\item{col_bactid}{column name of the unique IDs of the microorganisms (should occur in the \code{\link{microorganisms}} dataset)} +\item{col_bactid}{column name of the unique IDs of the microorganisms (should occur in the \code{\link{microorganisms}} dataset). Get your bactid's with the function \code{\link{guess_bactid}}, that takes microorganism names as input.} \item{col_testcode}{column name of the test codes. Use \code{col_testcode = NA} to \strong{not} exclude certain test codes (like test codes for screening). In that case \code{testcodes_exclude} will be ignored. Supports tidyverse-like quotation.} diff --git a/man/guess_bactid.Rd b/man/guess_bactid.Rd index 9dc99719..729a478a 100755 --- a/man/guess_bactid.Rd +++ b/man/guess_bactid.Rd @@ -1,5 +1,5 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/atc.R +% Please edit documentation in R/guess_bactid.R \name{guess_bactid} \alias{guess_bactid} \title{Find bacteria ID based on genus/species} @@ -7,13 +7,13 @@ guess_bactid(x) } \arguments{ -\item{x}{character vector to determine \code{bactid}} +\item{x}{character vector or a dataframe with one or two columns} } \value{ Character (vector). } \description{ -Use this function to determine a valid ID based on a genus (and species). This input could be a full name (like \code{"Staphylococcus aureus"}), an abbreviated name (like \code{"S. aureus"}), or just a genus. You could also use a \code{\link{paste}} of a genus and species column to use the full name as input: \code{x = paste(df$genus, df$species)}, where \code{df} is your dataframe. +Use this function to determine a valid ID based on a genus (and species). This input could be a full name (like \code{"Staphylococcus aureus"}), an abbreviated name (like \code{"S. aureus"}), or just a genus. You could also \code{\link{select}} a genus and species column, zie Examples. } \examples{ # These examples all return "STAAUR", the ID of S. aureus: @@ -25,6 +25,18 @@ guess_bactid("S aureus") guess_bactid("Staphylococcus aureus") guess_bactid("MRSA") # Methicillin-resistant S. aureus guess_bactid("VISA") # Vancomycin Intermediate S. aureus + +\dontrun{ +df$bactid <- guess_bactid(df$microorganism_name) + +# the select function of tidyverse is also supported: +df$bactid <- df \%>\% select(microorganism_name) \%>\% guess_bactid() + +# and can even contain 2 columns, which is convenient for genus/species combinations: +df$bactid <- df \%>\% select(genus, species) \%>\% guess_bactid() +# same result: +df <- df \%>\% mutate(bactid = paste(genus, species) \%>\% guess_bactid()) +} } \seealso{ \code{\link{microorganisms}} for the dataframe that is being used to determine ID's. diff --git a/man/key_antibiotics.Rd b/man/key_antibiotics.Rd index 12932662..6ff3af20 100755 --- a/man/key_antibiotics.Rd +++ b/man/key_antibiotics.Rd @@ -14,9 +14,9 @@ key_antibiotics(tbl, col_bactid = "bactid", info = TRUE, amcl = "amcl", \arguments{ \item{tbl}{table with antibiotics coloms, like \code{amox} and \code{amcl}.} -\item{col_bactid}{column of bacteria IDs in \code{tbl}; these should occur in \code{microorganisms$bactid}, see \code{\link{microorganisms}}} +\item{col_bactid}{column name of the unique IDs of the microorganisms (should occur in the \code{\link{microorganisms}} dataset). Get your bactid's with the function \code{\link{guess_bactid}}, that takes microorganism names as input.} -\item{info}{print warnings} +\item{info}{print progress} \item{amcl, amox, cfot, cfta, cftr, cfur, cipr, clar, clin, clox, doxy, gent, line, mero, peni, pita, rifa, teic, trsu, vanc}{column names of antibiotics, case-insensitive} } diff --git a/tests/testthat/test-atc.R b/tests/testthat/test-atc.R index d309bbb9..8fc674ef 100755 --- a/tests/testthat/test-atc.R +++ b/tests/testthat/test-atc.R @@ -1,6 +1,5 @@ context("atc.R") - test_that("atc_property works", { expect_equal(tolower(atc_property("J01CA04", property = "Name")), "amoxicillin") expect_equivalent(atc_property("J01CA04", "DDD"), 1) @@ -15,21 +14,6 @@ test_that("abname works", { expect_equal(abname("J01CA04", from = 'atc'), "Amoxicillin") }) -test_that("guess_bactid works", { - expect_identical(guess_bactid(c("E. coli", "H. influenzae")), c("ESCCOL", "HAEINF")) - expect_equal(guess_bactid("Escherichia coli"), "ESCCOL") - expect_equal(guess_bactid("Negative rods"), "GNR") - expect_equal(guess_bactid(c("stau", - "STAU", - "staaur", - "S. aureus", - "S aureus", - "Staphylococcus aureus", - "MRSA", - "VISA")), - rep("STAAUR", 8)) -}) - test_that("guess_atc works", { expect_equal(guess_atc(c("J01FA01", "Erythromycin", diff --git a/tests/testthat/test-guess_bactid.R b/tests/testthat/test-guess_bactid.R new file mode 100644 index 00000000..8bb7b010 --- /dev/null +++ b/tests/testthat/test-guess_bactid.R @@ -0,0 +1,40 @@ +context("guess_bactid.R") + +test_that("guess_bactid works", { + expect_identical( + guess_bactid(c("E. coli", "H. influenzae")), + c("ESCCOL", "HAEINF")) + + expect_equal(guess_bactid("Escherichia coli"), "ESCCOL") + + expect_equal(guess_bactid("Negative rods"), "GNR") + + expect_identical( + guess_bactid(c("stau", + "STAU", + "staaur", + "S. aureus", + "S aureus", + "Staphylococcus aureus", + "MRSA", + "VISA")), + rep("STAAUR", 8)) + + # select with one column + expect_identical( + septic_patients[1:10,] %>% + left_join_microorganisms() %>% + select(genus) %>% + guess_bactid(), + c("STC", "STC", "NEI", "STA", "STA", + "NEI", "ENT", "ENT", "ESC", "KLE")) + + # select with two columns + expect_identical( + septic_patients[1:10,] %>% + pull(bactid), + septic_patients[1:10,] %>% + left_join_microorganisms() %>% + select(genus, species) %>% + guess_bactid()) +})