diff --git a/DESCRIPTION b/DESCRIPTION index 72867c1a..b28fc9b5 100755 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,6 +1,6 @@ Package: AMR -Version: 0.3.0.9008 -Date: 2018-09-16 +Version: 0.3.0.9009 +Date: 2018-09-24 Title: Antimicrobial Resistance Analysis Authors@R: c( person( @@ -48,6 +48,7 @@ Imports: backports, clipr, curl, + data.table (>= 1.9.0), dplyr (>= 0.7.0), hms, knitr (>= 1.0.0), diff --git a/NAMESPACE b/NAMESPACE index de08bf3f..dd4ddc09 100755 --- a/NAMESPACE +++ b/NAMESPACE @@ -90,7 +90,7 @@ export(kurtosis) export(labels_rsi_count) export(left_join_microorganisms) export(like) -export(mo_aerobic) +export(mo_TSN) export(mo_class) export(mo_family) export(mo_fullname) @@ -101,6 +101,7 @@ export(mo_phylum) export(mo_property) export(mo_shortname) export(mo_species) +export(mo_subkingdom) export(mo_subspecies) export(mo_taxonomy) export(mo_type) @@ -161,6 +162,9 @@ exportMethods(summary.rsi) importFrom(clipr,read_clip_tbl) importFrom(clipr,write_clip) importFrom(curl,nslookup) +importFrom(data.table,as.data.table) +importFrom(data.table,data.table) +importFrom(data.table,setkey) importFrom(dplyr,"%>%") importFrom(dplyr,arrange) importFrom(dplyr,arrange_at) diff --git a/NEWS.md b/NEWS.md index e8108efb..750b7b65 100755 --- a/NEWS.md +++ b/NEWS.md @@ -1,6 +1,30 @@ # 0.3.0.90xx (latest development version) #### New +* The data set `microorganisms` now contains **all microbial taxonomic data from ITIS** (kingdoms Bacteria, Fungi and Protozoa), the Integrated Taxonomy Information System, available via https://itis.gov. The data set now contains more than 18,000 microorganisms with all known bacteria, fungi and protozoa according ITIS with genus, species, subspecies, family, order, class, phylum and subkingdom. The new data set `microorganisms.old` contains all previously known taxonomic names from those kingdoms. +* Aliases for existing function `mo_property` + * Taxonomic names: `mo_phylum`, `mo_class`, `mo_order`, `mo_family`, `mo_genus`, `mo_species`, `mo_subspecies` + * Semantic names: `mo_fullname`, `mo_shortname` + * Microbial properties: `mo_type`, `mo_gramstain`. + + They also come with support for German, Dutch, French, Italian, Spanish and Portuguese, and it defaults to the systems locale: + ```r + mo_gramstain("E. coli") + # [1] "Gram negative" + mo_gramstain("E. coli", language = "de") # "de" = Deutsch / German + # [1] "Gramnegativ" + mo_gramstain("E. coli", language = "es") # "es" = Español / Spanish + # [1] "Gram negativo" + mo_fullname("S. group A") # when run on a on a Portuguese system + # [1] "Streptococcus grupo A" + ``` + + Furthermore, old taxonomic names kan easily be looked up and give a note about the taxonomic change: + ```r + mo_fullname("Pseudomonas facilis") + # Note: 'Pseudomonas facilis' was renamed to 'Acidovorax facilis' by Willems et al. in 1990 + # [1] "Acidovorax facilis" + ``` * Functions `count_R`, `count_IR`, `count_I`, `count_SI` and `count_S` to selectively count resistant or susceptible isolates * Extra function `count_df` (which works like `portion_df`) to get all counts of S, I and R of a data set with antibiotic columns, with support for grouped variables * Function `is.rsi.eligible` to check for columns that have valid antimicrobial results, but do not have the `rsi` class yet. Transform the columns of your raw data with: `data %>% mutate_if(is.rsi.eligible, as.rsi)` @@ -27,21 +51,7 @@ * All old syntaxes will still work with this version, but will throw warnings * Function `labels_rsi_count` to print datalabels on a RSI `ggplot2` model * Functions `as.atc` and `is.atc` to transform/look up antibiotic ATC codes as defined by the WHO. The existing function `guess_atc` is now an alias of `as.atc`. -* Aliases for existing function `mo_property` and new data from ITIS (Integrated Taxonomic Information System, https://www.itis.gov) - * Taxonomic names: `mo_phylum`, `mo_class`, `mo_order`, `mo_family`, `mo_genus`, `mo_species`, `mo_subspecies` - * Semantic names: `mo_fullname`, `mo_shortname` - * Microbial properties: `mo_aerobic`, `mo_type`, `mo_gramstain`. - They also come with support for German, Dutch, French, Italian, Spanish and Portuguese, and it defaults to the systems locale: - ```r - mo_gramstain("E. coli") - # [1] "Negative rods" - mo_gramstain("E. coli", language = "de") # "de" = Deutsch / German - # [1] "Negative Stäbchen" - mo_gramstain("E. coli", language = "es") # "es" = Español / Spanish - # [1] "Bacilos negativos" - mo_fullname("S. group A") # when run on a on a Portuguese system - # [1] "Streptococcus grupo A" - ``` + * Function `ab_property` and its aliases: `ab_name`, `ab_tradenames`, `ab_certe`, `ab_umcg` and `ab_trivial_nl` * Introduction to AMR as a vignette diff --git a/R/data.R b/R/data.R index a70e83e7..b01e2fd7 100755 --- a/R/data.R +++ b/R/data.R @@ -120,30 +120,48 @@ # "antibiotics" -#' Data set with human pathogenic microorganisms +#' Data set with taxonomic data from ITIS #' -#' A data set containing (potential) human pathogenic microorganisms. MO codes can be looked up using \code{\link{guess_mo}}. -#' @format A \code{\link{tibble}} with 2,642 observations and 14 variables: +#' A data set containing the complete microbial taxonomy of the kingdoms Bacteria, Fungi and Protozoa. MO codes can be looked up using \code{\link{as.mo}}. +#' @inheritSection as.mo ITIS +#' @format A \code{\link{data.frame}} with 18,831 observations and 15 variables: #' \describe{ #' \item{\code{mo}}{ID of microorganism} -#' \item{\code{bactsys}}{Bactsyscode of microorganism} -#' \item{\code{genus}}{Genus name of microorganism, like \code{"Echerichia"}} -#' \item{\code{species}}{Species name of microorganism, like \code{"coli"}} -#' \item{\code{subspecies}}{Subspecies name of bio-/serovar of microorganism, like \code{"EHEC"}} -#' \item{\code{fullname}}{Full name, like \code{"Echerichia coli (EHEC)"}} -#' \item{\code{gramstain}}{Gram of microorganism, like \code{"Negative rods"}} -#' \item{\code{aerobic}}{Logical whether bacteria is aerobic} +#' \item{\code{tsn}}{Taxonomic Serial Number (TSN), as defined by ITIS} +#' \item{\code{genus}}{Taxonomic genus of the microorganism as found in ITIS, see Source} +#' \item{\code{species}}{Taxonomic species of the microorganism as found in ITIS, see Source} +#' \item{\code{subspecies}}{Taxonomic subspecies of the microorganism as found in ITIS, see Source} +#' \item{\code{fullname}}{Full name, like \code{"Echerichia coli"}} #' \item{\code{family}}{Taxonomic family of the microorganism as found in ITIS, see Source} #' \item{\code{order}}{Taxonomic order of the microorganism as found in ITIS, see Source} #' \item{\code{class}}{Taxonomic class of the microorganism as found in ITIS, see Source} #' \item{\code{phylum}}{Taxonomic phylum of the microorganism as found in ITIS, see Source} -#' \item{\code{type}}{Type of microorganism, like \code{"Bacteria"} and \code{"Fungus/yeast"}} +#' \item{\code{subkingdom}}{Taxonomic subkingdom of the microorganism as found in ITIS, see Source} +#' \item{\code{gramstain}}{Gram of microorganism, like \code{"Gram negative"}} +#' \item{\code{type}}{Type of microorganism, like \code{"Bacteria"} and \code{"Fungi"}} #' \item{\code{prevalence}}{A rounded integer based on prevalence of the microorganism. Used internally by \code{\link{as.mo}}, otherwise quite meaningless.} +#' \item{\code{mo.old}}{The old ID for package versions 0.3.0 and lower.} #' } -#' @source Integrated Taxonomic Information System (ITIS) on-line database, \url{https://www.itis.gov}. -#' @seealso \code{\link{guess_mo}} \code{\link{antibiotics}} \code{\link{microorganisms.umcg}} +#' @source [3] Integrated Taxonomic Information System (ITIS) on-line database, \url{https://www.itis.gov}. +#' @seealso \code{\link{as.mo}} \code{\link{mo_property}} \code{\link{microorganisms.umcg}} "microorganisms" +#' Data set with old taxonomic data from ITIS +#' +#' A data set containing old, previously valid, taxonomic names. This data set is used internally by \code{\link{as.mo}}. +#' @inheritSection as.mo ITIS +#' @format A \code{\link{data.frame}} with 58 observations and 5 variables: +#' \describe{ +#' \item{\code{tsn}}{Old Taxonomic Serial Number (TSN), as defined by ITIS} +#' \item{\code{name}}{Old taxonomic name of the microorganism as found in ITIS, see Source} +#' \item{\code{tsn_new}}{New Taxonomic Serial Number (TSN), as defined by ITIS} +#' \item{\code{authors}}{Authors responsible for renaming as found in ITIS, see Source} +#' \item{\code{year}}{Year in which the literature was published about the renaming as found in ITIS, see Source} +#' } +#' @source [3] Integrated Taxonomic Information System (ITIS) on-line database, \url{https://www.itis.gov}. +#' @seealso \code{\link{as.mo}} \code{\link{mo_property}} \code{\link{microorganisms}} +"microorganisms.old" + #' Translation table for UMCG #' #' A data set containing all bacteria codes of UMCG MMB. These codes can be joined to data with an ID from \code{\link{microorganisms}$mo} (using \code{\link{left_join_microorganisms}}). GLIMS codes can also be translated to valid \code{MO}s with \code{\link{guess_mo}}. @@ -152,7 +170,7 @@ #' \item{\code{umcg}}{Code of microorganism according to UMCG MMB} #' \item{\code{mo}}{Code of microorganism in \code{\link{microorganisms}}} #' } -#' @seealso \code{\link{guess_mo}} \code{\link{microorganisms}} +#' @seealso \code{\link{as.mo}} \code{\link{microorganisms}} "microorganisms.umcg" #' Data set with 2000 blood culture isolates of septic patients diff --git a/R/globals.R b/R/globals.R index 80ae961a..9f19c1f8 100755 --- a/R/globals.R +++ b/R/globals.R @@ -17,9 +17,11 @@ # ==================================================================== # globalVariables(c(".", + "..property", "antibiotic", "Antibiotic", "antibiotics", + "authors", "cnt", "count", "cum_count", @@ -29,6 +31,7 @@ globalVariables(c(".", "fctlvl", "first_isolate_row_index", "Freq", + "fullname", "genus", "gramstain", "Interpretation", @@ -40,8 +43,11 @@ globalVariables(c(".", "median", "mic", "microorganisms", + "microorganisms.old", "mo", + "mo.old", "n", + "name", "observations", "other_pat_or_mo", "Pasted", @@ -52,6 +58,9 @@ globalVariables(c(".", "S", "septic_patients", "species", + "tsn", + "tsn_new", "value", "Value", - "y")) + "y", + "year")) diff --git a/R/key_antibiotics.R b/R/key_antibiotics.R index 818e7386..5247a988 100644 --- a/R/key_antibiotics.R +++ b/R/key_antibiotics.R @@ -150,7 +150,7 @@ key_antibiotics <- function(tbl, # Gram + tbl <- tbl %>% mutate(key_ab = - if_else(gramstain %like% '^Positive ', + if_else(gramstain == "Gram positive", apply(X = tbl[, gram_positive], MARGIN = 1, FUN = function(x) paste(x, collapse = "")), @@ -158,7 +158,7 @@ key_antibiotics <- function(tbl, # Gram - tbl <- tbl %>% mutate(key_ab = - if_else(gramstain %like% '^Negative ', + if_else(gramstain == "Gram negative", apply(X = tbl[, gram_negative], MARGIN = 1, FUN = function(x) paste(x, collapse = "")), diff --git a/R/misc.R b/R/misc.R index f8d18207..56d8b746 100755 --- a/R/misc.R +++ b/R/misc.R @@ -157,6 +157,12 @@ tbl_parse_guess <- function(tbl, #' @importFrom dplyr case_when Sys.locale <- function() { + alreadyset <- getOption("AMR_locale") + if (!is.null(alreadyset)) { + if (tolower(alreadyset) %in% c("en", "de", "nl", "es", "fr", "pt", "it")) { + return(tolower(alreadyset)) + } + } sys <- base::Sys.getlocale() case_when( sys %like% '(Deutsch|German|de_)' ~ "de", diff --git a/R/mo.R b/R/mo.R index 9ebcea60..ce8c1078 100644 --- a/R/mo.R +++ b/R/mo.R @@ -18,38 +18,62 @@ #' Transform to microorganism ID #' -#' Use this function to determine a valid ID based on a genus (and species). Determination is done using Artificial Intelligence (AI), so the input can be almost anything: a full name (like \code{"Staphylococcus aureus"}), an abbreviated name (like \code{"S. aureus"}), an abbreviation known in the field (like \code{"MRSA"}), or just a genus. You could also \code{\link{select}} a genus and species column, zie Examples. +#' Use this function to determine a valid microorganism ID (\code{mo}). Determination is done using Artificial Intelligence (AI) and the complete taxonomic kingdoms \emph{Bacteria}, \emph{Fungi} and \emph{Protozoa} (see Source), so the input can be almost anything: a full name (like \code{"Staphylococcus aureus"}), an abbreviated name (like \code{"S. aureus"}), an abbreviation known in the field (like \code{"MRSA"}), or just a genus. You could also \code{\link{select}} a genus and species column, zie Examples. #' @param x a character vector or a \code{data.frame} with one or two columns #' @param Becker a logical to indicate whether \emph{Staphylococci} should be categorised into Coagulase Negative \emph{Staphylococci} ("CoNS") and Coagulase Positive \emph{Staphylococci} ("CoPS") instead of their own species, according to Karsten Becker \emph{et al.} [1]. #' #' This excludes \emph{Staphylococcus aureus} at default, use \code{Becker = "all"} to also categorise \emph{S. aureus} as "CoPS". -#' @param Lancefield a logical to indicate whether beta-haemolytic \emph{Streptococci} should be categorised into Lancefield groups instead of their own species, according to Rebecca C. Lancefield [2]. These \emph{Streptococci} will be categorised in their first group, i.e. \emph{Streptococcus dysgalactiae} will be group C, although officially it was also categorised into groups G and L. +#' @param Lancefield a logical to indicate whether beta-haemolytic \emph{Streptococci} should be categorised into Lancefield groups instead of their own species, according to Rebecca C. Lancefield [2]. These \emph{Streptococci} will be categorised in their first group, e.g. \emph{Streptococcus dysgalactiae} will be group C, although officially it was also categorised into groups G and L. #' #' This excludes \emph{Enterococci} at default (who are in group D), use \code{Lancefield = "all"} to also categorise all \emph{Enterococci} as group D. +#' @param allow_uncertain a logical to indicate whether empty results should be checked for only a part of the input string. When results are found, a warning will be given about the uncertainty and the result. #' @rdname as.mo #' @aliases mo #' @keywords mo Becker becker Lancefield lancefield guess -#' @details \code{guess_mo} is an alias of \code{as.mo}. +#' @details +#' A microbial ID (class: \code{mo}) typically looks like these examples:\cr +#' \preformatted{ +#' Code Full name +#' --------------- -------------------------------------- +#' B_KLBSL Klebsiella +#' B_KLBSL_PNE Klebsiella pneumoniae +#' B_KLBSL_PNE_RHI Klebsiella pneumoniae rhinoscleromatis +#' | | | | +#' | | | | +#' | | | ----> subspecies, a 3-4 letter acronym +#' | | ----> species, a 3-4 letter acronym +#' | ----> genus, a 5-7 letter acronym, mostly without vowels +#' ----> taxonomic kingdom, either Bacteria (B), Fungi (F) or Protozoa (P) +#' } #' #' Use the \code{\link{mo_property}} functions to get properties based on the returned code, see Examples. #' -#' Thus function uses Artificial Intelligence (AI) to help getting more logical results, based on type of input and known prevalence of human pathogens. For example: +#' This function uses Artificial Intelligence (AI) to help getting more logical results, based on type of input and known prevalence of human pathogens. For example: #' \itemize{ #' \item{\code{"E. coli"} will return the ID of \emph{Escherichia coli} and not \emph{Entamoeba coli}, although the latter would alphabetically come first} #' \item{\code{"H. influenzae"} will return the ID of \emph{Haemophilus influenzae} and not \emph{Haematobacter influenzae} for the same reason} #' \item{Something like \code{"p aer"} will return the ID of \emph{Pseudomonas aeruginosa} and not \emph{Pasteurella aerogenes}} #' \item{Something like \code{"stau"} or \code{"S aur"} will return the ID of \emph{Staphylococcus aureus} and not \emph{Staphylococcus auricularis}} #' } -#' Moreover, this function also supports ID's based on only Gram stain, when the species is not known. \cr -#' For example, \code{"Gram negative rods"} and \code{"GNR"} will both return the ID of a Gram negative rod: \code{GNR}. -#' @source +#' This means that looking up human non-pathogenic microorganisms takes a longer time compares to human pathogenic microorganisms. +#' +#' \code{guess_mo} is an alias of \code{as.mo}. +#' @section ITIS: +#' \if{html}{\figure{itis_logo.jpg}{options: height=60px style=margin-bottom:5px} \cr} +#' This \code{AMR} package contains the \strong{complete microbial taxonomic data} from the publicly available Integrated Taxonomic Information System (ITIS, https://www.itis.gov). ITIS is a partnership of U.S., Canadian, and Mexican agencies and taxonomic specialists [3]. The complete taxonomic kingdoms Bacteria, Fungi and Protozoa (from subkingdom to the subspecies level) are included in this package. +# (source as section, so it can be inherited by mo_property:) +#' @section Source: #' [1] Becker K \emph{et al.} \strong{Coagulase-Negative Staphylococci}. 2014. Clin Microbiol Rev. 27(4): 870–926. \url{https://dx.doi.org/10.1128/CMR.00109-13} #' #' [2] Lancefield RC \strong{A serological differentiation of human and other groups of hemolytic streptococci}. 1933. J Exp Med. 57(4): 571–95. \url{https://dx.doi.org/10.1084/jem.57.4.571} +#' +#' [3] Integrated Taxonomic Information System (ITIS). Retrieved September 2018. \url{http://www.itis.gov} #' @export -#' @importFrom dplyr %>% pull left_join arrange +#' @importFrom dplyr %>% pull left_join +#' @importFrom data.table as.data.table setkey #' @return Character (vector) with class \code{"mo"}. Unknown values will return \code{NA}. -#' @seealso \code{\link{microorganisms}} for the dataframe that is being used to determine ID's. +#' @seealso \code{\link{microorganisms}} for the \code{data.frame} with ITIS content that is being used to determine ID's. \cr +#' The \code{\link{mo_property}} functions (like \code{\link{mo_genus}}, \code{\link{mo_gramstain}}) to get properties based on the returned code. #' @examples #' # These examples all return "STAAUR", the ID of S. aureus: #' as.mo("stau") @@ -61,22 +85,27 @@ #' as.mo("MRSA") # Methicillin Resistant S. aureus #' as.mo("VISA") # Vancomycin Intermediate S. aureus #' as.mo("VRSA") # Vancomycin Resistant S. aureus +#' as.mo(369) # Search on TSN (Taxonomic Serial Number), a unique identifier +#' # for the Integrated Taxonomic Information System (ITIS) #' #' as.mo("Streptococcus group A") #' as.mo("GAS") # Group A Streptococci #' as.mo("GBS") # Group B Streptococci #' #' # guess_mo is an alias of as.mo and works the same -#' guess_mo("S. epidermidis") # will remain species: STAEPI -#' guess_mo("S. epidermidis", Becker = TRUE) # will not remain species: STACNS +#' guess_mo("S. epidermidis") # will remain species: B_STPHY_EPI +#' guess_mo("S. epidermidis", Becker = TRUE) # will not remain species: B_STPHY_CNS #' -#' guess_mo("S. pyogenes") # will remain species: STCPYO -#' guess_mo("S. pyogenes", Lancefield = TRUE) # will not remain species: STCGRA +#' guess_mo("S. pyogenes") # will remain species: B_STRPTC_PYO +#' guess_mo("S. pyogenes", Lancefield = TRUE) # will not remain species: B_STRPTC_GRA #' #' # Use mo_* functions to get a specific property based on `mo` -#' Ecoli <- as.mo("E. coli") # returns `ESCCOL` +#' Ecoli <- as.mo("E. coli") # returns `B_ESCHR_COL` #' mo_genus(Ecoli) # returns "Escherichia" -#' mo_gramstain(Ecoli) # returns "Negative rods" +#' mo_gramstain(Ecoli) # returns "Gram negative" +#' # but it uses as.mo internally too, so you could also just use: +#' mo_genus("E. coli") # returns "Escherichia" +#' #' #' \dontrun{ #' df$mo <- as.mo(df$microorganism_name) @@ -96,7 +125,7 @@ #' df <- df %>% #' mutate(mo = guess_mo(paste(genus, species))) #' } -as.mo <- function(x, Becker = FALSE, Lancefield = FALSE) { +as.mo <- function(x, Becker = FALSE, Lancefield = FALSE, allow_uncertain = FALSE) { if (NCOL(x) == 2) { # support tidyverse selection like: df %>% select(colA, colB) @@ -118,17 +147,33 @@ as.mo <- function(x, Becker = FALSE, Lancefield = FALSE) { } } - MOs <- AMR::microorganisms %>% - arrange(prevalence) %>% # more expected result on multiple findings - filter(!mo %like% '^_FAM', # don't search in those - (nchar(mo) > 3 | mo %in% c("GNR", "GPR", "GNC", "GPC"))) # no genera + MOs <- as.data.table(AMR::microorganisms) + setkey(MOs, prevalence, tsn) + MOs_mostprevalent <- MOs[prevalence != 9999,] + MOs_allothers <- NULL # will be set later, if needed + MOs_old <- NULL # will be set later, if needed + + + if (all(unique(x) %in% MOs[,mo])) { + class(x) <- "mo" + attr(x, 'package') <- 'AMR' + attr(x, 'ITIS') <- TRUE + return(x) + } + if (AMR::is.mo(x) & isTRUE(attributes(x)$ITIS)) { + # check for new mo class, data coming from ITIS + return(x) + } + + failures <- character(0) x_input <- x # only check the uniques, which is way faster x <- unique(x) - x_backup <- x + x_backup <- trimws(x, which = "both") + x_species <- paste(x_backup, "species") # translate to English for supported languages of mo_property x <- gsub("(Gruppe|gruppe|groep|grupo|gruppo|groupe)", "group", x) # remove 'empty' genus and species values @@ -138,6 +183,7 @@ as.mo <- function(x, Becker = FALSE, Lancefield = FALSE) { # but spaces before and after should be omitted x <- trimws(x, which = "both") x_trimmed <- x + x_trimmed_species <- paste(x_trimmed, "species") # replace space by regex sign x_withspaces <- gsub(" ", ".* ", x, fixed = TRUE) x <- gsub(" ", ".*", x, fixed = TRUE) @@ -148,111 +194,137 @@ as.mo <- function(x, Becker = FALSE, Lancefield = FALSE) { x_withspaces <- paste0('^', x_withspaces, '$') # cat(paste0('x "', x, '"\n')) + # cat(paste0('x_species "', x_species, '"\n')) # cat(paste0('x_withspaces_all "', x_withspaces_all, '"\n')) # cat(paste0('x_withspaces_start "', x_withspaces_start, '"\n')) # cat(paste0('x_withspaces "', x_withspaces, '"\n')) # cat(paste0('x_backup "', x_backup, '"\n')) + # cat(paste0('x_trimmed "', x_trimmed, '"\n')) + # cat(paste0('x_trimmed_species "', x_trimmed_species, '"\n')) for (i in 1:length(x)) { - if (identical(x_trimmed[i], "")) { + if (identical(x_trimmed[i], "") | is.na(x_trimmed[i])) { # empty values x[i] <- NA next } - if (toupper(x_backup[i]) %in% AMR::microorganisms$mo) { - # is already a valid MO code - x[i] <- toupper(x_backup[i]) - next - } - if (toupper(x_trimmed[i]) %in% AMR::microorganisms$mo) { - # is already a valid MO code - x[i] <- toupper(x_trimmed[i]) - next - } - if (tolower(x_backup[i]) %in% tolower(AMR::microorganisms$fullname)) { - # is exact match in fullname - x[i] <- AMR::microorganisms[which(AMR::microorganisms$fullname == x_backup[i]), ]$mo[1L] - next - } - - # CoNS/CoPS in different languages (support for German, Dutch, Spanish, Portuguese) ---- - if (tolower(x[i]) %like% '[ck]oagulas[ea] negatie?[vf]' - | tolower(x_trimmed[i]) %like% '[ck]oagulas[ea] negatie?[vf]' - | tolower(x[i]) %like% '[ck]o?ns[^a-z]?$') { - # coerce S. coagulase negative - x[i] <- 'STACNS' - next - } - if (tolower(x[i]) %like% '[ck]oagulas[ea] positie?[vf]' - | tolower(x_trimmed[i]) %like% '[ck]oagulas[ea] positie?[vf]' - | tolower(x[i]) %like% '[ck]o?ps[^a-z]?$') { - # coerce S. coagulase positive - x[i] <- 'STACPS' - next - } # translate known trivial abbreviations to genus + species ---- if (!is.na(x_trimmed[i])) { if (toupper(x_trimmed[i]) == 'MRSA' | toupper(x_trimmed[i]) == 'VISA' | toupper(x_trimmed[i]) == 'VRSA') { - x[i] <- 'STAAUR' + x[i] <- 'B_STPHY_AUR' next } if (toupper(x_trimmed[i]) == 'MRSE') { - x[i] <- 'STAEPI' + x[i] <- 'B_STPHY_EPI' next } if (toupper(x_trimmed[i]) == 'VRE') { - x[i] <- 'ENCSPP' + x[i] <- 'B_ENTRC' next } if (toupper(x_trimmed[i]) == 'MRPA') { # multi resistant P. aeruginosa - x[i] <- 'PSEAER' + x[i] <- 'B_PDMNS_AER' next } if (toupper(x_trimmed[i]) %in% c('PISP', 'PRSP', 'VISP', 'VRSP')) { # peni I, peni R, vanco I, vanco R: S. pneumoniae - x[i] <- 'STCPNE' + x[i] <- 'B_STRPTC_PNE' next } - if (toupper(x_trimmed[i]) %like% '^G[ABCDFHK]S$') { - x[i] <- gsub("G([ABCDFHK])S", "STCGR\\1", x_trimmed[i]) + if (toupper(x_trimmed[i]) %like% '^G[ABCDFGHK]S$') { + x[i] <- gsub("G([ABCDFGHK])S", "B_STRPTC_GR\\1", x_trimmed[i]) next } + # CoNS/CoPS in different languages (support for German, Dutch, Spanish, Portuguese) ---- + if (tolower(x[i]) %like% '[ck]oagulas[ea] negatie?[vf]' + | tolower(x_trimmed[i]) %like% '[ck]oagulas[ea] negatie?[vf]' + | tolower(x[i]) %like% '[ck]o?ns[^a-z]?$') { + # coerce S. coagulase negative + x[i] <- 'B_STPHY_CNS' + next + } + if (tolower(x[i]) %like% '[ck]oagulas[ea] positie?[vf]' + | tolower(x_trimmed[i]) %like% '[ck]oagulas[ea] positie?[vf]' + | tolower(x[i]) %like% '[ck]o?ps[^a-z]?$') { + # coerce S. coagulase positive + x[i] <- 'B_STPHY_CPS' + next + } + } + + # FIRST TRY FULLNAMES AND CODES + # if only genus is available, don't select species + if (all(!c(x[i], x_trimmed[i]) %like% " ")) { + found <- MOs[tolower(fullname) %in% tolower(c(x_species[i], x_trimmed_species[i])), mo] + if (length(found) > 0) { + x[i] <- found[1L] + next + } + if (nchar(x_trimmed[i]) > 4) { + # not when abbr is esco, stau, klpn, etc. + found <- MOs[tolower(fullname) %like% gsub(" ", ".*", x_trimmed_species[i], fixed = TRUE), mo] + if (length(found) > 0) { + x[i] <- found[1L] + next + } + } + } + + # search for GLIMS code ---- + found <- AMR::microorganisms.umcg[which(toupper(AMR::microorganisms.umcg$umcg) == toupper(x_trimmed[i])),]$mo + if (length(found) > 0) { + x[i] <- MOs[mo.old == found, mo][1L] + next + } + + # TRY FIRST THOUSAND MOST PREVALENT IN HUMAN INFECTIONS ---- + + found <- MOs_mostprevalent[tolower(fullname) %in% tolower(c(x_backup[i], x_trimmed[i])), mo] + # most probable: is exact match in fullname + if (length(found) > 0) { + x[i] <- found[1L] + next + } + found <- MOs_mostprevalent[tsn == x_trimmed[i], mo] + # is a valid TSN + if (length(found) > 0) { + x[i] <- found[1L] + next + } + found <- MOs_mostprevalent[mo == toupper(x_backup[i]), mo] + # is a valid mo + if (length(found) > 0) { + x[i] <- found[1L] + next + } + found <- MOs_mostprevalent[mo.old == toupper(x_backup[i]) + | (substr(x_backup[i], 4, 6) == "SPP" & mo.old == substr(x_backup[i], 1, 3)), mo] + # is a valid old mo + if (length(found) > 0) { + x[i] <- found[1L] + next } # try any match keeping spaces ---- - found <- MOs[which(MOs$fullname %like% x_withspaces[i]),]$mo - if (length(found) > 0) { - x[i] <- found[1L] - next - } - - # try the same, now based on genus + species ---- - found <- MOs[which(paste(MOs$genus, MOs$species) %like% x_withspaces[i]),]$mo - if (length(found) > 0) { - x[i] <- found[1L] - next - } - - # try any match with genus, keeping spaces, not ending with $ ---- - found <- MOs[which(MOs$genus %like% x_withspaces_start[i] & MOs$mo %like% 'SPP$'),]$mo + found <- MOs_mostprevalent[fullname %like% x_withspaces[i], mo] if (length(found) > 0) { x[i] <- found[1L] next } # try any match keeping spaces, not ending with $ ---- - found <- MOs[which(MOs$fullname %like% x_withspaces_start[i]),]$mo + found <- MOs_mostprevalent[fullname %like% x_withspaces_start[i], mo] if (length(found) > 0) { x[i] <- found[1L] next } # try any match diregarding spaces ---- - found <- MOs[which(MOs$fullname %like% x[i]),]$mo + found <- MOs_mostprevalent[fullname %like% x[i], mo] if (length(found) > 0) { x[i] <- found[1L] next @@ -260,14 +332,7 @@ as.mo <- function(x, Becker = FALSE, Lancefield = FALSE) { # try fullname without start and stop regex, to also find subspecies ---- # like "K. pneu rhino" -> "Klebsiella pneumoniae (rhinoscleromatis)" = KLEPNERH - found <- MOs[which(gsub("[\\(\\)]", "", MOs$fullname) %like% x_withspaces_all[i]),]$mo - if (length(found) > 0) { - x[i] <- found[1L] - next - } - - # search for GLIMS code ---- - found <- AMR::microorganisms.umcg[which(toupper(AMR::microorganisms.umcg$umcg) == toupper(x_trimmed[i])),]$mo + found <- MOs_mostprevalent[fullname %like% x_withspaces_start[i], mo] if (length(found) > 0) { x[i] <- found[1L] next @@ -280,7 +345,7 @@ as.mo <- function(x, Becker = FALSE, Lancefield = FALSE) { x_split[i] <- paste0(x_trimmed[i] %>% substr(1, x_length / 2) %>% trimws(), '.* ', x_trimmed[i] %>% substr((x_length / 2) + 1, x_length) %>% trimws()) - found <- MOs[which(MOs$fullname %like% paste0('^', x_split[i])),]$mo + found <- MOs_mostprevalent[fullname %like% paste0('^', x_split[i]), mo] if (length(found) > 0) { x[i] <- found[1L] next @@ -288,15 +353,137 @@ as.mo <- function(x, Becker = FALSE, Lancefield = FALSE) { # try any match with text before and after original search string ---- # so "negative rods" will be "GNR" - if (x_trimmed[i] %like% "^Gram") { - x_trimmed[i] <- gsub("^Gram", "", x_trimmed[i], ignore.case = TRUE) - # remove leading and trailing spaces again - x_trimmed[i] <- trimws(x_trimmed[i], which = "both") + # if (x_trimmed[i] %like% "^Gram") { + # x_trimmed[i] <- gsub("^Gram", "", x_trimmed[i], ignore.case = TRUE) + # # remove leading and trailing spaces again + # x_trimmed[i] <- trimws(x_trimmed[i], which = "both") + # } + # if (!is.na(x_trimmed[i])) { + # found <- MOs_mostprevalent[fullname %like% x_trimmed[i], mo] + # if (length(found) > 0) { + # x[i] <- found[1L] + # next + # } + # } + + # THEN TRY ALL OTHERS ---- + if (is.null(MOs_allothers)) { + MOs_allothers <- MOs[prevalence == 9999,] } - if (!is.na(x_trimmed[i])) { - found <- MOs[which(MOs$fullname %like% x_trimmed[i]),]$mo - if (length(found) > 0) { - x[i] <- found[1L] + + found <- MOs_allothers[tolower(fullname) == tolower(x_backup[i]), mo] + # most probable: is exact match in fullname + if (length(found) > 0) { + x[i] <- found[1L] + next + } + found <- MOs_allothers[tolower(fullname) == tolower(x_trimmed[i]), mo] + # most probable: is exact match in fullname + if (length(found) > 0) { + x[i] <- found[1L] + next + } + found <- MOs_allothers[tsn == x_trimmed[i], mo] + # is a valid TSN + if (length(found) > 0) { + x[i] <- found[1L] + next + } + found <- MOs_allothers[mo == toupper(x_backup[i]), mo] + # is a valid mo + if (length(found) > 0) { + x[i] <- found[1L] + next + } + found <- MOs_allothers[mo.old == toupper(x_backup[i]), mo] + # is a valid old mo + if (length(found) > 0) { + x[i] <- found[1L] + next + } + + # try any match keeping spaces ---- + found <- MOs_allothers[fullname %like% x_withspaces[i], mo] + if (length(found) > 0) { + x[i] <- found[1L] + next + } + + # try any match keeping spaces, not ending with $ ---- + found <- MOs_allothers[fullname %like% x_withspaces_start[i], mo] + if (length(found) > 0) { + x[i] <- found[1L] + next + } + + # try any match diregarding spaces ---- + found <- MOs_allothers[fullname %like% x[i], mo] + if (length(found) > 0) { + x[i] <- found[1L] + next + } + + # try fullname without start and stop regex, to also find subspecies ---- + # like "K. pneu rhino" -> "Klebsiella pneumoniae (rhinoscleromatis)" = KLEPNERH + found <- MOs_allothers[fullname %like% x_withspaces_start[i], mo] + if (length(found) > 0) { + x[i] <- found[1L] + next + } + + # try splitting of characters and then find ID ---- + # like esco = E. coli, klpn = K. pneumoniae, stau = S. aureus + x_split <- x + x_length <- nchar(x_trimmed[i]) + x_split[i] <- paste0(x_trimmed[i] %>% substr(1, x_length / 2) %>% trimws(), + '.* ', + x_trimmed[i] %>% substr((x_length / 2) + 1, x_length) %>% trimws()) + found <- MOs_allothers[fullname %like% paste0('^', x_split[i]), mo] + if (length(found) > 0) { + x[i] <- found[1L] + next + } + + # # try any match with text before and after original search string ---- + # # so "negative rods" will be "GNR" + # if (x_trimmed[i] %like% "^Gram") { + # x_trimmed[i] <- gsub("^Gram", "", x_trimmed[i], ignore.case = TRUE) + # # remove leading and trailing spaces again + # x_trimmed[i] <- trimws(x_trimmed[i], which = "both") + # } + # if (!is.na(x_trimmed[i])) { + # found <- MOs_allothers[fullname %like% x_trimmed[i], mo] + # if (length(found) > 0) { + # x[i] <- found[1L] + # next + # } + # } + + # MISCELLANEOUS ---- + + # look for old taxonomic names ---- + if (is.null(MOs_old)) { + MOs_old <- as.data.table(microorganisms.old) + setkey(MOs_old, name, tsn_new) + } + found <- MOs_old[tolower(name) == tolower(x_backup[i]) | + tsn == x_trimmed[i],] + if (NROW(found) > 0) { + x[i] <- MOs[tsn == found[1, tsn_new], mo] + message("Note: '", found[1, name], "' was renamed to '", + MOs[tsn == found[1, tsn_new], fullname], "' by ", + found[1, authors], " in ", found[1, year]) + next + } + + # check for uncertain results ---- + # (1) try to strip off one element and check the remains + if (allow_uncertain == TRUE) { + x_strip <- x_backup[i] %>% strsplit(" ") %>% unlist() + x_strip <- x_strip[1:length(x_strip) - 1] + x[i] <- suppressWarnings(suppressMessages(as.mo(x_strip))) + if (!is.na(x[i])) { + warning("Uncertain result: '", x_backup[i], "' -> '", MOs[mo == x[i], fullname], "' (", x[i], ")") next } } @@ -309,7 +496,7 @@ as.mo <- function(x, Becker = FALSE, Lancefield = FALSE) { failures <- failures[!failures %in% c(NA, NULL, NaN)] if (length(failures) > 0) { - warning("These ", length(failures) , " values could not be coerced to a valid mo: ", + warning("These ", length(failures) , " values could not be coerced (try again with allow_uncertain = TRUE):\n", paste('"', unique(failures), '"', sep = "", collapse = ', '), ".", call. = FALSE) @@ -341,43 +528,36 @@ as.mo <- function(x, Becker = FALSE, Lancefield = FALSE) { "pseudintermedius", "pseudointermedius", "schleiferi")) %>% pull(mo) - x[x %in% CoNS] <- "STACNS" - x[x %in% CoPS] <- "STACPS" + x[x %in% CoNS] <- "B_STPHY_CNS" + x[x %in% CoPS] <- "B_STPHY_CPS" if (Becker == "all") { - x[x == "STAAUR"] <- "STACPS" + x[x == "B_STPHY_AUR"] <- "B_STPHY_CPS" } } # Lancefield ---- if (Lancefield == TRUE | Lancefield == "all") { # group A - x[x == "STCPYO"] <- "STCGRA" # S. pyogenes + x[x == "B_STRPTC_PYO"] <- "B_STRPTC_GRA" # S. pyogenes # group B - x[x == "STCAGA"] <- "STCGRB" # S. agalactiae + x[x == "B_STRPTC_AGA"] <- "B_STRPTC_GRB" # S. agalactiae # group C S_groupC <- MOs %>% filter(genus == "Streptococcus", species %in% c("equisimilis", "equi", "zooepidemicus", "dysgalactiae")) %>% pull(mo) - x[x %in% S_groupC] <- "STCGRC" # S. agalactiae + x[x %in% S_groupC] <- "B_STRPTC_GRC" # S. agalactiae if (Lancefield == "all") { - x[substr(x, 1, 3) == "ENC"] <- "STCGRD" # all Enterococci + x[substr(x, 1, 7) == "B_ENTRC"] <- "B_STRPTC_GRD" # all Enterococci } # group F - x[x == "STCANG"] <- "STCGRF" # S. anginosus + x[x == "B_STRPTC_ANG"] <- "B_STRPTC_GRF" # S. anginosus # group H - x[x == "STCSAN"] <- "STCGRH" # S. sanguis + x[x == "B_STRPTC_SAN"] <- "B_STRPTC_GRH" # S. sanguinis # group K - x[x == "STCSAL"] <- "STCGRK" # S. salivarius + x[x == "B_STRPTC_SAL"] <- "B_STRPTC_GRK" # S. salivarius } - # for the returned genera without species, add species ---- - # like "ESC" -> "ESCSPP", but only where the input contained it - indices <- nchar(unique(x)) == 3 & !x %like% "[A-Z]{3}SPP" & !x %in% c("GNR", "GPR", "GNC", "GPC", - "GNS", "GPS", "GNK", "GPK") - indices <- indices[!is.na(indices)] - x[indices] <- paste0(x[indices], 'SPP') - # left join the found results to the original input values (x_input) df_found <- data.frame(input = as.character(unique(x_input)), found = x, @@ -392,9 +572,11 @@ as.mo <- function(x, Becker = FALSE, Lancefield = FALSE) { class(x) <- "mo" attr(x, 'package') <- 'AMR' + attr(x, 'ITIS') <- TRUE x } + #' @rdname as.mo #' @export is.mo <- function(x) { diff --git a/R/mo_property.R b/R/mo_property.R index 3addd71d..910e969d 100644 --- a/R/mo_property.R +++ b/R/mo_property.R @@ -22,21 +22,17 @@ #' @param x any (vector of) text that can be coerced to a valid microorganism code with \code{\link{as.mo}} #' @param property one of the column names of one of the \code{\link{microorganisms}} data set, like \code{"mo"}, \code{"bactsys"}, \code{"family"}, \code{"genus"}, \code{"species"}, \code{"fullname"}, \code{"gramstain"} and \code{"aerobic"} #' @inheritParams as.mo -#' @param language language of the returned text, defaults to the systems language. Either one of \code{"en"} (English), \code{"de"} (German), \code{"nl"} (Dutch), \code{"es"} (Spanish) or \code{"pt"} (Portuguese). -#' @source -#' [1] Becker K \emph{et al.} \strong{Coagulase-Negative Staphylococci}. 2014. Clin Microbiol Rev. 27(4): 870–926. \url{https://dx.doi.org/10.1128/CMR.00109-13} -#' -#' [2] Lancefield RC \strong{A serological differentiation of human and other groups of hemolytic streptococci}. 1933. J Exp Med. 57(4): 571–95. \url{https://dx.doi.org/10.1084/jem.57.4.571} -#' -#' [3] Integrated Taxonomic Information System (ITIS) on-line database, \url{https://www.itis.gov}. +#' @param language language of the returned text, defaults to the systems language but can also be set with \code{\link{getOption}("AMR_locale")}. Either one of \code{"en"} (English), \code{"de"} (German), \code{"nl"} (Dutch), \code{"es"} (Spanish) or \code{"pt"} (Portuguese). +#' @inheritSection as.mo ITIS +#' @inheritSection as.mo Source #' @rdname mo_property #' @name mo_property #' @return A logical (in case of \code{mo_aerobic}), a list (in case of \code{mo_taxonomy}), a character otherwise #' @export -#' @importFrom dplyr %>% left_join pull #' @seealso \code{\link{microorganisms}} #' @examples #' # All properties +#' mo_subkingdom("E. coli") # "Negibacteria" #' mo_phylum("E. coli") # "Proteobacteria" #' mo_class("E. coli") # "Gammaproteobacteria" #' mo_order("E. coli") # "Enterobacteriales" @@ -46,42 +42,30 @@ #' mo_subspecies("E. coli") # "" #' mo_fullname("E. coli") # "Escherichia coli" #' mo_shortname("E. coli") # "E. coli" +#' mo_gramstain("E. coli") # "Gram negative" +#' mo_TSN("E. coli") # 285 #' mo_type("E. coli") # "Bacteria" -#' mo_gramstain("E. coli") # "Negative rods" -#' mo_aerobic("E. coli") # TRUE #' #' #' # Abbreviations known in the field #' mo_genus("MRSA") # "Staphylococcus" #' mo_species("MRSA") # "aureus" #' mo_shortname("MRSA") # "S. aureus" -#' mo_gramstain("MRSA") # "Positive cocci" +#' mo_gramstain("MRSA") # "Gram positive" #' #' mo_genus("VISA") # "Staphylococcus" #' mo_species("VISA") # "aureus" #' #' #' # Known subspecies -#' mo_genus("EHEC") # "Escherichia" -#' mo_species("EHEC") # "coli" -#' mo_subspecies("EHEC") # "EHEC" -#' mo_fullname("EHEC") # "Escherichia coli (EHEC)" -#' mo_shortname("EHEC") # "E. coli" -#' #' mo_genus("doylei") # "Campylobacter" #' mo_species("doylei") # "jejuni" -#' mo_fullname("doylei") # "Campylobacter jejuni (doylei)" +#' mo_fullname("doylei") # "Campylobacter jejuni doylei" #' -#' mo_fullname("K. pneu rh") # "Klebsiella pneumoniae (rhinoscleromatis)" +#' mo_fullname("K. pneu rh") # "Klebsiella pneumoniae rhinoscleromatis" #' mo_shortname("K. pneu rh") # "K. pneumoniae" #' #' -#' # Anaerobic bacteria -#' mo_genus("B. fragilis") # "Bacteroides" -#' mo_species("B. fragilis") # "fragilis" -#' mo_aerobic("B. fragilis") # FALSE -#' -#' #' # Becker classification, see ?as.mo #' mo_fullname("S. epi") # "Staphylococcus epidermidis" #' mo_fullname("S. epi", Becker = TRUE) # "Coagulase Negative Staphylococcus (CoNS)" @@ -99,10 +83,9 @@ #' mo_type("E. coli", language = "de") # "Bakterium" #' mo_type("E. coli", language = "nl") # "Bacterie" #' mo_type("E. coli", language = "es") # "Bakteria" -#' mo_gramstain("E. coli", language = "de") # "Negative Staebchen" -#' mo_gramstain("E. coli", language = "nl") # "Negatieve staven" -#' mo_gramstain("E. coli", language = "es") # "Bacilos negativos" -#' mo_gramstain("Giardia", language = "pt") # "Parasitas" +#' mo_gramstain("E. coli", language = "de") # "Gramnegativ" +#' mo_gramstain("E. coli", language = "nl") # "Gram-negatief" +#' mo_gramstain("E. coli", language = "es") # "Gram negativo" #' #' mo_fullname("S. pyogenes", #' Lancefield = TRUE, @@ -112,7 +95,7 @@ #' language = "nl") # "Streptococcus groep A" #' #' -#' # Complete taxonomy up to Phylum, returns a list +#' # Complete taxonomy up to Subkingdom, returns a list #' mo_taxonomy("E. coli") mo_fullname <- function(x, Becker = FALSE, Lancefield = FALSE, language = NULL) { mo_property(x, "fullname", Becker = Becker, Lancefield = Lancefield, language = language) @@ -191,12 +174,24 @@ mo_phylum <- function(x) { mo_property(x, "phylum") } +#' @rdname mo_property +#' @export +mo_subkingdom <- function(x) { + mo_property(x, "subkingdom") +} + #' @rdname mo_property #' @export mo_type <- function(x, language = NULL) { mo_property(x, "type", language = language) } +#' @rdname mo_property +#' @export +mo_TSN <- function(x) { + mo_property(x, "tsn") +} + #' @rdname mo_property #' @export mo_gramstain <- function(x, language = NULL) { @@ -204,28 +199,32 @@ mo_gramstain <- function(x, language = NULL) { } #' @rdname mo_property -#' @export -mo_aerobic <- function(x) { - mo_property(x, "aerobic") -} - -#' @rdname mo_property +#' @importFrom data.table data.table as.data.table setkey #' @export mo_property <- function(x, property = 'fullname', Becker = FALSE, Lancefield = FALSE, language = NULL) { property <- tolower(property[1]) if (!property %in% colnames(AMR::microorganisms)) { stop("invalid property: ", property, " - use a column name of the `microorganisms` data set") } - result1 <- as.mo(x = x, Becker = Becker, Lancefield = Lancefield) # this will give a warning if x cannot be coerced - result2 <- suppressWarnings( - data.frame(mo = result1, stringsAsFactors = FALSE) %>% - left_join(AMR::microorganisms, by = "mo") %>% - pull(property) - ) - if (property != "aerobic") { + if (Becker == TRUE | Lancefield == TRUE | !is.mo(x)) { + # this will give a warning if x cannot be coerced + result1 <- AMR::as.mo(x = x, Becker = Becker, Lancefield = Lancefield) + } else { + result1 <- x + } + A <- data.table(mo = result1, stringsAsFactors = FALSE) + B <- as.data.table(AMR::microorganisms) + setkey(B, mo) + result2 <- B[A, on = 'mo', ..property][[1]] + + if (property == "tsn") { + result2 <- as.integer(result2) + } else { # will else not retain `logical` class result2[x %in% c("", NA) | result2 %in% c("", NA, "(no MO)")] <- "" - result2 <- mo_translate(result2, language = language) + if (property %in% c("fullname", "shortname", "genus", "species", "subspecies", "type", "gramstain")) { + result2 <- mo_translate(result2, language = language) + } } result2 } @@ -234,7 +233,8 @@ mo_property <- function(x, property = 'fullname', Becker = FALSE, Lancefield = F #' @export mo_taxonomy <- function(x) { x <- as.mo(x) - base::list(phylum = mo_phylum(x), + base::list(subkingdom = mo_subkingdom(x), + phylum = mo_phylum(x), class = mo_class(x), order = mo_order(x), family = mo_family(x), @@ -266,15 +266,11 @@ mo_translate <- function(x, language) { gsub("Coagulase Positive Staphylococcus","Koagulase-positive Staphylococcus", ., fixed = TRUE) %>% gsub("Beta-haemolytic Streptococcus", "Beta-h\u00e4molytischer Streptococcus", ., fixed = TRUE) %>% gsub("(no MO)", "(kein MO)", ., fixed = TRUE) %>% - gsub("Negative rods", "Negative St\u00e4bchen", ., fixed = TRUE) %>% - gsub("Negative cocci", "Negative Kokken", ., fixed = TRUE) %>% - gsub("Positive rods", "Positive St\u00e4bchen", ., fixed = TRUE) %>% - gsub("Positive cocci", "Positive Kokken", ., fixed = TRUE) %>% - gsub("Parasites", "Parasiten", ., fixed = TRUE) %>% - gsub("Fungi and yeasts", "Pilze und Hefen", ., fixed = TRUE) %>% - gsub("Bacteria", "Bakterium", ., fixed = TRUE) %>% - gsub("Fungus/yeast", "Pilz/Hefe", ., fixed = TRUE) %>% - gsub("Parasite", "Parasit", ., fixed = TRUE) %>% + gsub("Gram negative", "Gramnegativ", ., fixed = TRUE) %>% + gsub("Gram positive", "Grampositiv", ., fixed = TRUE) %>% + gsub("Bacteria", "Bakterien", ., fixed = TRUE) %>% + gsub("Fungi", "Hefen/Pilze", ., fixed = TRUE) %>% + gsub("Protozoa", "Protozoen", ., fixed = TRUE) %>% gsub("biogroup", "Biogruppe", ., fixed = TRUE) %>% gsub("biotype", "Biotyp", ., fixed = TRUE) %>% gsub("vegetative", "vegetativ", ., fixed = TRUE) %>% @@ -287,15 +283,11 @@ mo_translate <- function(x, language) { gsub("Coagulase Positive Staphylococcus","Coagulase-positieve Staphylococcus", ., fixed = TRUE) %>% gsub("Beta-haemolytic Streptococcus", "Beta-hemolytische Streptococcus", ., fixed = TRUE) %>% gsub("(no MO)", "(geen MO)", ., fixed = TRUE) %>% - gsub("Negative rods", "Negatieve staven", ., fixed = TRUE) %>% - gsub("Negative cocci", "Negatieve kokken", ., fixed = TRUE) %>% - gsub("Positive rods", "Positieve staven", ., fixed = TRUE) %>% - gsub("Positive cocci", "Positieve kokken", ., fixed = TRUE) %>% - gsub("Parasites", "Parasieten", ., fixed = TRUE) %>% - gsub("Fungi and yeasts", "Schimmels en gisten", ., fixed = TRUE) %>% - gsub("Bacteria", "Bacterie", ., fixed = TRUE) %>% - gsub("Fungus/yeast", "Schimmel/gist", ., fixed = TRUE) %>% - gsub("Parasite", "Parasiet", ., fixed = TRUE) %>% + gsub("Gram negative", "Gram-negatief", ., fixed = TRUE) %>% + gsub("Gram positive", "Gram-positief", ., fixed = TRUE) %>% + gsub("Bacteria", "Bacteri\u00ebn", ., fixed = TRUE) %>% + gsub("Fungi", "Schimmels/gisten", ., fixed = TRUE) %>% + gsub("Protozoa", "protozo\u00ebn", ., fixed = TRUE) %>% gsub("biogroup", "biogroep", ., fixed = TRUE) %>% # gsub("biotype", "biotype", ., fixed = TRUE) %>% gsub("vegetative", "vegetatief", ., fixed = TRUE) %>% @@ -308,15 +300,11 @@ mo_translate <- function(x, language) { gsub("Coagulase Positive Staphylococcus","Staphylococcus coagulasa positivo", ., fixed = TRUE) %>% gsub("Beta-haemolytic Streptococcus", "Streptococcus Beta-hemol\u00edtico", ., fixed = TRUE) %>% gsub("(no MO)", "(sin MO)", ., fixed = TRUE) %>% - gsub("Negative rods", "Bacilos negativos", ., fixed = TRUE) %>% - gsub("Negative cocci", "Cocos negativos", ., fixed = TRUE) %>% - gsub("Positive rods", "Bacilos positivos", ., fixed = TRUE) %>% - gsub("Positive cocci", "Cocos positivos", ., fixed = TRUE) %>% - gsub("Parasites", "Par\u00e1sitos", ., fixed = TRUE) %>% - gsub("Fungi and yeasts", "Hongos y levaduras", ., fixed = TRUE) %>% - # gsub("Bacteria", "Bacteria", ., fixed = TRUE) %>% - gsub("Fungus/yeast", "Hongo/levadura", ., fixed = TRUE) %>% - gsub("Parasite", "Par\u00e1sito", ., fixed = TRUE) %>% + gsub("Gram negative", "Gram negativo", ., fixed = TRUE) %>% + gsub("Gram positive", "Gram positivo", ., fixed = TRUE) %>% + gsub("Bacteria", "Bacterias", ., fixed = TRUE) %>% + gsub("Fungi", "Hongos", ., fixed = TRUE) %>% + gsub("Protozoa", "Protozoarios", ., fixed = TRUE) %>% gsub("biogroup", "biogrupo", ., fixed = TRUE) %>% gsub("biotype", "biotipo", ., fixed = TRUE) %>% gsub("vegetative", "vegetativo", ., fixed = TRUE) %>% @@ -329,15 +317,11 @@ mo_translate <- function(x, language) { gsub("Coagulase Positive Staphylococcus","Staphylococcus coagulase positivo", ., fixed = TRUE) %>% gsub("Beta-haemolytic Streptococcus", "Streptococcus Beta-hemol\u00edtico", ., fixed = TRUE) %>% gsub("(no MO)", "(sem MO)", ., fixed = TRUE) %>% - gsub("Negative rods", "Bacilos negativos", ., fixed = TRUE) %>% - gsub("Negative cocci", "Cocos negativos", ., fixed = TRUE) %>% - gsub("Positive rods", "Bacilos positivos", ., fixed = TRUE) %>% - gsub("Positive cocci", "Cocos positivos", ., fixed = TRUE) %>% - gsub("Parasites", "Parasitas", ., fixed = TRUE) %>% - gsub("Fungi and yeasts", "Cogumelos e leveduras", ., fixed = TRUE) %>% - gsub("Bacteria", "Bact\u00e9ria", ., fixed = TRUE) %>% - gsub("Fungus/yeast", "Cogumelo/levedura", ., fixed = TRUE) %>% - gsub("Parasite", "Parasita", ., fixed = TRUE) %>% + gsub("Gram negative", "Gram negativo", ., fixed = TRUE) %>% + gsub("Gram positive", "Gram positivo", ., fixed = TRUE) %>% + gsub("Bacteria", "Bact\u00e9rias", ., fixed = TRUE) %>% + gsub("Fungi", "Fungos", ., fixed = TRUE) %>% + gsub("Protozoa", "Protozo\u00e1rios", ., fixed = TRUE) %>% gsub("biogroup", "biogrupo", ., fixed = TRUE) %>% gsub("biotype", "bi\u00f3tipo", ., fixed = TRUE) %>% gsub("vegetative", "vegetativo", ., fixed = TRUE) %>% @@ -350,15 +334,11 @@ mo_translate <- function(x, language) { gsub("Coagulase Positive Staphylococcus","Staphylococcus positivo coagulasi", ., fixed = TRUE) %>% gsub("Beta-haemolytic Streptococcus", "Streptococcus Beta-emolitico", ., fixed = TRUE) %>% gsub("(no MO)", "(non MO)", ., fixed = TRUE) %>% - gsub("Negative rods", "Bastoncini Gram-negativi", ., fixed = TRUE) %>% - gsub("Negative cocci", "Cocchi Gram-negativi", ., fixed = TRUE) %>% - gsub("Positive rods", "Bastoncini Gram-positivi", ., fixed = TRUE) %>% - gsub("Positive cocci", "Cocchi Gram-positivi", ., fixed = TRUE) %>% - gsub("Parasites", "Parassiti", ., fixed = TRUE) %>% - gsub("Fungi and yeasts", "Funghi e lieviti", ., fixed = TRUE) %>% - gsub("Bacteria", "Batterio", ., fixed = TRUE) %>% - gsub("Fungus/yeast", "Fungo/lievito", ., fixed = TRUE) %>% - gsub("Parasite", "Parassita", ., fixed = TRUE) %>% + gsub("Gram negative", "Gram negativo", ., fixed = TRUE) %>% + gsub("Gram positive", "Gram positivo", ., fixed = TRUE) %>% + gsub("Bacteria", "Batteri", ., fixed = TRUE) %>% + gsub("Fungi", "Fungo", ., fixed = TRUE) %>% + gsub("Protozoa", "Protozoi", ., fixed = TRUE) %>% gsub("biogroup", "biogruppo", ., fixed = TRUE) %>% gsub("biotype", "biotipo", ., fixed = TRUE) %>% gsub("vegetative", "vegetativo", ., fixed = TRUE) %>% @@ -371,15 +351,11 @@ mo_translate <- function(x, language) { gsub("Coagulase Positive Staphylococcus","Staphylococcus \u00e0 coagulase positif", ., fixed = TRUE) %>% gsub("Beta-haemolytic Streptococcus", "Streptococcus B\u00eata-h\u00e9molytique", ., fixed = TRUE) %>% gsub("(no MO)", "(pas MO)", ., fixed = TRUE) %>% - gsub("Negative rods", "Bacilles n\u00e9gatif", ., fixed = TRUE) %>% - gsub("Negative cocci", "Cocci n\u00e9gatif", ., fixed = TRUE) %>% - gsub("Positive rods", "Bacilles positif", ., fixed = TRUE) %>% - gsub("Positive cocci", "Cocci positif", ., fixed = TRUE) %>% - # gsub("Parasites", "Parasites", ., fixed = TRUE) %>% - gsub("Fungi and yeasts", "Champignons et levures", ., fixed = TRUE) %>% - gsub("Bacteria", "Bact\u00e9rie", ., fixed = TRUE) %>% - gsub("Fungus/yeast", "Champignon/levure", ., fixed = TRUE) %>% - # gsub("Parasite", "Parasite", ., fixed = TRUE) %>% + gsub("Gram negative", "Gram n\u00e9gatif", ., fixed = TRUE) %>% + gsub("Gram positive", "Gram positif", ., fixed = TRUE) %>% + gsub("Bacteria", "Bact\u00e9ries", ., fixed = TRUE) %>% + gsub("Fungi", "Champignons", ., fixed = TRUE) %>% + gsub("Protozoa", "Protozoaires", ., fixed = TRUE) %>% gsub("biogroup", "biogroupe", ., fixed = TRUE) %>% # gsub("biotype", "biotype", ., fixed = TRUE) %>% gsub("vegetative", "v\u00e9g\u00e9tatif", ., fixed = TRUE) %>% diff --git a/README.md b/README.md index 85290334..d30fddd7 100755 --- a/README.md +++ b/README.md @@ -38,13 +38,17 @@ Erwin E.A. Hassing2, * [Copyright](#copyright) ## Why this package? -This R package was intended to make microbial epidemiology easier. Most functions contain extensive help pages to get started. +This R package was intended **to make microbial epidemiology easier**. Most functions contain extensive help pages to get started. -This `AMR` package basically does four important things: + + +This `AMR` package contains the *complete microbial taxonomic data* from the publicly available Integrated Taxonomic Information System (ITIS, https://www.itis.gov). ITIS is a partnership of U.S., Canadian, and Mexican agencies and taxonomic specialists. The complete taxonomic kingdoms Bacteria, Fungi and Protozoa (from subkingdom to the subspecies level) are included in this package. This allows users to use authoritative taxonomic information for their data analyses on any microorganisms, not only human pathogens. + +Combined with several new functions, this `AMR` package basically does four important things: 1. It **cleanses existing data**, by transforming it to reproducible and profound *classes*, making the most efficient use of R. These functions all use artificial intelligence to guess results that you would expect: - * Use `as.mo` to get an ID of a microorganism. The IDs are quite obvious - the ID of *E. coli* is "ESCCOL" and the ID of *S. aureus* is "STAAUR". The function takes almost any text as input that looks like the name or code of a microorganism like "E. coli", "esco" and "esccol". Even `as.mo("MRSA")` will return the ID of *S. aureus*. Moreover, it can group all coagulase negative and positive *Staphylococci*, and can transform *Streptococci* into Lancefield groups. To find bacteria based on your input, this package contains a freely available database of almost 3,000 different (potential) human pathogenic microorganisms. + * Use `as.mo` to get an ID of a microorganism. The IDs are human readable for the trained eye - the ID of *Klebsiella pneumoniae* is "B_KLBSL_PNE" (B stands for Bacteria) and the ID of *S. aureus* is "B_STPHY_AUR". The function takes almost any text as input that looks like the name or code of a microorganism like "E. coli", "esco" and "esccol". Even `as.mo("MRSA")` will return the ID of *S. aureus*. Moreover, it can group all coagulase negative and positive *Staphylococci*, and can transform *Streptococci* into Lancefield groups. To find bacteria based on your input, it uses Artificial Intelligence to look up values in the included ITIS data, consisting of more than 18,000 microorganisms. * Use `as.rsi` to transform values to valid antimicrobial results. It produces just S, I or R based on your input and warns about invalid values. Even values like "<=0.002; S" (combined MIC/RSI) will result in "S". * Use `as.mic` to cleanse your MIC values. It produces a so-called factor (called *ordinal* in SPSS) with valid MIC values as levels. A value like "<=0.002; S" (combined MIC/RSI) will result in "<=0.002". * Use `as.atc` to get the ATC code of an antibiotic as defined by the WHO. This package contains a database with most LIS codes, official names, DDDs and even trade names of antibiotics. For example, the values "Furabid", "Furadantin", "nitro" all return the ATC code of Nitrofurantoine. @@ -55,7 +59,7 @@ This `AMR` package basically does four important things: * Use `first_isolate` to identify the first isolates of every patient [using guidelines from the CLSI](https://clsi.org/standards/products/microbiology/documents/m39/) (Clinical and Laboratory Standards Institute). * You can also identify first *weighted* isolates of every patient, an adjusted version of the CLSI guideline. This takes into account key antibiotics of every strain and compares them. * Use `MDRO` (abbreviation of Multi Drug Resistant Organisms) to check your isolates for exceptional resistance with country-specific guidelines or EUCAST rules. Currently, national guidelines for Germany and the Netherlands are supported. - * The data set `microorganisms` contains the taxonomic properties of almost 3,000 potential human pathogenic microorganisms (bacteria, fungi/yeasts and parasites). Taxonomic names were downloaded from ITIS (Integrated Taxonomic Information System, http://www.itis.gov). Furhermore, the colloquial name and Gram stain are available, which enables resistance analysis of e.g. different antibiotics per Gram stain. The package also contains functions to look up values in this data set like `mo_genus`, `mo_family`, `mo_gramstain` or even `mo_phylum`. As they use `as.mo` internally, they also use artificial intelligence. For example, `mo_genus("MRSA")` and `mo_genus("S. aureus")` will both return `"Staphylococcus"`. They also come with support for German, Dutch, French, Italian, Spanish and Portuguese. These functions can be used to add new variables to your data. + * The data set `microorganisms` contains the complete taxonomic tree of more than 18,000 microorganisms (bacteria, fungi/yeasts and protozoa). Furthermore, the colloquial name and Gram stain are available, which enables resistance analysis of e.g. different antibiotics per Gram stain. The package also contains functions to look up values in this data set like `mo_genus`, `mo_family`, `mo_gramstain` or even `mo_phylum`. As they use `as.mo` internally, they also use artificial intelligence. For example, `mo_genus("MRSA")` and `mo_genus("S. aureus")` will both return `"Staphylococcus"`. They also come with support for German, Dutch, French, Italian, Spanish and Portuguese. These functions can be used to add new variables to your data. * The data set `antibiotics` contains the ATC code, LIS codes, official name, trivial name and DDD of both oral and parenteral administration. It also contains a total of 298 trade names. Use functions like `ab_name` and `ab_tradenames` to look up values. The `ab_*` functions use `as.atc` internally so they support AI to guess your expected result. For example, `ab_name("Fluclox")`, `ab_name("Floxapen")` and `ab_name("J01CF05")` will all return `"Flucloxacillin"`. These functions can again be used to add new variables to your data. 3. It **analyses the data** with convenient functions that use well-known methods. @@ -378,19 +382,19 @@ Learn more about this function with: ``` ### Data sets included in package -Datasets to work with antibiotics and bacteria properties. +Data sets to work with antibiotics and bacteria properties. ```r -# Dataset with 2000 random blood culture isolates from anonymised +# Data set with complete taxonomic trees from ITIS, containing of +# the three kingdoms Bacteria, Fungi and Protozoa +microorganisms # A tibble: 18,831 x 15 + +# Data set with 2000 random blood culture isolates from anonymised # septic patients between 2001 and 2017 in 5 Dutch hospitals septic_patients # A tibble: 2,000 x 49 -# Dataset with ATC antibiotics codes, official names, trade names +# Data set with ATC antibiotics codes, official names, trade names # and DDDs (oral and parenteral) antibiotics # A tibble: 423 x 18 - -# Dataset with bacteria codes and properties like gram stain and -# aerobic/anaerobic -microorganisms # A tibble: 2,642 x 14 ``` ## Copyright diff --git a/data/microorganisms.old.rda b/data/microorganisms.old.rda new file mode 100644 index 00000000..155803aa Binary files /dev/null and b/data/microorganisms.old.rda differ diff --git a/data/microorganisms.rda b/data/microorganisms.rda index 193a6d33..d3c7e2cf 100755 Binary files a/data/microorganisms.rda and b/data/microorganisms.rda differ diff --git a/data/septic_patients.rda b/data/septic_patients.rda index 230e297b..7d66129f 100755 Binary files a/data/septic_patients.rda and b/data/septic_patients.rda differ diff --git a/man/as.mo.Rd b/man/as.mo.Rd index 18cb943e..277acc78 100644 --- a/man/as.mo.Rd +++ b/man/as.mo.Rd @@ -6,17 +6,13 @@ \alias{is.mo} \alias{guess_mo} \title{Transform to microorganism ID} -\source{ -[1] Becker K \emph{et al.} \strong{Coagulase-Negative Staphylococci}. 2014. Clin Microbiol Rev. 27(4): 870–926. \url{https://dx.doi.org/10.1128/CMR.00109-13} - -[2] Lancefield RC \strong{A serological differentiation of human and other groups of hemolytic streptococci}. 1933. J Exp Med. 57(4): 571–95. \url{https://dx.doi.org/10.1084/jem.57.4.571} -} \usage{ -as.mo(x, Becker = FALSE, Lancefield = FALSE) +as.mo(x, Becker = FALSE, Lancefield = FALSE, allow_uncertain = FALSE) is.mo(x) -guess_mo(x, Becker = FALSE, Lancefield = FALSE) +guess_mo(x, Becker = FALSE, Lancefield = FALSE, + allow_uncertain = FALSE) } \arguments{ \item{x}{a character vector or a \code{data.frame} with one or two columns} @@ -25,31 +21,62 @@ guess_mo(x, Becker = FALSE, Lancefield = FALSE) This excludes \emph{Staphylococcus aureus} at default, use \code{Becker = "all"} to also categorise \emph{S. aureus} as "CoPS".} -\item{Lancefield}{a logical to indicate whether beta-haemolytic \emph{Streptococci} should be categorised into Lancefield groups instead of their own species, according to Rebecca C. Lancefield [2]. These \emph{Streptococci} will be categorised in their first group, i.e. \emph{Streptococcus dysgalactiae} will be group C, although officially it was also categorised into groups G and L. +\item{Lancefield}{a logical to indicate whether beta-haemolytic \emph{Streptococci} should be categorised into Lancefield groups instead of their own species, according to Rebecca C. Lancefield [2]. These \emph{Streptococci} will be categorised in their first group, e.g. \emph{Streptococcus dysgalactiae} will be group C, although officially it was also categorised into groups G and L. This excludes \emph{Enterococci} at default (who are in group D), use \code{Lancefield = "all"} to also categorise all \emph{Enterococci} as group D.} + +\item{allow_uncertain}{a logical to indicate whether empty results should be checked for only a part of the input string. When results are found, a warning will be given about the uncertainty and the result.} } \value{ Character (vector) with class \code{"mo"}. Unknown values will return \code{NA}. } \description{ -Use this function to determine a valid ID based on a genus (and species). Determination is done using Artificial Intelligence (AI), so the input can be almost anything: a full name (like \code{"Staphylococcus aureus"}), an abbreviated name (like \code{"S. aureus"}), an abbreviation known in the field (like \code{"MRSA"}), or just a genus. You could also \code{\link{select}} a genus and species column, zie Examples. +Use this function to determine a valid microorganism ID (\code{mo}). Determination is done using Artificial Intelligence (AI) and the complete taxonomic kingdoms \emph{Bacteria}, \emph{Fungi} and \emph{Protozoa} (see Source), so the input can be almost anything: a full name (like \code{"Staphylococcus aureus"}), an abbreviated name (like \code{"S. aureus"}), an abbreviation known in the field (like \code{"MRSA"}), or just a genus. You could also \code{\link{select}} a genus and species column, zie Examples. } \details{ -\code{guess_mo} is an alias of \code{as.mo}. +A microbial ID (class: \code{mo}) typically looks like these examples:\cr +\preformatted{ + Code Full name + --------------- -------------------------------------- + B_KLBSL Klebsiella + B_KLBSL_PNE Klebsiella pneumoniae + B_KLBSL_PNE_RHI Klebsiella pneumoniae rhinoscleromatis + | | | | + | | | | + | | | ----> subspecies, a 3-4 letter acronym + | | ----> species, a 3-4 letter acronym + | ----> genus, a 5-7 letter acronym, mostly without vowels + ----> taxonomic kingdom, either Bacteria (B), Fungi (F) or Protozoa (P) +} Use the \code{\link{mo_property}} functions to get properties based on the returned code, see Examples. -Thus function uses Artificial Intelligence (AI) to help getting more logical results, based on type of input and known prevalence of human pathogens. For example: +This function uses Artificial Intelligence (AI) to help getting more logical results, based on type of input and known prevalence of human pathogens. For example: \itemize{ \item{\code{"E. coli"} will return the ID of \emph{Escherichia coli} and not \emph{Entamoeba coli}, although the latter would alphabetically come first} \item{\code{"H. influenzae"} will return the ID of \emph{Haemophilus influenzae} and not \emph{Haematobacter influenzae} for the same reason} \item{Something like \code{"p aer"} will return the ID of \emph{Pseudomonas aeruginosa} and not \emph{Pasteurella aerogenes}} \item{Something like \code{"stau"} or \code{"S aur"} will return the ID of \emph{Staphylococcus aureus} and not \emph{Staphylococcus auricularis}} } -Moreover, this function also supports ID's based on only Gram stain, when the species is not known. \cr -For example, \code{"Gram negative rods"} and \code{"GNR"} will both return the ID of a Gram negative rod: \code{GNR}. +This means that looking up human non-pathogenic microorganisms takes a longer time compares to human pathogenic microorganisms. + +\code{guess_mo} is an alias of \code{as.mo}. } +\section{ITIS}{ + +\if{html}{\figure{itis_logo.jpg}{options: height=60px style=margin-bottom:5px} \cr} +This \code{AMR} package contains the \strong{complete microbial taxonomic data} from the publicly available Integrated Taxonomic Information System (ITIS, https://www.itis.gov). ITIS is a partnership of U.S., Canadian, and Mexican agencies and taxonomic specialists [3]. The complete taxonomic kingdoms Bacteria, Fungi and Protozoa (from subkingdom to the subspecies level) are included in this package. +} + +\section{Source}{ + +[1] Becker K \emph{et al.} \strong{Coagulase-Negative Staphylococci}. 2014. Clin Microbiol Rev. 27(4): 870–926. \url{https://dx.doi.org/10.1128/CMR.00109-13} + +[2] Lancefield RC \strong{A serological differentiation of human and other groups of hemolytic streptococci}. 1933. J Exp Med. 57(4): 571–95. \url{https://dx.doi.org/10.1084/jem.57.4.571} + +[3] Integrated Taxonomic Information System (ITIS). Retrieved September 2018. \url{http://www.itis.gov} +} + \examples{ # These examples all return "STAAUR", the ID of S. aureus: as.mo("stau") @@ -61,22 +88,27 @@ as.mo("Staphylococcus aureus") as.mo("MRSA") # Methicillin Resistant S. aureus as.mo("VISA") # Vancomycin Intermediate S. aureus as.mo("VRSA") # Vancomycin Resistant S. aureus +as.mo(369) # Search on TSN (Taxonomic Serial Number), a unique identifier + # for the Integrated Taxonomic Information System (ITIS) as.mo("Streptococcus group A") as.mo("GAS") # Group A Streptococci as.mo("GBS") # Group B Streptococci # guess_mo is an alias of as.mo and works the same -guess_mo("S. epidermidis") # will remain species: STAEPI -guess_mo("S. epidermidis", Becker = TRUE) # will not remain species: STACNS +guess_mo("S. epidermidis") # will remain species: B_STPHY_EPI +guess_mo("S. epidermidis", Becker = TRUE) # will not remain species: B_STPHY_CNS -guess_mo("S. pyogenes") # will remain species: STCPYO -guess_mo("S. pyogenes", Lancefield = TRUE) # will not remain species: STCGRA +guess_mo("S. pyogenes") # will remain species: B_STRPTC_PYO +guess_mo("S. pyogenes", Lancefield = TRUE) # will not remain species: B_STRPTC_GRA # Use mo_* functions to get a specific property based on `mo` -Ecoli <- as.mo("E. coli") # returns `ESCCOL` +Ecoli <- as.mo("E. coli") # returns `B_ESCHR_COL` mo_genus(Ecoli) # returns "Escherichia" -mo_gramstain(Ecoli) # returns "Negative rods" +mo_gramstain(Ecoli) # returns "Gram negative" +# but it uses as.mo internally too, so you could also just use: +mo_genus("E. coli") # returns "Escherichia" + \dontrun{ df$mo <- as.mo(df$microorganism_name) @@ -98,7 +130,8 @@ df <- df \%>\% } } \seealso{ -\code{\link{microorganisms}} for the dataframe that is being used to determine ID's. +\code{\link{microorganisms}} for the \code{data.frame} with ITIS content that is being used to determine ID's. \cr +The \code{\link{mo_property}} functions (like \code{\link{mo_genus}}, \code{\link{mo_gramstain}}) to get properties based on the returned code. } \keyword{Becker} \keyword{Lancefield} diff --git a/man/figures/itis_logo.jpg b/man/figures/itis_logo.jpg new file mode 100644 index 00000000..e2788e42 Binary files /dev/null and b/man/figures/itis_logo.jpg differ diff --git a/man/microorganisms.Rd b/man/microorganisms.Rd index d6a26726..fc8b8f4c 100755 --- a/man/microorganisms.Rd +++ b/man/microorganisms.Rd @@ -3,34 +3,41 @@ \docType{data} \name{microorganisms} \alias{microorganisms} -\title{Data set with human pathogenic microorganisms} -\format{A \code{\link{tibble}} with 2,642 observations and 14 variables: +\title{Data set with taxonomic data from ITIS} +\format{A \code{\link{data.frame}} with 18,831 observations and 15 variables: \describe{ \item{\code{mo}}{ID of microorganism} - \item{\code{bactsys}}{Bactsyscode of microorganism} - \item{\code{genus}}{Genus name of microorganism, like \code{"Echerichia"}} - \item{\code{species}}{Species name of microorganism, like \code{"coli"}} - \item{\code{subspecies}}{Subspecies name of bio-/serovar of microorganism, like \code{"EHEC"}} - \item{\code{fullname}}{Full name, like \code{"Echerichia coli (EHEC)"}} - \item{\code{gramstain}}{Gram of microorganism, like \code{"Negative rods"}} - \item{\code{aerobic}}{Logical whether bacteria is aerobic} + \item{\code{tsn}}{Taxonomic Serial Number (TSN), as defined by ITIS} + \item{\code{genus}}{Taxonomic genus of the microorganism as found in ITIS, see Source} + \item{\code{species}}{Taxonomic species of the microorganism as found in ITIS, see Source} + \item{\code{subspecies}}{Taxonomic subspecies of the microorganism as found in ITIS, see Source} + \item{\code{fullname}}{Full name, like \code{"Echerichia coli"}} \item{\code{family}}{Taxonomic family of the microorganism as found in ITIS, see Source} \item{\code{order}}{Taxonomic order of the microorganism as found in ITIS, see Source} \item{\code{class}}{Taxonomic class of the microorganism as found in ITIS, see Source} \item{\code{phylum}}{Taxonomic phylum of the microorganism as found in ITIS, see Source} - \item{\code{type}}{Type of microorganism, like \code{"Bacteria"} and \code{"Fungus/yeast"}} + \item{\code{subkingdom}}{Taxonomic subkingdom of the microorganism as found in ITIS, see Source} + \item{\code{gramstain}}{Gram of microorganism, like \code{"Gram negative"}} + \item{\code{type}}{Type of microorganism, like \code{"Bacteria"} and \code{"Fungi"}} \item{\code{prevalence}}{A rounded integer based on prevalence of the microorganism. Used internally by \code{\link{as.mo}}, otherwise quite meaningless.} + \item{\code{mo.old}}{The old ID for package versions 0.3.0 and lower.} }} \source{ -Integrated Taxonomic Information System (ITIS) on-line database, \url{https://www.itis.gov}. +[3] Integrated Taxonomic Information System (ITIS) on-line database, \url{https://www.itis.gov}. } \usage{ microorganisms } \description{ -A data set containing (potential) human pathogenic microorganisms. MO codes can be looked up using \code{\link{guess_mo}}. +A data set containing the complete microbial taxonomy of the kingdoms Bacteria, Fungi and Protozoa. MO codes can be looked up using \code{\link{as.mo}}. } +\section{ITIS}{ + +\if{html}{\figure{itis_logo.jpg}{options: height=60px style=margin-bottom:5px} \cr} +This \code{AMR} package contains the \strong{complete microbial taxonomic data} from the publicly available Integrated Taxonomic Information System (ITIS, https://www.itis.gov). ITIS is a partnership of U.S., Canadian, and Mexican agencies and taxonomic specialists [3]. The complete taxonomic kingdoms Bacteria, Fungi and Protozoa (from subkingdom to the subspecies level) are included in this package. +} + \seealso{ -\code{\link{guess_mo}} \code{\link{antibiotics}} \code{\link{microorganisms.umcg}} +\code{\link{as.mo}} \code{\link{mo_property}} \code{\link{microorganisms.umcg}} } \keyword{datasets} diff --git a/man/microorganisms.old.Rd b/man/microorganisms.old.Rd new file mode 100644 index 00000000..5cb09b65 --- /dev/null +++ b/man/microorganisms.old.Rd @@ -0,0 +1,33 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/data.R +\docType{data} +\name{microorganisms.old} +\alias{microorganisms.old} +\title{Data set with old taxonomic data from ITIS} +\format{A \code{\link{data.frame}} with 58 observations and 5 variables: +\describe{ + \item{\code{tsn}}{Old Taxonomic Serial Number (TSN), as defined by ITIS} + \item{\code{name}}{Old taxonomic name of the microorganism as found in ITIS, see Source} + \item{\code{tsn_new}}{New Taxonomic Serial Number (TSN), as defined by ITIS} + \item{\code{authors}}{Authors responsible for renaming as found in ITIS, see Source} + \item{\code{year}}{Year in which the literature was published about the renaming as found in ITIS, see Source} +}} +\source{ +[3] Integrated Taxonomic Information System (ITIS) on-line database, \url{https://www.itis.gov}. +} +\usage{ +microorganisms.old +} +\description{ +A data set containing old, previously valid, taxonomic names. This data set is used internally by \code{\link{as.mo}}. +} +\section{ITIS}{ + +\if{html}{\figure{itis_logo.jpg}{options: height=60px style=margin-bottom:5px} \cr} +This \code{AMR} package contains the \strong{complete microbial taxonomic data} from the publicly available Integrated Taxonomic Information System (ITIS, https://www.itis.gov). ITIS is a partnership of U.S., Canadian, and Mexican agencies and taxonomic specialists [3]. The complete taxonomic kingdoms Bacteria, Fungi and Protozoa (from subkingdom to the subspecies level) are included in this package. +} + +\seealso{ +\code{\link{as.mo}} \code{\link{mo_property}} \code{\link{microorganisms}} +} +\keyword{datasets} diff --git a/man/microorganisms.umcg.Rd b/man/microorganisms.umcg.Rd index 3140299d..7586e66f 100755 --- a/man/microorganisms.umcg.Rd +++ b/man/microorganisms.umcg.Rd @@ -16,6 +16,6 @@ microorganisms.umcg A data set containing all bacteria codes of UMCG MMB. These codes can be joined to data with an ID from \code{\link{microorganisms}$mo} (using \code{\link{left_join_microorganisms}}). GLIMS codes can also be translated to valid \code{MO}s with \code{\link{guess_mo}}. } \seealso{ -\code{\link{guess_mo}} \code{\link{microorganisms}} +\code{\link{as.mo}} \code{\link{microorganisms}} } \keyword{datasets} diff --git a/man/mo_property.Rd b/man/mo_property.Rd index 1d478569..f007bb99 100644 --- a/man/mo_property.Rd +++ b/man/mo_property.Rd @@ -11,18 +11,12 @@ \alias{mo_order} \alias{mo_class} \alias{mo_phylum} +\alias{mo_subkingdom} \alias{mo_type} +\alias{mo_TSN} \alias{mo_gramstain} -\alias{mo_aerobic} \alias{mo_taxonomy} \title{Property of a microorganism} -\source{ -[1] Becker K \emph{et al.} \strong{Coagulase-Negative Staphylococci}. 2014. Clin Microbiol Rev. 27(4): 870–926. \url{https://dx.doi.org/10.1128/CMR.00109-13} - -[2] Lancefield RC \strong{A serological differentiation of human and other groups of hemolytic streptococci}. 1933. J Exp Med. 57(4): 571–95. \url{https://dx.doi.org/10.1084/jem.57.4.571} - -[3] Integrated Taxonomic Information System (ITIS) on-line database, \url{https://www.itis.gov}. -} \usage{ mo_fullname(x, Becker = FALSE, Lancefield = FALSE, language = NULL) @@ -42,11 +36,13 @@ mo_class(x) mo_phylum(x) +mo_subkingdom(x) + mo_type(x, language = NULL) -mo_gramstain(x, language = NULL) +mo_TSN(x) -mo_aerobic(x) +mo_gramstain(x, language = NULL) mo_property(x, property = "fullname", Becker = FALSE, Lancefield = FALSE, language = NULL) @@ -60,11 +56,11 @@ mo_taxonomy(x) This excludes \emph{Staphylococcus aureus} at default, use \code{Becker = "all"} to also categorise \emph{S. aureus} as "CoPS".} -\item{Lancefield}{a logical to indicate whether beta-haemolytic \emph{Streptococci} should be categorised into Lancefield groups instead of their own species, according to Rebecca C. Lancefield [2]. These \emph{Streptococci} will be categorised in their first group, i.e. \emph{Streptococcus dysgalactiae} will be group C, although officially it was also categorised into groups G and L. +\item{Lancefield}{a logical to indicate whether beta-haemolytic \emph{Streptococci} should be categorised into Lancefield groups instead of their own species, according to Rebecca C. Lancefield [2]. These \emph{Streptococci} will be categorised in their first group, e.g. \emph{Streptococcus dysgalactiae} will be group C, although officially it was also categorised into groups G and L. This excludes \emph{Enterococci} at default (who are in group D), use \code{Lancefield = "all"} to also categorise all \emph{Enterococci} as group D.} -\item{language}{language of the returned text, defaults to the systems language. Either one of \code{"en"} (English), \code{"de"} (German), \code{"nl"} (Dutch), \code{"es"} (Spanish) or \code{"pt"} (Portuguese).} +\item{language}{language of the returned text, defaults to the systems language but can also be set with \code{\link{getOption}("AMR_locale")}. Either one of \code{"en"} (English), \code{"de"} (German), \code{"nl"} (Dutch), \code{"es"} (Spanish) or \code{"pt"} (Portuguese).} \item{property}{one of the column names of one of the \code{\link{microorganisms}} data set, like \code{"mo"}, \code{"bactsys"}, \code{"family"}, \code{"genus"}, \code{"species"}, \code{"fullname"}, \code{"gramstain"} and \code{"aerobic"}} } @@ -74,8 +70,24 @@ A logical (in case of \code{mo_aerobic}), a list (in case of \code{mo_taxonomy}) \description{ Use these functions to return a specific property of a microorganism from the \code{\link{microorganisms}} data set. All input values will be evaluated internally with \code{\link{as.mo}}. } +\section{ITIS}{ + +\if{html}{\figure{itis_logo.jpg}{options: height=60px style=margin-bottom:5px} \cr} +This \code{AMR} package contains the \strong{complete microbial taxonomic data} from the publicly available Integrated Taxonomic Information System (ITIS, https://www.itis.gov). ITIS is a partnership of U.S., Canadian, and Mexican agencies and taxonomic specialists [3]. The complete taxonomic kingdoms Bacteria, Fungi and Protozoa (from subkingdom to the subspecies level) are included in this package. +} + +\section{Source}{ + +[1] Becker K \emph{et al.} \strong{Coagulase-Negative Staphylococci}. 2014. Clin Microbiol Rev. 27(4): 870–926. \url{https://dx.doi.org/10.1128/CMR.00109-13} + +[2] Lancefield RC \strong{A serological differentiation of human and other groups of hemolytic streptococci}. 1933. J Exp Med. 57(4): 571–95. \url{https://dx.doi.org/10.1084/jem.57.4.571} + +[3] Integrated Taxonomic Information System (ITIS). Retrieved September 2018. \url{http://www.itis.gov} +} + \examples{ # All properties +mo_subkingdom("E. coli") # "Negibacteria" mo_phylum("E. coli") # "Proteobacteria" mo_class("E. coli") # "Gammaproteobacteria" mo_order("E. coli") # "Enterobacteriales" @@ -85,42 +97,30 @@ mo_species("E. coli") # "coli" mo_subspecies("E. coli") # "" mo_fullname("E. coli") # "Escherichia coli" mo_shortname("E. coli") # "E. coli" +mo_gramstain("E. coli") # "Gram negative" +mo_TSN("E. coli") # 285 mo_type("E. coli") # "Bacteria" -mo_gramstain("E. coli") # "Negative rods" -mo_aerobic("E. coli") # TRUE # Abbreviations known in the field mo_genus("MRSA") # "Staphylococcus" mo_species("MRSA") # "aureus" mo_shortname("MRSA") # "S. aureus" -mo_gramstain("MRSA") # "Positive cocci" +mo_gramstain("MRSA") # "Gram positive" mo_genus("VISA") # "Staphylococcus" mo_species("VISA") # "aureus" # Known subspecies -mo_genus("EHEC") # "Escherichia" -mo_species("EHEC") # "coli" -mo_subspecies("EHEC") # "EHEC" -mo_fullname("EHEC") # "Escherichia coli (EHEC)" -mo_shortname("EHEC") # "E. coli" - mo_genus("doylei") # "Campylobacter" mo_species("doylei") # "jejuni" -mo_fullname("doylei") # "Campylobacter jejuni (doylei)" +mo_fullname("doylei") # "Campylobacter jejuni doylei" -mo_fullname("K. pneu rh") # "Klebsiella pneumoniae (rhinoscleromatis)" +mo_fullname("K. pneu rh") # "Klebsiella pneumoniae rhinoscleromatis" mo_shortname("K. pneu rh") # "K. pneumoniae" -# Anaerobic bacteria -mo_genus("B. fragilis") # "Bacteroides" -mo_species("B. fragilis") # "fragilis" -mo_aerobic("B. fragilis") # FALSE - - # Becker classification, see ?as.mo mo_fullname("S. epi") # "Staphylococcus epidermidis" mo_fullname("S. epi", Becker = TRUE) # "Coagulase Negative Staphylococcus (CoNS)" @@ -138,10 +138,9 @@ mo_shortname("S. pyo", Lancefield = TRUE) # "GAS" mo_type("E. coli", language = "de") # "Bakterium" mo_type("E. coli", language = "nl") # "Bacterie" mo_type("E. coli", language = "es") # "Bakteria" -mo_gramstain("E. coli", language = "de") # "Negative Staebchen" -mo_gramstain("E. coli", language = "nl") # "Negatieve staven" -mo_gramstain("E. coli", language = "es") # "Bacilos negativos" -mo_gramstain("Giardia", language = "pt") # "Parasitas" +mo_gramstain("E. coli", language = "de") # "Gramnegativ" +mo_gramstain("E. coli", language = "nl") # "Gram-negatief" +mo_gramstain("E. coli", language = "es") # "Gram negativo" mo_fullname("S. pyogenes", Lancefield = TRUE, @@ -151,7 +150,7 @@ mo_fullname("S. pyogenes", language = "nl") # "Streptococcus groep A" -# Complete taxonomy up to Phylum, returns a list +# Complete taxonomy up to Subkingdom, returns a list mo_taxonomy("E. coli") } \seealso{ diff --git a/tests/testthat/test-count.R b/tests/testthat/test-count.R index e6e28ae7..099baa15 100644 --- a/tests/testthat/test-count.R +++ b/tests/testthat/test-count.R @@ -2,16 +2,16 @@ context("count.R") test_that("counts work", { # amox resistance in `septic_patients` - expect_equal(count_R(septic_patients$amox), 659) + expect_equal(count_R(septic_patients$amox), 662) expect_equal(count_I(septic_patients$amox), 3) - expect_equal(count_S(septic_patients$amox), 336) + expect_equal(count_S(septic_patients$amox), 335) expect_equal(count_R(septic_patients$amox) + count_I(septic_patients$amox), count_IR(septic_patients$amox)) expect_equal(count_S(septic_patients$amox) + count_I(septic_patients$amox), count_SI(septic_patients$amox)) - expect_equal(septic_patients %>% count_S(amcl), 1056) - expect_equal(septic_patients %>% count_S(amcl, gent), 1385) + expect_equal(septic_patients %>% count_S(amcl), 1057) + expect_equal(septic_patients %>% count_S(amcl, gent), 1396) # count of cases expect_equal(septic_patients %>% @@ -20,7 +20,7 @@ test_that("counts work", { genta = count_S(gent), combination = count_S(cipr, gent)) %>% pull(combination), - c(192, 440, 184, 474)) + c(192, 446, 184, 474)) expect_equal(septic_patients %>% select(amox, cipr) %>% count_df(translate_ab = "official") %>% nrow(), 6) diff --git a/tests/testthat/test-first_isolate.R b/tests/testthat/test-first_isolate.R index 32c5e1fa..0cb920e4 100755 --- a/tests/testthat/test-first_isolate.R +++ b/tests/testthat/test-first_isolate.R @@ -10,9 +10,9 @@ test_that("first isolates work", { col_mo = "mo", info = TRUE), na.rm = TRUE), - 1330) + 1315) - # septic_patients contains 1426 out of 2000 first *weighted* isolates + # septic_patients contains 1411 out of 2000 first *weighted* isolates expect_equal( suppressWarnings( sum( @@ -24,8 +24,8 @@ test_that("first isolates work", { type = "keyantibiotics", info = TRUE), na.rm = TRUE)), - 1425) - # and 1449 when not ignoring I + 1411) + # and 1435 when not ignoring I expect_equal( suppressWarnings( sum( @@ -38,8 +38,8 @@ test_that("first isolates work", { type = "keyantibiotics", info = TRUE), na.rm = TRUE)), - 1448) - # and 1430 when using points + 1435) + # and 1416 when using points expect_equal( suppressWarnings( sum( @@ -51,9 +51,9 @@ test_that("first isolates work", { type = "points", info = TRUE), na.rm = TRUE)), - 1430) + 1416) - # septic_patients contains 1176 out of 2000 first non-ICU isolates + # septic_patients contains 1161 out of 2000 first non-ICU isolates expect_equal( sum( first_isolate(septic_patients, @@ -64,7 +64,7 @@ test_that("first isolates work", { info = TRUE, icu_exclude = TRUE), na.rm = TRUE), - 1175) + 1161) # set 1500 random observations to be of specimen type 'Urine' random_rows <- sample(x = 1:2000, size = 1500, replace = FALSE) diff --git a/tests/testthat/test-freq.R b/tests/testthat/test-freq.R index 38918042..2a17f2f9 100755 --- a/tests/testthat/test-freq.R +++ b/tests/testthat/test-freq.R @@ -6,8 +6,8 @@ test_that("frequency table works", { expect_equal(nrow(freq(c(1, 1, 2, 2, 3, 3, 4, 4, 5, 5))), 5) expect_equal(nrow(frequency_tbl(c(1, 1, 2, 2, 3, 3, 4, 4, 5, 5))), 5) - # date column of septic_patients should contain 1151 unique dates - expect_equal(nrow(freq(septic_patients$date)), 1151) + # date column of septic_patients should contain 1140 unique dates + expect_equal(nrow(freq(septic_patients$date)), 1140) expect_equal(nrow(freq(septic_patients$date)), length(unique(septic_patients$date))) @@ -24,7 +24,7 @@ test_that("frequency table works", { # character expect_output(print(freq(septic_patients$mo))) - expect_output(print(freq(microorganisms$fullname))) + expect_output(suppressWarnings(print(freq(microorganisms$fullname)))) # integer expect_output(print(freq(septic_patients$age))) # date diff --git a/tests/testthat/test-join_microorganisms.R b/tests/testthat/test-join_microorganisms.R index 2dd59d1e..1c381d18 100755 --- a/tests/testthat/test-join_microorganisms.R +++ b/tests/testthat/test-join_microorganisms.R @@ -24,17 +24,17 @@ test_that("joins work", { expect_true(nrow(unjoined) < nrow(full)) - expect_equal(nrow(inner_join_microorganisms("ESCCOL")), 1) - expect_equal(nrow(inner_join_microorganisms("ESCCOL", by = c("mo" = "mo"))), 1) + expect_equal(nrow(inner_join_microorganisms("B_ESCHR_COL")), 1) + expect_equal(nrow(inner_join_microorganisms("B_ESCHR_COL", by = c("mo" = "mo"))), 1) expect_warning(inner_join_microorganisms("Escherichia", by = c("mo" = "genus"))) - expect_equal(nrow(left_join_microorganisms("ESCCOL")), 1) + expect_equal(nrow(left_join_microorganisms("B_ESCHR_COL")), 1) expect_warning(left_join_microorganisms("Escherichia", by = c("mo" = "genus"))) - expect_equal(nrow(semi_join_microorganisms("ESCCOL")), 1) - expect_equal(nrow(anti_join_microorganisms("ESCCOL")), 0) + expect_equal(nrow(semi_join_microorganisms("B_ESCHR_COL")), 1) + expect_equal(nrow(anti_join_microorganisms("B_ESCHR_COL")), 0) - expect_warning(right_join_microorganisms("ESCCOL")) - expect_warning(full_join_microorganisms("ESCCOL")) + expect_warning(right_join_microorganisms("B_ESCHR_COL")) + expect_warning(full_join_microorganisms("B_ESCHR_COL")) }) diff --git a/tests/testthat/test-key_antibiotics.R b/tests/testthat/test-key_antibiotics.R index f4439c03..2dd051a7 100644 --- a/tests/testthat/test-key_antibiotics.R +++ b/tests/testthat/test-key_antibiotics.R @@ -2,6 +2,7 @@ context("key_antibiotics.R") test_that("keyantibiotics work", { expect_equal(length(key_antibiotics(septic_patients, warnings = FALSE)), nrow(septic_patients)) + expect_false(all(is.na(key_antibiotics(septic_patients)))) expect_true(key_antibiotics_equal("SSS", "SSS")) expect_false(key_antibiotics_equal("SSS", "SRS")) expect_true(key_antibiotics_equal("SSS", "SIS", ignore_I = TRUE)) diff --git a/tests/testthat/test-kurtosis.R b/tests/testthat/test-kurtosis.R index 8d5b8f77..80c97427 100644 --- a/tests/testthat/test-kurtosis.R +++ b/tests/testthat/test-kurtosis.R @@ -2,12 +2,12 @@ context("kurtosis.R") test_that("kurtosis works", { expect_equal(kurtosis(septic_patients$age), - 3.57781, + 3.549319, tolerance = 0.00001) expect_equal(unname(kurtosis(data.frame(septic_patients$age))), - 3.57781, + 3.549319, tolerance = 0.00001) expect_equal(kurtosis(matrix(septic_patients$age)), - 3.57781, + 3.549319, tolerance = 0.00001) }) diff --git a/tests/testthat/test-mo.R b/tests/testthat/test-mo.R index 1e459dac..f853fdb2 100644 --- a/tests/testthat/test-mo.R +++ b/tests/testthat/test-mo.R @@ -8,50 +8,50 @@ test_that("as.mo works", { expect_identical( as.character(as.mo(c("E. coli", "H. influenzae"))), - c("ESCCOL", "HAEINF")) + c("B_ESCHR_COL", "B_HMPHL_INF")) - expect_equal(as.character(as.mo("Escherichia coli")), "ESCCOL") - expect_equal(as.character(as.mo("Escherichia coli")), "ESCCOL") - expect_equal(as.character(as.mo("Escherichia species")), "ESCSPP") - expect_equal(as.character(as.mo("Escherichia")), "ESCSPP") - expect_equal(as.character(as.mo(" ESCCOL ")), "ESCCOL") - expect_equal(as.character(as.mo("coli")), "ESCCOL") # not Campylobacter - expect_equal(as.character(as.mo("klpn")), "KLEPNE") - expect_equal(as.character(as.mo("Klebsiella")), "KLESPP") - expect_equal(as.character(as.mo("K. pneu rhino")), "KLEPNERH") # K. pneumoniae subspp. rhinoscleromatis - expect_equal(as.character(as.mo("Bartonella")), "BARSPP") - expect_equal(as.character(as.mo("C. difficile")), "CLODIF") - expect_equal(as.character(as.mo("L. pneumophila")), "LEGPNE") - expect_equal(as.character(as.mo("L. non pneumophila")), "LEGNON") - expect_equal(as.character(as.mo("S. beta-haemolytic")), "STCHAE") - expect_equal(as.character(as.mo("Strepto")), "STCSPP") - expect_equal(as.character(as.mo("Streptococcus")), "STCSPP") # not Peptostreptoccus + expect_equal(as.character(as.mo("Escherichia coli")), "B_ESCHR_COL") + expect_equal(as.character(as.mo("Escherichia coli")), "B_ESCHR_COL") + expect_equal(as.character(as.mo("Escherichia species")), "B_ESCHR") + expect_equal(as.character(as.mo("Escherichia")), "B_ESCHR") + expect_equal(as.character(as.mo(" B_ESCHR_COL ")), "B_ESCHR_COL") + #expect_equal(as.character(as.mo("coli")), "B_ESCHR_COL") # not Campylobacter + expect_equal(as.character(as.mo("klpn")), "B_KLBSL_PNE") + expect_equal(as.character(as.mo("Klebsiella")), "B_KLBSL") + expect_equal(as.character(as.mo("K. pneu rhino")), "B_KLBSL_PNE_RHI") # K. pneumoniae subspp. rhinoscleromatis + expect_equal(as.character(as.mo("Bartonella")), "B_BRTNL") + expect_equal(as.character(as.mo("C. difficile")), "B_CTRDM_DIF") + expect_equal(as.character(as.mo("L. pneumophila")), "B_LGNLL_PNE") + # expect_equal(as.character(as.mo("L. non pneumophila")), "LEGNON") + # expect_equal(as.character(as.mo("S. beta-haemolytic")), "STCHAE") + expect_equal(as.character(as.mo("Strepto")), "B_STRPTC") + expect_equal(as.character(as.mo("Streptococcus")), "B_STRPTC") # not Peptostreptoccus - expect_equal(as.character(as.mo(c("GAS", "GBS"))), c("STCGRA", "STCGRB")) + expect_equal(as.character(as.mo(c("GAS", "GBS"))), c("B_STRPTC_GRA", "B_STRPTC_GRB")) - expect_equal(as.character(as.mo("S. pyo")), "STCPYO") # not Actinomyces pyogenes + expect_equal(as.character(as.mo("S. pyo")), "B_STRPTC_PYO") # not Actinomyces pyogenes - expect_equal(as.character(as.mo("P. aer")), "PSEAER") # not Pasteurella aerogenes + expect_equal(as.character(as.mo("P. aer")), "B_PDMNS_AER") # not Pasteurella aerogenes - expect_equal(as.character(as.mo("Negative rods")), "GNR") - expect_equal(as.character(as.mo("Gram negative rods")), "GNR") + # expect_equal(as.character(as.mo("Negative rods")), "GNR") + # expect_equal(as.character(as.mo("Gram negative rods")), "GNR") # GLIMS - expect_equal(as.character(as.mo("bctfgr")), "BACFRA") + expect_equal(as.character(as.mo("bctfgr")), "B_BCTRD_FRA") - expect_equal(as.character(as.mo("MRSE")), "STAEPI") - expect_equal(as.character(as.mo("VRE")), "ENCSPP") - expect_equal(as.character(as.mo("MRPA")), "PSEAER") - expect_equal(as.character(as.mo("PISP")), "STCPNE") - expect_equal(as.character(as.mo("PRSP")), "STCPNE") - expect_equal(as.character(as.mo("VISP")), "STCPNE") - expect_equal(as.character(as.mo("VRSP")), "STCPNE") + expect_equal(as.character(as.mo("MRSE")), "B_STPHY_EPI") + expect_equal(as.character(as.mo("VRE")), "B_ENTRC") + expect_equal(as.character(as.mo("MRPA")), "B_PDMNS_AER") + expect_equal(as.character(as.mo("PISP")), "B_STRPTC_PNE") + expect_equal(as.character(as.mo("PRSP")), "B_STRPTC_PNE") + expect_equal(as.character(as.mo("VISP")), "B_STRPTC_PNE") + expect_equal(as.character(as.mo("VRSP")), "B_STRPTC_PNE") - expect_equal(as.character(as.mo("CNS")), "STACNS") - expect_equal(as.character(as.mo("CoNS")), "STACNS") - expect_equal(as.character(as.mo("CPS")), "STACPS") - expect_equal(as.character(as.mo("CoPS")), "STACPS") + expect_equal(as.character(as.mo("CNS")), "B_STPHY_CNS") + expect_equal(as.character(as.mo("CoNS")), "B_STPHY_CNS") + expect_equal(as.character(as.mo("CPS")), "B_STPHY_CPS") + expect_equal(as.character(as.mo("CoPS")), "B_STPHY_CPS") expect_identical( as.character( @@ -63,39 +63,39 @@ test_that("as.mo works", { "Staphylococcus aureus", "MRSA", "VISA"))), - rep("STAAUR", 8)) + rep("B_STPHY_AUR", 8)) # check for Becker classification - expect_identical(as.character(guess_mo("S. epidermidis", Becker = FALSE)), "STAEPI") - expect_identical(as.character(guess_mo("S. epidermidis", Becker = TRUE)), "STACNS") - expect_identical(as.character(guess_mo("STAEPI", Becker = TRUE)), "STACNS") - expect_identical(as.character(guess_mo("S. intermedius", Becker = FALSE)), "STCINT") # Strep (!) intermedius - expect_identical(as.character(guess_mo("Sta intermedius",Becker = FALSE)), "STAINT") - expect_identical(as.character(guess_mo("Sta intermedius",Becker = TRUE)), "STACPS") - expect_identical(as.character(guess_mo("STAINT", Becker = TRUE)), "STACPS") + expect_identical(as.character(guess_mo("S. epidermidis", Becker = FALSE)), "B_STPHY_EPI") + expect_identical(as.character(guess_mo("S. epidermidis", Becker = TRUE)), "B_STPHY_CNS") + expect_identical(as.character(guess_mo("STAEPI", Becker = TRUE)), "B_STPHY_CNS") + expect_identical(as.character(guess_mo("S. intermedius", Becker = FALSE)), "B_STRPTC_INT") # Strep (!) intermedius + expect_identical(as.character(guess_mo("Sta intermedius",Becker = FALSE)), "B_STPHY_INT") + expect_identical(as.character(guess_mo("Sta intermedius",Becker = TRUE)), "B_STPHY_CPS") + expect_identical(as.character(guess_mo("STAINT", Becker = TRUE)), "B_STPHY_CPS") # aureus must only be influenced if Becker = "all" - expect_identical(as.character(guess_mo("STAAUR", Becker = FALSE)), "STAAUR") - expect_identical(as.character(guess_mo("STAAUR", Becker = TRUE)), "STAAUR") - expect_identical(as.character(guess_mo("STAAUR", Becker = "all")), "STACPS") + expect_identical(as.character(guess_mo("STAAUR", Becker = FALSE)), "B_STPHY_AUR") + expect_identical(as.character(guess_mo("STAAUR", Becker = TRUE)), "B_STPHY_AUR") + expect_identical(as.character(guess_mo("STAAUR", Becker = "all")), "B_STPHY_CPS") # check for Lancefield classification - expect_identical(as.character(guess_mo("S. pyogenes", Lancefield = FALSE)), "STCPYO") - expect_identical(as.character(guess_mo("S. pyogenes", Lancefield = TRUE)), "STCGRA") - expect_identical(as.character(guess_mo("STCPYO", Lancefield = TRUE)), "STCGRA") # group A - expect_identical(as.character(guess_mo("S. agalactiae", Lancefield = FALSE)), "STCAGA") - expect_identical(as.character(guess_mo("S. agalactiae", Lancefield = TRUE)), "STCGRB") # group B - expect_identical(as.character(guess_mo("S. equisimilis", Lancefield = FALSE)), "STCEQS") - expect_identical(as.character(guess_mo("S. equisimilis", Lancefield = TRUE)), "STCGRC") # group C + expect_identical(as.character(guess_mo("S. pyogenes", Lancefield = FALSE)), "B_STRPTC_PYO") + expect_identical(as.character(guess_mo("S. pyogenes", Lancefield = TRUE)), "B_STRPTC_GRA") + expect_identical(as.character(guess_mo("STCPYO", Lancefield = TRUE)), "B_STRPTC_GRA") # group A + expect_identical(as.character(guess_mo("S. agalactiae", Lancefield = FALSE)), "B_STRPTC_AGA") + expect_identical(as.character(guess_mo("S. agalactiae", Lancefield = TRUE)), "B_STRPTC_GRB") # group B + expect_identical(as.character(guess_mo("S. equisimilis", Lancefield = FALSE)), "B_STRPTC_DYS_EQU") + expect_identical(as.character(guess_mo("S. equisimilis", Lancefield = TRUE)), "B_STRPTC_GRC") # group C # Enterococci must only be influenced if Lancefield = "all" - expect_identical(as.character(guess_mo("E. faecium", Lancefield = FALSE)), "ENCFAC") - expect_identical(as.character(guess_mo("E. faecium", Lancefield = TRUE)), "ENCFAC") - expect_identical(as.character(guess_mo("E. faecium", Lancefield = "all")), "STCGRD") # group D - expect_identical(as.character(guess_mo("S. anginosus", Lancefield = FALSE)), "STCANG") - expect_identical(as.character(guess_mo("S. anginosus", Lancefield = TRUE)), "STCGRF") # group F - expect_identical(as.character(guess_mo("S. sanguis", Lancefield = FALSE)), "STCSAN") - expect_identical(as.character(guess_mo("S. sanguis", Lancefield = TRUE)), "STCGRH") # group H - expect_identical(as.character(guess_mo("S. salivarius", Lancefield = FALSE)), "STCSAL") - expect_identical(as.character(guess_mo("S. salivarius", Lancefield = TRUE)), "STCGRK") # group K + expect_identical(as.character(guess_mo("E. faecium", Lancefield = FALSE)), "B_ENTRC_IUM") + expect_identical(as.character(guess_mo("E. faecium", Lancefield = TRUE)), "B_ENTRC_IUM") + expect_identical(as.character(guess_mo("E. faecium", Lancefield = "all")), "B_STRPTC_GRD") # group D + expect_identical(as.character(guess_mo("S. anginosus", Lancefield = FALSE)), "B_STRPTC_ANG") + expect_identical(as.character(guess_mo("S. anginosus", Lancefield = TRUE)), "B_STRPTC_GRF") # group F + expect_identical(as.character(guess_mo("S. sanguinis", Lancefield = FALSE)), "B_STRPTC_SAN") + expect_identical(as.character(guess_mo("S. sanguinis", Lancefield = TRUE)), "B_STRPTC_GRH") # group H + expect_identical(as.character(guess_mo("S. salivarius", Lancefield = FALSE)), "B_STRPTC_SAL") + expect_identical(as.character(guess_mo("S. salivarius", Lancefield = TRUE)), "B_STRPTC_GRK") # group K library(dplyr) @@ -106,9 +106,8 @@ test_that("as.mo works", { select(genus) %>% as.mo() %>% as.character(), - paste0(c("ESC", "ESC", "STA", "STA", "STA", - "STA", "STA", "STA", "STA", "STA"), - "SPP")) + c("B_ESCHR", "B_ESCHR", "B_STPHY", "B_STPHY", "B_STPHY", + "B_STPHY", "B_STPHY", "B_STPHY", "B_STPHY", "B_STPHY")) # select with two columns expect_identical( @@ -127,18 +126,18 @@ test_that("as.mo works", { expect_error(septic_patients %>% select(1:3) %>% as.mo()) # print - expect_output(print(as.mo(c("ESCCOL", NA)))) + expect_output(print(as.mo(c("B_ESCHR_COL", NA)))) # helper function - expect_identical(as.mo("ESCCOL"), - guess_mo("ESCCOL")) + expect_identical(as.mo("B_ESCHR_COL"), + guess_mo("B_ESCHR_COL")) # test pull expect_equal(nrow(septic_patients %>% mutate(mo = as.mo(mo))), 2000) # test data.frame - expect_equal(nrow(data.frame(test = as.mo("ESCCOL"))), + expect_equal(nrow(data.frame(test = as.mo("B_ESCHR_COL"))), 1) # check empty values diff --git a/tests/testthat/test-mo_property.R b/tests/testthat/test-mo_property.R index e28cdbbc..47b285cd 100644 --- a/tests/testthat/test-mo_property.R +++ b/tests/testthat/test-mo_property.R @@ -1,6 +1,7 @@ context("mo_property.R") test_that("mo_property works", { + expect_equal(mo_subkingdom("E. coli"), "Negibacteria") expect_equal(mo_phylum("E. coli"), "Proteobacteria") expect_equal(mo_class("E. coli"), "Gammaproteobacteria") expect_equal(mo_order("E. coli"), "Enterobacteriales") @@ -10,8 +11,7 @@ test_that("mo_property works", { expect_equal(mo_subspecies("E. coli"), "") expect_equal(mo_fullname("E. coli"), "Escherichia coli") expect_equal(mo_type("E. coli", language = "en"), "Bacteria") - expect_equal(mo_gramstain("E. coli", language = "en"), "Negative rods") - expect_equal(mo_aerobic("E. coli"), TRUE) + expect_equal(mo_gramstain("E. coli", language = "en"), "Gram negative") expect_equal(class(mo_taxonomy("E. coli")), "list") expect_equal(mo_shortname("MRSA"), "S. aureus") @@ -22,30 +22,16 @@ test_that("mo_property works", { # test integrity library(dplyr) - MOs <- AMR::microorganisms %>% filter(!is.na(mo), nchar(mo) > 3) + rnd <- sample(1:nrow(AMR::microorganisms), 500, replace = FALSE) # random 500 rows + MOs <- AMR::microorganisms %>% filter(!is.na(mo), + species != "species", + dplyr::row_number() %in% rnd) expect_identical(MOs$fullname, mo_fullname(MOs$fullname, language = "en")) - mo_clean <- MOs$mo - mo_from_shortname <- as.mo(mo_shortname(mo_clean)) - mo_clean <- mo_clean[nchar(mo_from_shortname) == 6 & - !is.na(mo_from_shortname) & - !mo_from_shortname %like% "...SPP"] - mo_from_shortname <- mo_from_shortname[nchar(mo_from_shortname) == 6 & - !is.na(mo_from_shortname) & - !mo_from_shortname %like% "...SPP"] - tb <- tibble(a = substr(mo_clean, 1, 6), - b = mo_from_shortname, - c = a == b, - d = mo_shortname(a), - e = mo_shortname(b), - f = d == e) - expect_gt(sum(tb$c) / nrow(tb), 0.9) # more than 90% of MO code should be identical - expect_identical(sum(tb$f), nrow(tb)) # all shortnames should be identical - # check languages - expect_equal(mo_type("E. coli", language = "de"), "Bakterium") - expect_equal(mo_type("E. coli", language = "nl"), "Bacterie") - expect_equal(mo_gramstain("E. coli", language = "nl"), "Negatieve staven") + expect_equal(mo_type("E. coli", language = "de"), "Bakterien") + expect_equal(mo_type("E. coli", language = "nl"), "Bacteri\u00ebn") + expect_equal(mo_gramstain("E. coli", language = "nl"), "Gram-negatief") expect_output(print(mo_gramstain("E. coli", language = "en"))) expect_output(print(mo_gramstain("E. coli", language = "de"))) diff --git a/tests/testthat/test-portion.R b/tests/testthat/test-portion.R index db05330a..a86c4640 100755 --- a/tests/testthat/test-portion.R +++ b/tests/testthat/test-portion.R @@ -2,8 +2,8 @@ context("portion.R") test_that("portions works", { # amox resistance in `septic_patients` - expect_equal(portion_R(septic_patients$amox), 0.6603, tolerance = 0.0001) - expect_equal(portion_I(septic_patients$amox), 0.0030, tolerance = 0.0001) + expect_equal(portion_R(septic_patients$amox), 0.662, tolerance = 0.0001) + expect_equal(portion_I(septic_patients$amox), 0.003, tolerance = 0.0001) expect_equal(1 - portion_R(septic_patients$amox) - portion_I(septic_patients$amox), portion_S(septic_patients$amox)) expect_equal(portion_R(septic_patients$amox) + portion_I(septic_patients$amox), @@ -12,17 +12,17 @@ test_that("portions works", { portion_SI(septic_patients$amox)) expect_equal(septic_patients %>% portion_S(amcl), - 0.673, + 0.6706853, tolerance = 0.001) expect_equal(septic_patients %>% portion_S(amcl, gent), - 0.921, + 0.9202373, tolerance = 0.001) # amcl+genta susceptibility around 92.1% expect_equal(suppressWarnings(rsi(septic_patients$amcl, septic_patients$gent, interpretation = "S")), - 0.9208777, + 0.9202373, tolerance = 0.000001) # percentages @@ -35,7 +35,7 @@ test_that("portions works", { total = n()) %>% pull(n) %>% sum(), - 1404) + 1409) # count of cases expect_equal(septic_patients %>% @@ -47,7 +47,7 @@ test_that("portions works", { combination_p = portion_S(cipr, gent, as_percent = TRUE), combination_n = n_rsi(cipr, gent)) %>% pull(combination_n), - c(202, 482, 201, 499)) + c(202, 488, 201, 499)) expect_warning(portion_R(as.character(septic_patients$amcl))) expect_warning(portion_S(as.character(septic_patients$amcl))) @@ -57,7 +57,7 @@ test_that("portions works", { septic_patients$gent))) expect_equal(suppressWarnings(n_rsi(as.character(septic_patients$amcl, septic_patients$gent))), - 1570) + 1576) # check for errors expect_error(portion_IR("test", minimum = "test")) @@ -84,15 +84,15 @@ test_that("portions works", { test_that("old rsi works", { # amox resistance in `septic_patients` should be around 66.33% - expect_equal(suppressWarnings(rsi(septic_patients$amox)), 0.6633, tolerance = 0.0001) - expect_equal(suppressWarnings(rsi(septic_patients$amox, interpretation = "S")), 1 - 0.6633, tolerance = 0.0001) + expect_equal(suppressWarnings(rsi(septic_patients$amox)), 0.665, tolerance = 0.0001) + expect_equal(suppressWarnings(rsi(septic_patients$amox, interpretation = "S")), 1 - 0.665, tolerance = 0.0001) # pita+genta susceptibility around 98.09% expect_equal(suppressWarnings(rsi(septic_patients$pita, septic_patients$gent, interpretation = "S", info = TRUE)), - 0.9535, + 0.9540412, tolerance = 0.0001) # count of cases @@ -108,7 +108,7 @@ test_that("old rsi works", { as_percent = TRUE, warning = FALSE)), combination_n = n_rsi(cipr, gent)) %>% pull(combination_n), - c(202, 482, 201, 499)) + c(202, 488, 201, 499)) # portion_df expect_equal( @@ -122,7 +122,7 @@ test_that("old rsi works", { test_that("prediction of rsi works", { amox_R <- septic_patients %>% - filter(mo == "ESCCOL") %>% + filter(mo == "B_ESCHR_COL") %>% rsi_predict(col_ab = "amox", col_date = "date", minimum = 10, @@ -131,37 +131,37 @@ test_that("prediction of rsi works", { # amox resistance will increase according to data set `septic_patients` expect_true(amox_R[3] < amox_R[20]) - expect_output(rsi_predict(tbl = filter(septic_patients, mo == "ESCCOL"), + expect_output(rsi_predict(tbl = filter(septic_patients, mo == "B_ESCHR_COL"), model = "binomial", col_ab = "amox", col_date = "date", info = TRUE)) - expect_output(rsi_predict(tbl = filter(septic_patients, mo == "ESCCOL"), + expect_output(rsi_predict(tbl = filter(septic_patients, mo == "B_ESCHR_COL"), model = "loglin", col_ab = "amox", col_date = "date", info = TRUE)) - expect_output(rsi_predict(tbl = filter(septic_patients, mo == "ESCCOL"), + expect_output(rsi_predict(tbl = filter(septic_patients, mo == "B_ESCHR_COL"), model = "lin", col_ab = "amox", col_date = "date", info = TRUE)) - expect_error(rsi_predict(tbl = filter(septic_patients, mo == "ESCCOL"), + expect_error(rsi_predict(tbl = filter(septic_patients, mo == "B_ESCHR_COL"), model = "INVALID MODEL", col_ab = "amox", col_date = "date", info = TRUE)) - expect_error(rsi_predict(tbl = filter(septic_patients, mo == "ESCCOL"), + expect_error(rsi_predict(tbl = filter(septic_patients, mo == "B_ESCHR_COL"), col_ab = "NOT EXISTING COLUMN", col_date = "date", info = TRUE)) - expect_error(rsi_predict(tbl = filter(septic_patients, mo == "ESCCOL"), + expect_error(rsi_predict(tbl = filter(septic_patients, mo == "B_ESCHR_COL"), col_ab = "amox", col_date = "NOT EXISTING COLUMN", info = TRUE)) # almost all E. coli are mero S in the Netherlands :) - expect_error(resistance_predict(tbl = filter(septic_patients, mo == "ESCCOL"), + expect_error(resistance_predict(tbl = filter(septic_patients, mo == "B_ESCHR_COL"), col_ab = "mero", col_date = "date", info = TRUE)) diff --git a/tests/testthat/test-skewness.R b/tests/testthat/test-skewness.R index 96b0ef6e..3f953898 100644 --- a/tests/testthat/test-skewness.R +++ b/tests/testthat/test-skewness.R @@ -2,12 +2,12 @@ context("skewness.R") test_that("skewness works", { expect_equal(skewness(septic_patients$age), - -0.90624, + -0.8958019, tolerance = 0.00001) expect_equal(unname(skewness(data.frame(septic_patients$age))), - -0.90624, + -0.8958019, tolerance = 0.00001) expect_equal(skewness(matrix(septic_patients$age)), - -0.90624, + -0.8958019, tolerance = 0.00001) })