diff --git a/DESCRIPTION b/DESCRIPTION index 7bb150cf..3b9c49e8 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,6 +1,6 @@ Package: AMR Version: 0.5.0.9019 -Date: 2019-02-26 +Date: 2019-02-27 Title: Antimicrobial Resistance Analysis Authors@R: c( person( diff --git a/R/age.R b/R/age.R index 421f9626..0df17e96 100755 --- a/R/age.R +++ b/R/age.R @@ -73,8 +73,8 @@ age <- function(x, reference = Sys.Date()) { #' \itemize{ #' \item{\code{"children"}, equivalent of: \code{c(0, 1, 2, 4, 6, 13, 18)}. This will split on 0, 1, 2-3, 4-5, 6-12, 13-17 and 18+.} #' \item{\code{"elderly"} or \code{"seniors"}, equivalent of: \code{c(65, 75, 85, 95)}. This will split on 0-64, 65-74, 75-84, 85-94 and 95+.} -#' \item{\code{"fives"}, equivalent of: \code{1:20 * 5}. This will split on 0-4, 5-9, 10-14, 15-19 and so forth.} -#' \item{\code{"tens"}, equivalent of: \code{1:10 * 10}. This will split on 0-9, 10-19, 20-29 and so forth.} +#' \item{\code{"fives"}, equivalent of: \code{1:24 * 5}. This will split on 0-4, 5-9, 10-14, 15-19 and so forth, until 120.} +#' \item{\code{"tens"}, equivalent of: \code{1:12 * 10}. This will split on 0-9, 10-19, 20-29 and so forth, until 120.} #' } #' } #' @keywords age_group age @@ -92,11 +92,11 @@ age <- function(x, reference = Sys.Date()) { #' age_groups(ages, c(20, 50)) #' #' # split into groups of ten years -#' age_groups(ages, 1:10 * 10) +#' age_groups(ages, 1:12 * 10) #' age_groups(ages, split_at = "tens") #' #' # split into groups of five years -#' age_groups(ages, 1:20 * 5) +#' age_groups(ages, 1:24 * 5) #' age_groups(ages, split_at = "fives") #' #' # split specifically for children @@ -122,9 +122,9 @@ age_groups <- function(x, split_at = c(12, 25, 55, 75)) { } else if (split_at %like% "^(elder|senior)") { split_at <- c(65, 75, 85, 95) } else if (split_at %like% "^five") { - split_at <- 1:20 * 5 + split_at <- 1:24 * 5 } else if (split_at %like% "^ten") { - split_at <- 1:10 * 10 + split_at <- 1:12 * 10 } } split_at <- as.integer(split_at) diff --git a/R/catalogue_of_life.R b/R/catalogue_of_life.R index 09763da6..4bca4357 100755 --- a/R/catalogue_of_life.R +++ b/R/catalogue_of_life.R @@ -24,12 +24,12 @@ #' This package contains the complete taxonomic tree of almost all microorganisms from the authoritative and comprehensive Catalogue of Life. #' @section Catalogue of Life: #' \if{html}{\figure{logo_col.png}{options: height=60px style=margin-bottom:5px} \cr} -#' This package contains the complete taxonomic tree of almost all microorganisms from the authoritative and comprehensive Catalogue of Life (\url{http://www.catalogueoflife.org}). This data is updated annually - check the included version with \code{\link{catalogue_of_life_version}}. +#' This package contains the complete taxonomic tree of almost all microorganisms from the authoritative and comprehensive Catalogue of Life (\url{http://www.catalogueoflife.org}). This data is updated annually - check the included version with \code{\link{catalogue_of_life_version}()}. #' #' Included are: #' \itemize{ #' \item{All ~55,000 (sub)species from the kingdoms of Archaea, Bacteria, Protozoa and Viruses} -#' \item{All ~3,500 (sub)species from these orders of the kingdom of Fungi: Eurotiales, Onygenales, Pneumocystales, Saccharomycetales, Schizosaccharomycetales and Tremellales. The kingdom of Fungi is a very large taxon with almost 300,000 different (sub)species, of which most are not microbial (but rather macroscopic, like mushrooms). Because of this, not all fungi fit the scope of this package and including everything would tremendously slow down our algorithms too. By only including the aforementioned taxonomic orders, the most relevant fungi are covered (like all species of \emph{Aspergillus}, \emph{Candida}, \emph{Cryptococcus}, \emph{Histplasma}, \emph{Pneumocystis}, \emph{Saccharomyces} and \emph{Trichophyton}).} +#' \item{All ~3,500 (sub)species from these orders of the kingdom of Fungi: Eurotiales, Onygenales, Pneumocystales, Saccharomycetales, Schizosaccharomycetales and Tremellales. This covers the most relevant microbial fungi (like all species of \emph{Aspergillus}, \emph{Candida}, \emph{Cryptococcus}, \emph{Histplasma}, \emph{Pneumocystis}, \emph{Saccharomyces} and \emph{Trichophyton}).} #' \item{All ~15,000 previously accepted names of included (sub)species that have been taxonomically renamed} #' \item{The complete taxonomic tree of all included (sub)species: from kingdom to subspecies} #' \item{The responsible author(s) and year of scientific publication} @@ -76,6 +76,7 @@ NULL #' #' This function returns a list with info about the included data from the Catalogue of Life. It also shows if the included version is their latest annual release. The Catalogue of Life releases their annual release in March each year. #' @seealso \code{\link{microorganisms}} +#' @details The list item \code{is_latest_annual_release} is based on the system date. #' @inheritSection catalogue_of_life Catalogue of Life #' @inheritSection AMR Read more on our website! #' @export diff --git a/R/mo.R b/R/mo.R index 4faa3b9f..df08ec54 100755 --- a/R/mo.R +++ b/R/mo.R @@ -21,7 +21,7 @@ #' Transform to microorganism ID #' -#' Use this function to determine a valid microorganism ID (\code{mo}). Determination is done using Artificial Intelligence (AI) and the complete taxonomic kingdoms \emph{Bacteria}, \emph{Fungi} and \emph{Protozoa} (see Source), so the input can be almost anything: a full name (like \code{"Staphylococcus aureus"}), an abbreviated name (like \code{"S. aureus"}), an abbreviation known in the field (like \code{"MRSA"}), or just a genus. You could also \code{\link{select}} a genus and species column, zie Examples. +#' Use this function to determine a valid microorganism ID (\code{mo}). Determination is done using Artificial Intelligence (AI) and the complete taxonomic kingdoms Archaea, Bacteria, Protozoa, Viruses and most microbial species from the kingdom Fungi (see Source), so the input can be almost anything: a full name (like \code{"Staphylococcus aureus"}), an abbreviated name (like \code{"S. aureus"}), an abbreviation known in the field (like \code{"MRSA"}), or just a genus. You could also \code{\link{select}} a genus and species column, zie Examples. #' @param x a character vector or a \code{data.frame} with one or two columns #' @param Becker a logical to indicate whether \emph{Staphylococci} should be categorised into Coagulase Negative \emph{Staphylococci} ("CoNS") and Coagulase Positive \emph{Staphylococci} ("CoPS") instead of their own species, according to Karsten Becker \emph{et al.} [1]. #' @@ -65,7 +65,6 @@ #' \itemize{ #' \item{\code{"E. coli"} will return the ID of \emph{Escherichia coli} and not \emph{Entamoeba coli}, although the latter would alphabetically come first} #' \item{\code{"H. influenzae"} will return the ID of \emph{Haemophilus influenzae} and not \emph{Haematobacter influenzae} for the same reason} -#' \item{Something like \code{"p aer"} will return the ID of \emph{Pseudomonas aeruginosa} and not \emph{Pasteurella aerogenes}} #' \item{Something like \code{"stau"} or \code{"S aur"} will return the ID of \emph{Staphylococcus aureus} and not \emph{Staphylococcus auricularis}} #' } #' This means that looking up human pathogenic microorganisms takes less time than looking up human \strong{non}-pathogenic microorganisms. @@ -77,7 +76,7 @@ #' \item{It strips off values between brackets and the brackets itself, and re-evaluates the input with all previous rules} #' \item{It strips off words from the end one by one and re-evaluates the input with all previous rules} #' \item{It strips off words from the start one by one and re-evaluates the input with all previous rules} -#' \item{It tries to look for some manual changes which are not yet published to the Catalogue of Life (like \emph{Propionibacterium} not yet being \emph{Cutibacterium})} +#' \item{It tries to look for some manual changes which are not (yet) published to the Catalogue of Life (like \emph{Propionibacterium} being \emph{Cutibacterium})} #' } #' #' Examples: @@ -89,7 +88,7 @@ #' #' Use \code{mo_failures()} to get a vector with all values that could not be coerced to a valid value. #' -#' Use \code{mo_uncertainties()} to get a vector with all values that were coerced to a valid value, but with uncertainty. +#' Use \code{mo_uncertainties()} to get info about all values that were coerced to a valid value, but with uncertainty. #' #' Use \code{mo_renamed()} to get a vector with all values that could be coerced based on an old, previously accepted taxonomic name. #' @@ -111,7 +110,7 @@ #' #' [2] Lancefield RC \strong{A serological differentiation of human and other groups of hemolytic streptococci}. 1933. J Exp Med. 57(4): 571–95. \url{https://dx.doi.org/10.1084/jem.57.4.571} #' -#' [3] Catalogue of Life: Annual Checklist (public online database), \url{www.catalogueoflife.org}. +#' [3] Catalogue of Life: Annual Checklist (public online taxonomic database), \url{www.catalogueoflife.org} (check included annual version with \code{\link{catalogue_of_life_version}()}). #' @export #' @return Character (vector) with class \code{"mo"}. Unknown values will return \code{NA}. #' @seealso \code{\link{microorganisms}} for the \code{data.frame} that is being used to determine ID's. \cr @@ -238,7 +237,9 @@ exec_as.mo <- function(x, Becker = FALSE, Lancefield = FALSE, } notes <- character(0) - uncertainties <- character(0) + uncertainties <- data.frame(input = character(0), + fullname = character(0), + mo = character(0)) failures <- character(0) x_input <- x # already strip leading and trailing spaces @@ -695,8 +696,10 @@ exec_as.mo <- function(x, Becker = FALSE, Lancefield = FALSE, found <- microorganismsDT[tolower(fullname) %like% paste(b.x_trimmed, "species"), ..property][[1]] if (length(found) > 0) { x[i] <- found[1L] - uncertainties <<- c(uncertainties, - paste0("'", a.x_backup, "' >> ", microorganismsDT[mo == found[1L], fullname][[1]], " (", found[1L], ")")) + uncertainties <<- rbind(uncertainties, + data.frame(input = a.x_backup, + fullname = microorganismsDT[mo == found[1L], fullname][[1]], + mo = found[1L])) return(x) } } @@ -719,8 +722,10 @@ exec_as.mo <- function(x, Becker = FALSE, Lancefield = FALSE, ref_old = found[1, ref], ref_new = microorganismsDT[col_id == found[1, col_id_new], ref], mo = microorganismsDT[col_id == found[1, col_id_new], mo]) - uncertainties <<- c(uncertainties, - paste0("'", a.x_backup, "' >> ", found[1, fullname], " (Catalogue of Life ID ", found[1, col_id], ")")) + uncertainties <<- rbind(uncertainties, + data.frame(input = a.x_backup, + fullname = found[1, fullname], + mo = paste("CoL", found[1, col_id]))) return(x) } @@ -731,8 +736,10 @@ exec_as.mo <- function(x, Becker = FALSE, Lancefield = FALSE, if (!is.na(found) & nchar(b.x_trimmed) >= 6) { found_result <- found found <- microorganismsDT[mo == found, ..property][[1]] - uncertainties <<- c(uncertainties, - paste0("'", a.x_backup, "' >> ", microorganismsDT[mo == found_result[1L], fullname][[1]], " (", found_result[1L], ")")) + uncertainties <<- rbind(uncertainties, + data.frame(input = a.x_backup, + fullname = microorganismsDT[mo == found_result[1L], fullname][[1]], + mo = found_result[1L])) return(found[1L]) } @@ -745,8 +752,10 @@ exec_as.mo <- function(x, Becker = FALSE, Lancefield = FALSE, if (!is.na(found)) { found_result <- found found <- microorganismsDT[mo == found, ..property][[1]] - uncertainties <<- c(uncertainties, - paste0("'", a.x_backup, "' >> ", microorganismsDT[mo == found_result[1L], fullname][[1]], " (", found_result[1L], ")")) + uncertainties <<- rbind(uncertainties, + data.frame(input = a.x_backup, + fullname = microorganismsDT[mo == found_result[1L], fullname][[1]], + mo = found_result[1L])) return(found[1L]) } } @@ -761,8 +770,10 @@ exec_as.mo <- function(x, Becker = FALSE, Lancefield = FALSE, if (!is.na(found)) { found_result <- found found <- microorganismsDT[mo == found, ..property][[1]] - uncertainties <<- c(uncertainties, - paste0("'", a.x_backup, "' >> ", microorganismsDT[mo == found_result[1L], fullname][[1]], " (", found_result[1L], ")")) + uncertainties <<- rbind(uncertainties, + data.frame(input = a.x_backup, + fullname = microorganismsDT[mo == found_result[1L], fullname][[1]], + mo = found_result[1L])) return(found[1L]) } } @@ -773,11 +784,10 @@ exec_as.mo <- function(x, Becker = FALSE, Lancefield = FALSE, if (!is.na(found)) { found_result <- found found <- microorganismsDT[mo == found, ..property][[1]] - warning(silver(paste0('Guessed with uncertainty: "', - a.x_backup, '" >> ', italic(microorganismsDT[mo == found_result[1L], fullname][[1]]), " (", found_result[1L], ")")), - call. = FALSE, immediate. = FALSE) - uncertainties <<- c(uncertainties, - paste0('"', a.x_backup, '" >> ', microorganismsDT[mo == found_result[1L], fullname][[1]], " (", found_result[1L], ")")) + uncertainties <<- rbind(uncertainties, + data.frame(input = a.x_backup, + fullname = microorganismsDT[mo == found_result[1L], fullname][[1]], + mo = found_result[1L])) return(found[1L]) } @@ -799,7 +809,7 @@ exec_as.mo <- function(x, Becker = FALSE, Lancefield = FALSE, # failures failures <- failures[!failures %in% c(NA, NULL, NaN)] - if (length(failures) > 0) { + if (length(failures) > 0 & clear_options == TRUE) { options(mo_failures = sort(unique(failures))) plural <- c("value", "it") if (n_distinct(failures) > 1) { @@ -807,7 +817,7 @@ exec_as.mo <- function(x, Becker = FALSE, Lancefield = FALSE, } total_failures <- length(x_input[x_input %in% failures & !x_input %in% c(NA, NULL, NaN)]) total_n <- length(x_input[!x_input %in% c(NA, NULL, NaN)]) - msg <- paste0("\n", n_distinct(failures), " unique ", plural[1], + msg <- paste0("\n", nr2char(n_distinct(failures)), " unique input ", plural[1], " (^= ", percent(total_failures / total_n, round = 1, force_zero = TRUE), ") could not be coerced to a valid MO code") if (n_distinct(failures) <= 10) { @@ -819,14 +829,15 @@ exec_as.mo <- function(x, Becker = FALSE, Lancefield = FALSE, immediate. = TRUE) # thus will always be shown, even if >= warnings } # uncertainties - if (length(uncertainties) > 0) { - options(mo_uncertainties = sort(unique(uncertainties))) + if (NROW(uncertainties) > 0 & clear_options == TRUE) { + options(mo_uncertainties = as.list(distinct(uncertainties, input, .keep_all = TRUE))) + plural <- c("value", "it") - if (n_distinct(failures) > 1) { + if (NROW(uncertainties) > 1) { plural <- c("values", "them") } - msg <- paste0("\nResults of ", n_distinct(uncertainties), " input ", plural[1], - " guessed with uncertainty. Use mo_uncertainties() to review ", plural[2], ".") + msg <- paste0("\nResults of ", nr2char(NROW(uncertainties)), " input ", plural[1], + " was guessed with uncertainty. Use mo_uncertainties() to review ", plural[2], ".") warning(red(msg), call. = FALSE, immediate. = TRUE) # thus will always be shown, even if >= warnings @@ -961,6 +972,7 @@ print.mo <- function(x, ...) { } #' @exportMethod summary.mo +#' @importFrom dplyr n_distinct #' @export #' @noRd summary.mo <- function(object, ...) { @@ -969,7 +981,7 @@ summary.mo <- function(object, ...) { top_3 <- unname(top_freq(freq(x), 3)) c("Class" = "mo", "" = length(x[is.na(x)]), - "Unique" = dplyr::n_distinct(x[!is.na(x)]), + "Unique" = n_distinct(x[!is.na(x)]), "#1" = top_3[1], "#2" = top_3[2], "#3" = top_3[3]) @@ -978,7 +990,7 @@ summary.mo <- function(object, ...) { #' @exportMethod as.data.frame.mo #' @export #' @noRd -as.data.frame.mo <- function (x, ...) { +as.data.frame.mo <- function(x, ...) { # same as as.data.frame.character but with removed stringsAsFactors, since it will be class "mo" nm <- paste(deparse(substitute(x), width.cutoff = 500L), collapse = " ") @@ -1004,13 +1016,31 @@ mo_failures <- function() { } #' @rdname as.mo +#' @importFrom crayon italic #' @export mo_uncertainties <- function() { - getOption("mo_uncertainties") + df <- as.data.frame(getOption("mo_uncertainties")) + msg <- "" + for (i in 1:nrow(df)) { + msg <- paste(msg, + paste0('"', df[i, "input"], '" -> ', italic(df[i, "fullname"]), " (", df[i, "mo"], ")"), + sep = "\n") + } + cat(paste0(bold("Results guessed with uncertainty:"), msg)) } #' @rdname as.mo #' @export mo_renamed <- function() { - strip_style(gsub("was renamed", ">>", getOption("mo_renamed"), fixed = TRUE)) + strip_style(gsub("was renamed", "->", getOption("mo_renamed"), fixed = TRUE)) +} + +nr2char <- function(x) { + if (x %in% c(1:10)) { + v <- c("one" = 1, "two" = 2, "three" = 3, "four" = 4, "five" = 5, + "six" = 6, "seven" = 7, "eight" = 8, "nine" = 9, "ten" = 10) + names(v[x]) + } else { + x + } } diff --git a/R/mo_source.R b/R/mo_source.R index 9748bd29..f4cc7fe1 100644 --- a/R/mo_source.R +++ b/R/mo_source.R @@ -26,13 +26,13 @@ #' @rdname mo_source #' @name mo_source #' @aliases set_mo_source get_mo_source -#' @details The reference file can be a text file seperated with commas (CSV) or pipes, an Excel file (old 'xls' format or new 'xlsx' format) or an R object file (extension '.rds'). To use an Excel file, you need to have the \code{readxl} package installed. +#' @details The reference file can be a text file seperated with commas (CSV) or tabs or pipes, an Excel file (either 'xls' or 'xlsx' format) or an R object file (extension '.rds'). To use an Excel file, you need to have the \code{readxl} package installed. #' #' \code{set_mo_source} will check the file for validity: it must be a \code{data.frame}, must have a column named \code{"mo"} which contains values from \code{microorganisms$mo} and must have a reference column with your own defined values. If all tests pass, \code{set_mo_source} will read the file into R and export it to \code{"~/.mo_source.rds"}. This compressed data file will then be used at default for MO determination (function \code{\link{as.mo}} and consequently all \code{mo_*} functions like \code{\link{mo_genus}} and \code{\link{mo_gramstain}}). The location of the original file will be saved as option with \code{\link{options}(mo_source = path)}. Its timestamp will be saved with \code{\link{options}(mo_source_datetime = ...)}. #' #' \code{get_mo_source} will return the data set by reading \code{"~/.mo_source.rds"} with \code{\link{readRDS}}. If the original file has changed (the file defined with \code{path}), it will call \code{set_mo_source} to update the data file automatically. #' -#' Reading an Excel file (\code{.xlsx}) with only one row has a size of 8-9 kB. The compressed file will have a size of 0.1 kB and can be read by \code{get_mo_source} in only a couple of microseconds (a millionth of a second). +#' Reading an Excel file (\code{.xlsx}) with only one row has a size of 8-9 kB. The compressed file used by this package will have a size of 0.1 kB and can be read by \code{get_mo_source} in only a couple of microseconds (a millionth of a second). #' @importFrom dplyr select everything #' @export #' @inheritSection AMR Read more on our website! @@ -48,7 +48,7 @@ #' # 1. We save it as 'home/me/ourcodes.xlsx' #' #' # 2. We use it for input: -#' set_mo_source("C:\path\ourcodes.xlsx") +#' set_mo_source("home/me/ourcodes.xlsx") #' #> Created mo_source file '~/.mo_source.rds' from 'home/me/ourcodes.xlsx'. #' #' # 3. And use it in our functions: @@ -109,11 +109,20 @@ set_mo_source <- function(path) { } df <- readxl::read_excel(path) + } else if (path %like% '[.]tsv$') { + df <- utils::read.table(header = TRUE, sep = "\t", stringsAsFactors = FALSE) + } else { # try comma first try( df <- utils::read.table(header = TRUE, sep = ",", stringsAsFactors = FALSE), silent = TRUE) + if (!is_valid(df)) { + # try tab + try( + df <- utils::read.table(header = TRUE, sep = "\t", stringsAsFactors = FALSE), + silent = TRUE) + } if (!is_valid(df)) { # try pipe try( diff --git a/docs/articles/AMR.html b/docs/articles/AMR.html index 0c3fafb8..915b8c7d 100644 --- a/docs/articles/AMR.html +++ b/docs/articles/AMR.html @@ -192,7 +192,7 @@

How to conduct AMR analysis

Matthijs S. Berends

-

26 February 2019

+

27 February 2019

@@ -201,7 +201,7 @@ -

Note: values on this page will change with every website update since they are based on randomly created values and the page was written in RMarkdown. However, the methodology remains unchanged. This page was generated on 26 February 2019.

+

Note: values on this page will change with every website update since they are based on randomly created values and the page was written in RMarkdown. However, the methodology remains unchanged. This page was generated on 27 February 2019.

Introduction

@@ -217,21 +217,21 @@ -2019-02-26 +2019-02-27 abcd Escherichia coli S S -2019-02-26 +2019-02-27 abcd Escherichia coli S R -2019-02-26 +2019-02-27 efgh Escherichia coli R @@ -327,19 +327,41 @@ -2015-04-06 -E7 -Hospital C +2015-01-18 +F9 +Hospital B +Escherichia coli +R +S +R +S +M + + +2017-12-07 +H7 +Hospital A +Klebsiella pneumoniae +R +S +S +S +M + + +2016-02-14 +J4 +Hospital A Escherichia coli R I S -R +S M -2016-10-23 -S6 +2010-12-25 +P2 Hospital B Streptococcus pneumoniae S @@ -349,44 +371,22 @@ F -2010-02-02 -O1 -Hospital D -Escherichia coli -S -S -R -S -F - - -2014-03-12 -H4 +2016-12-26 +S8 Hospital A -Escherichia coli +Streptococcus pneumoniae +S +I S S -S -S -M - - -2011-11-01 -X1 -Hospital B -Escherichia coli -R -S -S -R F -2016-12-10 -W4 -Hospital B -Escherichia coli -R +2010-03-27 +R7 +Hospital D +Klebsiella pneumoniae +S S S S @@ -411,8 +411,8 @@ #> #> Item Count Percent Cum. Count Cum. Percent #> --- ----- ------- -------- ----------- ------------- -#> 1 M 10,479 52.4% 10,479 52.4% -#> 2 F 9,521 47.6% 20,000 100.0% +#> 1 M 10,386 51.9% 10,386 51.9% +#> 2 F 9,614 48.1% 20,000 100.0%

So, we can draw at least two conclusions immediately. From a data scientist perspective, the data looks clean: only values M and F. From a researcher perspective: there are slightly more men. Nothing we didn’t already know.

The data is already quite clean, but we still need to transform some variables. The bacteria column now consists of text, and we want to add more variables based on microbial IDs later on. So, we will transform this column to valid IDs. The mutate() function of the dplyr package makes this really easy:

+#> -> changed 4,078 test results (0 to S; 0 to I; 4,078 to R)

@@ -489,8 +489,8 @@ #> NOTE: Using column `bacteria` as input for `col_mo`. #> NOTE: Using column `date` as input for `col_date`. #> NOTE: Using column `patient_id` as input for `col_patient_id`. -#> => Found 5,678 first isolates (28.4% of total)

-

So only 28.4% is suitable for resistance analysis! We can now filter on it with the filter() function, also from the dplyr package:

+#> => Found 5,707 first isolates (28.5% of total) +

So only 28.5% is suitable for resistance analysis! We can now filter on it with the filter() function, also from the dplyr package:

data_1st <- data %>% 
   filter(first == TRUE)

For future use, the above two syntaxes can be shortened with the filter_first_isolate() function:

@@ -516,65 +516,65 @@ 1 -2010-01-25 -B9 +2010-02-12 +I9 B_ESCHR_COL -R -R S S +S +R TRUE 2 -2010-03-01 -B9 +2010-02-12 +I9 B_ESCHR_COL -I -S R S +S +S FALSE 3 -2010-06-15 -B9 +2010-02-22 +I9 B_ESCHR_COL R S -S +R S FALSE 4 -2010-07-08 -B9 +2010-03-05 +I9 B_ESCHR_COL +S +S R S -S -S FALSE 5 -2010-07-20 -B9 +2010-03-08 +I9 B_ESCHR_COL S S -S -S +R +R FALSE 6 -2010-09-18 -B9 +2010-03-17 +I9 B_ESCHR_COL -R +S S S S @@ -582,8 +582,8 @@ 7 -2010-09-21 -B9 +2010-05-03 +I9 B_ESCHR_COL S S @@ -593,33 +593,33 @@ 8 -2010-11-24 -B9 +2010-07-03 +I9 B_ESCHR_COL S -R +S S S FALSE 9 -2010-12-08 -B9 +2010-09-11 +I9 B_ESCHR_COL R -R +S S S FALSE 10 -2011-01-19 -B9 +2010-09-24 +I9 B_ESCHR_COL S -I +R S S FALSE @@ -637,7 +637,7 @@ #> NOTE: Using column `patient_id` as input for `col_patient_id`. #> NOTE: Using column `keyab` as input for `col_keyantibiotics`. Use col_keyantibiotics = FALSE to prevent this. #> [Criterion] Inclusion based on key antibiotics, ignoring I. -#> => Found 15,822 first weighted isolates (79.1% of total) +#> => Found 15,861 first weighted isolates (79.3% of total) @@ -654,70 +654,70 @@ - - + + - - + + - - + + - - + + - - + + - + - - + + + + - - - + - - + + - - + + - - + + - + @@ -726,35 +726,35 @@ - - + + - + - - + + - + - + - - + + - + @@ -762,11 +762,11 @@ - - + + - + @@ -774,11 +774,11 @@
isolate
12010-01-25B92010-02-12I9 B_ESCHR_COLRR S SSR TRUE TRUE
22010-03-01B92010-02-12I9 B_ESCHR_COLIS R SSS FALSE TRUE
32010-06-15B92010-02-22I9 B_ESCHR_COL R SSR S FALSE TRUE
42010-07-08B92010-03-05I9 B_ESCHR_COLSS R SSSFALSE FALSETRUE
52010-07-20B92010-03-08I9 B_ESCHR_COL S SSSRR FALSE TRUE
62010-09-18B92010-03-17I9 B_ESCHR_COLRS S S S
72010-09-21B92010-05-03I9 B_ESCHR_COL S S S S FALSETRUEFALSE
82010-11-24B92010-07-03I9 B_ESCHR_COL SRS S S FALSETRUEFALSE
92010-12-08B92010-09-11I9 B_ESCHR_COL RRS S S FALSE
102011-01-19B92010-09-24I9 B_ESCHR_COL SIR S S FALSE
-

Instead of 1, now 9 isolates are flagged. In total, 79.1% of all isolates are marked ‘first weighted’ - 50.7% more than when using the CLSI guideline. In real life, this novel algorithm will yield 5-10% more isolates than the classic CLSI guideline.

+

Instead of 1, now 8 isolates are flagged. In total, 79.3% of all isolates are marked ‘first weighted’ - 50.8% more than when using the CLSI guideline. In real life, this novel algorithm will yield 5-10% more isolates than the classic CLSI guideline.

As with filter_first_isolate(), there’s a shortcut for this new algorithm too:

data_1st <- data %>% 
   filter_first_weighted_isolate()
-

So we end up with 15,822 isolates for analysis.

+

So we end up with 15,861 isolates for analysis.

We can remove unneeded columns:

data_1st <- data_1st %>% 
   select(-c(first, keyab))
@@ -804,14 +804,14 @@ 1 -2015-04-06 -E7 -Hospital C +2015-01-18 +F9 +Hospital B B_ESCHR_COL R -I S R +S M Gram negative Escherichia @@ -819,9 +819,25 @@ TRUE -2 -2016-10-23 -S6 +3 +2016-02-14 +J4 +Hospital A +B_ESCHR_COL +R +I +S +S +M +Gram negative +Escherichia +coli +TRUE + + +4 +2010-12-25 +P2 Hospital B B_STRPT_PNE S @@ -834,68 +850,52 @@ pneumoniae TRUE - -3 -2010-02-02 -O1 -Hospital D -B_ESCHR_COL -S -S -R -S -F -Gram negative -Escherichia -coli -TRUE - 5 -2011-11-01 -X1 -Hospital B -B_ESCHR_COL -R +2016-12-26 +S8 +Hospital A +B_STRPT_PNE S +I S R F -Gram negative -Escherichia -coli +Gram positive +Streptococcus +pneumoniae TRUE 6 -2016-12-10 -W4 -Hospital B -B_ESCHR_COL +2010-03-27 +R7 +Hospital D +B_KLBSL_PNE R S S S F Gram negative -Escherichia -coli +Klebsiella +pneumoniae TRUE -7 -2015-07-07 -P8 -Hospital D -B_ESCHR_COL -S +8 +2016-08-08 +K8 +Hospital B +B_KLBSL_PNE R +I S S -F +M Gram negative -Escherichia -coli +Klebsiella +pneumoniae TRUE @@ -915,9 +915,9 @@
freq(paste(data_1st$genus, data_1st$species))

Or can be used like the dplyr way, which is easier readable:

data_1st %>% freq(genus, species)
-

Frequency table of genus and species from a data.frame (15,822 x 13)

+

Frequency table of genus and species from a data.frame (15,861 x 13)

Columns: 2
-Length: 15,822 (of which NA: 0 = 0.00%)
+Length: 15,861 (of which NA: 0 = 0.00%)
Unique: 4

Shortest: 16
Longest: 24

@@ -934,33 +934,33 @@ Longest: 24

1 Escherichia coli -7,838 -49.5% -7,838 -49.5% +7,879 +49.7% +7,879 +49.7% 2 Staphylococcus aureus -3,965 -25.1% -11,803 -74.6% +3,915 +24.7% +11,794 +74.4% 3 Streptococcus pneumoniae -2,457 -15.5% -14,260 -90.1% +2,482 +15.6% +14,276 +90.0% 4 Klebsiella pneumoniae -1,562 -9.9% -15,822 +1,585 +10.0% +15,861 100.0% @@ -971,7 +971,7 @@ Longest: 24

Resistance percentages

The functions portion_R, portion_RI, portion_I, portion_IS and portion_S can be used to determine the portion of a specific antimicrobial outcome. They can be used on their own:

data_1st %>% portion_IR(amox)
-#> [1] 0.4722538
+#> [1] 0.4744341

Or can be used in conjuction with group_by() and summarise(), both from the dplyr package:

data_1st %>% 
   group_by(hospital) %>% 
@@ -984,19 +984,19 @@ Longest: 24

Hospital A -0.4692014 +0.4759916 Hospital B -0.4694061 +0.4808997 Hospital C -0.4845361 +0.4682779 Hospital D -0.4727669 +0.4651015 @@ -1014,23 +1014,23 @@ Longest: 24

Hospital A -0.4692014 -4708 +0.4759916 +4790 Hospital B -0.4694061 -5573 +0.4808997 +5602 Hospital C -0.4845361 -2328 +0.4682779 +2317 Hospital D -0.4727669 -3213 +0.4651015 +3152 @@ -1050,27 +1050,27 @@ Longest: 24

Escherichia -0.7269712 -0.9050778 -0.9744833 +0.7292804 +0.8975758 +0.9772814 Klebsiella -0.7349552 -0.8988476 -0.9763124 +0.7438486 +0.9015773 +0.9741325 Staphylococcus -0.7263556 -0.9235813 -0.9793190 +0.7315453 +0.9154534 +0.9793103 Streptococcus -0.7391127 +0.7352941 0.0000000 -0.7391127 +0.7352941 diff --git a/docs/articles/AMR_files/figure-html/plot 1-1.png b/docs/articles/AMR_files/figure-html/plot 1-1.png index 13b0d9f9..946ecc29 100644 Binary files a/docs/articles/AMR_files/figure-html/plot 1-1.png and b/docs/articles/AMR_files/figure-html/plot 1-1.png differ diff --git a/docs/articles/AMR_files/figure-html/plot 3-1.png b/docs/articles/AMR_files/figure-html/plot 3-1.png index 38decffc..377470a0 100644 Binary files a/docs/articles/AMR_files/figure-html/plot 3-1.png and b/docs/articles/AMR_files/figure-html/plot 3-1.png differ diff --git a/docs/articles/AMR_files/figure-html/plot 4-1.png b/docs/articles/AMR_files/figure-html/plot 4-1.png index 65da5ffc..1f14285d 100644 Binary files a/docs/articles/AMR_files/figure-html/plot 4-1.png and b/docs/articles/AMR_files/figure-html/plot 4-1.png differ diff --git a/docs/articles/AMR_files/figure-html/plot 5-1.png b/docs/articles/AMR_files/figure-html/plot 5-1.png index d03665cb..9b15ee06 100644 Binary files a/docs/articles/AMR_files/figure-html/plot 5-1.png and b/docs/articles/AMR_files/figure-html/plot 5-1.png differ diff --git a/docs/articles/EUCAST.html b/docs/articles/EUCAST.html index c0c90e4e..7c0f984f 100644 --- a/docs/articles/EUCAST.html +++ b/docs/articles/EUCAST.html @@ -192,7 +192,7 @@

How to apply EUCAST rules

Matthijs S. Berends

-

26 February 2019

+

27 February 2019

diff --git a/docs/articles/G_test.html b/docs/articles/G_test.html index d0cb9763..85bbe8dc 100644 --- a/docs/articles/G_test.html +++ b/docs/articles/G_test.html @@ -192,7 +192,7 @@

How to use the G-test

Matthijs S. Berends

-

26 February 2019

+

27 February 2019

diff --git a/docs/articles/WHONET.html b/docs/articles/WHONET.html index 36401586..c7332bcb 100644 --- a/docs/articles/WHONET.html +++ b/docs/articles/WHONET.html @@ -192,7 +192,7 @@

How to work with WHONET data

Matthijs S. Berends

-

26 February 2019

+

27 February 2019

diff --git a/docs/articles/atc_property.html b/docs/articles/atc_property.html index ff8088b3..b99b6c90 100644 --- a/docs/articles/atc_property.html +++ b/docs/articles/atc_property.html @@ -192,7 +192,7 @@

How to get properties of an antibiotic

Matthijs S. Berends

-

26 February 2019

+

27 February 2019

diff --git a/docs/articles/benchmarks.html b/docs/articles/benchmarks.html index 117feb83..43526b59 100644 --- a/docs/articles/benchmarks.html +++ b/docs/articles/benchmarks.html @@ -40,7 +40,7 @@ AMR (for R) - 0.5.0.9018 + 0.5.0.9019
@@ -192,7 +192,7 @@

Benchmarks

Matthijs S. Berends

-

25 February 2019

+

27 February 2019

@@ -217,15 +217,15 @@ times = 10) print(S.aureus, unit = "ms", signif = 3) #> Unit: milliseconds -#> expr min lq mean median uq max neval -#> as.mo("sau") 13.40 13.60 17.8 13.60 13.80 51.6 10 -#> as.mo("stau") 83.00 83.30 96.5 85.30 88.40 163.0 10 -#> as.mo("staaur") 13.50 13.50 19.1 13.70 14.90 51.5 10 -#> as.mo("STAAUR") 13.50 13.50 14.1 13.60 13.70 18.2 10 -#> as.mo("S. aureus") 21.40 21.40 22.1 21.50 21.70 25.4 10 -#> as.mo("S. aureus") 21.40 21.40 25.7 21.60 23.30 60.1 10 -#> as.mo("Staphylococcus aureus") 5.63 5.87 15.2 5.94 8.32 57.8 10
-

In the table above, all measurements are shown in milliseconds (thousands of seconds). A value of 10 milliseconds means it can determine 100 input values per second. It case of 50 milliseconds, this is only 20 input values per second. The second input is the only one that has to be looked up thoroughly. All the others are known codes (the first is a WHONET code) or common laboratory codes, or common full organism names like the last one.

+#> expr min lq mean median uq max neval +#> as.mo("sau") 15.40 15.50 22.70 15.60 15.90 53.3 10 +#> as.mo("stau") 84.20 84.30 86.60 84.60 86.60 102.0 10 +#> as.mo("staaur") 15.40 15.40 19.70 15.50 15.60 57.1 10 +#> as.mo("STAAUR") 15.40 15.40 15.50 15.50 15.60 15.9 10 +#> as.mo("S. aureus") 23.50 23.50 31.10 23.50 23.60 61.7 10 +#> as.mo("S. aureus") 23.50 23.50 36.50 23.50 61.60 74.3 10 +#> as.mo("Staphylococcus aureus") 7.19 7.27 9.01 7.44 7.67 23.2 10 +

In the table above, all measurements are shown in milliseconds (thousands of seconds). A value of 5 milliseconds means it can determine 200 input values per second. It case of 100 milliseconds, this is only 10 input values per second. The second input is the only one that has to be looked up thoroughly. All the others are known codes (the first one is a WHONET code) or common laboratory codes, or common full organism names like the last one. Full organism names are always preferred.

To achieve this speed, the as.mo function also takes into account the prevalence of human pathogenic microorganisms. The downside is of course that less prevalent microorganisms will be determined less fast. See this example for the ID of Thermus islandicus (B_THERMS_ISL), a bug probably never found before in humans:

T.islandicus <- microbenchmark(as.mo("theisl"),
                                  as.mo("THEISL"),
@@ -236,12 +236,12 @@
 print(T.islandicus, unit = "ms", signif = 3)
 #> Unit: milliseconds
 #>                         expr   min    lq  mean median    uq   max neval
-#>              as.mo("theisl") 448.0 486.0 483.0  489.0 490.0 510.0    10
-#>              as.mo("THEISL") 447.0 489.0 487.0  491.0 493.0 499.0    10
-#>       as.mo("T. islandicus")  78.0  78.2  78.9   78.7  78.9  82.3    10
-#>      as.mo("T.  islandicus")  78.1  78.3  84.4   78.8  81.3 129.0    10
-#>  as.mo("Thermus islandicus")  61.8  62.1  75.4   62.8 104.0 109.0    10
-

That takes 8 times as much time on average. A value of 100 milliseconds means it can only determine ~10 different input values per second. We can conclude that looking up arbitrary codes of less prevalent microorganisms is the worst way to go, in terms of calculation performance. Full names (like Thermus islandicus) are almost fast - these are the most probable input from most data sets.

+#> as.mo("theisl") 444.0 449.0 479.0 488.0 493.0 506.0 10 +#> as.mo("THEISL") 444.0 484.0 488.0 491.0 507.0 514.0 10 +#> as.mo("T. islandicus") 80.5 80.8 87.8 81.3 89.9 118.0 10 +#> as.mo("T. islandicus") 79.8 80.4 82.0 80.7 81.2 93.5 10 +#> as.mo("Thermus islandicus") 63.4 63.5 72.3 64.0 64.5 107.0 10 +

That takes 7.7 times as much time on average. A value of 100 milliseconds means it can only determine ~10 different input values per second. We can conclude that looking up arbitrary codes of less prevalent microorganisms is the worst way to go, in terms of calculation performance. Full names (like Thermus islandicus) are almost fast - these are the most probable input from most data sets.

In the figure below, we compare Escherichia coli (which is very common) with Prevotella brevis (which is moderately common) and with Thermus islandicus (which is very uncommon):

par(mar = c(5, 16, 4, 2)) # set more space for left margin text (16)
 
@@ -287,8 +287,8 @@
 print(run_it, unit = "ms", signif = 3)
 #> Unit: milliseconds
 #>            expr min  lq mean median  uq max neval
-#>  mo_fullname(x) 741 746  806    778 827 968    10
-

So transforming 500,000 values (!!) of 50 unique values only takes 0.78 seconds (778 ms). You only lose time on your unique input values.

+#> mo_fullname(x) 743 771 805 798 844 886 10 +

So transforming 500,000 values (!!) of 50 unique values only takes 0.8 seconds (798 ms). You only lose time on your unique input values.

@@ -300,11 +300,11 @@ times = 10) print(run_it, unit = "ms", signif = 3) #> Unit: milliseconds -#> expr min lq mean median uq max neval -#> A 10.200 10.300 10.600 10.400 11.00 11.300 10 -#> B 20.500 20.700 21.300 21.400 22.00 22.100 10 -#> C 0.308 0.504 0.589 0.591 0.73 0.863 10

-

So going from mo_fullname("Staphylococcus aureus") to "Staphylococcus aureus" takes 0.0006 seconds - it doesn’t even start calculating if the result would be the same as the expected resulting value. That goes for all helper functions:

+#> expr min lq mean median uq max neval +#> A 10.900 11.100 11.200 11.200 11.300 11.400 10 +#> B 21.300 21.400 21.600 21.600 21.700 22.000 10 +#> C 0.302 0.313 0.492 0.532 0.569 0.725 10 +

So going from mo_fullname("Staphylococcus aureus") to "Staphylococcus aureus" takes 0.0005 seconds - it doesn’t even start calculating if the result would be the same as the expected resulting value. That goes for all helper functions:

run_it <- microbenchmark(A = mo_species("aureus"),
                          B = mo_genus("Staphylococcus"),
                          C = mo_fullname("Staphylococcus aureus"),
@@ -317,14 +317,14 @@
 print(run_it, unit = "ms", signif = 3)
 #> Unit: milliseconds
 #>  expr   min    lq  mean median    uq   max neval
-#>     A 0.318 0.340 0.388  0.382 0.434 0.474    10
-#>     B 0.339 0.362 0.424  0.428 0.449 0.555    10
-#>     C 0.331 0.369 0.522  0.526 0.637 0.673    10
-#>     D 0.269 0.278 0.313  0.300 0.353 0.384    10
-#>     E 0.252 0.266 0.322  0.302 0.349 0.448    10
-#>     F 0.241 0.264 0.310  0.313 0.347 0.379    10
-#>     G 0.241 0.258 0.310  0.317 0.355 0.386    10
-#>     H 0.278 0.289 0.316  0.313 0.334 0.375    10
+#> A 0.330 0.399 0.444 0.425 0.480 0.599 10 +#> B 0.343 0.362 0.386 0.376 0.425 0.439 10 +#> C 0.327 0.454 0.550 0.571 0.640 0.816 10 +#> D 0.273 0.306 0.329 0.319 0.366 0.392 10 +#> E 0.246 0.266 0.295 0.286 0.323 0.364 10 +#> F 0.260 0.265 0.320 0.312 0.364 0.407 10 +#> G 0.238 0.252 0.281 0.270 0.319 0.339 10 +#> H 0.251 0.278 0.316 0.320 0.358 0.381 10

Of course, when running mo_phylum("Firmicutes") the function has zero knowledge about the actual microorganism, namely S. aureus. But since the result would be "Firmicutes" too, there is no point in calculating the result. And because this package ‘knows’ all phyla of all known bacteria (according to the Catalogue of Life), it can just return the initial value immediately.

@@ -351,13 +351,13 @@ print(run_it, unit = "ms", signif = 4) #> Unit: milliseconds #> expr min lq mean median uq max neval -#> en 13.23 13.57 16.92 13.69 13.73 46.78 10 -#> de 22.09 22.20 25.72 22.32 23.16 55.31 10 -#> nl 21.66 22.03 22.12 22.15 22.20 22.52 10 -#> es 21.67 22.07 22.32 22.16 22.45 23.26 10 -#> it 21.64 21.86 22.35 22.21 22.48 23.90 10 -#> fr 21.70 22.10 28.72 22.21 22.33 55.28 10 -#> pt 21.78 22.12 28.83 22.19 22.21 55.99 10
+#> en 14.37 14.43 17.91 14.64 14.82 47.42 10 +#> de 22.59 22.88 27.57 23.00 23.55 67.95 10 +#> nl 22.50 22.91 26.39 22.94 23.01 57.05 10 +#> es 22.56 22.76 26.83 23.05 24.02 57.31 10 +#> it 22.53 22.86 29.52 22.97 23.29 56.11 10 +#> fr 22.49 22.92 23.06 23.01 23.18 23.99 10 +#> pt 22.49 22.86 23.21 23.06 23.62 24.09 10

Currently supported are German, Dutch, Spanish, Italian, French and Portuguese.

diff --git a/docs/articles/benchmarks_files/figure-html/unnamed-chunk-5-1.png b/docs/articles/benchmarks_files/figure-html/unnamed-chunk-5-1.png index ab59c7db..e4fd0ba2 100644 Binary files a/docs/articles/benchmarks_files/figure-html/unnamed-chunk-5-1.png and b/docs/articles/benchmarks_files/figure-html/unnamed-chunk-5-1.png differ diff --git a/docs/articles/freq.html b/docs/articles/freq.html index 5e013356..9bf1d365 100644 --- a/docs/articles/freq.html +++ b/docs/articles/freq.html @@ -40,7 +40,7 @@ AMR (for R) - 0.5.0.9018 + 0.5.0.9019 @@ -192,7 +192,7 @@

How to create frequency tables

Matthijs S. Berends

-

25 February 2019

+

27 February 2019

diff --git a/docs/articles/mo_property.html b/docs/articles/mo_property.html index 974b617e..7e5c8d10 100644 --- a/docs/articles/mo_property.html +++ b/docs/articles/mo_property.html @@ -40,7 +40,7 @@ AMR (for R) - 0.5.0.9018 + 0.5.0.9019 @@ -192,7 +192,7 @@

How to get properties of a microorganism

Matthijs S. Berends

-

25 February 2019

+

27 February 2019

diff --git a/docs/articles/resistance_predict.html b/docs/articles/resistance_predict.html index 3a394d4d..1e9440e2 100644 --- a/docs/articles/resistance_predict.html +++ b/docs/articles/resistance_predict.html @@ -40,7 +40,7 @@ AMR (for R) - 0.5.0.9018 + 0.5.0.9019 @@ -192,7 +192,7 @@

How to predict antimicrobial resistance

Matthijs S. Berends

-

25 February 2019

+

27 February 2019

diff --git a/docs/news/index.html b/docs/news/index.html index 9c4989bd..6b4def66 100644 --- a/docs/news/index.html +++ b/docs/news/index.html @@ -78,7 +78,7 @@ AMR (for R) - 0.5.0.9018 + 0.5.0.9019 @@ -252,13 +252,11 @@
  • Catalogue of Life as a new taxonomic source for data about microorganisms, which also contains all ITIS data we used previously. The microorganisms data set now contains:
  • @@ -338,7 +336,7 @@ These functions use as.atc()
  • Understanding of highly virulent E. coli strains like EIEC, EPEC and STEC
  • There will be looked for uncertain results at default - these results will be returned with an informative warning
  • -
  • Manual now contains more info about the algorithms
  • +
  • Manual (help page) now contains more info about the algorithms
  • Progress bar will be shown when it takes more than 3 seconds to get results
  • Support for formatted console text
  • Console will return the percentage of uncoercable input
  • diff --git a/docs/reference/age_groups.html b/docs/reference/age_groups.html index 4aec10c5..f66bac17 100644 --- a/docs/reference/age_groups.html +++ b/docs/reference/age_groups.html @@ -268,8 +268,8 @@
  • A character:

  • @@ -294,11 +294,11 @@ age_groups(ages, c(20, 50)) # split into groups of ten years -age_groups(ages, 1:10 * 10) +age_groups(ages, 1:12 * 10) age_groups(ages, split_at = "tens") # split into groups of five years -age_groups(ages, 1:20 * 5) +age_groups(ages, 1:24 * 5) age_groups(ages, split_at = "fives") # split specifically for children diff --git a/docs/reference/as.mo.html b/docs/reference/as.mo.html index f224e862..c0ed089d 100644 --- a/docs/reference/as.mo.html +++ b/docs/reference/as.mo.html @@ -47,7 +47,7 @@ - + @@ -237,7 +237,7 @@
    -

    Use this function to determine a valid microorganism ID (mo). Determination is done using Artificial Intelligence (AI) and the complete taxonomic kingdoms Bacteria, Fungi and Protozoa (see Source), so the input can be almost anything: a full name (like "Staphylococcus aureus"), an abbreviated name (like "S. aureus"), an abbreviation known in the field (like "MRSA"), or just a genus. You could also select a genus and species column, zie Examples.

    +

    Use this function to determine a valid microorganism ID (mo). Determination is done using Artificial Intelligence (AI) and the complete taxonomic kingdoms Archaea, Bacteria, Protozoa, Viruses and most microbial species from the kingdom Fungi (see Source), so the input can be almost anything: a full name (like "Staphylococcus aureus"), an abbreviated name (like "S. aureus"), an abbreviation known in the field (like "MRSA"), or just a genus. You could also select a genus and species column, zie Examples.

    @@ -309,7 +309,6 @@

    A couple of effects because of these rules:

    This means that looking up human pathogenic microorganisms takes less time than looking up human non-pathogenic microorganisms.

    UNCERTAIN RESULTS
    @@ -318,7 +317,7 @@ When using allow_uncertain = TRUE (which is the default setting), i

  • It strips off values between brackets and the brackets itself, and re-evaluates the input with all previous rules

  • It strips off words from the end one by one and re-evaluates the input with all previous rules

  • It strips off words from the start one by one and re-evaluates the input with all previous rules

  • -
  • It tries to look for some manual changes which are not yet published to the Catalogue of Life (like Propionibacterium not yet being Cutibacterium)

  • +
  • It tries to look for some manual changes which are not (yet) published to the Catalogue of Life (like Propionibacterium being Cutibacterium)

  • Examples:

    Use mo_failures() to get a vector with all values that could not be coerced to a valid value.

    -

    Use mo_uncertainties() to get a vector with all values that were coerced to a valid value, but with uncertainty.

    +

    Use mo_uncertainties() to get info about all values that were coerced to a valid value, but with uncertainty.

    Use mo_renamed() to get a vector with all values that could be coerced based on an old, previously accepted taxonomic name.

    Microbial prevalence of pathogens in humans

    @@ -345,16 +344,16 @@ When using allow_uncertain = TRUE (which is the default setting), i

    [1] Becker K et al. Coagulase-Negative Staphylococci. 2014. Clin Microbiol Rev. 27(4): 870–926. https://dx.doi.org/10.1128/CMR.00109-13

    [2] Lancefield RC A serological differentiation of human and other groups of hemolytic streptococci. 1933. J Exp Med. 57(4): 571–95. https://dx.doi.org/10.1084/jem.57.4.571

    -

    [3] Catalogue of Life: Annual Checklist (public online database), www.catalogueoflife.org.

    +

    [3] Catalogue of Life: Annual Checklist (public online taxonomic database), www.catalogueoflife.org (check included annual version with catalogue_of_life_version()).

    Catalogue of Life


    -This package contains the complete taxonomic tree of almost all microorganisms from the authoritative and comprehensive Catalogue of Life (http://www.catalogueoflife.org). This data is updated annually - check the included version with catalogue_of_life_version.

    +This package contains the complete taxonomic tree of almost all microorganisms from the authoritative and comprehensive Catalogue of Life (http://www.catalogueoflife.org). This data is updated annually - check the included version with catalogue_of_life_version().

    Included are: