diff --git a/DESCRIPTION b/DESCRIPTION
index 7bb150cf..3b9c49e8 100644
--- a/DESCRIPTION
+++ b/DESCRIPTION
@@ -1,6 +1,6 @@
Package: AMR
Version: 0.5.0.9019
-Date: 2019-02-26
+Date: 2019-02-27
Title: Antimicrobial Resistance Analysis
Authors@R: c(
person(
diff --git a/R/age.R b/R/age.R
index 421f9626..0df17e96 100755
--- a/R/age.R
+++ b/R/age.R
@@ -73,8 +73,8 @@ age <- function(x, reference = Sys.Date()) {
#' \itemize{
#' \item{\code{"children"}, equivalent of: \code{c(0, 1, 2, 4, 6, 13, 18)}. This will split on 0, 1, 2-3, 4-5, 6-12, 13-17 and 18+.}
#' \item{\code{"elderly"} or \code{"seniors"}, equivalent of: \code{c(65, 75, 85, 95)}. This will split on 0-64, 65-74, 75-84, 85-94 and 95+.}
-#' \item{\code{"fives"}, equivalent of: \code{1:20 * 5}. This will split on 0-4, 5-9, 10-14, 15-19 and so forth.}
-#' \item{\code{"tens"}, equivalent of: \code{1:10 * 10}. This will split on 0-9, 10-19, 20-29 and so forth.}
+#' \item{\code{"fives"}, equivalent of: \code{1:24 * 5}. This will split on 0-4, 5-9, 10-14, 15-19 and so forth, until 120.}
+#' \item{\code{"tens"}, equivalent of: \code{1:12 * 10}. This will split on 0-9, 10-19, 20-29 and so forth, until 120.}
#' }
#' }
#' @keywords age_group age
@@ -92,11 +92,11 @@ age <- function(x, reference = Sys.Date()) {
#' age_groups(ages, c(20, 50))
#'
#' # split into groups of ten years
-#' age_groups(ages, 1:10 * 10)
+#' age_groups(ages, 1:12 * 10)
#' age_groups(ages, split_at = "tens")
#'
#' # split into groups of five years
-#' age_groups(ages, 1:20 * 5)
+#' age_groups(ages, 1:24 * 5)
#' age_groups(ages, split_at = "fives")
#'
#' # split specifically for children
@@ -122,9 +122,9 @@ age_groups <- function(x, split_at = c(12, 25, 55, 75)) {
} else if (split_at %like% "^(elder|senior)") {
split_at <- c(65, 75, 85, 95)
} else if (split_at %like% "^five") {
- split_at <- 1:20 * 5
+ split_at <- 1:24 * 5
} else if (split_at %like% "^ten") {
- split_at <- 1:10 * 10
+ split_at <- 1:12 * 10
}
}
split_at <- as.integer(split_at)
diff --git a/R/catalogue_of_life.R b/R/catalogue_of_life.R
index 09763da6..4bca4357 100755
--- a/R/catalogue_of_life.R
+++ b/R/catalogue_of_life.R
@@ -24,12 +24,12 @@
#' This package contains the complete taxonomic tree of almost all microorganisms from the authoritative and comprehensive Catalogue of Life.
#' @section Catalogue of Life:
#' \if{html}{\figure{logo_col.png}{options: height=60px style=margin-bottom:5px} \cr}
-#' This package contains the complete taxonomic tree of almost all microorganisms from the authoritative and comprehensive Catalogue of Life (\url{http://www.catalogueoflife.org}). This data is updated annually - check the included version with \code{\link{catalogue_of_life_version}}.
+#' This package contains the complete taxonomic tree of almost all microorganisms from the authoritative and comprehensive Catalogue of Life (\url{http://www.catalogueoflife.org}). This data is updated annually - check the included version with \code{\link{catalogue_of_life_version}()}.
#'
#' Included are:
#' \itemize{
#' \item{All ~55,000 (sub)species from the kingdoms of Archaea, Bacteria, Protozoa and Viruses}
-#' \item{All ~3,500 (sub)species from these orders of the kingdom of Fungi: Eurotiales, Onygenales, Pneumocystales, Saccharomycetales, Schizosaccharomycetales and Tremellales. The kingdom of Fungi is a very large taxon with almost 300,000 different (sub)species, of which most are not microbial (but rather macroscopic, like mushrooms). Because of this, not all fungi fit the scope of this package and including everything would tremendously slow down our algorithms too. By only including the aforementioned taxonomic orders, the most relevant fungi are covered (like all species of \emph{Aspergillus}, \emph{Candida}, \emph{Cryptococcus}, \emph{Histplasma}, \emph{Pneumocystis}, \emph{Saccharomyces} and \emph{Trichophyton}).}
+#' \item{All ~3,500 (sub)species from these orders of the kingdom of Fungi: Eurotiales, Onygenales, Pneumocystales, Saccharomycetales, Schizosaccharomycetales and Tremellales. This covers the most relevant microbial fungi (like all species of \emph{Aspergillus}, \emph{Candida}, \emph{Cryptococcus}, \emph{Histplasma}, \emph{Pneumocystis}, \emph{Saccharomyces} and \emph{Trichophyton}).}
#' \item{All ~15,000 previously accepted names of included (sub)species that have been taxonomically renamed}
#' \item{The complete taxonomic tree of all included (sub)species: from kingdom to subspecies}
#' \item{The responsible author(s) and year of scientific publication}
@@ -76,6 +76,7 @@ NULL
#'
#' This function returns a list with info about the included data from the Catalogue of Life. It also shows if the included version is their latest annual release. The Catalogue of Life releases their annual release in March each year.
#' @seealso \code{\link{microorganisms}}
+#' @details The list item \code{is_latest_annual_release} is based on the system date.
#' @inheritSection catalogue_of_life Catalogue of Life
#' @inheritSection AMR Read more on our website!
#' @export
diff --git a/R/mo.R b/R/mo.R
index 4faa3b9f..df08ec54 100755
--- a/R/mo.R
+++ b/R/mo.R
@@ -21,7 +21,7 @@
#' Transform to microorganism ID
#'
-#' Use this function to determine a valid microorganism ID (\code{mo}). Determination is done using Artificial Intelligence (AI) and the complete taxonomic kingdoms \emph{Bacteria}, \emph{Fungi} and \emph{Protozoa} (see Source), so the input can be almost anything: a full name (like \code{"Staphylococcus aureus"}), an abbreviated name (like \code{"S. aureus"}), an abbreviation known in the field (like \code{"MRSA"}), or just a genus. You could also \code{\link{select}} a genus and species column, zie Examples.
+#' Use this function to determine a valid microorganism ID (\code{mo}). Determination is done using Artificial Intelligence (AI) and the complete taxonomic kingdoms Archaea, Bacteria, Protozoa, Viruses and most microbial species from the kingdom Fungi (see Source), so the input can be almost anything: a full name (like \code{"Staphylococcus aureus"}), an abbreviated name (like \code{"S. aureus"}), an abbreviation known in the field (like \code{"MRSA"}), or just a genus. You could also \code{\link{select}} a genus and species column, zie Examples.
#' @param x a character vector or a \code{data.frame} with one or two columns
#' @param Becker a logical to indicate whether \emph{Staphylococci} should be categorised into Coagulase Negative \emph{Staphylococci} ("CoNS") and Coagulase Positive \emph{Staphylococci} ("CoPS") instead of their own species, according to Karsten Becker \emph{et al.} [1].
#'
@@ -65,7 +65,6 @@
#' \itemize{
#' \item{\code{"E. coli"} will return the ID of \emph{Escherichia coli} and not \emph{Entamoeba coli}, although the latter would alphabetically come first}
#' \item{\code{"H. influenzae"} will return the ID of \emph{Haemophilus influenzae} and not \emph{Haematobacter influenzae} for the same reason}
-#' \item{Something like \code{"p aer"} will return the ID of \emph{Pseudomonas aeruginosa} and not \emph{Pasteurella aerogenes}}
#' \item{Something like \code{"stau"} or \code{"S aur"} will return the ID of \emph{Staphylococcus aureus} and not \emph{Staphylococcus auricularis}}
#' }
#' This means that looking up human pathogenic microorganisms takes less time than looking up human \strong{non}-pathogenic microorganisms.
@@ -77,7 +76,7 @@
#' \item{It strips off values between brackets and the brackets itself, and re-evaluates the input with all previous rules}
#' \item{It strips off words from the end one by one and re-evaluates the input with all previous rules}
#' \item{It strips off words from the start one by one and re-evaluates the input with all previous rules}
-#' \item{It tries to look for some manual changes which are not yet published to the Catalogue of Life (like \emph{Propionibacterium} not yet being \emph{Cutibacterium})}
+#' \item{It tries to look for some manual changes which are not (yet) published to the Catalogue of Life (like \emph{Propionibacterium} being \emph{Cutibacterium})}
#' }
#'
#' Examples:
@@ -89,7 +88,7 @@
#'
#' Use \code{mo_failures()} to get a vector with all values that could not be coerced to a valid value.
#'
-#' Use \code{mo_uncertainties()} to get a vector with all values that were coerced to a valid value, but with uncertainty.
+#' Use \code{mo_uncertainties()} to get info about all values that were coerced to a valid value, but with uncertainty.
#'
#' Use \code{mo_renamed()} to get a vector with all values that could be coerced based on an old, previously accepted taxonomic name.
#'
@@ -111,7 +110,7 @@
#'
#' [2] Lancefield RC \strong{A serological differentiation of human and other groups of hemolytic streptococci}. 1933. J Exp Med. 57(4): 571–95. \url{https://dx.doi.org/10.1084/jem.57.4.571}
#'
-#' [3] Catalogue of Life: Annual Checklist (public online database), \url{www.catalogueoflife.org}.
+#' [3] Catalogue of Life: Annual Checklist (public online taxonomic database), \url{www.catalogueoflife.org} (check included annual version with \code{\link{catalogue_of_life_version}()}).
#' @export
#' @return Character (vector) with class \code{"mo"}. Unknown values will return \code{NA}.
#' @seealso \code{\link{microorganisms}} for the \code{data.frame} that is being used to determine ID's. \cr
@@ -238,7 +237,9 @@ exec_as.mo <- function(x, Becker = FALSE, Lancefield = FALSE,
}
notes <- character(0)
- uncertainties <- character(0)
+ uncertainties <- data.frame(input = character(0),
+ fullname = character(0),
+ mo = character(0))
failures <- character(0)
x_input <- x
# already strip leading and trailing spaces
@@ -695,8 +696,10 @@ exec_as.mo <- function(x, Becker = FALSE, Lancefield = FALSE,
found <- microorganismsDT[tolower(fullname) %like% paste(b.x_trimmed, "species"), ..property][[1]]
if (length(found) > 0) {
x[i] <- found[1L]
- uncertainties <<- c(uncertainties,
- paste0("'", a.x_backup, "' >> ", microorganismsDT[mo == found[1L], fullname][[1]], " (", found[1L], ")"))
+ uncertainties <<- rbind(uncertainties,
+ data.frame(input = a.x_backup,
+ fullname = microorganismsDT[mo == found[1L], fullname][[1]],
+ mo = found[1L]))
return(x)
}
}
@@ -719,8 +722,10 @@ exec_as.mo <- function(x, Becker = FALSE, Lancefield = FALSE,
ref_old = found[1, ref],
ref_new = microorganismsDT[col_id == found[1, col_id_new], ref],
mo = microorganismsDT[col_id == found[1, col_id_new], mo])
- uncertainties <<- c(uncertainties,
- paste0("'", a.x_backup, "' >> ", found[1, fullname], " (Catalogue of Life ID ", found[1, col_id], ")"))
+ uncertainties <<- rbind(uncertainties,
+ data.frame(input = a.x_backup,
+ fullname = found[1, fullname],
+ mo = paste("CoL", found[1, col_id])))
return(x)
}
@@ -731,8 +736,10 @@ exec_as.mo <- function(x, Becker = FALSE, Lancefield = FALSE,
if (!is.na(found) & nchar(b.x_trimmed) >= 6) {
found_result <- found
found <- microorganismsDT[mo == found, ..property][[1]]
- uncertainties <<- c(uncertainties,
- paste0("'", a.x_backup, "' >> ", microorganismsDT[mo == found_result[1L], fullname][[1]], " (", found_result[1L], ")"))
+ uncertainties <<- rbind(uncertainties,
+ data.frame(input = a.x_backup,
+ fullname = microorganismsDT[mo == found_result[1L], fullname][[1]],
+ mo = found_result[1L]))
return(found[1L])
}
@@ -745,8 +752,10 @@ exec_as.mo <- function(x, Becker = FALSE, Lancefield = FALSE,
if (!is.na(found)) {
found_result <- found
found <- microorganismsDT[mo == found, ..property][[1]]
- uncertainties <<- c(uncertainties,
- paste0("'", a.x_backup, "' >> ", microorganismsDT[mo == found_result[1L], fullname][[1]], " (", found_result[1L], ")"))
+ uncertainties <<- rbind(uncertainties,
+ data.frame(input = a.x_backup,
+ fullname = microorganismsDT[mo == found_result[1L], fullname][[1]],
+ mo = found_result[1L]))
return(found[1L])
}
}
@@ -761,8 +770,10 @@ exec_as.mo <- function(x, Becker = FALSE, Lancefield = FALSE,
if (!is.na(found)) {
found_result <- found
found <- microorganismsDT[mo == found, ..property][[1]]
- uncertainties <<- c(uncertainties,
- paste0("'", a.x_backup, "' >> ", microorganismsDT[mo == found_result[1L], fullname][[1]], " (", found_result[1L], ")"))
+ uncertainties <<- rbind(uncertainties,
+ data.frame(input = a.x_backup,
+ fullname = microorganismsDT[mo == found_result[1L], fullname][[1]],
+ mo = found_result[1L]))
return(found[1L])
}
}
@@ -773,11 +784,10 @@ exec_as.mo <- function(x, Becker = FALSE, Lancefield = FALSE,
if (!is.na(found)) {
found_result <- found
found <- microorganismsDT[mo == found, ..property][[1]]
- warning(silver(paste0('Guessed with uncertainty: "',
- a.x_backup, '" >> ', italic(microorganismsDT[mo == found_result[1L], fullname][[1]]), " (", found_result[1L], ")")),
- call. = FALSE, immediate. = FALSE)
- uncertainties <<- c(uncertainties,
- paste0('"', a.x_backup, '" >> ', microorganismsDT[mo == found_result[1L], fullname][[1]], " (", found_result[1L], ")"))
+ uncertainties <<- rbind(uncertainties,
+ data.frame(input = a.x_backup,
+ fullname = microorganismsDT[mo == found_result[1L], fullname][[1]],
+ mo = found_result[1L]))
return(found[1L])
}
@@ -799,7 +809,7 @@ exec_as.mo <- function(x, Becker = FALSE, Lancefield = FALSE,
# failures
failures <- failures[!failures %in% c(NA, NULL, NaN)]
- if (length(failures) > 0) {
+ if (length(failures) > 0 & clear_options == TRUE) {
options(mo_failures = sort(unique(failures)))
plural <- c("value", "it")
if (n_distinct(failures) > 1) {
@@ -807,7 +817,7 @@ exec_as.mo <- function(x, Becker = FALSE, Lancefield = FALSE,
}
total_failures <- length(x_input[x_input %in% failures & !x_input %in% c(NA, NULL, NaN)])
total_n <- length(x_input[!x_input %in% c(NA, NULL, NaN)])
- msg <- paste0("\n", n_distinct(failures), " unique ", plural[1],
+ msg <- paste0("\n", nr2char(n_distinct(failures)), " unique input ", plural[1],
" (^= ", percent(total_failures / total_n, round = 1, force_zero = TRUE),
") could not be coerced to a valid MO code")
if (n_distinct(failures) <= 10) {
@@ -819,14 +829,15 @@ exec_as.mo <- function(x, Becker = FALSE, Lancefield = FALSE,
immediate. = TRUE) # thus will always be shown, even if >= warnings
}
# uncertainties
- if (length(uncertainties) > 0) {
- options(mo_uncertainties = sort(unique(uncertainties)))
+ if (NROW(uncertainties) > 0 & clear_options == TRUE) {
+ options(mo_uncertainties = as.list(distinct(uncertainties, input, .keep_all = TRUE)))
+
plural <- c("value", "it")
- if (n_distinct(failures) > 1) {
+ if (NROW(uncertainties) > 1) {
plural <- c("values", "them")
}
- msg <- paste0("\nResults of ", n_distinct(uncertainties), " input ", plural[1],
- " guessed with uncertainty. Use mo_uncertainties() to review ", plural[2], ".")
+ msg <- paste0("\nResults of ", nr2char(NROW(uncertainties)), " input ", plural[1],
+ " was guessed with uncertainty. Use mo_uncertainties() to review ", plural[2], ".")
warning(red(msg),
call. = FALSE,
immediate. = TRUE) # thus will always be shown, even if >= warnings
@@ -961,6 +972,7 @@ print.mo <- function(x, ...) {
}
#' @exportMethod summary.mo
+#' @importFrom dplyr n_distinct
#' @export
#' @noRd
summary.mo <- function(object, ...) {
@@ -969,7 +981,7 @@ summary.mo <- function(object, ...) {
top_3 <- unname(top_freq(freq(x), 3))
c("Class" = "mo",
" Note: values on this page will change with every website update since they are based on randomly created values and the page was written in RMarkdown. However, the methodology remains unchanged. This page was generated on 26 February 2019. Note: values on this page will change with every website update since they are based on randomly created values and the page was written in RMarkdown. However, the methodology remains unchanged. This page was generated on 27 February 2019. So, we can draw at least two conclusions immediately. From a data scientist perspective, the data looks clean: only values The data is already quite clean, but we still need to transform some variables. The So only 28.4% is suitable for resistance analysis! We can now filter on it with the So only 28.5% is suitable for resistance analysis! We can now filter on it with the For future use, the above two syntaxes can be shortened with the Instead of 1, now 9 isolates are flagged. In total, 79.1% of all isolates are marked ‘first weighted’ - 50.7% more than when using the CLSI guideline. In real life, this novel algorithm will yield 5-10% more isolates than the classic CLSI guideline. Instead of 1, now 8 isolates are flagged. In total, 79.3% of all isolates are marked ‘first weighted’ - 50.8% more than when using the CLSI guideline. In real life, this novel algorithm will yield 5-10% more isolates than the classic CLSI guideline. As with So we end up with 15,822 isolates for analysis. So we end up with 15,861 isolates for analysis. We can remove unneeded columns: Or can be used like the Frequency table of Frequency table of Columns: 2 Shortest: 16 The functions Or can be used in conjuction with In the table above, all measurements are shown in milliseconds (thousands of seconds). A value of 10 milliseconds means it can determine 100 input values per second. It case of 50 milliseconds, this is only 20 input values per second. The second input is the only one that has to be looked up thoroughly. All the others are known codes (the first is a WHONET code) or common laboratory codes, or common full organism names like the last one. In the table above, all measurements are shown in milliseconds (thousands of seconds). A value of 5 milliseconds means it can determine 200 input values per second. It case of 100 milliseconds, this is only 10 input values per second. The second input is the only one that has to be looked up thoroughly. All the others are known codes (the first one is a WHONET code) or common laboratory codes, or common full organism names like the last one. Full organism names are always preferred. To achieve this speed, the That takes 8 times as much time on average. A value of 100 milliseconds means it can only determine ~10 different input values per second. We can conclude that looking up arbitrary codes of less prevalent microorganisms is the worst way to go, in terms of calculation performance. Full names (like Thermus islandicus) are almost fast - these are the most probable input from most data sets. That takes 7.7 times as much time on average. A value of 100 milliseconds means it can only determine ~10 different input values per second. We can conclude that looking up arbitrary codes of less prevalent microorganisms is the worst way to go, in terms of calculation performance. Full names (like Thermus islandicus) are almost fast - these are the most probable input from most data sets. In the figure below, we compare Escherichia coli (which is very common) with Prevotella brevis (which is moderately common) and with Thermus islandicus (which is very uncommon): So transforming 500,000 values (!!) of 50 unique values only takes 0.78 seconds (778 ms). You only lose time on your unique input values. So transforming 500,000 values (!!) of 50 unique values only takes 0.8 seconds (798 ms). You only lose time on your unique input values. So going from So going from Of course, when running Currently supported are German, Dutch, Spanish, Italian, French and Portuguese. All ~3,000 (sub)species from these orders of the kingdom of Fungi: Eurotiales, Onygenales, Pneumocystales, Saccharomycetales and Schizosaccharomycetales. The responsible author(s) and year of scientific publication A character: Use this function to determine a valid microorganism ID ( Use this function to determine a valid microorganism ID ( A couple of effects because of these rules: Something like Something like This means that looking up human pathogenic microorganisms takes less time than looking up human non-pathogenic microorganisms. UNCERTAIN RESULTS It strips off values between brackets and the brackets itself, and re-evaluates the input with all previous rules It strips off words from the end one by one and re-evaluates the input with all previous rules It strips off words from the start one by one and re-evaluates the input with all previous rules It tries to look for some manual changes which are not yet published to the Catalogue of Life (like Propionibacterium not yet being Cutibacterium) It tries to look for some manual changes which are not (yet) published to the Catalogue of Life (like Propionibacterium being Cutibacterium) Examples: Use Use Use Use [1] Becker K et al. Coagulase-Negative Staphylococci. 2014. Clin Microbiol Rev. 27(4): 870–926. https://dx.doi.org/10.1128/CMR.00109-13 [2] Lancefield RC A serological differentiation of human and other groups of hemolytic streptococci. 1933. J Exp Med. 57(4): 571–95. https://dx.doi.org/10.1084/jem.57.4.571 [3] Catalogue of Life: Annual Checklist (public online database), www.catalogueoflife.org. [3] Catalogue of Life: Annual Checklist (public online taxonomic database), www.catalogueoflife.org (check included annual version with How to conduct AMR analysis
Matthijs S. Berends
- 26 February 2019
+ 27 February 2019
AMR.Rmd
Introduction
@@ -217,21 +217,21 @@
-
2019-02-26
+2019-02-27
abcd
Escherichia coli
S
S
-
2019-02-26
+2019-02-27
abcd
Escherichia coli
S
R
-
2019-02-26
+2019-02-27
efgh
Escherichia coli
R
@@ -327,19 +327,41 @@
-
+2015-04-06
-E7
-Hospital C
+2015-01-18
+F9
+Hospital B
+Escherichia coli
+R
+S
+R
+S
+M
+
+
+2017-12-07
+H7
+Hospital A
+Klebsiella pneumoniae
+R
+S
+S
+S
+M
+
+
2016-02-14
+J4
+Hospital A
Escherichia coli
R
I
S
-R
+S
M
-
2016-10-23
-S6
+2010-12-25
+P2
Hospital B
Streptococcus pneumoniae
S
@@ -349,44 +371,22 @@
F
-
-2010-02-02
-O1
-Hospital D
-Escherichia coli
-S
-S
-R
-S
-F
-
-
-2014-03-12
-H4
+2016-12-26
+S8
Hospital A
-Escherichia coli
+Streptococcus pneumoniae
+S
+I
S
S
-S
-S
-M
-
-
2011-11-01
-X1
-Hospital B
-Escherichia coli
-R
-S
-S
-R
F
-
2016-12-10
-W4
-Hospital B
-Escherichia coli
-R
+2010-03-27
+R7
+Hospital D
+Klebsiella pneumoniae
+S
S
S
S
@@ -411,8 +411,8 @@
#>
#> Item Count Percent Cum. Count Cum. Percent
#> --- ----- ------- -------- ----------- -------------
-#> 1 M 10,479 52.4% 10,479 52.4%
-#> 2 F 9,521 47.6% 20,000 100.0%
+#> 1 M 10,386 51.9% 10,386 51.9%
+#> 2 F 9,614 48.1% 20,000 100.0%
M
and F
. From a researcher perspective: there are slightly more men. Nothing we didn’t already know.bacteria
column now consists of text, and we want to add more variables based on microbial IDs later on. So, we will transform this column to valid IDs. The mutate()
function of the dplyr
package makes this really easy:data <- data %>%
@@ -443,10 +443,10 @@
#> Kingella kingae (no changes)
#>
#> EUCAST Expert Rules, Intrinsic Resistance and Exceptional Phenotypes (v3.1, 2016)
-#> Table 1: Intrinsic resistance in Enterobacteriaceae (1324 changes)
+#> Table 1: Intrinsic resistance in Enterobacteriaceae (1291 changes)
#> Table 2: Intrinsic resistance in non-fermentative Gram-negative bacteria (no changes)
#> Table 3: Intrinsic resistance in other Gram-negative bacteria (no changes)
-#> Table 4: Intrinsic resistance in Gram-positive bacteria (2776 changes)
+#> Table 4: Intrinsic resistance in Gram-positive bacteria (2787 changes)
#> Table 8: Interpretive rules for B-lactam agents and Gram-positive cocci (no changes)
#> Table 9: Interpretive rules for B-lactam agents and Gram-negative rods (no changes)
#> Table 10: Interpretive rules for B-lactam agents and other Gram-negative bacteria (no changes)
@@ -462,9 +462,9 @@
#> Non-EUCAST: piperacillin/tazobactam = S where piperacillin = S (no changes)
#> Non-EUCAST: trimethoprim/sulfa = S where trimethoprim = S (no changes)
#>
-#> => EUCAST rules affected 7,376 out of 20,000 rows
+#> => EUCAST rules affected 7,442 out of 20,000 rows
#> -> added 0 test results
-#> -> changed 4,100 test results (0 to S; 0 to I; 4,100 to R)
@@ -489,8 +489,8 @@
#> NOTE: Using column `bacteria` as input for `col_mo`.
#> NOTE: Using column `date` as input for `col_date`.
#> NOTE: Using column `patient_id` as input for `col_patient_id`.
-#> => Found 5,678 first isolates (28.4% of total)
filter()
function, also from the dplyr
package:filter()
function, also from the dplyr
package:filter_first_isolate()
function:
1
-2010-01-25
-B9
+2010-02-12
+I9
B_ESCHR_COL
-R
-R
S
S
+S
+R
TRUE
2
-2010-03-01
-B9
+2010-02-12
+I9
B_ESCHR_COL
-I
-S
R
S
+S
+S
FALSE
3
-2010-06-15
-B9
+2010-02-22
+I9
B_ESCHR_COL
R
S
-S
+R
S
FALSE
4
-2010-07-08
-B9
+2010-03-05
+I9
B_ESCHR_COL
+S
+S
R
S
-S
-S
FALSE
5
-2010-07-20
-B9
+2010-03-08
+I9
B_ESCHR_COL
S
S
-S
-S
+R
+R
FALSE
6
-2010-09-18
-B9
+2010-03-17
+I9
B_ESCHR_COL
-R
+S
S
S
S
@@ -582,8 +582,8 @@
7
-2010-09-21
-B9
+2010-05-03
+I9
B_ESCHR_COL
S
S
@@ -593,33 +593,33 @@
8
-2010-11-24
-B9
+2010-07-03
+I9
B_ESCHR_COL
S
-R
+S
S
S
FALSE
9
-2010-12-08
-B9
+2010-09-11
+I9
B_ESCHR_COL
R
-R
+S
S
S
FALSE
10
-2011-01-19
-B9
+2010-09-24
+I9
B_ESCHR_COL
S
-I
+R
S
S
FALSE
@@ -637,7 +637,7 @@
#> NOTE: Using column `patient_id` as input for `col_patient_id`.
#> NOTE: Using column `keyab` as input for `col_keyantibiotics`. Use col_keyantibiotics = FALSE to prevent this.
#> [Criterion] Inclusion based on key antibiotics, ignoring I.
-#> => Found 15,822 first weighted isolates (79.1% of total)
+#> => Found 15,861 first weighted isolates (79.3% of total)
-
isolate
@@ -654,70 +654,70 @@
1
-2010-01-25
-B9
+2010-02-12
+I9
B_ESCHR_COL
-R
-R
S
S
+S
+R
TRUE
TRUE
2
-2010-03-01
-B9
+2010-02-12
+I9
B_ESCHR_COL
-I
-S
R
S
+S
+S
FALSE
TRUE
3
-2010-06-15
-B9
+2010-02-22
+I9
B_ESCHR_COL
R
S
-S
+R
S
FALSE
TRUE
4
-2010-07-08
-B9
+2010-03-05
+I9
B_ESCHR_COL
+S
+S
R
S
-S
-S
-FALSE
FALSE
+TRUE
5
-2010-07-20
-B9
+2010-03-08
+I9
B_ESCHR_COL
S
S
-S
-S
+R
+R
FALSE
TRUE
6
-2010-09-18
-B9
+2010-03-17
+I9
B_ESCHR_COL
-R
+S
S
S
S
@@ -726,35 +726,35 @@
7
-2010-09-21
-B9
+2010-05-03
+I9
B_ESCHR_COL
S
S
S
S
FALSE
-TRUE
+FALSE
8
-2010-11-24
-B9
+2010-07-03
+I9
B_ESCHR_COL
S
-R
+S
S
S
FALSE
-TRUE
+FALSE
9
-2010-12-08
-B9
+2010-09-11
+I9
B_ESCHR_COL
R
-R
+S
S
S
FALSE
@@ -762,11 +762,11 @@
10
-2011-01-19
-B9
+2010-09-24
+I9
B_ESCHR_COL
S
-I
+R
S
S
FALSE
@@ -774,11 +774,11 @@
filter_first_isolate()
, there’s a shortcut for this new algorithm too:
1
-2015-04-06
-E7
-Hospital C
+2015-01-18
+F9
+Hospital B
B_ESCHR_COL
R
-I
S
R
+S
M
Gram negative
Escherichia
@@ -819,9 +819,25 @@
TRUE
-
+2
-2016-10-23
-S6
+3
+2016-02-14
+J4
+Hospital A
+B_ESCHR_COL
+R
+I
+S
+S
+M
+Gram negative
+Escherichia
+coli
+TRUE
+
+
-4
+2010-12-25
+P2
Hospital B
B_STRPT_PNE
S
@@ -834,68 +850,52 @@
pneumoniae
TRUE
-
3
-2010-02-02
-O1
-Hospital D
-B_ESCHR_COL
-S
-S
-R
-S
-F
-Gram negative
-Escherichia
-coli
-TRUE
-
5
-2011-11-01
-X1
-Hospital B
-B_ESCHR_COL
-R
+2016-12-26
+S8
+Hospital A
+B_STRPT_PNE
S
+I
S
R
F
-Gram negative
-Escherichia
-coli
+Gram positive
+Streptococcus
+pneumoniae
TRUE
6
-2016-12-10
-W4
-Hospital B
-B_ESCHR_COL
+2010-03-27
+R7
+Hospital D
+B_KLBSL_PNE
R
S
S
S
F
Gram negative
-Escherichia
-coli
+Klebsiella
+pneumoniae
TRUE
-
@@ -915,9 +915,9 @@
7
-2015-07-07
-P8
-Hospital D
-B_ESCHR_COL
-S
+8
+2016-08-08
+K8
+Hospital B
+B_KLBSL_PNE
R
+I
S
S
-F
+M
Gram negative
-Escherichia
-coli
+Klebsiella
+pneumoniae
TRUE
dplyr
way, which is easier readable:genus
and species
from a data.frame
(15,822 x 13)genus
and species
from a data.frame
(15,861 x 13)
-Length: 15,822 (of which NA: 0 = 0.00%)
+Length: 15,861 (of which NA: 0 = 0.00%)
Unique: 4
Longest: 24
1
Escherichia coli
-7,838
-49.5%
-7,838
-49.5%
+7,879
+49.7%
+7,879
+49.7%
2
Staphylococcus aureus
-3,965
-25.1%
-11,803
-74.6%
+3,915
+24.7%
+11,794
+74.4%
3
Streptococcus pneumoniae
-2,457
-15.5%
-14,260
-90.1%
+2,482
+15.6%
+14,276
+90.0%
@@ -971,7 +971,7 @@ Longest: 24
Resistance percentages
4
Klebsiella pneumoniae
-1,562
-9.9%
-15,822
+1,585
+10.0%
+15,861
100.0%
portion_R
, portion_RI
, portion_I
, portion_IS
and portion_S
can be used to determine the portion of a specific antimicrobial outcome. They can be used on their own:group_by()
and summarise()
, both from the dplyr
package:data_1st %>%
group_by(hospital) %>%
@@ -984,19 +984,19 @@ Longest: 24
Hospital A
-0.4692014
+0.4759916
Hospital B
-0.4694061
+0.4808997
Hospital C
-0.4845361
+0.4682779
@@ -1014,23 +1014,23 @@ Longest: 24
Hospital D
-0.4727669
+0.4651015
Hospital A
-0.4692014
-4708
+0.4759916
+4790
Hospital B
-0.4694061
-5573
+0.4808997
+5602
Hospital C
-0.4845361
-2328
+0.4682779
+2317
@@ -1050,27 +1050,27 @@ Longest: 24
Hospital D
-0.4727669
-3213
+0.4651015
+3152
Escherichia
-0.7269712
-0.9050778
-0.9744833
+0.7292804
+0.8975758
+0.9772814
Klebsiella
-0.7349552
-0.8988476
-0.9763124
+0.7438486
+0.9015773
+0.9741325
Staphylococcus
-0.7263556
-0.9235813
-0.9793190
+0.7315453
+0.9154534
+0.9793103
diff --git a/docs/articles/AMR_files/figure-html/plot 1-1.png b/docs/articles/AMR_files/figure-html/plot 1-1.png
index 13b0d9f9..946ecc29 100644
Binary files a/docs/articles/AMR_files/figure-html/plot 1-1.png and b/docs/articles/AMR_files/figure-html/plot 1-1.png differ
diff --git a/docs/articles/AMR_files/figure-html/plot 3-1.png b/docs/articles/AMR_files/figure-html/plot 3-1.png
index 38decffc..377470a0 100644
Binary files a/docs/articles/AMR_files/figure-html/plot 3-1.png and b/docs/articles/AMR_files/figure-html/plot 3-1.png differ
diff --git a/docs/articles/AMR_files/figure-html/plot 4-1.png b/docs/articles/AMR_files/figure-html/plot 4-1.png
index 65da5ffc..1f14285d 100644
Binary files a/docs/articles/AMR_files/figure-html/plot 4-1.png and b/docs/articles/AMR_files/figure-html/plot 4-1.png differ
diff --git a/docs/articles/AMR_files/figure-html/plot 5-1.png b/docs/articles/AMR_files/figure-html/plot 5-1.png
index d03665cb..9b15ee06 100644
Binary files a/docs/articles/AMR_files/figure-html/plot 5-1.png and b/docs/articles/AMR_files/figure-html/plot 5-1.png differ
diff --git a/docs/articles/EUCAST.html b/docs/articles/EUCAST.html
index c0c90e4e..7c0f984f 100644
--- a/docs/articles/EUCAST.html
+++ b/docs/articles/EUCAST.html
@@ -192,7 +192,7 @@
Streptococcus
-0.7391127
+0.7352941
0.0000000
-0.7391127
+0.7352941
How to apply EUCAST rules
Matthijs S. Berends
- 26 February 2019
+ 27 February 2019
EUCAST.Rmd
How to use the G-test
Matthijs S. Berends
- 26 February 2019
+ 27 February 2019
G_test.Rmd
How to work with WHONET data
Matthijs S. Berends
- 26 February 2019
+ 27 February 2019
WHONET.Rmd
How to get properties of an antibiotic
Matthijs S. Berends
- 26 February 2019
+ 27 February 2019
atc_property.Rmd
Benchmarks
Matthijs S. Berends
- 25 February 2019
+ 27 February 2019
benchmarks.Rmd
as.mo
function also takes into account the prevalence of human pathogenic microorganisms. The downside is of course that less prevalent microorganisms will be determined less fast. See this example for the ID of Thermus islandicus (B_THERMS_ISL
), a bug probably never found before in humans:T.islandicus <- microbenchmark(as.mo("theisl"),
as.mo("THEISL"),
@@ -236,12 +236,12 @@
print(T.islandicus, unit = "ms", signif = 3)
#> Unit: milliseconds
#> expr min lq mean median uq max neval
-#> as.mo("theisl") 448.0 486.0 483.0 489.0 490.0 510.0 10
-#> as.mo("THEISL") 447.0 489.0 487.0 491.0 493.0 499.0 10
-#> as.mo("T. islandicus") 78.0 78.2 78.9 78.7 78.9 82.3 10
-#> as.mo("T. islandicus") 78.1 78.3 84.4 78.8 81.3 129.0 10
-#> as.mo("Thermus islandicus") 61.8 62.1 75.4 62.8 104.0 109.0 10
par(mar = c(5, 16, 4, 2)) # set more space for left margin text (16)
@@ -287,8 +287,8 @@
print(run_it, unit = "ms", signif = 3)
#> Unit: milliseconds
#> expr min lq mean median uq max neval
-#> mo_fullname(x) 741 746 806 778 827 968 10
@@ -300,11 +300,11 @@
times = 10)
print(run_it, unit = "ms", signif = 3)
#> Unit: milliseconds
-#> expr min lq mean median uq max neval
-#> A 10.200 10.300 10.600 10.400 11.00 11.300 10
-#> B 20.500 20.700 21.300 21.400 22.00 22.100 10
-#> C 0.308 0.504 0.589 0.591 0.73 0.863 10
mo_fullname("Staphylococcus aureus")
to "Staphylococcus aureus"
takes 0.0006 seconds - it doesn’t even start calculating if the result would be the same as the expected resulting value. That goes for all helper functions:mo_fullname("Staphylococcus aureus")
to "Staphylococcus aureus"
takes 0.0005 seconds - it doesn’t even start calculating if the result would be the same as the expected resulting value. That goes for all helper functions:run_it <- microbenchmark(A = mo_species("aureus"),
B = mo_genus("Staphylococcus"),
C = mo_fullname("Staphylococcus aureus"),
@@ -317,14 +317,14 @@
print(run_it, unit = "ms", signif = 3)
#> Unit: milliseconds
#> expr min lq mean median uq max neval
-#> A 0.318 0.340 0.388 0.382 0.434 0.474 10
-#> B 0.339 0.362 0.424 0.428 0.449 0.555 10
-#> C 0.331 0.369 0.522 0.526 0.637 0.673 10
-#> D 0.269 0.278 0.313 0.300 0.353 0.384 10
-#> E 0.252 0.266 0.322 0.302 0.349 0.448 10
-#> F 0.241 0.264 0.310 0.313 0.347 0.379 10
-#> G 0.241 0.258 0.310 0.317 0.355 0.386 10
-#> H 0.278 0.289 0.316 0.313 0.334 0.375 10
mo_phylum("Firmicutes")
the function has zero knowledge about the actual microorganism, namely S. aureus. But since the result would be "Firmicutes"
too, there is no point in calculating the result. And because this package ‘knows’ all phyla of all known bacteria (according to the Catalogue of Life), it can just return the initial value immediately.How to create frequency tables
Matthijs S. Berends
- 25 February 2019
+ 27 February 2019
freq.Rmd
How to get properties of a microorganism
Matthijs S. Berends
- 25 February 2019
+ 27 February 2019
mo_property.Rmd
How to predict antimicrobial resistance
Matthijs S. Berends
- 25 February 2019
+ 27 February 2019
resistance_predict.Rmd
microorganisms
data set now contains:
catalogue_of_life_version()
.catalogue_of_life_version()
.mo
codes changed (e.g. Streptococcus changed from B_STRPTC
to B_STRPT
). A translation table is used internally to support older microorganism IDs, so users will not notice this difference.as.atc()
"children"
, equivalent of: c(0, 1, 2, 4, 6, 13, 18)
. This will split on 0, 1, 2-3, 4-5, 6-12, 13-17 and 18+."elderly"
or "seniors"
, equivalent of: c(65, 75, 85, 95)
. This will split on 0-64, 65-74, 75-84, 85-94 and 95+."fives"
, equivalent of: 1:20 * 5
. This will split on 0-4, 5-9, 10-14, 15-19 and so forth."tens"
, equivalent of: 1:10 * 10
. This will split on 0-9, 10-19, 20-29 and so forth."fives"
, equivalent of: 1:24 * 5
. This will split on 0-4, 5-9, 10-14, 15-19 and so forth, until 120."tens"
, equivalent of: 1:12 * 10
. This will split on 0-9, 10-19, 20-29 and so forth, until 120.mo
). Determination is done using Artificial Intelligence (AI) and the complete taxonomic kingdoms Bacteria, Fungi and Protozoa (see Source), so the input can be almost anything: a full name (like "Staphylococcus aureus"
), an abbreviated name (like "S. aureus"
), an abbreviation known in the field (like "MRSA"
), or just a genus. You could also select
a genus and species column, zie Examples.mo
). Determination is done using Artificial Intelligence (AI) and the complete taxonomic kingdoms Archaea, Bacteria, Protozoa, Viruses and most microbial species from the kingdom Fungi (see Source), so the input can be almost anything: a full name (like "Staphylococcus aureus"
), an abbreviated name (like "S. aureus"
), an abbreviation known in the field (like "MRSA"
), or just a genus. You could also select
a genus and species column, zie Examples.
"E. coli"
will return the ID of Escherichia coli and not Entamoeba coli, although the latter would alphabetically come first"H. influenzae"
will return the ID of Haemophilus influenzae and not Haematobacter influenzae for the same reason"p aer"
will return the ID of Pseudomonas aeruginosa and not Pasteurella aerogenes"stau"
or "S aur"
will return the ID of Staphylococcus aureus and not Staphylococcus auricularis
@@ -318,7 +317,7 @@ When using allow_uncertain = TRUE
(which is the default setting), i
"Streptococcus group B (known as S. agalactiae)"
. The text between brackets will be removed and a warning will be thrown that the result Streptococcus group B (B_STRPT_GRB
) needs review.allow_uncertain = TRUE
(which is the default setting), i
"Fluoroquinolone-resistant Neisseria gonorrhoeae"
. The first word will be stripped, after which the function will try to find a match. A warning will be thrown that the result Neisseria gonorrhoeae (B_NESSR_GON
) needs review.mo_failures()
to get a vector with all values that could not be coerced to a valid value.mo_uncertainties()
to get a vector with all values that were coerced to a valid value, but with uncertainty.mo_uncertainties()
to get info about all values that were coerced to a valid value, but with uncertainty.mo_renamed()
to get a vector with all values that could be coerced based on an old, previously accepted taxonomic name.Microbial prevalence of pathogens in humans
@@ -345,16 +344,16 @@ When using allow_uncertain = TRUE
(which is the default setting), i
catalogue_of_life_version()
).Catalogue of Life
-This package contains the complete taxonomic tree of almost all microorganisms from the authoritative and comprehensive Catalogue of Life (http://www.catalogueoflife.org). This data is updated annually - check the included version with catalogue_of_life_version
.catalogue_of_life_version()
.
Included are:
All ~55,000 (sub)species from the kingdoms of Archaea, Bacteria, Protozoa and Viruses
All ~3,500 (sub)species from these orders of the kingdom of Fungi: Eurotiales, Onygenales, Pneumocystales, Saccharomycetales, Schizosaccharomycetales and Tremellales. The kingdom of Fungi is a very large taxon with almost 300,000 different (sub)species, of which most are not microbial (but rather macroscopic, like mushrooms). Because of this, not all fungi fit the scope of this package and including everything would tremendously slow down our algorithms too. By only including the aforementioned taxonomic orders, the most relevant fungi are covered (like all species of Aspergillus, Candida, Cryptococcus, Histplasma, Pneumocystis, Saccharomyces and Trichophyton).
All ~3,500 (sub)species from these orders of the kingdom of Fungi: Eurotiales, Onygenales, Pneumocystales, Saccharomycetales, Schizosaccharomycetales and Tremellales. This covers the most relevant microbial fungi (like all species of Aspergillus, Candida, Cryptococcus, Histplasma, Pneumocystis, Saccharomyces and Trichophyton).
All ~15,000 previously accepted names of included (sub)species that have been taxonomically renamed
The complete taxonomic tree of all included (sub)species: from kingdom to subspecies
The responsible author(s) and year of scientific publication
-This package contains the complete taxonomic tree of almost all microorganisms from the authoritative and comprehensive Catalogue of Life (http://www.catalogueoflife.org). This data is updated annually - check the included version with catalogue_of_life_version
.
catalogue_of_life_version()
.
Included are:
All ~55,000 (sub)species from the kingdoms of Archaea, Bacteria, Protozoa and Viruses
All ~3,500 (sub)species from these orders of the kingdom of Fungi: Eurotiales, Onygenales, Pneumocystales, Saccharomycetales, Schizosaccharomycetales and Tremellales. The kingdom of Fungi is a very large taxon with almost 300,000 different (sub)species, of which most are not microbial (but rather macroscopic, like mushrooms). Because of this, not all fungi fit the scope of this package and including everything would tremendously slow down our algorithms too. By only including the aforementioned taxonomic orders, the most relevant fungi are covered (like all species of Aspergillus, Candida, Cryptococcus, Histplasma, Pneumocystis, Saccharomyces and Trichophyton).
All ~3,500 (sub)species from these orders of the kingdom of Fungi: Eurotiales, Onygenales, Pneumocystales, Saccharomycetales, Schizosaccharomycetales and Tremellales. This covers the most relevant microbial fungi (like all species of Aspergillus, Candida, Cryptococcus, Histplasma, Pneumocystis, Saccharomyces and Trichophyton).
All ~15,000 previously accepted names of included (sub)species that have been taxonomically renamed
The complete taxonomic tree of all included (sub)species: from kingdom to subspecies
The responsible author(s) and year of scientific publication
catalogue_of_life_version()
+ The list item is_latest_annual_release
is based on the system date.
-This package contains the complete taxonomic tree of almost all microorganisms from the authoritative and comprehensive Catalogue of Life (http://www.catalogueoflife.org). This data is updated annually - check the included version with catalogue_of_life_version
.
catalogue_of_life_version()
.
Included are:
All ~55,000 (sub)species from the kingdoms of Archaea, Bacteria, Protozoa and Viruses
All ~3,500 (sub)species from these orders of the kingdom of Fungi: Eurotiales, Onygenales, Pneumocystales, Saccharomycetales, Schizosaccharomycetales and Tremellales. The kingdom of Fungi is a very large taxon with almost 300,000 different (sub)species, of which most are not microbial (but rather macroscopic, like mushrooms). Because of this, not all fungi fit the scope of this package and including everything would tremendously slow down our algorithms too. By only including the aforementioned taxonomic orders, the most relevant fungi are covered (like all species of Aspergillus, Candida, Cryptococcus, Histplasma, Pneumocystis, Saccharomyces and Trichophyton).
All ~3,500 (sub)species from these orders of the kingdom of Fungi: Eurotiales, Onygenales, Pneumocystales, Saccharomycetales, Schizosaccharomycetales and Tremellales. This covers the most relevant microbial fungi (like all species of Aspergillus, Candida, Cryptococcus, Histplasma, Pneumocystis, Saccharomyces and Trichophyton).
All ~15,000 previously accepted names of included (sub)species that have been taxonomically renamed
The complete taxonomic tree of all included (sub)species: from kingdom to subspecies
The responsible author(s) and year of scientific publication
# NOT RUN { -# imagine this Excel file (mo codes looked up in `microorganisms` data set): -# A B -# 1 our code mo -# 2 lab_mo_ecoli B_ESCHR_COL -# 3 lab_mo_kpneumoniae B_KLBSL_PNE +# NOT RUN { +# imagine this Excel file (mo codes looked up in `microorganisms` data set): +# A B +# 1 our code mo +# 2 lab_mo_ecoli B_ESCHR_COL +# 3 lab_mo_kpneumoniae B_KLBSL_PNE -# 1. We save it as 'home/me/ourcodes.xlsx' +# 1. We save it as 'home/me/ourcodes.xlsx' -# 2. We use it for input: -set_mo_source("C:\path\ourcodes.xlsx") -#> Created mo_source file '~/.mo_source.rds' from 'home/me/ourcodes.xlsx'. +# 2. We use it for input: +set_mo_source("home/me/ourcodes.xlsx") +#> Created mo_source file '~/.mo_source.rds' from 'home/me/ourcodes.xlsx'. -# 3. And use it in our functions: -as.mo("lab_mo_ecoli") -#> B_ESCHR_COL +# 3. And use it in our functions: +as.mo("lab_mo_ecoli") +#> B_ESCHR_COL -mo_genus("lab_mo_kpneumoniae") -#> "Klebsiella" +mo_genus("lab_mo_kpneumoniae") +#> "Klebsiella" -# 4. It will look for changes itself: -# (add new row to the Excel file and save it) +# 4. It will look for changes itself: +# (add new row to the Excel file and save it) -mo_genus("lab_mo_kpneumoniae") -#> Updated mo_source file '~/.mo_source.rds' from 'home/me/ourcodes.xlsx'. -#> "Klebsiella" -# } -+mo_genus("lab_mo_kpneumoniae") +#> Updated mo_source file '~/.mo_source.rds' from 'home/me/ourcodes.xlsx'. +#> "Klebsiella" +# }