diff --git a/.gitignore b/.gitignore index df09180e..08217d47 100755 --- a/.gitignore +++ b/.gitignore @@ -20,6 +20,7 @@ vignettes/*.R ^CRAN-RELEASE$ packrat/lib*/ packrat/src/ +data-raw/taxa.txt data-raw/taxon.tab data-raw/DSMZ_bactnames.xlsx data-raw/country_analysis_url_token.R diff --git a/DESCRIPTION b/DESCRIPTION index ac684134..1fafd63d 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,6 +1,6 @@ Package: AMR -Version: 1.0.1.9003 -Date: 2020-03-08 +Version: 1.0.1.9004 +Date: 2020-03-14 Title: Antimicrobial Resistance Analysis Authors@R: c( person(role = c("aut", "cre"), @@ -46,8 +46,10 @@ Imports: knitr (>= 1.0.0), microbenchmark, pillar, + R6, rlang (>= 0.3.1), - tidyr (>= 1.0.0) + tidyr (>= 1.0.0), + vctrs (>= 0.2.4) Suggests: covr (>= 3.0.1), curl, @@ -63,5 +65,5 @@ BugReports: https://gitlab.com/msberends/AMR/issues License: GPL-2 | file LICENSE Encoding: UTF-8 LazyData: true -RoxygenNote: 7.0.2 +RoxygenNote: 7.1.0 Roxygen: list(markdown = TRUE) diff --git a/NAMESPACE b/NAMESPACE index 6c6487ca..d7037d2f 100755 --- a/NAMESPACE +++ b/NAMESPACE @@ -1,14 +1,19 @@ # Generated by roxygen2: do not edit by hand S3method("[",ab) +S3method("[",disk) S3method("[",mo) S3method("[<-",ab) +S3method("[<-",disk) S3method("[<-",mo) S3method("[[",ab) +S3method("[[",disk) S3method("[[",mo) S3method("[[<-",ab) +S3method("[[<-",disk) S3method("[[<-",mo) S3method(as.data.frame,ab) +S3method(as.data.frame,disk) S3method(as.data.frame,mo) S3method(as.double,mic) S3method(as.integer,mic) @@ -20,6 +25,7 @@ S3method(as.rsi,mic) S3method(barplot,mic) S3method(barplot,rsi) S3method(c,ab) +S3method(c,disk) S3method(c,mo) S3method(droplevels,mic) S3method(droplevels,rsi) @@ -52,11 +58,21 @@ S3method(skewness,matrix) S3method(summary,mic) S3method(summary,mo) S3method(summary,rsi) -S3method(type_sum,ab) -S3method(type_sum,disk) -S3method(type_sum,mic) -S3method(type_sum,mo) -S3method(type_sum,rsi) +S3method(vec_cast,character.mo) +S3method(vec_cast,mo) +S3method(vec_cast,mo.character) +S3method(vec_cast,mo.mo) +S3method(vec_ptype2,mo) +S3method(vec_ptype_abbr,ab) +S3method(vec_ptype_abbr,disk) +S3method(vec_ptype_abbr,mic) +S3method(vec_ptype_abbr,mo) +S3method(vec_ptype_abbr,rsi) +S3method(vec_ptype_full,ab) +S3method(vec_ptype_full,disk) +S3method(vec_ptype_full,mic) +S3method(vec_ptype_full,mo) +S3method(vec_ptype_full,rsi) export("%like%") export("%like_case%") export(ab_atc) @@ -199,14 +215,19 @@ export(skewness) export(susceptibility) export(theme_rsi) exportMethods("[.ab") +exportMethods("[.disk") exportMethods("[.mo") exportMethods("[<-.ab") +exportMethods("[<-.disk") exportMethods("[<-.mo") exportMethods("[[.ab") +exportMethods("[[.disk") exportMethods("[[.mo") exportMethods("[[<-.ab") +exportMethods("[[<-.disk") exportMethods("[[<-.mo") exportMethods(as.data.frame.ab) +exportMethods(as.data.frame.disk) exportMethods(as.data.frame.mo) exportMethods(as.double.mic) exportMethods(as.integer.mic) @@ -214,6 +235,7 @@ exportMethods(as.numeric.mic) exportMethods(barplot.mic) exportMethods(barplot.rsi) exportMethods(c.ab) +exportMethods(c.disk) exportMethods(c.mo) exportMethods(droplevels.mic) exportMethods(droplevels.rsi) @@ -244,6 +266,7 @@ exportMethods(skewness.matrix) exportMethods(summary.mic) exportMethods(summary.mo) exportMethods(summary.rsi) +importFrom(R6,R6Class) importFrom(cleaner,freq) importFrom(cleaner,freq.default) importFrom(cleaner,percentage) @@ -294,7 +317,6 @@ importFrom(dplyr,mutate_at) importFrom(dplyr,n) importFrom(dplyr,n_distinct) importFrom(dplyr,n_groups) -importFrom(dplyr,progress_estimated) importFrom(dplyr,pull) importFrom(dplyr,rename) importFrom(dplyr,row_number) @@ -317,7 +339,6 @@ importFrom(graphics,text) importFrom(knitr,kable) importFrom(microbenchmark,microbenchmark) importFrom(pillar,pillar_shaft) -importFrom(pillar,type_sum) importFrom(rlang,as_label) importFrom(rlang,enquos) importFrom(rlang,eval_tidy) @@ -334,3 +355,7 @@ importFrom(tidyr,pivot_wider) importFrom(utils,adist) importFrom(utils,browseURL) importFrom(utils,menu) +importFrom(vctrs,vec_cast) +importFrom(vctrs,vec_ptype2) +importFrom(vctrs,vec_ptype_abbr) +importFrom(vctrs,vec_ptype_full) diff --git a/NEWS.md b/NEWS.md index 159cc2d3..16796180 100755 --- a/NEWS.md +++ b/NEWS.md @@ -1,10 +1,13 @@ -# AMR 1.0.1.9003 -## Last updated: 08-Mar-2020 +# AMR 1.0.1.9004 +## Last updated: 14-Mar-2020 ### New * Support for easy principal component analysis for AMR, using the new `pca()` function * Plotting biplots for principal component analysis using the new `ggplot_pca()` function +### Other +* Support for the upcoming `dplyr` version 1.0.0 + # AMR 1.0.1 ### Changed diff --git a/R/ab.R b/R/ab.R index 45e60781..8929dae1 100755 --- a/R/ab.R +++ b/R/ab.R @@ -396,9 +396,15 @@ c.ab <- function(x, ...) { class_integrity_check(y, "antimicrobial code", antibiotics$ab) } -#' @importFrom pillar type_sum +#' @importFrom vctrs vec_ptype_abbr #' @export -type_sum.ab <- function(x) { +vec_ptype_abbr.ab <- function(x, ...) { + "ab" +} + +#' @importFrom vctrs vec_ptype_full +#' @export +vec_ptype_full.ab <- function(x, ...) { "ab" } diff --git a/R/amr.R b/R/amr.R index f5a93aa4..7b315d11 100644 --- a/R/amr.R +++ b/R/amr.R @@ -41,6 +41,7 @@ #' - Getting SNOMED codes of a microorganism, or get its name associated with a SNOMED code #' - Getting LOINC codes of an antibiotic, or get its name associated with a LOINC code #' - Machine reading the EUCAST and CLSI guidelines from 2011-2020 to translate MIC values and disk diffusion diameters to R/SI +#' - Principal component analysis for AMR #' @section Read more on our website!: #' On our website you can find [a comprehensive tutorial](https://msberends.gitlab.io/AMR/articles/AMR.html) about how to conduct AMR analysis, the [complete documentation of all functions](https://msberends.gitlab.io/AMR/reference) (which reads a lot easier than here in R) and [an example analysis using WHONET data](https://msberends.gitlab.io/AMR/articles/WHONET.html). diff --git a/R/atc_online.R b/R/atc_online.R index 33895bac..70352188 100644 --- a/R/atc_online.R +++ b/R/atc_online.R @@ -56,7 +56,7 @@ #' - `"ml"` = milliliter (e.g. eyedrops) #' @export #' @rdname atc_online -#' @importFrom dplyr %>% progress_estimated +#' @importFrom dplyr %>% #' @inheritSection AMR Read more on our website! #' @source #' @examples diff --git a/R/disk.R b/R/disk.R index b0f2e1e4..c13081ab 100644 --- a/R/disk.R +++ b/R/disk.R @@ -80,7 +80,7 @@ as.disk <- function(x, na.rm = FALSE) { list_missing, call. = FALSE) } - class(x) <- c("disk", "integer") + class(x) <- "disk" x } } @@ -97,6 +97,20 @@ is.disk <- function(x) { inherits(x, "disk") } +#' @exportMethod as.data.frame.disk +#' @export +#' @noRd +as.data.frame.disk <- function(x, ...) { + # same as as.data.frame.integer but with removed stringsAsFactors, since it will be class "disk" + nm <- paste(deparse(substitute(x), width.cutoff = 500L), + collapse = " ") + if (!"nm" %in% names(list(...))) { + as.data.frame.vector(x, ..., nm = nm) + } else { + as.data.frame.vector(x, ...) + } +} + #' @exportMethod print.disk #' @export #' @noRd @@ -105,12 +119,6 @@ print.disk <- function(x, ...) { print(as.integer(x), quote = FALSE) } -#' @importFrom pillar type_sum -#' @export -type_sum.disk <- function(x) { - "disk" -} - #' @importFrom pillar pillar_shaft #' @export pillar_shaft.disk <- function(x, ...) { @@ -118,3 +126,56 @@ pillar_shaft.disk <- function(x, ...) { out[is.na(x)] <- pillar::style_na(NA) pillar::new_pillar_shaft_simple(out, align = "right", min_width = 3) } + +#' @importFrom vctrs vec_ptype_abbr +#' @export +vec_ptype_abbr.disk <- function(x, ...) { + "disk" +} + +#' @importFrom vctrs vec_ptype_full +#' @export +vec_ptype_full.disk <- function(x, ...) { + "disk" +} + +#' @exportMethod [.disk +#' @export +#' @noRd +"[.disk" <- function(x, ...) { + y <- NextMethod() + attributes(y) <- attributes(x) + y +} +#' @exportMethod [[.disk +#' @export +#' @noRd +"[[.disk" <- function(x, ...) { + y <- NextMethod() + attributes(y) <- attributes(x) + y +} +#' @exportMethod [<-.disk +#' @export +#' @noRd +"[<-.disk" <- function(i, j, ..., value) { + y <- NextMethod() + attributes(y) <- attributes(i) + y +} +#' @exportMethod [[<-.disk +#' @export +#' @noRd +"[[<-.disk" <- function(i, j, ..., value) { + y <- NextMethod() + attributes(y) <- attributes(i) + y +} +#' @exportMethod c.disk +#' @export +#' @noRd +c.disk <- function(x, ...) { + y <- NextMethod() + attributes(y) <- attributes(x) + y +} diff --git a/R/join_microorganisms.R b/R/join_microorganisms.R index 1febc8b6..81e2d674 100755 --- a/R/join_microorganisms.R +++ b/R/join_microorganisms.R @@ -50,13 +50,14 @@ #' df_joined <- left_join_microorganisms(df, "bacteria") #' colnames(df_joined) inner_join_microorganisms <- function(x, by = NULL, suffix = c("2", ""), ...) { + check_dataset_integrity() checked <- joins_check_df(x, by) x <- checked$x by <- checked$by join <- suppressWarnings( dplyr::inner_join(x = x, y = microorganisms, by = by, suffix = suffix, ...) ) - if (nrow(join) > nrow(x)) { + if (NROW(join) > NROW(x)) { warning("The newly joined tbl contains ", nrow(join) - nrow(x), " rows more that its original.") } join @@ -65,13 +66,14 @@ inner_join_microorganisms <- function(x, by = NULL, suffix = c("2", ""), ...) { #' @rdname join #' @export left_join_microorganisms <- function(x, by = NULL, suffix = c("2", ""), ...) { + check_dataset_integrity() checked <- joins_check_df(x, by) x <- checked$x by <- checked$by join <- suppressWarnings( dplyr::left_join(x = x, y = microorganisms, by = by, suffix = suffix, ...) ) - if (nrow(join) > nrow(x)) { + if (NROW(join) > NROW(x)) { warning("The newly joined tbl contains ", nrow(join) - nrow(x), " rows more that its original.") } join @@ -80,13 +82,14 @@ left_join_microorganisms <- function(x, by = NULL, suffix = c("2", ""), ...) { #' @rdname join #' @export right_join_microorganisms <- function(x, by = NULL, suffix = c("2", ""), ...) { + check_dataset_integrity() checked <- joins_check_df(x, by) x <- checked$x by <- checked$by join <- suppressWarnings( dplyr::right_join(x = x, y = microorganisms, by = by, suffix = suffix, ...) ) - if (nrow(join) > nrow(x)) { + if (NROW(join) > NROW(x)) { warning("The newly joined tbl contains ", nrow(join) - nrow(x), " rows more that its original.") } join @@ -95,13 +98,14 @@ right_join_microorganisms <- function(x, by = NULL, suffix = c("2", ""), ...) { #' @rdname join #' @export full_join_microorganisms <- function(x, by = NULL, suffix = c("2", ""), ...) { + check_dataset_integrity() checked <- joins_check_df(x, by) x <- checked$x by <- checked$by join <- suppressWarnings( dplyr::full_join(x = x, y = microorganisms, by = by, suffix = suffix, ...) ) - if (nrow(join) > nrow(x)) { + if (NROW(join) > NROW(x)) { warning("The newly joined tbl contains ", nrow(join) - nrow(x), " rows more that its original.") } join @@ -110,6 +114,7 @@ full_join_microorganisms <- function(x, by = NULL, suffix = c("2", ""), ...) { #' @rdname join #' @export semi_join_microorganisms <- function(x, by = NULL, ...) { + check_dataset_integrity() checked <- joins_check_df(x, by) x <- checked$x by <- checked$by @@ -121,6 +126,7 @@ semi_join_microorganisms <- function(x, by = NULL, ...) { #' @rdname join #' @export anti_join_microorganisms <- function(x, by = NULL, ...) { + check_dataset_integrity() checked <- joins_check_df(x, by) x <- checked$x by <- checked$by @@ -131,7 +137,7 @@ anti_join_microorganisms <- function(x, by = NULL, ...) { joins_check_df <- function(x, by) { if (!any(class(x) %in% c("data.frame", "matrix"))) { - x <- data.frame(mo = as.character(x), stringsAsFactors = FALSE) + x <- data.frame(mo = as.mo(x), stringsAsFactors = FALSE) if (is.null(by)) { by <- "mo" } @@ -142,6 +148,7 @@ joins_check_df <- function(x, by) { if (is.na(by)) { if ("mo" %in% colnames(x)) { by <- "mo" + x[, "mo"] <- as.mo(x[, "mo"]) } else { stop("Cannot join - no column found with name or class `mo`.", call. = FALSE) } diff --git a/R/key_antibiotics.R b/R/key_antibiotics.R index 9fccf64c..250e94ef 100755 --- a/R/key_antibiotics.R +++ b/R/key_antibiotics.R @@ -245,7 +245,7 @@ key_antibiotics <- function(x, } -#' @importFrom dplyr progress_estimated %>% +#' @importFrom dplyr %>% #' @rdname key_antibiotics #' @export key_antibiotics_equal <- function(y, @@ -270,7 +270,7 @@ key_antibiotics_equal <- function(y, result <- logical(length(x)) if (info_needed == TRUE) { - p <- dplyr::progress_estimated(length(x)) + p <- progress_estimated(length(x)) } for (i in seq_len(length(x))) { diff --git a/R/mic.R b/R/mic.R index 7c014351..fc70c007 100755 --- a/R/mic.R +++ b/R/mic.R @@ -235,9 +235,15 @@ barplot.mic <- function(height, axis(2, seq(0, max(table(droplevels.factor(height))))) } -#' @importFrom pillar type_sum +#' @importFrom vctrs vec_ptype_abbr #' @export -type_sum.mic <- function(x) { +vec_ptype_abbr.mic <- function(x, ...) { + "mic" +} + +#' @importFrom vctrs vec_ptype_full +#' @export +vec_ptype_full.mic <- function(x, ...) { "mic" } diff --git a/R/mo.R b/R/mo.R index db621bba..f991d925 100755 --- a/R/mo.R +++ b/R/mo.R @@ -78,9 +78,9 @@ #' - Uncertainty level 3: allow all of level 1 and 2, strip off text elements from the end, allow any part of a taxonomic name. #' #' This leads to e.g.: -#' - `"Streptococcus group B (known as S. agalactiae)"`. The text between brackets will be removed and a warning will be thrown that the result *Streptococcus group B* (`B_STRPT_GRPB`) needs review. -#' - `"S. aureus - please mind: MRSA"`. The last word will be stripped, after which the function will try to find a match. If it does not, the second last word will be stripped, etc. Again, a warning will be thrown that the result *Staphylococcus aureus* (`B_STPHY_AURS`) needs review. -#' - `"Fluoroquinolone-resistant Neisseria gonorrhoeae"`. The first word will be stripped, after which the function will try to find a match. A warning will be thrown that the result *Neisseria gonorrhoeae* (`B_NESSR_GNRR`) needs review. +#' - `"Streptococcus group B (known as S. agalactiae)"`. The text between brackets will be removed and a warning will be thrown that the result *Streptococcus group B* (``r as.mo("Streptococcus group B")``) needs review. +#' - `"S. aureus - please mind: MRSA"`. The last word will be stripped, after which the function will try to find a match. If it does not, the second last word will be stripped, etc. Again, a warning will be thrown that the result *Staphylococcus aureus* (``r as.mo("Staphylococcus aureus")``) needs review. +#' - `"Fluoroquinolone-resistant Neisseria gonorrhoeae"`. The first word will be stripped, after which the function will try to find a match. A warning will be thrown that the result *Neisseria gonorrhoeae* (``r as.mo("Neisseria gonorrhoeae")``) needs review. #' #' The level of uncertainty can be set using the argument `allow_uncertain`. The default is `allow_uncertain = TRUE`, which is equal to uncertainty level 2. Using `allow_uncertain = FALSE` is equal to uncertainty level 0 and will skip all rules. You can also use e.g. `as.mo(..., allow_uncertain = 1)` to only allow up to level 1 uncertainty. #' @@ -234,7 +234,7 @@ is.mo <- function(x) { inherits(x, "mo") } -#' @importFrom dplyr %>% pull left_join n_distinct progress_estimated filter distinct +#' @importFrom dplyr %>% pull left_join n_distinct filter distinct #' @importFrom data.table data.table as.data.table setkey #' @importFrom crayon magenta red blue silver italic #' @importFrom cleaner percentage @@ -1675,12 +1675,48 @@ print.mo <- function(x, ...) { print.default(x, quote = FALSE) } -#' @importFrom pillar type_sum +#' @importFrom vctrs vec_ptype_abbr #' @export -type_sum.mo <- function(x) { +vec_ptype_abbr.mo <- function(x, ...) { "mo" } +#' @importFrom vctrs vec_ptype_full +#' @export +vec_ptype_full.mo <- function(x, ...) { + "mo" +} + +#' @importFrom vctrs vec_ptype2 +#' @export +vec_ptype2.mo <- function(x, y, ...) { + vctrs::vec_ptype2(x = as.character(x), y = as.character(y), ...) +} + +#' @importFrom vctrs vec_cast +#' @export +vec_cast.mo <- function(x, to, ...) { + as.mo(vctrs::vec_cast(x = as.character(x), to = as.character(to), ...)) +} + +#' @importFrom vctrs vec_cast +#' @export +vec_cast.mo.mo <- function(x, to, ...) { + as.mo(vctrs::vec_cast(x = as.character(x), to = as.character(to), ...)) +} + +#' @importFrom vctrs vec_cast +#' @export +vec_cast.mo.character <- function(x, to, ...) { + vctrs::vec_cast(x = as.character(x), to = as.character(to), ...) +} + +#' @importFrom vctrs vec_cast +#' @export +vec_cast.character.mo <- function(x, to, ...) { + as.mo(vctrs::vec_cast(x = as.character(x), to = as.character(to), ...)) +} + #' @importFrom pillar pillar_shaft #' @export pillar_shaft.mo <- function(x, ...) { diff --git a/R/pca.R b/R/pca.R index bef7abd3..e6afa9c0 100755 --- a/R/pca.R +++ b/R/pca.R @@ -22,7 +22,7 @@ #' Principal Component Analysis (for AMR) #' #' Performs a principal component analysis (PCA) based on a data set with automatic determination for afterwards plotting the groups and labels, and automatic filtering on only suitable (i.e. non-empty and numeric) variables. -#' @inheritSection lifecycle Experimental lifecycle +#' @inheritSection lifecycle Maturing lifecycle #' @param x a [data.frame] containing numeric columns #' @param ... columns of `x` to be selected for PCA #' @inheritParams stats::prcomp diff --git a/R/progress_estimated.R b/R/progress_estimated.R new file mode 100644 index 00000000..1944d160 --- /dev/null +++ b/R/progress_estimated.R @@ -0,0 +1,142 @@ +# ==================================================================== # +# TITLE # +# Antimicrobial Resistance (AMR) Analysis # +# # +# SOURCE # +# https://gitlab.com/msberends/AMR # +# # +# LICENCE # +# (c) 2018-2020 Berends MS, Luz CF et al. # +# # +# This R package is free software; you can freely use and distribute # +# it for both personal and commercial purposes under the terms of the # +# GNU General Public License version 2.0 (GNU GPL-2), as published by # +# the Free Software Foundation. # +# # +# We created this package for both routine data analysis and academic # +# research and it was publicly released in the hope that it will be # +# useful, but it comes WITHOUT ANY WARRANTY OR LIABILITY. # +# Visit our website for more info: https://msberends.gitlab.io/AMR. # +# ==================================================================== # + +# taken from https://github.com/tidyverse/dplyr/blob/f306d8da8f27c2e6abbd3c70f219fef7ca61fbb5/R/progress.R +# when it was still in the dplyr package + +progress_estimated <- function(n, min_time = 0) { + Progress$new(n, min_time = min_time) +} + +#' @importFrom R6 R6Class +Progress <- R6::R6Class("Progress", + public = list( + n = NULL, + i = 0, + init_time = NULL, + stopped = FALSE, + stop_time = NULL, + min_time = NULL, + last_update = NULL, + + initialize = function(n, min_time = 0, ...) { + self$n <- n + self$min_time <- min_time + self$begin() + }, + + begin = function() { + "Initialise timer. Call this before beginning timing." + self$i <- 0 + self$last_update <- self$init_time <- now() + self$stopped <- FALSE + self + }, + + pause = function(x) { + "Sleep for x seconds. Useful for testing." + Sys.sleep(x) + self + }, + + width = function() { + getOption("width") - nchar("|100% ~ 99.9 h remaining") - 2 + }, + + tick = function() { + "Process one element" + if (self$stopped) return(self) + + if (self$i == self$n) stop("No more ticks") + self$i <- self$i + 1 + self + }, + + stop = function() { + if (self$stopped) return(self) + + self$stopped <- TRUE + self$stop_time <- now() + self + }, + + print = function(...) { + if (!isTRUE(getOption("dplyr.show_progress")) || # user sepecifies no progress + !interactive() || # not an interactive session + !is.null(getOption("knitr.in.progress"))) { # dplyr used within knitr document + return(invisible(self)) + } + + now_ <- now() + if (now_ - self$init_time < self$min_time || now_ - self$last_update < 0.05) { + return(invisible(self)) + } + self$last_update <- now_ + + if (self$stopped) { + overall <- show_time(self$stop_time - self$init_time) + if (self$i == self$n) { + cat_line("Completed after ", overall) + cat("\n") + } else { + cat_line("Killed after ", overall) + cat("\n") + } + return(invisible(self)) + } + + avg <- (now() - self$init_time) / self$i + time_left <- (self$n - self$i) * avg + nbars <- trunc(self$i / self$n * self$width()) + + cat_line( + "|", str_rep("=", nbars), str_rep(" ", self$width() - nbars), "|", + format(round(self$i / self$n * 100), width = 3), "% ", + "~", show_time(time_left), " remaining" + ) + + invisible(self) + } + ) +) + +cat_line <- function(...) { + msg <- paste(..., sep = "", collapse = "") + gap <- max(c(0, getOption("width") - nchar(msg, "width"))) + cat("\r", msg, rep.int(" ", gap), sep = "") + utils::flush.console() +} + +str_rep <- function(x, i) { + paste(rep.int(x, i), collapse = "") +} + +show_time <- function(x) { + if (x < 60) { + paste(round(x), "s") + } else if (x < 60 * 60) { + paste(round(x / 60), "m") + } else { + paste(round(x / (60 * 60)), "h") + } +} + +now <- function() proc.time()[[3]] diff --git a/R/rsi.R b/R/rsi.R index 6e9894de..c2516791 100755 --- a/R/rsi.R +++ b/R/rsi.R @@ -659,9 +659,15 @@ barplot.rsi <- function(height, } } -#' @importFrom pillar type_sum +#' @importFrom vctrs vec_ptype_abbr #' @export -type_sum.rsi <- function(x) { +vec_ptype_abbr.rsi <- function(x, ...) { + "rsi" +} + +#' @importFrom vctrs vec_ptype_full +#' @export +vec_ptype_full.rsi <- function(x, ...) { "rsi" } diff --git a/R/sysdata.rda b/R/sysdata.rda index a2901128..8840d534 100644 Binary files a/R/sysdata.rda and b/R/sysdata.rda differ diff --git a/data-raw/country_analysis.R b/data-raw/country_analysis.R index 395963fd..941c434f 100644 --- a/data-raw/country_analysis.R +++ b/data-raw/country_analysis.R @@ -50,7 +50,7 @@ rm(data_json) # add country data based on IP address and ipinfo.io API unique_ip <- unique(data$ipaddress) ip_tbl <- GET_df(unique_ip[1]) -p <- progress_estimated(n = length(unique_ip) - 1, min_time = 0) +p <- AMR:::progress_estimated(n = length(unique_ip) - 1, min_time = 0) for (i in 2:length(unique_ip)) { p$tick()$print() ip_tbl <- ip_tbl %>% diff --git a/data-raw/reproduction_of_microorganisms.R b/data-raw/reproduction_of_microorganisms.R index af9feaac..ef9be6e7 100644 --- a/data-raw/reproduction_of_microorganisms.R +++ b/data-raw/reproduction_of_microorganisms.R @@ -23,6 +23,7 @@ # Data retrieved from the Catalogue of Life (CoL) through the Encyclopaedia of Life: # https://opendata.eol.org/dataset/catalogue-of-life/ +# https://doi.org/10.15468/rffz4x # (download the resource file with a name like "Catalogue of Life yyyy-mm-dd") # and from the Leibniz Institute DSMZ-German Collection of Microorganisms and Cell Cultures # https://www.dsmz.de/support/bacterial-nomenclature-up-to-date-downloads.html @@ -32,7 +33,8 @@ library(dplyr) library(AMR) # unzip and extract taxon.tab (around 1.5 GB) from the CoL archive, then: -data_col <- data.table::fread("data-raw/taxon.tab") +# data_col <- data.table::fread("data-raw/taxon.tab") +data_col <- data.table::fread("data-raw/taxa.txt", quote = "") # read the xlsx file from DSMZ (only around 2.5 MB): data_dsmz <- readxl::read_xlsx("data-raw/DSMZ_bactnames.xlsx") @@ -66,7 +68,7 @@ data_col <- data_col %>% subspecies = infraspecificEpithet, rank = taxonRank, ref = scientificNameAuthorship, - species_id = furtherInformationURL) + species_id = references) data_col$source <- "CoL" # clean data_dsmz @@ -761,6 +763,7 @@ new_families <- MOs %>% class(MOs$mo) <- "character" MOs <- rbind(MOs %>% filter(!(rank == "family" & fullname %in% new_families)), AMR::microorganisms %>% + select(-snomed) %>% filter(family == "Enterobacteriaceae" & rank == "family") %>% rbind(., ., ., ., ., ., .) %>% mutate(fullname = new_families, @@ -794,7 +797,9 @@ colnames(MOs) MOs %>% arrange(fullname) %>% filter(!fullname %in% AMR::microorganisms$fullname) %>% View() MOs.old %>% arrange(fullname) %>% filter(!fullname %in% AMR::microorganisms.old$fullname) %>% View() # and the ones we lost: -AMR::microorganisms %>% filter(!fullname %in% MOs$fullname) %>% View() +AMR::microorganisms %>% filter(!fullname %in% MOs$fullname) %>% View() # based on fullname +AMR::microorganisms %>% filter(!mo %in% MOs$mo) %>% View() # based on mo +AMR::microorganisms %>% filter(!mo %in% MOs$mo & !fullname %in% MOs$fullname) %>% View() # and these IDs have changed: old_new <- MOs %>% mutate(kingdom_fullname = paste(kingdom, fullname)) %>% @@ -805,24 +810,41 @@ old_new <- MOs %>% View(old_new) # to keep all the old IDs: -# MOs <- MOs %>% filter(!mo %in% old_new$mo_new) %>% +# MOs <- MOs %>% filter(!mo %in% old_new$mo_new) %>% # rbind(microorganisms %>% # filter(mo %in% old_new$mo_old) %>% # select(mo, fullname) %>% -# left_join(MOs %>% +# left_join(MOs %>% # select(-mo), by = "fullname")) # and these codes are now missing (which will throw a unit test error): AMR::microorganisms.codes %>% filter(!mo %in% MOs$mo) AMR::rsi_translation %>% filter(!mo %in% MOs$mo) -AMR::microorganisms.translation %>% filter(!mo_new %in% MOs$mo) +AMR:::microorganisms.translation %>% filter(!mo_new %in% MOs$mo) %>% View() # this is how to fix it microorganisms.codes <- AMR::microorganisms.codes %>% left_join(MOs %>% mutate(kingdom_fullname = paste(kingdom, fullname)) %>% - left_join(AMR::microorganisms %>% - mutate(kingdom_fullname = paste(kingdom, fullname)) %>% - select(mo, kingdom_fullname), by = "kingdom_fullname", suffix = c("_new", "_old")) %>% + left_join(AMR::microorganisms %>% + transmute(mo, kingdom_fullname = paste(kingdom, fullname)), + by = "kingdom_fullname", suffix = c("_new", "_old")) %>% + select(mo_old, mo_new), + by = c("mo" = "mo_old")) %>% + select(code, mo = mo_new) %>% + filter(!is.na(mo)) +microorganisms.codes %>% filter(!mo %in% MOs$mo) +# and for microorganisms.translation: +microorganisms.translation <- AMR:::microorganisms.translation %>% + select(mo = mo_new) %>% + left_join(AMR::microorganisms %>% + transmute(mo, kingdom_fullname = paste(kingdom, fullname)), + by = "kingdom_fullname", suffix = c("_new", "_old")) %>% + select(mo_old, mo_new) + left_join(MOs %>% + mutate(kingdom_fullname = paste(kingdom, fullname)) %>% + left_join(AMR::microorganisms %>% + transmute(mo, kingdom_fullname = paste(kingdom, fullname)), + by = "kingdom_fullname", suffix = c("_new", "_old")) %>% select(mo_old, mo_new), by = c("mo" = "mo_old")) %>% select(code, mo = mo_new) %>% diff --git a/docs/404.html b/docs/404.html index 79c6d533..81cd34ce 100644 --- a/docs/404.html +++ b/docs/404.html @@ -78,7 +78,7 @@ AMR (for R) - 1.0.1.9002 + 1.0.1.9004 diff --git a/docs/LICENSE-text.html b/docs/LICENSE-text.html index 155d95ca..96fee50d 100644 --- a/docs/LICENSE-text.html +++ b/docs/LICENSE-text.html @@ -78,7 +78,7 @@ AMR (for R) - 1.0.1.9002 + 1.0.1.9004 diff --git a/docs/articles/EUCAST.html b/docs/articles/EUCAST.html index 922cbf0f..6c980b6e 100644 --- a/docs/articles/EUCAST.html +++ b/docs/articles/EUCAST.html @@ -39,7 +39,7 @@ AMR (for R) - 1.0.1.9000 + 1.0.1.9004 @@ -77,9 +77,9 @@
  • - + - Conduct Principal Component Analysis for AMR + Conduct principal component analysis for AMR
  • @@ -186,7 +186,7 @@

    How to apply EUCAST rules

    Matthijs S. Berends

    -

    07 March 2020

    +

    14 March 2020

    diff --git a/docs/articles/PCA.html b/docs/articles/PCA.html index ced3dfc4..a55921b4 100644 --- a/docs/articles/PCA.html +++ b/docs/articles/PCA.html @@ -39,7 +39,7 @@ AMR (for R) - 1.0.1.9000 + 1.0.1.9004 @@ -186,7 +186,7 @@

    How to conduct principal component analysis (PCA) for AMR

    Matthijs S. Berends

    -

    07 March 2020

    +

    14 March 2020

    diff --git a/docs/articles/PCA_files/figure-html/unnamed-chunk-6-1.png b/docs/articles/PCA_files/figure-html/unnamed-chunk-6-1.png index 1f655d7f..db8eb744 100644 Binary files a/docs/articles/PCA_files/figure-html/unnamed-chunk-6-1.png and b/docs/articles/PCA_files/figure-html/unnamed-chunk-6-1.png differ diff --git a/docs/articles/PCA_files/figure-html/unnamed-chunk-7-1.png b/docs/articles/PCA_files/figure-html/unnamed-chunk-7-1.png index 9d52bc29..5fca6abd 100644 Binary files a/docs/articles/PCA_files/figure-html/unnamed-chunk-7-1.png and b/docs/articles/PCA_files/figure-html/unnamed-chunk-7-1.png differ diff --git a/docs/articles/benchmarks.html b/docs/articles/benchmarks.html index db2efa6e..d35d7b99 100644 --- a/docs/articles/benchmarks.html +++ b/docs/articles/benchmarks.html @@ -39,7 +39,7 @@ AMR (for R) - 1.0.1.9000 + 1.0.1.9004 @@ -186,7 +186,7 @@

    Benchmarks

    Matthijs S. Berends

    -

    07 March 2020

    +

    14 March 2020

    @@ -220,36 +220,21 @@ times = 10) print(S.aureus, unit = "ms", signif = 2) # Unit: milliseconds -# expr min lq mean median uq max -# as.mo("sau") 8.0 8.2 9.1 8.4 8.5 16 -# as.mo("stau") 37.0 40.0 51.0 52.0 60.0 76 -# as.mo("STAU") 36.0 38.0 58.0 60.0 68.0 100 -# as.mo("staaur") 8.2 8.4 9.5 8.6 8.9 14 -# as.mo("STAAUR") 8.2 8.3 15.0 9.2 14.0 53 -# as.mo("S. aureus") 13.0 21.0 64.0 21.0 45.0 260 -# as.mo("S aureus") 13.0 14.0 33.0 24.0 44.0 76 -# as.mo("Staphylococcus aureus") 4.7 4.8 9.9 6.8 7.9 42 -# as.mo("Staphylococcus aureus (MRSA)") 620.0 640.0 770.0 700.0 860.0 1100 -# as.mo("Sthafilokkockus aaureuz") 330.0 350.0 460.0 490.0 560.0 570 -# as.mo("MRSA") 8.1 8.3 14.0 12.0 13.0 48 -# as.mo("VISA") 24.0 25.0 34.0 26.0 38.0 59 -# as.mo("VRSA") 23.0 24.0 37.0 27.0 39.0 78 -# as.mo(22242419) 120.0 130.0 150.0 140.0 160.0 240 -# neval -# 10 -# 10 -# 10 -# 10 -# 10 -# 10 -# 10 -# 10 -# 10 -# 10 -# 10 -# 10 -# 10 -# 10 +# expr min lq mean median uq max neval +# as.mo("sau") 8.9 9.3 9.6 9.6 9.9 10 10 +# as.mo("stau") 41.0 41.0 51.0 43.0 67.0 74 10 +# as.mo("STAU") 39.0 41.0 49.0 42.0 56.0 72 10 +# as.mo("staaur") 9.0 9.2 9.7 9.5 9.9 11 10 +# as.mo("STAAUR") 9.5 9.8 24.0 21.0 38.0 45 10 +# as.mo("S. aureus") 15.0 16.0 26.0 18.0 38.0 61 10 +# as.mo("S aureus") 15.0 15.0 17.0 16.0 17.0 21 10 +# as.mo("Staphylococcus aureus") 5.2 5.6 8.4 6.0 6.5 30 10 +# as.mo("Staphylococcus aureus (MRSA)") 640.0 690.0 710.0 710.0 720.0 760 10 +# as.mo("Sthafilokkockus aaureuz") 350.0 360.0 420.0 400.0 490.0 510 10 +# as.mo("MRSA") 9.2 9.3 16.0 10.0 10.0 49 10 +# as.mo("VISA") 25.0 27.0 46.0 56.0 57.0 60 10 +# as.mo("VRSA") 26.0 27.0 39.0 28.0 32.0 120 10 +# as.mo(22242419) 120.0 140.0 170.0 140.0 150.0 410 10

    In the table above, all measurements are shown in milliseconds (thousands of seconds). A value of 5 milliseconds means it can determine 200 input values per second. It case of 100 milliseconds, this is only 10 input values per second.

    To achieve this speed, the as.mo function also takes into account the prevalence of human pathogenic microorganisms. The downside of this is of course that less prevalent microorganisms will be determined less fast. See this example for the ID of Methanosarcina semesiae (B_MTHNSR_SEMS), a bug probably never found before in humans:

    @@ -261,19 +246,19 @@ times = 10) print(M.semesiae, unit = "ms", signif = 4) # Unit: milliseconds -# expr min lq mean median uq -# as.mo("metsem") 1349.000 1352.000 1597.000 1411.000 1983.000 -# as.mo("METSEM") 1316.000 2146.000 2069.000 2226.000 2245.000 -# as.mo("M. semesiae") 13.330 14.110 32.960 21.840 53.090 -# as.mo("M. semesiae") 13.730 20.960 29.720 21.430 40.000 -# as.mo("Methanosarcina semesiae") 4.802 5.171 6.667 6.551 8.036 -# max neval -# 2184.000 10 -# 2337.000 10 -# 62.780 10 -# 64.510 10 -# 8.735 10 -

    That takes 6.1 times as much time on average. We can conclude that looking up arbitrary codes of less prevalent microorganisms is the worst way to go, in terms of calculation performance. Full names (like Methanosarcina semesiae) are always very fast and only take some thousands of seconds to coerce - they are the most probable input from most data sets.

    +# expr min lq mean median uq +# as.mo("metsem") 1485.000 1507.000 1524.00 1519.000 1538.000 +# as.mo("METSEM") 1371.000 1495.000 1557.00 1567.000 1633.000 +# as.mo("M. semesiae") 16.010 16.310 25.38 16.480 42.840 +# as.mo("M. semesiae") 15.700 15.900 16.74 16.370 17.480 +# as.mo("Methanosarcina semesiae") 5.885 6.116 11.79 6.347 8.155 +# max neval +# 1577.00 10 +# 1663.00 10 +# 48.53 10 +# 18.55 10 +# 32.92 10 +

    That takes 5.5 times as much time on average. We can conclude that looking up arbitrary codes of less prevalent microorganisms is the worst way to go, in terms of calculation performance. Full names (like Methanosarcina semesiae) are always very fast and only take some thousands of seconds to coerce - they are the most probable input from most data sets.

    In the figure below, we compare Escherichia coli (which is very common) with Prevotella brevis (which is moderately common) and with Methanosarcina semesiae (which is uncommon):

    Uncommon microorganisms take a lot more time than common microorganisms. To relieve this pitfall and further improve performance, two important calculations take almost no time at all: repetitive results and already precalculated results.

    @@ -287,11 +272,11 @@ # keep only the unique ones unique() %>% # pick 50 of them at random - sample(50) %>% + sample(50) %>% # paste that 10,000 times rep(10000) %>% # scramble it - sample() + sample() # got indeed 50 times 10,000 = half a million? length(x) @@ -306,9 +291,9 @@ times = 100) print(run_it, unit = "ms", signif = 3) # Unit: milliseconds -# expr min lq mean median uq max neval -# mo_name(x) 564 605 673 630 657 1100 100 -

    So transforming 500,000 values (!!) of 50 unique values only takes 0.63 seconds (630 ms). You only lose time on your unique input values.

    +# expr min lq mean median uq max neval +# mo_name(x) 542 585 605 601 614 738 100 +

    So transforming 500,000 values (!!) of 50 unique values only takes 0.6 seconds (600 ms). You only lose time on your unique input values.

    @@ -320,11 +305,11 @@ times = 10) print(run_it, unit = "ms", signif = 3) # Unit: milliseconds -# expr min lq mean median uq max neval -# A 6.58 6.590 7.340 6.630 6.780 13.00 10 -# B 13.50 13.700 18.700 13.900 14.600 60.80 10 -# C 0.72 0.863 0.917 0.898 0.935 1.26 10

    -

    So going from mo_name("Staphylococcus aureus") to "Staphylococcus aureus" takes 0.0009 seconds - it doesn’t even start calculating if the result would be the same as the expected resulting value. That goes for all helper functions:

    +# expr min lq mean median uq max neval +# A 6.760 6.900 7.43 7.070 7.540 9.290 10 +# B 14.200 14.400 18.80 14.900 16.000 51.500 10 +# C 0.586 0.726 0.74 0.757 0.763 0.804 10 +

    So going from mo_name("Staphylococcus aureus") to "Staphylococcus aureus" takes 0.0008 seconds - it doesn’t even start calculating if the result would be the same as the expected resulting value. That goes for all helper functions:

    run_it <- microbenchmark(A = mo_species("aureus"),
                              B = mo_genus("Staphylococcus"),
                              C = mo_name("Staphylococcus aureus"),
    @@ -337,14 +322,14 @@
     print(run_it, unit = "ms", signif = 3)
     # Unit: milliseconds
     #  expr   min    lq  mean median    uq   max neval
    -#     A 0.499 0.511 0.516  0.517 0.522 0.544    10
    -#     B 0.532 0.539 0.550  0.542 0.563 0.592    10
    -#     C 0.718 0.787 0.832  0.843 0.889 0.904    10
    -#     D 0.538 0.548 0.566  0.567 0.571 0.607    10
    -#     E 0.503 0.509 0.515  0.513 0.516 0.549    10
    -#     F 0.502 0.504 0.514  0.511 0.519 0.539    10
    -#     G 0.493 0.513 0.538  0.514 0.536 0.684    10
    -#     H 0.499 0.501 0.509  0.505 0.516 0.531    10
    +# A 0.374 0.381 0.389 0.389 0.395 0.416 10 +# B 0.404 0.411 0.422 0.421 0.425 0.452 10 +# C 0.615 0.711 0.726 0.730 0.751 0.861 10 +# D 0.405 0.409 0.429 0.428 0.435 0.485 10 +# E 0.381 0.384 0.392 0.390 0.394 0.429 10 +# F 0.365 0.366 0.379 0.375 0.383 0.419 10 +# G 0.362 0.372 0.378 0.380 0.388 0.391 10 +# H 0.378 0.381 0.403 0.387 0.393 0.556 10

    Of course, when running mo_phylum("Firmicutes") the function has zero knowledge about the actual microorganism, namely S. aureus. But since the result would be "Firmicutes" anyway, there is no point in calculating the result. And because this package ‘knows’ all phyla of all known bacteria (according to the Catalogue of Life), it can just return the initial value immediately.

    @@ -371,13 +356,13 @@ print(run_it, unit = "ms", signif = 4) # Unit: milliseconds # expr min lq mean median uq max neval -# en 23.72 25.30 30.59 25.77 26.99 76.03 100 -# de 24.88 26.81 31.11 27.47 28.93 69.86 100 -# nl 30.65 32.77 38.07 33.70 35.23 74.79 100 -# es 24.89 26.33 32.10 27.13 28.87 68.79 100 -# it 24.78 26.72 33.51 27.53 28.91 166.60 100 -# fr 24.84 26.58 31.50 27.13 28.29 67.38 100 -# pt 24.88 26.58 32.38 27.50 29.20 79.30 100
    +# en 24.76 26.92 35.44 27.70 31.93 143.10 100 +# de 26.46 28.18 33.90 29.51 30.51 64.85 100 +# nl 32.40 34.89 39.79 35.94 37.28 75.95 100 +# es 26.41 28.80 34.46 29.56 31.58 67.56 100 +# it 26.44 28.52 35.22 29.30 30.37 156.00 100 +# fr 26.24 28.09 34.78 29.52 31.23 65.88 100 +# pt 26.28 28.32 36.00 29.49 32.22 66.76 100

    Currently supported are German, Dutch, Spanish, Italian, French and Portuguese.

    diff --git a/docs/articles/benchmarks_files/figure-html/unnamed-chunk-4-1.png b/docs/articles/benchmarks_files/figure-html/unnamed-chunk-4-1.png index f86f4ef2..c7db6200 100644 Binary files a/docs/articles/benchmarks_files/figure-html/unnamed-chunk-4-1.png and b/docs/articles/benchmarks_files/figure-html/unnamed-chunk-4-1.png differ diff --git a/docs/articles/benchmarks_files/figure-html/unnamed-chunk-6-1.png b/docs/articles/benchmarks_files/figure-html/unnamed-chunk-6-1.png index 3eec84dd..4fa5b80c 100644 Binary files a/docs/articles/benchmarks_files/figure-html/unnamed-chunk-6-1.png and b/docs/articles/benchmarks_files/figure-html/unnamed-chunk-6-1.png differ diff --git a/docs/articles/index.html b/docs/articles/index.html index 28f4f619..db7952b8 100644 --- a/docs/articles/index.html +++ b/docs/articles/index.html @@ -78,7 +78,7 @@ AMR (for R) - 1.0.1.9002 + 1.0.1.9004 diff --git a/docs/authors.html b/docs/authors.html index beceee87..1ef3c8a5 100644 --- a/docs/authors.html +++ b/docs/authors.html @@ -78,7 +78,7 @@ AMR (for R) - 1.0.1.9002 + 1.0.1.9004 diff --git a/docs/countries.png b/docs/countries.png index e9e82ee6..62084e3f 100644 Binary files a/docs/countries.png and b/docs/countries.png differ diff --git a/docs/countries_large.png b/docs/countries_large.png index 24b75f2c..1756232f 100644 Binary files a/docs/countries_large.png and b/docs/countries_large.png differ diff --git a/docs/index.html b/docs/index.html index b33d8bda..6a5263fd 100644 --- a/docs/index.html +++ b/docs/index.html @@ -43,7 +43,7 @@ AMR (for R) - 1.0.1.9002 + 1.0.1.9004 @@ -204,8 +204,8 @@ A methods paper about this package has been preprinted at bioRxiv (DOI: 10.1101/

    - Used in almost 100 countries
    - Since its first public release in early 2018, this package has been downloaded over 25,000 times from 99 countries (as of February 2020, CRAN logs). Click the map to enlarge.

    + Used in more than 100 countries
    + Since its first public release in early 2018, this package has been downloaded from more than 100 countries (as of March 2020, CRAN logs). Click the map to enlarge, to see the names of the countries.



    @@ -240,6 +240,7 @@ A methods paper about this package has been preprinted at bioRxiv (DOI: 10.1101/
  • Getting SNOMED codes of a microorganism, or get its name associated with a SNOMED code (manual)
  • Getting LOINC codes of an antibiotic, or get its name associated with a LOINC code (manual)
  • Machine reading the EUCAST and CLSI guidelines from 2011-2020 to translate MIC values and disk diffusion diameters to R/SI (link)
  • +
  • Principal component analysis for AMR (tutorial)
  • This package is ready-to-use for specialists in many fields:

    +
    +

    +Other

    +
      +
    • Support for the upcoming dplyr version 1.0.0
    • +
    +
    @@ -332,9 +339,9 @@
    -
    +

    -Other

    +Other
    • Add a CITATION file
    • Full support for the upcoming R 4.0
    • @@ -432,9 +439,9 @@
    -
    +

    -Other

    +Other
    • Rewrote the complete documentation to markdown format, to be able to use the very latest version of the great Roxygen2, released in November 2019. This tremously improved the documentation quality, since the rewrite forced us to go over all texts again and make changes where needed.
    • Change dependency on clean to cleaner, as this package was renamed accordingly upon CRAN request
    • @@ -586,9 +593,9 @@
    • Added more MIC factor levels (as.mic())
    -
    +

    -Other

    +Other
    • Added Prof. Dr. Casper Albers as doctoral advisor and added Dr. Judith Fonville, Eric Hazenberg, Dr. Bart Meijer, Dr. Dennis Souverein and Annick Lenglet as contributors
    • Cleaned the coding style of every single syntax line in this package with the help of the lintr package
    • @@ -669,9 +676,9 @@
    -
    +

    -Other

    +Other
    • Fixed a note thrown by CRAN tests
    @@ -765,9 +772,9 @@
  • Fix for mo_shortname() where species would not be determined correctly

  • -
    +

    -Other

    +Other
    • Support for R 3.6.0 and later by providing support for staged install
    • @@ -1012,9 +1019,9 @@
    • if using different lengths of pattern and x in %like%, it will now return the call
    -
    +

    -Other

    +Other
    • Updated licence text to emphasise GPL 2.0 and that this is an R package.
    @@ -1129,9 +1136,9 @@
  • Percentages will now will rounded more logically (e.g. in freq function)

  • -
    +

    -Other

    +Other
    • New dependency on package crayon, to support formatted text in the console
    • Dependency tidyr is now mandatory (went to Import field) since portion_df and count_df rely on it
    • @@ -1264,9 +1271,9 @@
    -
    +

    -Other

    +Other
    • More unit tests to ensure better integrity of functions
    @@ -1393,9 +1400,9 @@
  • Other small fixes
  • -
    +

    -Other

    +Other
    • Added integration tests (check if everything works as expected) for all releases of R 3.1 and higher
        @@ -1455,9 +1462,9 @@
      • Functions as.rsi and as.mic now add the package name and version as attributes
    -
    +

    -Other

    +Other

    Read more on our website!

    diff --git a/docs/reference/WHONET.html b/docs/reference/WHONET.html index b268ad16..666cb482 100644 --- a/docs/reference/WHONET.html +++ b/docs/reference/WHONET.html @@ -79,7 +79,7 @@ AMR (for R) - 1.0.1 + 1.0.1.9004
    @@ -115,6 +115,13 @@ Predict antimicrobial resistance +
  • + + + + Conduct principal component analysis for AMR + +
  • @@ -258,6 +265,7 @@
  • Date of data entry
    Date this data was entered in WHONET

  • AMP_ND10:CIP_EE
    27 different antibiotics. You can lookup the abbreviatons in the
    antibiotics data set, or use e.g. ab_name("AMP") to get the official name immediately. Before analysis, you should transform this to a valid antibiotic class, using as.rsi().

  • +

    Read more on our website!

    diff --git a/docs/reference/age.html b/docs/reference/age.html index 1312693c..83120572 100644 --- a/docs/reference/age.html +++ b/docs/reference/age.html @@ -79,7 +79,7 @@ AMR (for R) - 1.0.1.9000 + 1.0.1.9004
    @@ -117,9 +117,9 @@
  • - + - Conduct Principal Component Analysis for AMR + Conduct principal component analysis for AMR
  • diff --git a/docs/reference/age_groups.html b/docs/reference/age_groups.html index 1d52932e..3eaf4d2a 100644 --- a/docs/reference/age_groups.html +++ b/docs/reference/age_groups.html @@ -79,7 +79,7 @@ AMR (for R) - 1.0.1.9000 + 1.0.1.9004
  • @@ -117,9 +117,9 @@
  • - + - Conduct Principal Component Analysis for AMR + Conduct principal component analysis for AMR
  • diff --git a/docs/reference/antibiotics.html b/docs/reference/antibiotics.html index e101b856..12c86cdb 100644 --- a/docs/reference/antibiotics.html +++ b/docs/reference/antibiotics.html @@ -79,7 +79,7 @@ AMR (for R) - 1.0.1 + 1.0.1.9004
  • @@ -115,6 +115,13 @@ Predict antimicrobial resistance +
  • + + + + Conduct principal component analysis for AMR + +
  • @@ -232,7 +239,8 @@

    Format

    -

    For the antibiotics data set: a data.frame with 452 observations and 14 variables:

    + +

    For the antibiotics data set: a data.frame with 452 observations and 14 variables:

    • ab
      Antibiotic ID as used in this package (like AMC), using the official EARS-Net (European Antimicrobial Resistance Surveillance Network) codes where available

    • @@ -266,6 +274,8 @@
    • iv_units
      Units of iv_ddd

    + +

    An object of class data.frame with 102 rows and 9 columns.

    Source

    World Health Organization (WHO) Collaborating Centre for Drug Statistics Methodology (WHOCC): https://www.whocc.no/atc_ddd_index/

    diff --git a/docs/reference/example_isolates.html b/docs/reference/example_isolates.html index 062fef0d..b58598b2 100644 --- a/docs/reference/example_isolates.html +++ b/docs/reference/example_isolates.html @@ -79,7 +79,7 @@ AMR (for R) - 1.0.1 + 1.0.1.9004
  • @@ -115,6 +115,13 @@ Predict antimicrobial resistance +
  • + + + + Conduct principal component analysis for AMR + +
  • @@ -242,6 +249,7 @@
  • mo
    ID of microorganism created with
    as.mo(), see also microorganisms

  • PEN:RIF
    40 different antibiotics with class rsi (see as.rsi()); these column names occur in the antibiotics data set and can be translated with ab_name()

  • +

    Read more on our website!

    diff --git a/docs/reference/example_isolates_unclean.html b/docs/reference/example_isolates_unclean.html index 53530d45..a255fb5c 100644 --- a/docs/reference/example_isolates_unclean.html +++ b/docs/reference/example_isolates_unclean.html @@ -79,7 +79,7 @@ AMR (for R) - 1.0.1 + 1.0.1.9004
    @@ -115,6 +115,13 @@ Predict antimicrobial resistance +
  • + + + + Conduct principal component analysis for AMR + +
  • @@ -237,6 +244,7 @@
  • bacteria
    info about microorganism that can be transformed with
    as.mo(), see also microorganisms

  • AMX:GEN
    4 different antibiotics that have to be transformed with as.rsi()

  • +

    Read more on our website!

    diff --git a/docs/reference/ggplot_pca.html b/docs/reference/ggplot_pca.html index 45cd4efb..bca92b67 100644 --- a/docs/reference/ggplot_pca.html +++ b/docs/reference/ggplot_pca.html @@ -79,7 +79,7 @@ AMR (for R) - 1.0.1.9002 + 1.0.1.9004
    diff --git a/docs/reference/index.html b/docs/reference/index.html index d8e3cb48..d8493df6 100644 --- a/docs/reference/index.html +++ b/docs/reference/index.html @@ -78,7 +78,7 @@ AMR (for R) - 1.0.1.9002 + 1.0.1.9004
    diff --git a/docs/reference/lifecycle.html b/docs/reference/lifecycle.html index 665d5a72..f99abd00 100644 --- a/docs/reference/lifecycle.html +++ b/docs/reference/lifecycle.html @@ -81,7 +81,7 @@ This page contains a section for every lifecycle (with text borrowed from the af AMR (for R) - 1.0.1.9000 + 1.0.1.9004
    @@ -119,9 +119,9 @@ This page contains a section for every lifecycle (with text borrowed from the af
  • - + - Conduct Principal Component Analysis for AMR + Conduct principal component analysis for AMR
  • diff --git a/docs/reference/microorganisms.codes.html b/docs/reference/microorganisms.codes.html index f8eb695e..689d14df 100644 --- a/docs/reference/microorganisms.codes.html +++ b/docs/reference/microorganisms.codes.html @@ -79,7 +79,7 @@ AMR (for R) - 1.0.1 + 1.0.1.9004
  • @@ -115,6 +115,13 @@ Predict antimicrobial resistance +
  • + + + + Conduct principal component analysis for AMR + +
  • @@ -234,6 +241,7 @@
  • code
    Commonly used code of a microorganism

  • mo
    ID of the microorganism in the
    microorganisms data set

  • +

    Catalogue of Life

    diff --git a/docs/reference/microorganisms.html b/docs/reference/microorganisms.html index f9f62c5a..36b8fa26 100644 --- a/docs/reference/microorganisms.html +++ b/docs/reference/microorganisms.html @@ -79,7 +79,7 @@ AMR (for R) - 1.0.1 + 1.0.1.9004
    @@ -115,6 +115,13 @@ Predict antimicrobial resistance +
  • + + + + Conduct principal component analysis for AMR + +
  • @@ -242,6 +249,7 @@
  • prevalence
    Prevalence of the microorganism, see
    as.mo()

  • snomed
    SNOMED code of the microorganism. Use mo_snomed() to retrieve it quickly, see mo_property().

  • +

    Source

    Catalogue of Life: Annual Checklist (public online taxonomic database), http://www.catalogueoflife.org (check included annual version with catalogue_of_life_version()).

    diff --git a/docs/reference/microorganisms.old.html b/docs/reference/microorganisms.old.html index ddcaef46..a5664749 100644 --- a/docs/reference/microorganisms.old.html +++ b/docs/reference/microorganisms.old.html @@ -79,7 +79,7 @@ AMR (for R) - 1.0.1 + 1.0.1.9004
    @@ -115,6 +115,13 @@ Predict antimicrobial resistance +
  • + + + + Conduct principal component analysis for AMR + +
  • @@ -237,6 +244,7 @@
  • ref
    Author(s) and year of concerning scientific publication

  • prevalence
    Prevalence of the microorganism, see
    as.mo()

  • +

    Source

    Catalogue of Life: Annual Checklist (public online taxonomic database), http://www.catalogueoflife.org (check included annual version with catalogue_of_life_version()).

    diff --git a/docs/reference/pca.html b/docs/reference/pca.html index ca321567..f973c764 100644 --- a/docs/reference/pca.html +++ b/docs/reference/pca.html @@ -79,7 +79,7 @@ AMR (for R) - 1.0.1.9002 + 1.0.1.9004 @@ -301,12 +301,12 @@

    The pca() function takes a data.frame as input and performs the actual PCA with the R function prcomp().

    The result of the pca() function is a prcomp object, with an additional attribute non_numeric_cols which is a vector with the column names of all columns that do not contain numeric values. These are probably the groups and labels, and will be used by ggplot_pca().

    -

    Experimental lifecycle

    +

    Maturing lifecycle

    -


    -The lifecycle of this function is experimental. An experimental function is in the very early stages of development. The unlying code might be changing frequently as we rapidly iterate and explore variations in search of the best fit. Experimental functions might be removed without deprecation, so you are generally best off waiting until a function is more mature before you use it in production code. Experimental functions will not be included in releases we submit to CRAN, since they have not yet matured enough.

    +


    +The lifecycle of this function is maturing. The unlying code of a maturing function has been roughed out, but finer details might still change. We will strive to maintain backward compatibility, but the function needs wider usage and more extensive testing in order to optimise the unlying code.

    Examples

    # `example_isolates` is a dataset available in the AMR package.
    @@ -334,7 +334,7 @@ The lifecycle of this function is experimen
           
  • Arguments
  • Value
  • Details
  • -
  • Experimental lifecycle
  • +
  • Maturing lifecycle
  • Examples
  • diff --git a/docs/reference/rsi_translation.html b/docs/reference/rsi_translation.html index b46ad1a2..4bae4fc2 100644 --- a/docs/reference/rsi_translation.html +++ b/docs/reference/rsi_translation.html @@ -79,7 +79,7 @@ AMR (for R) - 1.0.1 + 1.0.1.9004 @@ -115,6 +115,13 @@ Predict antimicrobial resistance +
  • + + + + Conduct principal component analysis for AMR + +
  • @@ -241,6 +248,7 @@
  • breakpoint_S
    Lowest MIC value or highest number of millimetres that leads to "S"

  • breakpoint_R
    Highest MIC value or lowest number of millimetres that leads to "R"

  • +

    Details

    The repository of this AMR package contains a file comprising this exact data set: https://gitlab.com/msberends/AMR/blob/master/data-raw/rsi_translation.txt. This file allows for machine reading EUCAST and CLSI guidelines, which is almost impossible with the Excel and PDF files distributed by EUCAST and CLSI. This file is updated automatically.

    diff --git a/index.md b/index.md index 61017270..bb6f5ac4 100644 --- a/index.md +++ b/index.md @@ -17,8 +17,8 @@ We created this package for both routine data analysis and academic research (as

    - Used in almost 100 countries
    - Since its first public release in early 2018, this package has been downloaded over 25,000 times from 99 countries (as of February 2020, CRAN logs). Click the map to enlarge.



    + Used in more than 100 countries
    + Since its first public release in early 2018, this package has been downloaded from more than 100 countries (as of March 2020, CRAN logs). Click the map to enlarge, to see the names of the countries.



    #### Partners @@ -51,6 +51,7 @@ This package can be used for: * Getting SNOMED codes of a microorganism, or get its name associated with a SNOMED code ([manual](./reference/mo_property.html)) * Getting LOINC codes of an antibiotic, or get its name associated with a LOINC code ([manual](./reference/ab_property.html)) * Machine reading the EUCAST and CLSI guidelines from 2011-2020 to translate MIC values and disk diffusion diameters to R/SI ([link](https://gitlab.com/msberends/AMR/blob/master/data-raw/rsi_translation.txt)) + * Principal component analysis for AMR ([tutorial](./articles/PCA.html)) This package is ready-to-use for specialists in many fields: @@ -85,8 +86,8 @@ It will be downloaded and installed automatically. For RStudio, click on the men The latest and unpublished development version can be installed with (**precaution: may be unstable**): ```r -install.packages("devtools") -devtools::install_gitlab("msberends/AMR") +install.packages("remotes") +remotes::install_gitlab("msberends/AMR") ``` ### Get started diff --git a/man/AMR.Rd b/man/AMR.Rd index 9c4e1030..91be64e6 100644 --- a/man/AMR.Rd +++ b/man/AMR.Rd @@ -26,6 +26,7 @@ This package can be used for: \item Getting SNOMED codes of a microorganism, or get its name associated with a SNOMED code \item Getting LOINC codes of an antibiotic, or get its name associated with a LOINC code \item Machine reading the EUCAST and CLSI guidelines from 2011-2020 to translate MIC values and disk diffusion diameters to R/SI +\item Principal component analysis for AMR } } \section{Read more on our website!}{ diff --git a/man/WHONET.Rd b/man/WHONET.Rd index 37d63d3f..8d74668e 100644 --- a/man/WHONET.Rd +++ b/man/WHONET.Rd @@ -4,7 +4,8 @@ \name{WHONET} \alias{WHONET} \title{Data set with 500 isolates - WHONET example} -\format{A \code{\link{data.frame}} with 500 observations and 53 variables: +\format{ +A \code{\link{data.frame}} with 500 observations and 53 variables: \itemize{ \item \verb{Identification number}\cr ID of the sample \item \verb{Specimen number}\cr ID of the specimen @@ -32,7 +33,8 @@ \item \code{Comment}\cr Other comments \item \verb{Date of data entry}\cr Date this data was entered in WHONET \item \code{AMP_ND10:CIP_EE}\cr 27 different antibiotics. You can lookup the abbreviatons in the \link{antibiotics} data set, or use e.g. \code{\link[=ab_name]{ab_name("AMP")}} to get the official name immediately. Before analysis, you should transform this to a valid antibiotic class, using \code{\link[=as.rsi]{as.rsi()}}. -}} +} +} \usage{ WHONET } diff --git a/man/antibiotics.Rd b/man/antibiotics.Rd index 64ed22cd..90d22e69 100644 --- a/man/antibiotics.Rd +++ b/man/antibiotics.Rd @@ -5,7 +5,8 @@ \alias{antibiotics} \alias{antivirals} \title{Data sets with ~550 antimicrobials} -\format{\subsection{For the \link{antibiotics} data set: a \code{\link{data.frame}} with 452 observations and 14 variables:}{ +\format{ +\subsection{For the \link{antibiotics} data set: a \code{\link{data.frame}} with 452 observations and 14 variables:}{ \itemize{ \item \code{ab}\cr Antibiotic ID as used in this package (like \code{AMC}), using the official EARS-Net (European Antimicrobial Resistance Surveillance Network) codes where available \item \code{atc}\cr ATC code (Anatomical Therapeutic Chemical) as defined by the WHOCC, like \code{J01CR02} @@ -36,7 +37,10 @@ \item \code{iv_ddd}\cr Defined Daily Dose (DDD), parenteral treatment \item \code{iv_units}\cr Units of \code{iv_ddd} } -}} +} + +An object of class \code{data.frame} with 102 rows and 9 columns. +} \source{ World Health Organization (WHO) Collaborating Centre for Drug Statistics Methodology (WHOCC): \url{https://www.whocc.no/atc_ddd_index/} diff --git a/man/example_isolates.Rd b/man/example_isolates.Rd index 2d4af0f8..a3a1859e 100644 --- a/man/example_isolates.Rd +++ b/man/example_isolates.Rd @@ -4,7 +4,8 @@ \name{example_isolates} \alias{example_isolates} \title{Data set with 2,000 example isolates} -\format{A \code{\link{data.frame}} with 2,000 observations and 49 variables: +\format{ +A \code{\link{data.frame}} with 2,000 observations and 49 variables: \itemize{ \item \code{date}\cr date of receipt at the laboratory \item \code{hospital_id}\cr ID of the hospital, from A to D @@ -16,7 +17,8 @@ \item \code{patient_id}\cr ID of the patient \item \code{mo}\cr ID of microorganism created with \code{\link[=as.mo]{as.mo()}}, see also \link{microorganisms} \item \code{PEN:RIF}\cr 40 different antibiotics with class \code{\link{rsi}} (see \code{\link[=as.rsi]{as.rsi()}}); these column names occur in the \link{antibiotics} data set and can be translated with \code{\link[=ab_name]{ab_name()}} -}} +} +} \usage{ example_isolates } diff --git a/man/example_isolates_unclean.Rd b/man/example_isolates_unclean.Rd index b4724288..2e48933c 100644 --- a/man/example_isolates_unclean.Rd +++ b/man/example_isolates_unclean.Rd @@ -4,14 +4,16 @@ \name{example_isolates_unclean} \alias{example_isolates_unclean} \title{Data set with unclean data} -\format{A \code{\link{data.frame}} with 3,000 observations and 8 variables: +\format{ +A \code{\link{data.frame}} with 3,000 observations and 8 variables: \itemize{ \item \code{patient_id}\cr ID of the patient \item \code{date}\cr date of receipt at the laboratory \item \code{hospital}\cr ID of the hospital, from A to C \item \code{bacteria}\cr info about microorganism that can be transformed with \code{\link[=as.mo]{as.mo()}}, see also \link{microorganisms} \item \code{AMX:GEN}\cr 4 different antibiotics that have to be transformed with \code{\link[=as.rsi]{as.rsi()}} -}} +} +} \usage{ example_isolates_unclean } diff --git a/man/microorganisms.Rd b/man/microorganisms.Rd index 9a0a7222..e0c14d8a 100755 --- a/man/microorganisms.Rd +++ b/man/microorganisms.Rd @@ -4,7 +4,8 @@ \name{microorganisms} \alias{microorganisms} \title{Data set with ~70,000 microorganisms} -\format{A \code{\link{data.frame}} with 69,447 observations and 17 variables: +\format{ +A \code{\link{data.frame}} with 69,447 observations and 17 variables: \itemize{ \item \code{mo}\cr ID of microorganism as used by this package \item \code{col_id}\cr Catalogue of Life ID @@ -16,7 +17,8 @@ \item \code{source}\cr Either "CoL", "DSMZ" (see Source) or "manually added" \item \code{prevalence}\cr Prevalence of the microorganism, see \code{\link[=as.mo]{as.mo()}} \item \code{snomed}\cr SNOMED code of the microorganism. Use \code{\link[=mo_snomed]{mo_snomed()}} to retrieve it quickly, see \code{\link[=mo_property]{mo_property()}}. -}} +} +} \source{ Catalogue of Life: Annual Checklist (public online taxonomic database), \url{http://www.catalogueoflife.org} (check included annual version with \code{\link[=catalogue_of_life_version]{catalogue_of_life_version()}}). diff --git a/man/microorganisms.codes.Rd b/man/microorganisms.codes.Rd index 43a25a26..5b541447 100644 --- a/man/microorganisms.codes.Rd +++ b/man/microorganisms.codes.Rd @@ -4,11 +4,13 @@ \name{microorganisms.codes} \alias{microorganisms.codes} \title{Translation table for common microorganism codes} -\format{A \code{\link{data.frame}} with 5,450 observations and 2 variables: +\format{ +A \code{\link{data.frame}} with 5,450 observations and 2 variables: \itemize{ \item \code{code}\cr Commonly used code of a microorganism \item \code{mo}\cr ID of the microorganism in the \link{microorganisms} data set -}} +} +} \usage{ microorganisms.codes } diff --git a/man/microorganisms.old.Rd b/man/microorganisms.old.Rd index 1422ce08..b3c8e61c 100644 --- a/man/microorganisms.old.Rd +++ b/man/microorganisms.old.Rd @@ -4,14 +4,16 @@ \name{microorganisms.old} \alias{microorganisms.old} \title{Data set with previously accepted taxonomic names} -\format{A \code{\link{data.frame}} with 24,246 observations and 5 variables: +\format{ +A \code{\link{data.frame}} with 24,246 observations and 5 variables: \itemize{ \item \code{col_id}\cr Catalogue of Life ID that was originally given \item \code{col_id_new}\cr New Catalogue of Life ID that responds to an entry in the \link{microorganisms} data set \item \code{fullname}\cr Old full taxonomic name of the microorganism \item \code{ref}\cr Author(s) and year of concerning scientific publication \item \code{prevalence}\cr Prevalence of the microorganism, see \code{\link[=as.mo]{as.mo()}} -}} +} +} \source{ Catalogue of Life: Annual Checklist (public online taxonomic database), \url{http://www.catalogueoflife.org} (check included annual version with \code{\link[=catalogue_of_life_version]{catalogue_of_life_version()}}). } diff --git a/man/pca.Rd b/man/pca.Rd index 9a369314..a0142337 100644 --- a/man/pca.Rd +++ b/man/pca.Rd @@ -59,10 +59,10 @@ The \code{\link[=pca]{pca()}} function takes a \link{data.frame} as input and pe The result of the \code{\link[=pca]{pca()}} function is a \link{prcomp} object, with an additional attribute \code{non_numeric_cols} which is a vector with the column names of all columns that do not contain numeric values. These are probably the groups and labels, and will be used by \code{\link[=ggplot_pca]{ggplot_pca()}}. } -\section{Experimental lifecycle}{ +\section{Maturing lifecycle}{ -\if{html}{\figure{lifecycle_experimental.svg}{options: style=margin-bottom:5px} \cr} -The \link[AMR:lifecycle]{lifecycle} of this function is \strong{experimental}. An experimental function is in the very early stages of development. The unlying code might be changing frequently as we rapidly iterate and explore variations in search of the best fit. Experimental functions might be removed without deprecation, so you are generally best off waiting until a function is more mature before you use it in production code. Experimental functions will not be included in releases we submit to CRAN, since they have not yet matured enough. +\if{html}{\figure{lifecycle_maturing.svg}{options: style=margin-bottom:5px} \cr} +The \link[AMR:lifecycle]{lifecycle} of this function is \strong{maturing}. The unlying code of a maturing function has been roughed out, but finer details might still change. We will strive to maintain backward compatibility, but the function needs wider usage and more extensive testing in order to optimise the unlying code. } \examples{ diff --git a/man/rsi_translation.Rd b/man/rsi_translation.Rd index 85e7f2a9..b9103159 100644 --- a/man/rsi_translation.Rd +++ b/man/rsi_translation.Rd @@ -4,7 +4,8 @@ \name{rsi_translation} \alias{rsi_translation} \title{Data set for R/SI interpretation} -\format{A \code{\link{data.frame}} with 13,975 observations and 9 variables: +\format{ +A \code{\link{data.frame}} with 13,975 observations and 9 variables: \itemize{ \item \code{guideline}\cr Name of the guideline \item \code{method}\cr Either "MIC" or "DISK" @@ -15,7 +16,8 @@ \item \code{disk_dose}\cr Dose of the used disk diffusion method \item \code{breakpoint_S}\cr Lowest MIC value or highest number of millimetres that leads to "S" \item \code{breakpoint_R}\cr Highest MIC value or lowest number of millimetres that leads to "R" -}} +} +} \usage{ rsi_translation } diff --git a/pkgdown/logos/countries.png b/pkgdown/logos/countries.png index e9e82ee6..62084e3f 100644 Binary files a/pkgdown/logos/countries.png and b/pkgdown/logos/countries.png differ diff --git a/pkgdown/logos/countries_large.png b/pkgdown/logos/countries_large.png index 24b75f2c..1756232f 100644 Binary files a/pkgdown/logos/countries_large.png and b/pkgdown/logos/countries_large.png differ diff --git a/tests/testthat/test-join_microorganisms.R b/tests/testthat/test-join_microorganisms.R index c0522706..f942216d 100755 --- a/tests/testthat/test-join_microorganisms.R +++ b/tests/testthat/test-join_microorganisms.R @@ -47,10 +47,8 @@ test_that("joins work", { expect_equal(nrow(inner_join_microorganisms("B_ESCHR_COLI")), 1) expect_equal(nrow(inner_join_microorganisms("B_ESCHR_COLI", by = c("mo" = "mo"))), 1) - expect_warning(inner_join_microorganisms("Escherichia", by = c("mo" = "genus"))) expect_equal(nrow(left_join_microorganisms("B_ESCHR_COLI")), 1) - expect_warning(left_join_microorganisms("Escherichia", by = c("mo" = "genus"))) expect_equal(nrow(semi_join_microorganisms("B_ESCHR_COLI")), 1) expect_equal(nrow(anti_join_microorganisms("B_ESCHR_COLI")), 0)