diff --git a/DESCRIPTION b/DESCRIPTION index 5cf924b4..72421288 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,6 +1,6 @@ Package: AMR -Version: 0.7.1.9035 -Date: 2019-08-11 +Version: 0.7.1.9036 +Date: 2019-08-12 Title: Antimicrobial Resistance Analysis Authors@R: c( person(role = c("aut", "cre"), diff --git a/NAMESPACE b/NAMESPACE index 98bd9feb..7cafc1af 100755 --- a/NAMESPACE +++ b/NAMESPACE @@ -1,5 +1,7 @@ # Generated by roxygen2: do not edit by hand +S3method("[",ab) +S3method("[",mo) S3method(as.data.frame,ab) S3method(as.data.frame,mo) S3method(as.double,mic) @@ -19,6 +21,7 @@ S3method(kurtosis,data.frame) S3method(kurtosis,default) S3method(kurtosis,matrix) S3method(pillar_shaft,ab) +S3method(pillar_shaft,disk) S3method(pillar_shaft,mic) S3method(pillar_shaft,mo) S3method(pillar_shaft,rsi) @@ -33,8 +36,6 @@ S3method(print,mo) S3method(print,mo_renamed) S3method(print,mo_uncertainties) S3method(print,rsi) -S3method(pull,ab) -S3method(pull,mo) S3method(skewness,data.frame) S3method(skewness,default) S3method(skewness,matrix) @@ -42,6 +43,7 @@ S3method(summary,mic) S3method(summary,mo) S3method(summary,rsi) S3method(type_sum,ab) +S3method(type_sum,disk) S3method(type_sum,mic) S3method(type_sum,mo) S3method(type_sum,rsi) @@ -86,6 +88,7 @@ export(filter_1st_cephalosporins) export(filter_2nd_cephalosporins) export(filter_3rd_cephalosporins) export(filter_4th_cephalosporins) +export(filter_5th_cephalosporins) export(filter_ab_class) export(filter_aminoglycosides) export(filter_carbapenems) @@ -167,6 +170,8 @@ export(semi_join_microorganisms) export(set_mo_source) export(skewness) export(theme_rsi) +exportMethods("[.ab") +exportMethods("[.mo") exportMethods(as.data.frame.ab) exportMethods(as.data.frame.mo) exportMethods(as.double.mic) @@ -192,8 +197,6 @@ exportMethods(print.mo) exportMethods(print.mo_renamed) exportMethods(print.mo_uncertainties) exportMethods(print.rsi) -exportMethods(pull.ab) -exportMethods(pull.mo) exportMethods(scale_type.ab) exportMethods(scale_type.mo) exportMethods(skewness) diff --git a/NEWS.md b/NEWS.md index 59a8373a..e73741c2 100755 --- a/NEWS.md +++ b/NEWS.md @@ -1,4 +1,4 @@ -# AMR 0.7.1.9035 +# AMR 0.7.1.9036 ### Breaking * Function `freq()` has moved to a new package, [`clean`](https://github.com/msberends/clean) ([CRAN link](https://cran.r-project.org/package=clean)). Creating frequency tables is actually not the scope of this package (never was) and this function has matured a lot over the last two years. Therefore, a new package was created for data cleaning and checking and it perfectly fits the `freq()` function. The [`clean`](https://github.com/msberends/clean) package is available on CRAN and will be installed automatically when updating the `AMR` package, that now imports it. In a later stage, the `skewness()` and `kurtosis()` functions will be moved to the `clean` package too. @@ -46,16 +46,13 @@ * Printed info now distinguishes between added and changes values * Using Verbose mode (i.e. `eucast_rules(..., verbose = TRUE)`) returns more informative and readable output * Using factors as input now adds missing factors levels when the function changes antibiotic results -* Added tibble printing support for classes `rsi`, `mic`, `ab` and `mo`. When using tibbles containing antibiotic columns, values `S` will print in green, values `I` will print in yellow and values `R` will print in red: +* Added tibble printing support for classes `rsi`, `mic`, `disk`, `ab` `mo`. When using tibbles containing antibiotic columns, values `S` will print in green, values `I` will print in yellow and values `R` will print in red. Microbial IDs (class `mo`) will emphasise on the genus and species, not on the kingdom. ```r # (run this on your own console, as this page does not support colour printing) - tibble(mo = sample(AMR::microorganisms$fullname, 10), - drug1 = as.rsi(sample(c("S", "I", "R"), 10, replace = TRUE, - prob = c(0.6, 0.1, 0.3))), - drug2 = as.rsi(sample(c("S", "I", "R"), 10, replace = TRUE, - prob = c(0.6, 0.1, 0.3))), - drug3 = as.rsi(sample(c("S", "I", "R"), 10, replace = TRUE, - prob = c(0.6, 0.1, 0.3)))) + library(dplyr) + septic_patients %>% + select(mo:AMC) %>% + as_tibble() ``` * Removed class `atc` - using `as.atc()` is now deprecated in favour of `ab_atc()` and this will return a character, not the `atc` class anymore * Removed deprecated functions `abname()`, `ab_official()`, `atc_name()`, `atc_official()`, `atc_property()`, `atc_tradenames()`, `atc_trivial_nl()` @@ -69,8 +66,10 @@ * Fix for using `mo_*` functions where the coercion uncertainties and failures would not be available through `mo_uncertainties()` and `mo_failures()` anymore * Deprecated the `country` parameter of `mdro()` in favour of the already existing `guideline` parameter to support multiple guidelines within one country * The `name` of `RIF` is now Rifampicin instead of Rifampin -* The `antibiotics` data set is now sorted by name and all cephalosporines now have their generation between brackets +* The `antibiotics` data set is now sorted by name and all cephalosporins now have their generation between brackets * Speed improvement for `guess_ab_col()` which is now 30 times faster for antibiotic abbreviations +* Improved `filter_ab_class()` to be more reliable and to support 5th generation cephalosporins +* Classes `ab` and `mo` will now be preserved in any subsetting #### Other * Added Dr Bart Meijer, Dr Dennis Souverein and Annick Lenglet as contributors diff --git a/R/ab.R b/R/ab.R index 3fbc5929..19864eb3 100755 --- a/R/ab.R +++ b/R/ab.R @@ -279,12 +279,13 @@ as.data.frame.ab <- function (x, ...) { } } -#' @exportMethod pull.ab +#' @exportMethod [.ab #' @export -#' @importFrom dplyr pull #' @noRd -pull.ab <- function(.data, ...) { - pull(as.data.frame(.data), ...) +"[.ab" <- function (x, ...) { + # this function is needed to preserve the "ab" class for any subsetting, like df %>% filter(...) + y <- NextMethod() + structure(y, class = "ab") } #' @importFrom pillar type_sum diff --git a/R/disk.R b/R/disk.R index efe84d8a..7effc3a6 100644 --- a/R/disk.R +++ b/R/disk.R @@ -90,3 +90,17 @@ print.disk <- function(x, ...) { cat("Class 'disk'\n") print(as.integer(x), quote = FALSE) } + +#' @importFrom pillar type_sum +#' @export +type_sum.disk <- function(x) { + "disk" +} + +#' @importFrom pillar pillar_shaft +#' @export +pillar_shaft.disk <- function(x, ...) { + out <- trimws(format(x)) + out[is.na(x)] <- pillar::style_na(NA) + pillar::new_pillar_shaft_simple(out, align = "right", min_width = 3) +} diff --git a/R/filter_ab_class.R b/R/filter_ab_class.R index 50dc4438..1f5ea37d 100644 --- a/R/filter_ab_class.R +++ b/R/filter_ab_class.R @@ -22,12 +22,12 @@ #' Filter isolates on result in antibiotic class #' #' Filter isolates on results in specific antibiotic variables based on their class (ATC groups). This makes it easy to get a list of isolates that were tested for e.g. any aminoglycoside. -#' @param tbl a data set -#' @param ab_class an antimicrobial class, like \code{"carbapenems"}. More specifically, this should be a text that can be found in a 4th level ATC group (chemical subgroup) or a 5th level ATC group (chemical substance), please see \href{https://www.whocc.no/atc/structure_and_principles/}{this explanation on the WHOCC website}. +#' @param x a data set +#' @param ab_class an antimicrobial class, like \code{"carbapenems"}, as can be found in \code{AMR::antibiotics$group} #' @param result an antibiotic result: S, I or R (or a combination of more of them) #' @param scope the scope to check which variables to check, can be \code{"any"} (default) or \code{"all"} #' @param ... parameters passed on to \code{\link[dplyr]{filter_at}} -#' @details The \code{\link{antibiotics}} data set will be searched for \code{ab_class} in the columns \code{atc_group1} and \code{atc_group2} (case-insensitive). Next, \code{tbl} will be checked for column names with a value in any abbreviations, codes or official names found in the \code{antibiotics} data set. +#' @details The \code{group} column in \code{\link{antibiotics}} data set will be searched for \code{ab_class} (case-insensitive). If no results are found, the \code{atc_group1} and \code{atc_group2} columns will be searched. Next, \code{x} will be checked for column names with a value in any abbreviations, codes or official names found in the \code{antibiotics} data set. #' @rdname filter_ab_class #' @keywords filter fillter_class #' @importFrom dplyr filter_at %>% select vars any_vars all_vars @@ -62,7 +62,7 @@ #' septic_patients %>% #' filter_aminoglycosides("R", "all") %>% #' filter_fluoroquinolones("R", "all") -filter_ab_class <- function(tbl, +filter_ab_class <- function(x, ab_class, result = NULL, scope = "any", @@ -71,7 +71,7 @@ filter_ab_class <- function(tbl, if (is.null(result)) { result <- c("S", "I", "R") } - # make result = "IR" work too: + # make result = "SI" work too: result <- unlist(strsplit(result, "")) if (!all(result %in% c("S", "I", "R"))) { @@ -81,8 +81,8 @@ filter_ab_class <- function(tbl, stop("`scope` must be one of: any, all", call. = FALSE) } - vars_df <- colnames(tbl)[tolower(colnames(tbl)) %in% tolower(ab_class_vars(ab_class))] - atc_groups <- ab_class_atcgroups(ab_class) + vars_df <- colnames(x)[tolower(colnames(x)) %in% tolower(ab_class_vars(ab_class))] + ab_group <- find_ab_group(ab_class) if (length(vars_df) > 0) { if (length(result) == 1) { @@ -101,29 +101,29 @@ filter_ab_class <- function(tbl, } } if (length(vars_df) > 1) { - scope <- paste(scope, "of ") + scope <- paste(scope, "of columns ") } else { - scope <- "" + scope <- "column " } - message(blue(paste0("Filtering on ", atc_groups, ": ", scope, + message(blue(paste0("Filtering on ", ab_group, ": ", scope, paste(bold(paste0("`", vars_df, "`")), collapse = scope_txt), operator, toString(result)))) - tbl %>% + x %>% filter_at(vars(vars_df), scope_fn(. %in% result), ...) } else { - warning(paste0("no antibiotics of class ", atc_groups, " found, leaving data unchanged"), call. = FALSE) - tbl + warning(paste0("no antibiotics of class ", ab_group, " found, leaving data unchanged"), call. = FALSE) + x } } #' @rdname filter_ab_class #' @export -filter_aminoglycosides <- function(tbl, +filter_aminoglycosides <- function(x, result = NULL, scope = "any", ...) { - filter_ab_class(tbl = tbl, + filter_ab_class(x = x, ab_class = "aminoglycoside", result = result, scope = scope, @@ -132,11 +132,11 @@ filter_aminoglycosides <- function(tbl, #' @rdname filter_ab_class #' @export -filter_carbapenems <- function(tbl, +filter_carbapenems <- function(x, result = NULL, scope = "any", ...) { - filter_ab_class(tbl = tbl, + filter_ab_class(x = x, ab_class = "carbapenem", result = result, scope = scope, @@ -145,11 +145,11 @@ filter_carbapenems <- function(tbl, #' @rdname filter_ab_class #' @export -filter_cephalosporins <- function(tbl, +filter_cephalosporins <- function(x, result = NULL, scope = "any", ...) { - filter_ab_class(tbl = tbl, + filter_ab_class(x = x, ab_class = "cephalosporin", result = result, scope = scope, @@ -158,12 +158,12 @@ filter_cephalosporins <- function(tbl, #' @rdname filter_ab_class #' @export -filter_1st_cephalosporins <- function(tbl, +filter_1st_cephalosporins <- function(x, result = NULL, scope = "any", ...) { - filter_ab_class(tbl = tbl, - ab_class = "first-generation cephalosporin", + filter_ab_class(x = x, + ab_class = "cephalosporins (1st gen.)", result = result, scope = scope, ...) @@ -171,12 +171,12 @@ filter_1st_cephalosporins <- function(tbl, #' @rdname filter_ab_class #' @export -filter_2nd_cephalosporins <- function(tbl, +filter_2nd_cephalosporins <- function(x, result = NULL, scope = "any", ...) { - filter_ab_class(tbl = tbl, - ab_class = "second-generation cephalosporin", + filter_ab_class(x = x, + ab_class = "cephalosporins (2nd gen.)", result = result, scope = scope, ...) @@ -184,12 +184,12 @@ filter_2nd_cephalosporins <- function(tbl, #' @rdname filter_ab_class #' @export -filter_3rd_cephalosporins <- function(tbl, +filter_3rd_cephalosporins <- function(x, result = NULL, scope = "any", ...) { - filter_ab_class(tbl = tbl, - ab_class = "third-generation cephalosporin", + filter_ab_class(x = x, + ab_class = "cephalosporins (3rd gen.)", result = result, scope = scope, ...) @@ -197,12 +197,12 @@ filter_3rd_cephalosporins <- function(tbl, #' @rdname filter_ab_class #' @export -filter_4th_cephalosporins <- function(tbl, +filter_4th_cephalosporins <- function(x, result = NULL, scope = "any", ...) { - filter_ab_class(tbl = tbl, - ab_class = "fourth-generation cephalosporin", + filter_ab_class(x = x, + ab_class = "cephalosporins (4th gen.)", result = result, scope = scope, ...) @@ -210,11 +210,24 @@ filter_4th_cephalosporins <- function(tbl, #' @rdname filter_ab_class #' @export -filter_fluoroquinolones <- function(tbl, +filter_5th_cephalosporins <- function(x, + result = NULL, + scope = "any", + ...) { + filter_ab_class(x = x, + ab_class = "cephalosporins (5th gen.)", + result = result, + scope = scope, + ...) +} + +#' @rdname filter_ab_class +#' @export +filter_fluoroquinolones <- function(x, result = NULL, scope = "any", ...) { - filter_ab_class(tbl = tbl, + filter_ab_class(x = x, ab_class = "fluoroquinolone", result = result, scope = scope, @@ -223,11 +236,11 @@ filter_fluoroquinolones <- function(tbl, #' @rdname filter_ab_class #' @export -filter_glycopeptides <- function(tbl, +filter_glycopeptides <- function(x, result = NULL, scope = "any", ...) { - filter_ab_class(tbl = tbl, + filter_ab_class(x = x, ab_class = "glycopeptide", result = result, scope = scope, @@ -236,11 +249,11 @@ filter_glycopeptides <- function(tbl, #' @rdname filter_ab_class #' @export -filter_macrolides <- function(tbl, +filter_macrolides <- function(x, result = NULL, scope = "any", ...) { - filter_ab_class(tbl = tbl, + filter_ab_class(x = x, ab_class = "macrolide", result = result, scope = scope, @@ -249,11 +262,11 @@ filter_macrolides <- function(tbl, #' @rdname filter_ab_class #' @export -filter_tetracyclines <- function(tbl, +filter_tetracyclines <- function(x, result = NULL, scope = "any", ...) { - filter_ab_class(tbl = tbl, + filter_ab_class(x = x, ab_class = "tetracycline", result = result, scope = scope, @@ -262,8 +275,9 @@ filter_tetracyclines <- function(tbl, #' @importFrom dplyr %>% filter_at vars any_vars select ab_class_vars <- function(ab_class) { + ab_class <- gsub("[^a-z0-9]+", ".*", ab_class) ab_vars <- AMR::antibiotics %>% - filter_at(vars(c("atc_group1", "atc_group2")), any_vars(. %like% ab_class)) %>% + filter(group %like% ab_class) %>% select(ab:name, abbreviations, synonyms) %>% unlist() %>% as.matrix() %>% @@ -272,18 +286,29 @@ ab_class_vars <- function(ab_class) { strsplit("|", fixed = TRUE) %>% unlist() %>% unique() - ab_vars[!is.na(ab_vars)] + ab_vars <- ab_vars[!ab_vars %in% c(NA, "", "NA") & nchar(ab_vars) > 2] + if (length(ab_vars) == 0) { + # try again, searching atc_group1 and atc_group2 columns + ab_vars <- AMR::antibiotics %>% + filter_at(vars(c("atc_group1", "atc_group2")), any_vars(. %like% ab_class)) %>% + select(ab:name, abbreviations, synonyms) %>% + unlist() %>% + as.matrix() %>% + as.character() %>% + paste(collapse = "|") %>% + strsplit("|", fixed = TRUE) %>% + unlist() %>% + unique() + ab_vars <- ab_vars[!ab_vars %in% c(NA, "", "NA") & nchar(ab_vars) > 2] + } + ab_vars } #' @importFrom dplyr %>% filter pull -ab_class_atcgroups <- function(ab_class) { +find_ab_group <- function(ab_class) { ifelse(ab_class %in% c("aminoglycoside", "carbapenem", "cephalosporin", - "first-generation cephalosporin", - "second-generation cephalosporin", - "third-generation cephalosporin", - "fourth-generation cephalosporin", "fluoroquinolone", "glycopeptide", "macrolide", @@ -291,7 +316,7 @@ ab_class_atcgroups <- function(ab_class) { paste0(ab_class, "s"), AMR::antibiotics %>% filter(ab %in% ab_class_vars(ab_class)) %>% - pull("atc_group2") %>% + pull(group) %>% unique() %>% tolower() %>% paste(collapse = "/") diff --git a/R/mic.R b/R/mic.R index 291f7224..2b0d36d2 100755 --- a/R/mic.R +++ b/R/mic.R @@ -289,6 +289,6 @@ type_sum.mic <- function(x) { #' @export pillar_shaft.mic <- function(x, ...) { out <- trimws(format(x)) - out[is.na(x)] <- NA + out[is.na(x)] <- pillar::style_na(NA) pillar::new_pillar_shaft_simple(out, align = "right", min_width = 4) } diff --git a/R/mo.R b/R/mo.R index ed3307f0..657de9cc 100755 --- a/R/mo.R +++ b/R/mo.R @@ -1521,8 +1521,18 @@ type_sum.mo <- function(x) { #' @export pillar_shaft.mo <- function(x, ...) { out <- format(x) - out[is.na(x)] <- pillar::style_na("NA") - pillar::new_pillar_shaft_simple(out, align = "left", min_width = 11) + # grey out the kingdom (part before first "_") + first_parts <- unlist(lapply(gregexpr(pattern = '_', x[!is.na(x)], fixed = TRUE), min)) + first_parts[first_parts < 0] <- 0 + out[!is.na(x)] <- paste0(pillar::style_subtle(substr(x[!is.na(x)], 0, first_parts)), + substr(x[!is.na(x)], first_parts + 1, nchar(x))) + + out[is.na(x)] <- pillar::style_na(" NA") + out[x == "UNKNOWN"] <- pillar::style_na(" UNKNOWN") + + out <- gsub("_", pillar::style_subtle("_"), out) + + pillar::new_pillar_shaft_simple(out, align = "left", min_width = 12) } #' @exportMethod summary.mo @@ -1556,12 +1566,13 @@ as.data.frame.mo <- function(x, ...) { } } -#' @exportMethod pull.mo +#' @exportMethod [.mo #' @export -#' @importFrom dplyr pull #' @noRd -pull.mo <- function(.data, ...) { - pull(as.data.frame(.data), ...) +"[.mo" <- function (x, ...) { + # this function is needed to preserve the "mo" class for any subsetting, like df %>% filter(...) + y <- NextMethod() + to_class_mo(y) } #' @rdname as.mo diff --git a/R/rsi.R b/R/rsi.R index 0274ee29..d8938c45 100755 --- a/R/rsi.R +++ b/R/rsi.R @@ -486,9 +486,9 @@ type_sum.rsi <- function(x) { #' @export pillar_shaft.rsi <- function(x, ...) { out <- trimws(format(x)) - out[is.na(x)] <- pillar::style_subtle("NA") + out[is.na(x)] <- pillar::style_subtle(" NA") out[x == "S"] <- bgGreen(white(" S ")) out[x == "I"] <- bgYellow(black(" I ")) out[x == "R"] <- bgRed(white(" R ")) - pillar::new_pillar_shaft_simple(out, align = "left", min_width = 4) + pillar::new_pillar_shaft_simple(out, align = "left", min_width = 3) } diff --git a/docs/LICENSE-text.html b/docs/LICENSE-text.html index 80b7586d..37a2ca89 100644 --- a/docs/LICENSE-text.html +++ b/docs/LICENSE-text.html @@ -78,7 +78,7 @@ AMR (for R) - 0.7.1.9035 + 0.7.1.9036 diff --git a/docs/articles/benchmarks.html b/docs/articles/benchmarks.html index 7214e2c2..b1d4470e 100644 --- a/docs/articles/benchmarks.html +++ b/docs/articles/benchmarks.html @@ -40,7 +40,7 @@ AMR (for R) - 0.7.1.9035 + 0.7.1.9036 @@ -185,7 +185,7 @@

Benchmarks

Matthijs S. Berends

-

11 August 2019

+

12 August 2019

@@ -210,14 +210,14 @@ times = 10) print(S.aureus, unit = "ms", signif = 2) # Unit: milliseconds -# expr min lq mean median uq max neval -# as.mo("sau") 8.3 8.5 14.0 9.1 9.2 39.0 10 -# as.mo("stau") 31.0 32.0 38.0 32.0 47.0 53.0 10 -# as.mo("staaur") 8.2 8.4 20.0 8.7 40.0 55.0 10 -# as.mo("STAAUR") 8.2 8.4 10.0 8.9 9.4 24.0 10 -# as.mo("S. aureus") 23.0 24.0 33.0 24.0 24.0 99.0 10 -# as.mo("S. aureus") 23.0 23.0 29.0 24.0 40.0 42.0 10 -# as.mo("Staphylococcus aureus") 3.7 3.9 4.1 4.1 4.2 4.6 10 +# expr min lq mean median uq max neval +# as.mo("sau") 8.6 9.2 9.9 9.7 11.0 12 10 +# as.mo("stau") 32.0 33.0 33.0 33.0 34.0 35 10 +# as.mo("staaur") 8.7 9.0 13.0 9.6 11.0 26 10 +# as.mo("STAAUR") 8.6 9.1 13.0 9.5 9.7 28 10 +# as.mo("S. aureus") 24.0 24.0 28.0 25.0 26.0 41 10 +# as.mo("S. aureus") 24.0 24.0 37.0 24.0 39.0 120 10 +# as.mo("Staphylococcus aureus") 4.0 4.2 6.0 4.3 4.6 21 10

In the table above, all measurements are shown in milliseconds (thousands of seconds). A value of 5 milliseconds means it can determine 200 input values per second. It case of 100 milliseconds, this is only 10 input values per second. The second input is the only one that has to be looked up thoroughly. All the others are known codes (the first one is a WHONET code) or common laboratory codes, or common full organism names like the last one. Full organism names are always preferred.

To achieve this speed, the as.mo function also takes into account the prevalence of human pathogenic microorganisms. The downside is of course that less prevalent microorganisms will be determined less fast. See this example for the ID of Thermus islandicus (B_THERMS_ISL), a bug probably never found before in humans:

T.islandicus <- microbenchmark(as.mo("theisl"),
@@ -229,12 +229,12 @@
 print(T.islandicus, unit = "ms", signif = 2)
 # Unit: milliseconds
 #                         expr min  lq mean median  uq max neval
-#              as.mo("theisl") 270 270  280    290 290 300    10
-#              as.mo("THEISL") 280 290  290    290 300 300    10
-#       as.mo("T. islandicus") 130 130  150    150 160 160    10
-#      as.mo("T.  islandicus") 130 130  150    150 150 160    10
-#  as.mo("Thermus islandicus")  46  48   54     50  63  71    10
-

That takes 8.8 times as much time on average. A value of 100 milliseconds means it can only determine ~10 different input values per second. We can conclude that looking up arbitrary codes of less prevalent microorganisms is the worst way to go, in terms of calculation performance. Full names (like Thermus islandicus) are almost fast - these are the most probable input from most data sets.

+# as.mo("theisl") 270 280 290 290 310 320 10 +# as.mo("THEISL") 280 290 300 290 300 310 10 +# as.mo("T. islandicus") 140 140 150 140 160 170 10 +# as.mo("T. islandicus") 140 150 160 160 160 170 10 +# as.mo("Thermus islandicus") 48 49 60 60 68 77 10 +

That takes 9.5 times as much time on average. A value of 100 milliseconds means it can only determine ~10 different input values per second. We can conclude that looking up arbitrary codes of less prevalent microorganisms is the worst way to go, in terms of calculation performance. Full names (like Thermus islandicus) are almost fast - these are the most probable input from most data sets.

In the figure below, we compare Escherichia coli (which is very common) with Prevotella brevis (which is moderately common) and with Thermus islandicus (which is very uncommon):

par(mar = c(5, 16, 4, 2)) # set more space for left margin text (16)
 
@@ -280,8 +280,8 @@
 print(run_it, unit = "ms", signif = 3)
 # Unit: milliseconds
 #        expr min  lq mean median  uq max neval
-#  mo_name(x) 623 631  659    637 697 729    10
-

So transforming 500,000 values (!!) of 50 unique values only takes 0.64 seconds (637 ms). You only lose time on your unique input values.

+# mo_name(x) 596 622 635 626 635 704 10 +

So transforming 500,000 values (!!) of 50 unique values only takes 0.63 seconds (626 ms). You only lose time on your unique input values.

@@ -294,10 +294,10 @@ print(run_it, unit = "ms", signif = 3) # Unit: milliseconds # expr min lq mean median uq max neval -# A 6.290 6.730 7.170 7.010 7.760 8.09 10 -# B 22.600 22.700 26.200 23.000 25.400 44.30 10 -# C 0.798 0.806 0.874 0.844 0.891 1.05 10

-

So going from mo_name("Staphylococcus aureus") to "Staphylococcus aureus" takes 0.0008 seconds - it doesn’t even start calculating if the result would be the same as the expected resulting value. That goes for all helper functions:

+# A 6.700 6.950 7.410 7.600 7.730 8.06 10 +# B 22.900 23.900 27.000 24.100 24.200 46.00 10 +# C 0.772 0.833 0.876 0.874 0.918 1.03 10 +

So going from mo_name("Staphylococcus aureus") to "Staphylococcus aureus" takes 0.0009 seconds - it doesn’t even start calculating if the result would be the same as the expected resulting value. That goes for all helper functions:

run_it <- microbenchmark(A = mo_species("aureus"),
                          B = mo_genus("Staphylococcus"),
                          C = mo_name("Staphylococcus aureus"),
@@ -310,14 +310,14 @@
 print(run_it, unit = "ms", signif = 3)
 # Unit: milliseconds
 #  expr   min    lq  mean median    uq   max neval
-#     A 0.455 0.458 0.471  0.465 0.482 0.504    10
-#     B 0.480 0.482 0.497  0.491 0.497 0.554    10
-#     C 0.662 0.687 0.754  0.750 0.788 0.964    10
-#     D 0.484 0.484 0.496  0.488 0.501 0.544    10
-#     E 0.442 0.450 0.459  0.456 0.462 0.492    10
-#     F 0.440 0.447 0.456  0.452 0.463 0.486    10
-#     G 0.450 0.452 0.462  0.459 0.463 0.485    10
-#     H 0.455 0.461 0.467  0.467 0.471 0.492    10
+# A 0.472 0.475 0.553 0.562 0.607 0.675 10 +# B 0.474 0.482 0.560 0.493 0.577 0.973 10 +# C 0.766 0.820 0.899 0.881 0.941 1.070 10 +# D 0.459 0.485 0.539 0.503 0.569 0.744 10 +# E 0.421 0.448 0.470 0.456 0.488 0.543 10 +# F 0.430 0.457 0.534 0.495 0.592 0.738 10 +# G 0.420 0.450 0.477 0.463 0.491 0.586 10 +# H 0.426 0.437 0.500 0.447 0.461 0.776 10

Of course, when running mo_phylum("Firmicutes") the function has zero knowledge about the actual microorganism, namely S. aureus. But since the result would be "Firmicutes" too, there is no point in calculating the result. And because this package ‘knows’ all phyla of all known bacteria (according to the Catalogue of Life), it can just return the initial value immediately.

@@ -344,13 +344,13 @@ print(run_it, unit = "ms", signif = 4) # Unit: milliseconds # expr min lq mean median uq max neval -# en 17.66 17.86 18.50 18.49 19.14 19.36 10 -# de 19.03 19.38 19.64 19.49 20.01 20.42 10 -# nl 24.40 25.23 30.77 25.78 41.94 44.93 10 -# es 19.18 19.22 23.30 19.53 21.34 39.20 10 -# it 19.02 19.24 23.53 19.57 20.35 50.89 10 -# fr 19.28 19.33 19.87 19.57 20.19 21.25 10 -# pt 18.89 19.14 19.77 19.67 20.21 20.99 10
+# en 18.77 19.83 25.36 20.44 28.51 45.88 10 +# de 20.37 20.86 23.19 21.45 22.23 39.08 10 +# nl 26.50 27.09 27.74 27.58 28.10 29.74 10 +# es 20.82 21.18 21.66 21.27 22.04 23.64 10 +# it 19.82 20.65 25.98 21.22 22.13 50.42 10 +# fr 20.07 21.20 21.68 21.47 21.89 23.76 10 +# pt 19.87 20.63 22.76 21.07 21.68 38.11 10

Currently supported are German, Dutch, Spanish, Italian, French and Portuguese.

diff --git a/docs/articles/benchmarks_files/figure-html/unnamed-chunk-5-1.png b/docs/articles/benchmarks_files/figure-html/unnamed-chunk-5-1.png index 938b62bf..e58bb316 100644 Binary files a/docs/articles/benchmarks_files/figure-html/unnamed-chunk-5-1.png and b/docs/articles/benchmarks_files/figure-html/unnamed-chunk-5-1.png differ diff --git a/docs/articles/index.html b/docs/articles/index.html index b8b53703..4b572158 100644 --- a/docs/articles/index.html +++ b/docs/articles/index.html @@ -78,7 +78,7 @@ AMR (for R) - 0.7.1.9035 + 0.7.1.9036 diff --git a/docs/authors.html b/docs/authors.html index c4e81532..38ef6d49 100644 --- a/docs/authors.html +++ b/docs/authors.html @@ -78,7 +78,7 @@ AMR (for R) - 0.7.1.9035 + 0.7.1.9036 diff --git a/docs/index.html b/docs/index.html index a5a1bb68..67083bd3 100644 --- a/docs/index.html +++ b/docs/index.html @@ -42,7 +42,7 @@ AMR (for R) - 0.7.1.9035 + 0.7.1.9036 diff --git a/docs/news/index.html b/docs/news/index.html index 9575cea0..b1435014 100644 --- a/docs/news/index.html +++ b/docs/news/index.html @@ -78,7 +78,7 @@ AMR (for R) - 0.7.1.9035 + 0.7.1.9036 @@ -225,9 +225,9 @@ -
+

-AMR 0.7.1.9035 Unreleased +AMR 0.7.1.9036 Unreleased

@@ -289,15 +289,12 @@
  • -

    Added tibble printing support for classes rsi, mic, ab and mo. When using tibbles containing antibiotic columns, values S will print in green, values I will print in yellow and values R will print in red:

    +

    Added tibble printing support for classes rsi, mic, disk, ab mo. When using tibbles containing antibiotic columns, values S will print in green, values I will print in yellow and values R will print in red. Microbial IDs (class mo) will emphasise on the genus and species, not on the kingdom.

    # (run this on your own console, as this page does not support colour printing)
    -tibble(mo = sample(AMR::microorganisms$fullname, 10),
    -       drug1 = as.rsi(sample(c("S", "I", "R"), 10, replace = TRUE, 
    -                             prob = c(0.6, 0.1, 0.3))),
    -       drug2 = as.rsi(sample(c("S", "I", "R"), 10, replace = TRUE,
    -                             prob = c(0.6, 0.1, 0.3))),
    -       drug3 = as.rsi(sample(c("S", "I", "R"), 10, replace = TRUE,
    -                             prob = c(0.6, 0.1, 0.3))))
    +library(dplyr) +septic_patients %>% + select(mo:AMC) %>% + as_tibble()
  • Removed class atc - using as.atc() is now deprecated in favour of ab_atc() and this will return a character, not the atc class anymore
  • Removed deprecated functions abname(), ab_official(), atc_name(), atc_official(), atc_property(), atc_tradenames(), atc_trivial_nl() @@ -316,8 +313,10 @@
  • Fix for using mo_* functions where the coercion uncertainties and failures would not be available through mo_uncertainties() and mo_failures() anymore
  • Deprecated the country parameter of mdro() in favour of the already existing guideline parameter to support multiple guidelines within one country
  • The name of RIF is now Rifampicin instead of Rifampin
  • -
  • The antibiotics data set is now sorted by name and all cephalosporines now have their generation between brackets
  • -
  • Speed improvement for guess_ab_col() which is now 30 times faster for antibiotic abbreviations

  • +
  • The antibiotics data set is now sorted by name and all cephalosporins now have their generation between brackets
  • +
  • Speed improvement for guess_ab_col() which is now 30 times faster for antibiotic abbreviations
  • +
  • Improved filter_ab_class() to be more reliable and to support 5th generation cephalosporins
  • +
  • Classes ab and mo will now be preserved in any subsetting

  • @@ -339,7 +338,7 @@
  • Function rsi_df() to transform a data.frame to a data set containing only the microbial interpretation (S, I, R), the antibiotic, the percentage of S/I/R and the number of available isolates. This is a convenient combination of the existing functions count_df() and portion_df() to immediately show resistance percentages and number of available isolates:

  • @@ -607,9 +606,9 @@ These functions use as.atc()filter_first_isolate(septic_patients, ...)

    is equal to:

    septic_patients %>%
    -  mutate(only_firsts = first_isolate(septic_patients, ...)) %>%
    -  filter(only_firsts == TRUE) %>%
    -  select(-only_firsts)
    + mutate(only_firsts = first_isolate(septic_patients, ...)) %>% + filter(only_firsts == TRUE) %>% + select(-only_firsts)
  • New function availability() to check the number of available (non-empty) results in a data.frame
  • @@ -713,7 +712,7 @@ Using as.mo(..., allow_uncertain = 3)
    # Determine genus of microorganisms (mo) in `septic_patients` data set:
     # OLD WAY
     septic_patients %>%
    -  mutate(genus = mo_genus(mo)) %>%
    +  mutate(genus = mo_genus(mo)) %>%
       freq(genus)
     # NEW WAY
     septic_patients %>% 
    @@ -721,7 +720,7 @@ Using as.mo(..., allow_uncertain = 3)
     # Even supports grouping variables:
     septic_patients %>%
    -  group_by(gender) %>% 
    +  group_by(gender) %>% 
       freq(mo_genus(mo))
  • Header info is now available as a list, with the header function
  • @@ -734,7 +733,7 @@ Using as.mo(..., allow_uncertain = 3)
  • New parameter droplevels to exclude empty factor levels when input is a factor
  • Factor levels will be in header when present in input data (maximum of 5)
  • -
  • Fix for using select() on frequency tables
  • +
  • Fix for using select() on frequency tables
  • Function scale_y_percent() now contains the limits parameter
  • @@ -814,14 +813,14 @@ Using as.mo(..., allow_uncertain = 3)

    Support for grouping variables, test with:

  • Support for (un)selecting columns:

    + select(-count, -cum_count) # only get item, percent, cum_percent
  • Check for hms::is.hms
  • @@ -978,7 +977,7 @@ Using as.mo(..., allow_uncertain = 3)
  • Support for quasiquotation in the functions series count_* and portions_*, and n_rsi. This allows to check for more than 2 vectors or columns.

    -
    septic_patients %>% select(amox, cipr) %>% count_IR()
    +
    septic_patients %>% select(amox, cipr) %>% count_IR()
     # which is the same as:
     septic_patients %>% count_IR(amox, cipr)
     
    @@ -1237,7 +1236,7 @@ Using as.mo(..., allow_uncertain = 3)
           

    Contents

    diff --git a/docs/reference/filter_ab_class.html b/docs/reference/filter_ab_class.html index 3a558c84..bc5e04dc 100644 --- a/docs/reference/filter_ab_class.html +++ b/docs/reference/filter_ab_class.html @@ -80,7 +80,7 @@ AMR (for R) - 0.7.1.9029 + 0.7.1.9036
    @@ -234,40 +234,42 @@ -
    filter_ab_class(tbl, ab_class, result = NULL, scope = "any", ...)
    +    
    filter_ab_class(x, ab_class, result = NULL, scope = "any", ...)
     
    -filter_aminoglycosides(tbl, result = NULL, scope = "any", ...)
    +filter_aminoglycosides(x, result = NULL, scope = "any", ...)
     
    -filter_carbapenems(tbl, result = NULL, scope = "any", ...)
    +filter_carbapenems(x, result = NULL, scope = "any", ...)
     
    -filter_cephalosporins(tbl, result = NULL, scope = "any", ...)
    +filter_cephalosporins(x, result = NULL, scope = "any", ...)
     
    -filter_1st_cephalosporins(tbl, result = NULL, scope = "any", ...)
    +filter_1st_cephalosporins(x, result = NULL, scope = "any", ...)
     
    -filter_2nd_cephalosporins(tbl, result = NULL, scope = "any", ...)
    +filter_2nd_cephalosporins(x, result = NULL, scope = "any", ...)
     
    -filter_3rd_cephalosporins(tbl, result = NULL, scope = "any", ...)
    +filter_3rd_cephalosporins(x, result = NULL, scope = "any", ...)
     
    -filter_4th_cephalosporins(tbl, result = NULL, scope = "any", ...)
    +filter_4th_cephalosporins(x, result = NULL, scope = "any", ...)
     
    -filter_fluoroquinolones(tbl, result = NULL, scope = "any", ...)
    +filter_5th_cephalosporins(x, result = NULL, scope = "any", ...)
     
    -filter_glycopeptides(tbl, result = NULL, scope = "any", ...)
    +filter_fluoroquinolones(x, result = NULL, scope = "any", ...)
     
    -filter_macrolides(tbl, result = NULL, scope = "any", ...)
    +filter_glycopeptides(x, result = NULL, scope = "any", ...)
     
    -filter_tetracyclines(tbl, result = NULL, scope = "any", ...)
    +filter_macrolides(x, result = NULL, scope = "any", ...) + +filter_tetracyclines(x, result = NULL, scope = "any", ...)

    Arguments

    - + - + @@ -285,7 +287,7 @@

    Details

    -

    The antibiotics data set will be searched for ab_class in the columns atc_group1 and atc_group2 (case-insensitive). Next, tbl will be checked for column names with a value in any abbreviations, codes or official names found in the antibiotics data set.

    +

    The group column in antibiotics data set will be searched for ab_class (case-insensitive). If no results are found, the atc_group1 and atc_group2 columns will be searched. Next, x will be checked for column names with a value in any abbreviations, codes or official names found in the antibiotics data set.

    Examples

    diff --git a/docs/reference/index.html b/docs/reference/index.html index de5910f1..18ece678 100644 --- a/docs/reference/index.html +++ b/docs/reference/index.html @@ -78,7 +78,7 @@ AMR (for R) - 0.7.1.9035 + 0.7.1.9036 @@ -428,7 +428,7 @@ diff --git a/docs/reference/like.html b/docs/reference/like.html index b5d0978f..74fb7a39 100644 --- a/docs/reference/like.html +++ b/docs/reference/like.html @@ -80,7 +80,7 @@ AMR (for R) - 0.7.1.9035 + 0.7.1.9036 diff --git a/docs/reference/translate.html b/docs/reference/translate.html index 083b5d6f..e97f7a94 100644 --- a/docs/reference/translate.html +++ b/docs/reference/translate.html @@ -80,7 +80,7 @@ AMR (for R) - 0.7.1.9035 + 0.7.1.9036 diff --git a/man/filter_ab_class.Rd b/man/filter_ab_class.Rd index 31859f4b..55101198 100644 --- a/man/filter_ab_class.Rd +++ b/man/filter_ab_class.Rd @@ -9,40 +9,43 @@ \alias{filter_2nd_cephalosporins} \alias{filter_3rd_cephalosporins} \alias{filter_4th_cephalosporins} +\alias{filter_5th_cephalosporins} \alias{filter_fluoroquinolones} \alias{filter_glycopeptides} \alias{filter_macrolides} \alias{filter_tetracyclines} \title{Filter isolates on result in antibiotic class} \usage{ -filter_ab_class(tbl, ab_class, result = NULL, scope = "any", ...) +filter_ab_class(x, ab_class, result = NULL, scope = "any", ...) -filter_aminoglycosides(tbl, result = NULL, scope = "any", ...) +filter_aminoglycosides(x, result = NULL, scope = "any", ...) -filter_carbapenems(tbl, result = NULL, scope = "any", ...) +filter_carbapenems(x, result = NULL, scope = "any", ...) -filter_cephalosporins(tbl, result = NULL, scope = "any", ...) +filter_cephalosporins(x, result = NULL, scope = "any", ...) -filter_1st_cephalosporins(tbl, result = NULL, scope = "any", ...) +filter_1st_cephalosporins(x, result = NULL, scope = "any", ...) -filter_2nd_cephalosporins(tbl, result = NULL, scope = "any", ...) +filter_2nd_cephalosporins(x, result = NULL, scope = "any", ...) -filter_3rd_cephalosporins(tbl, result = NULL, scope = "any", ...) +filter_3rd_cephalosporins(x, result = NULL, scope = "any", ...) -filter_4th_cephalosporins(tbl, result = NULL, scope = "any", ...) +filter_4th_cephalosporins(x, result = NULL, scope = "any", ...) -filter_fluoroquinolones(tbl, result = NULL, scope = "any", ...) +filter_5th_cephalosporins(x, result = NULL, scope = "any", ...) -filter_glycopeptides(tbl, result = NULL, scope = "any", ...) +filter_fluoroquinolones(x, result = NULL, scope = "any", ...) -filter_macrolides(tbl, result = NULL, scope = "any", ...) +filter_glycopeptides(x, result = NULL, scope = "any", ...) -filter_tetracyclines(tbl, result = NULL, scope = "any", ...) +filter_macrolides(x, result = NULL, scope = "any", ...) + +filter_tetracyclines(x, result = NULL, scope = "any", ...) } \arguments{ -\item{tbl}{a data set} +\item{x}{a data set} -\item{ab_class}{an antimicrobial class, like \code{"carbapenems"}. More specifically, this should be a text that can be found in a 4th level ATC group (chemical subgroup) or a 5th level ATC group (chemical substance), please see \href{https://www.whocc.no/atc/structure_and_principles/}{this explanation on the WHOCC website}.} +\item{ab_class}{an antimicrobial class, like \code{"carbapenems"}, as can be found in \code{AMR::antibiotics$group}} \item{result}{an antibiotic result: S, I or R (or a combination of more of them)} @@ -54,7 +57,7 @@ filter_tetracyclines(tbl, result = NULL, scope = "any", ...) Filter isolates on results in specific antibiotic variables based on their class (ATC groups). This makes it easy to get a list of isolates that were tested for e.g. any aminoglycoside. } \details{ -The \code{\link{antibiotics}} data set will be searched for \code{ab_class} in the columns \code{atc_group1} and \code{atc_group2} (case-insensitive). Next, \code{tbl} will be checked for column names with a value in any abbreviations, codes or official names found in the \code{antibiotics} data set. +The \code{group} column in \code{\link{antibiotics}} data set will be searched for \code{ab_class} (case-insensitive). If no results are found, the \code{atc_group1} and \code{atc_group2} columns will be searched. Next, \code{x} will be checked for column names with a value in any abbreviations, codes or official names found in the \code{antibiotics} data set. } \examples{ library(dplyr) diff --git a/tests/testthat/test-mo.R b/tests/testthat/test-mo.R index 85a07fe6..8340ed48 100644 --- a/tests/testthat/test-mo.R +++ b/tests/testthat/test-mo.R @@ -159,8 +159,7 @@ test_that("as.mo works", { septic_patients[1:10,] %>% left_join_microorganisms() %>% select(genus, species) %>% - as.mo() %>% - as.character()) + as.mo()) # unknown results expect_warning(as.mo(c("INVALID", "Yeah, unknown"))) @@ -271,7 +270,7 @@ test_that("as.mo works", { expect_equal(as.character(as.mo("F_CANDD_GLB")), "F_CANDD_GLA") # debug mode - expect_warning(as.mo("kshgcjkhsdgkshjdfsfvsdfv", debug = TRUE, allow_uncertain = 3)) + expect_output(print(suppressWarnings(as.mo("kshgcjkhsdgkshjdfsfvsdfv", debug = TRUE, allow_uncertain = 3)))) # ..coccus expect_equal(as.character(as.mo(c("meningococ", "gonococ", "pneumococ"))),
    tblx

    a data set

    ab_class

    an antimicrobial class, like "carbapenems". More specifically, this should be a text that can be found in a 4th level ATC group (chemical subgroup) or a 5th level ATC group (chemical substance), please see this explanation on the WHOCC website.

    an antimicrobial class, like "carbapenems", as can be found in AMR::antibiotics$group

    result
    -

    filter_ab_class() filter_aminoglycosides() filter_carbapenems() filter_cephalosporins() filter_1st_cephalosporins() filter_2nd_cephalosporins() filter_3rd_cephalosporins() filter_4th_cephalosporins() filter_fluoroquinolones() filter_glycopeptides() filter_macrolides() filter_tetracyclines()

    +

    filter_ab_class() filter_aminoglycosides() filter_carbapenems() filter_cephalosporins() filter_1st_cephalosporins() filter_2nd_cephalosporins() filter_3rd_cephalosporins() filter_4th_cephalosporins() filter_5th_cephalosporins() filter_fluoroquinolones() filter_glycopeptides() filter_macrolides() filter_tetracyclines()

    Filter isolates on result in antibiotic class