diff --git a/DESCRIPTION b/DESCRIPTION index e54f581e..31916785 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,6 +1,6 @@ Package: AMR -Version: 1.4.0.9007 -Date: 2020-10-21 +Version: 1.4.0.9008 +Date: 2020-10-26 Title: Antimicrobial Resistance Analysis Authors@R: c( person(role = c("aut", "cre"), diff --git a/NAMESPACE b/NAMESPACE index 0eadd0cb..4660150e 100755 --- a/NAMESPACE +++ b/NAMESPACE @@ -65,6 +65,8 @@ S3method(unique,mo) S3method(unique,rsi) export("%like%") export("%like_case%") +export("%not_like%") +export("%not_like_case%") export(ab_atc) export(ab_atc_group1) export(ab_atc_group2) diff --git a/NEWS.md b/NEWS.md index f9123a95..6470ba87 100755 --- a/NEWS.md +++ b/NEWS.md @@ -1,17 +1,19 @@ -# AMR 1.4.0.9007 -## Last updated: 21 October 2020 +# AMR 1.4.0.9008 +## Last updated: 26 October 2020 ### New * Functions `is_gram_negative()` and `is_gram_positive()` as wrappers around `mo_gramstain()`. They always return `TRUE` or `FALSE`, thus always return `FALSE` for species outside the taxonomic kingdom of Bacteria. -* Functions `%not_like%` and `%like_perl%` as wrappers around `%like%`. +* Functions `%not_like%` and `%not_like_case%` as wrappers around `%like%` and `%like_case%`. The RStudio addin to insert the text " %like% " as provided in this package now iterates over all like variants. So if you have defined the keyboard shortcut Ctrl/Cmd + L to this addin, it will first insert ` %like% ` and by pressing it again it will be replaced with ` %not_like% `, etc. ### Changed * For all function parameters in the code, it is now defined what the exact type of user input should be (inspired by the [`typed`](https://github.com/moodymudskipper/typed) package). If the user input for a certain function does not meet the requirements for a specific parameter (such as the class or length), an informative error will be thrown. This makes the package more robust and the use of it more reproducible and reliable. In total, more than 400 arguments were defined. * Deprecated function `p_symbol()` that not really fits the scope of this package. It will be removed in a future version. See [here](https://github.com/msberends/AMR/blob/v1.4.0/R/p_symbol.R) for the source code to preserve it. * Better determination of disk zones and MIC values when running `as.rsi()` on a data.frame * Updated coagulase-negative staphylococci with Becker *et al.* 2020 (PMID 32056452), meaning that the species *S. argensis*, *S. caeli*, *S. debuckii*, *S. edaphicus* and *S. pseudoxylosus* are now all considered CoNS +* Fix for using parameter `reference_df` in `as.mo()` and `mo_*()` functions that contain old microbial codes (from previous package versions) ### Other +* All messages thrown by this package now have correct line breaks * More extensive unit tests # AMR 1.4.0 diff --git a/R/aa_helper_functions.R b/R/aa_helper_functions.R index 448da7a2..20044608 100755 --- a/R/aa_helper_functions.R +++ b/R/aa_helper_functions.R @@ -9,7 +9,7 @@ # (c) 2018-2020 Berends MS, Luz CF et al. # # Developed at the University of Groningen, the Netherlands, in # # collaboration with non-profit organisations Certe Medical # -# Diagnostics & Advice, and University Medical Center Groningen. # +# Diagnostics & Advice, and University Medical Center Groningen. # # # # This R package is free software; you can freely use and distribute # # it for both personal and commercial purposes under the terms of the # @@ -37,18 +37,18 @@ pm_left_join <- function(x, y, by = NULL, suffix = c(".x", ".y")) { if (length(by) == 1) { by <- rep(by, 2) } - + int_x <- colnames(x) %in% colnames(y) & colnames(x) != by[1] int_y <- colnames(y) %in% colnames(x) & colnames(y) != by[2] colnames(x)[int_x] <- paste0(colnames(x)[int_x], suffix[1L]) colnames(y)[int_y] <- paste0(colnames(y)[int_y], suffix[2L]) - + merged <- cbind(x, y[match(x[, by[1], drop = TRUE], y[, by[2], drop = TRUE]), colnames(y)[!colnames(y) %in% colnames(x) & !colnames(y) == by[2]], drop = FALSE]) - + rownames(merged) <- NULL merged } @@ -71,7 +71,42 @@ addin_insert_in <- function() { # No export, no Rd addin_insert_like <- function() { - import_fn("insertText", "rstudioapi")(" %like% ") + stop_ifnot_installed("rstudioapi") + # we want Ctrl/Cmd + L to iterate over %like%, %not_like% and %like_case%, so determine context first + + getSourceEditorContext <- import_fn("getSourceEditorContext", "rstudioapi") + insertText <- import_fn("insertText", "rstudioapi") + modifyRange <- import_fn("insertText", "rstudioapi") + document_range <- import_fn("document_range", "rstudioapi") + document_position <- import_fn("document_position", "rstudioapi") + # setSelectionRanges <- import_fn("setSelectionRanges", "rstudioapi") + + context <- getSourceEditorContext() + current_row <- context$selection[[1]]$range$end[1] + current_col <- context$selection[[1]]$range$end[2] + current_row_txt <- context$contents[current_row] + + pos_preceded_by <- function(txt) { + substr(current_row_txt, current_col - nchar(txt), current_col) == txt + } + replace_pos <- function(old, with) { + modifyRange(document_range(document_position(current_row, current_col - nchar(old)), + document_position(current_row, current_col)), + text = with, + id = context$id) + } + + if (pos_preceded_by(" %like% ")) { + replace_pos(" %like% ", with = " %not_like% ") + } else if (pos_preceded_by(" %not_like% ")) { + replace_pos(" %not_like% ", with = " %like_case% ") + } else if (pos_preceded_by(" %like_case% ")) { + replace_pos(" %like_case% ", with = " %not_like_case% ") + } else if (pos_preceded_by(" %not_like_case% ")) { + replace_pos(" %not_like_case% ", with = " %like% ") + } else { + insertText(" %like% ") + } } check_dataset_integrity <- function() { @@ -88,13 +123,13 @@ check_dataset_integrity <- function() { # check if other packages did not overwrite our data sets tryCatch({ check_microorganisms <- all(c("mo", "fullname", "kingdom", "phylum", - "class", "order", "family", "genus", + "class", "order", "family", "genus", "species", "subspecies", "rank", "species_id", "source", "ref", "prevalence") %in% colnames(microorganisms), na.rm = TRUE) - check_antibiotics <- all(c("ab", "atc", "cid", "name", "group", + check_antibiotics <- all(c("ab", "atc", "cid", "name", "group", "atc_group1", "atc_group2", "abbreviations", - "synonyms", "oral_ddd", "oral_units", + "synonyms", "oral_ddd", "oral_units", "iv_ddd", "iv_units", "loinc") %in% colnames(antibiotics), na.rm = TRUE) }, error = function(e) { @@ -107,10 +142,10 @@ check_dataset_integrity <- function() { search_type_in_df <- function(x, type, info = TRUE) { # try to find columns based on type found <- NULL - + x <- as.data.frame(x, stringsAsFactors = FALSE) colnames(x) <- trimws(colnames(x)) - + # -- mo if (type == "mo") { if (any(sapply(x, is.mo))) { @@ -128,7 +163,7 @@ search_type_in_df <- function(x, type, info = TRUE) { } else if (any(colnames(x) %like% "species")) { found <- sort(colnames(x)[colnames(x) %like% "species"])[1] } - + } # -- key antibiotics if (type == "keyantibiotics") { @@ -180,7 +215,7 @@ search_type_in_df <- function(x, type, info = TRUE) { } } } - + if (!is.null(found) & info == TRUE) { msg <- paste0("NOTE: Using column `", font_bold(found), "` as input for `col_", type, "`.") if (type %in% c("keyantibiotics", "specimen")) { @@ -222,8 +257,8 @@ import_fn <- function(name, pkg, error_on_fail = TRUE) { get(name, envir = asNamespace(pkg)), error = function(e) { if (isTRUE(error_on_fail)) { - stop_("function ", name, "() not found in package '", pkg, - "'. Please create an issue at https://github.com/msberends/AMR/issues. Many thanks!", + stop_("function ", name, "() not found in package '", pkg, + "'. Please create an issue at https://github.com/msberends/AMR/issues. Many thanks!", call = FALSE) } else { return(NULL) @@ -231,6 +266,52 @@ import_fn <- function(name, pkg, error_on_fail = TRUE) { }) } +# this alternative to the message() function: +# - wraps text to never break lines within words +# - ignores formatted text while wrapping +# - adds indentation dependent on the type of message (like NOTE) +# - add additional formatting functions like blue or bold text +message_ <- function(..., appendLF = TRUE, add_fn = list(font_blue)) { + msg <- paste0(c(...), collapse = "") + + # we need to correct for already applied style, that adds text like "\033[31m\" + msg_stripped <- font_stripstyle(msg) + # where are the spaces now? + msg_stripped_wrapped <- paste0(strwrap(msg_stripped, + simplify = TRUE, + width = 0.95 * getOption("width")), + collapse = "\n") + msg_stripped_spaces <- which(unlist(strsplit(msg_stripped, "")) == " ") + msg_stripped_wrapped_spaces <- which(unlist(strsplit(msg_stripped_wrapped, "")) == " ") + # so these are the indices of spaces that need to be replaced + replace_spaces <- which(!msg_stripped_spaces %in% msg_stripped_wrapped_spaces) + # put it together + msg <- unlist(strsplit(msg, " ")) + msg[replace_spaces] <- paste0(msg[replace_spaces], "\n") + msg <- paste0(msg, collapse = " ") + msg <- gsub("\n ", "\n", msg, fixed = TRUE) + + if (msg_stripped %like% "^NOTE: ") { + indentation <- 6 + } else { + indentation <- 0 + } + msg <- gsub("\n", paste0("\n", strrep(" ", indentation)), msg, fixed = TRUE) + + if (length(add_fn) > 0) { + if (!is.list(add_fn)) { + add_fn <- list(add_fn) + } + for (i in seq_len(length(add_fn))) { + msg <- add_fn[[i]](msg) + } + } + message(msg, appendLF = appendLF) +} + +# this alternative to the stop() function: +# - adds the function name where the error was thrown +# - wraps text to never break lines within words stop_ <- function(..., call = TRUE) { msg <- paste0(c(...), collapse = "") if (!isFALSE(call)) { @@ -340,10 +421,10 @@ meet_criteria <- function(object, allow_NA = FALSE, ignore.case = FALSE, .call_depth = 0) { # depth in calling - + obj_name <- deparse(substitute(object)) call_depth <- -2 - abs(.call_depth) - + if (is.null(object)) { stop_if(allow_NULL == FALSE, "argument `", obj_name, "` must not be NULL", call = call_depth) return(invisible()) @@ -352,7 +433,7 @@ meet_criteria <- function(object, stop_if(allow_NA == FALSE, "argument `", obj_name, "` must not be NA", call = call_depth) return(invisible()) } - + vector_or <- function(v, quotes) { if (length(v) == 1) { return(paste0(ifelse(quotes, '"', ""), v, ifelse(quotes, '"', ""))) @@ -361,32 +442,32 @@ meet_criteria <- function(object, paste0(paste0(ifelse(quotes, '"', ""), v[seq_len(length(v) - 1)], ifelse(quotes, '"', ""), collapse = ", "), " or ", paste0(ifelse(quotes, '"', ""), v[length(v)], ifelse(quotes, '"', ""))) } - + if (!is.null(allow_class)) { - stop_ifnot(inherits(object, allow_class), "argument `", obj_name, - "` must ", # ifelse(allow_NULL, "be NULL or must ", ""), - "be of class ", vector_or(allow_class, quotes = TRUE), + stop_ifnot(inherits(object, allow_class), "argument `", obj_name, + "` must ", # ifelse(allow_NULL, "be NULL or must ", ""), + "be of class ", vector_or(allow_class, quotes = TRUE), ", not \"", paste(class(object), collapse = "/"), "\"", call = call_depth) # check data.frames for data if (inherits(object, "data.frame")) { - stop_if(any(dim(object) == 0), + stop_if(any(dim(object) == 0), "the data provided in argument `", obj_name, - "` must contain rows and columns (current dimensions: ", + "` must contain rows and columns (current dimensions: ", paste(dim(object), collapse = " x "), ")", call = call_depth) } } if (!is.null(has_length)) { - stop_ifnot(length(object) %in% has_length, "argument `", obj_name, - "` must ", # ifelse(allow_NULL, "be NULL or must ", ""), + stop_ifnot(length(object) %in% has_length, "argument `", obj_name, + "` must ", # ifelse(allow_NULL, "be NULL or must ", ""), "be of length ", vector_or(has_length, quotes = FALSE), ", not ", length(object), call = call_depth) } if (!is.null(looks_like)) { - stop_ifnot(object %like% looks_like, "argument `", obj_name, - "` must ", # ifelse(allow_NULL, "be NULL or must ", ""), + stop_ifnot(object %like% looks_like, "argument `", obj_name, + "` must ", # ifelse(allow_NULL, "be NULL or must ", ""), "resemble the regular expression \"", looks_like, "\"", call = call_depth) } @@ -395,16 +476,16 @@ meet_criteria <- function(object, object <- tolower(object) is_in <- tolower(is_in) } - stop_ifnot(all(object %in% is_in, na.rm = TRUE), "argument `", obj_name, + stop_ifnot(all(object %in% is_in, na.rm = TRUE), "argument `", obj_name, "` must be ", ifelse(!is.null(has_length) && length(has_length) == 1 && has_length == 1, "one of: ", ""), - vector_or(is_in, quotes = TRUE), + vector_or(is_in, quotes = TRUE), ", not ", paste0("\"", object, "\"", collapse = "/"), "", call = call_depth) } if (!is.null(contains_column_class)) { stop_ifnot(any(sapply(object, function(col, columns_class = contains_column_class) inherits(col, columns_class)), na.rm = TRUE), - "the data provided in argument `", obj_name, + "the data provided in argument `", obj_name, "` must contain at least one column of class <", contains_column_class, ">. ", "See ?as.", contains_column_class, ".", call = call_depth) @@ -463,7 +544,7 @@ has_colour <- function() { if (Sys.getenv("TERM") == "dumb") { return(FALSE) } - grepl(pattern = "^screen|^xterm|^vt100|color|ansi|cygwin|linux", + grepl(pattern = "^screen|^xterm|^vt100|color|ansi|cygwin|linux", x = Sys.getenv("TERM"), ignore.case = TRUE, perl = TRUE) @@ -560,7 +641,7 @@ progress_ticker <- function(n = 1, n_min = 0, ...) { create_pillar_column <- function(x, ...) { new_pillar_shaft_simple <- import_fn("new_pillar_shaft_simple", "pillar", error_on_fail = FALSE) if (!is.null(new_pillar_shaft_simple)) { - new_pillar_shaft_simple(x, ...) + new_pillar_shaft_simple(x, ...) } else { # does not exist in package 'pillar' anymore structure(list(x), @@ -622,12 +703,12 @@ round2 <- function(x, digits = 0, force_zero = TRUE) { if (digits > 0 & force_zero == TRUE) { values_trans <- val[val != as.integer(val) & !is.na(val)] val[val != as.integer(val) & !is.na(val)] <- paste0(values_trans, - strrep("0", - max(0, + strrep("0", + max(0, digits - nchar( format( as.double( - gsub(".*[.](.*)$", + gsub(".*[.](.*)$", "\\1", values_trans)), scientific = FALSE))))) @@ -638,7 +719,7 @@ round2 <- function(x, digits = 0, force_zero = TRUE) { # percentage from our other package: 'cleaner' percentage <- function(x, digits = NULL, ...) { - + # getdecimalplaces() function getdecimalplaces <- function(x, minimum = 0, maximum = 3) { if (maximum < minimum) { @@ -647,20 +728,20 @@ percentage <- function(x, digits = NULL, ...) { if (minimum > maximum) { minimum <- maximum } - max_places <- max(unlist(lapply(strsplit(sub("0+$", "", + max_places <- max(unlist(lapply(strsplit(sub("0+$", "", as.character(x * 100)), ".", fixed = TRUE), function(y) ifelse(length(y) == 2, nchar(y[2]), 0))), na.rm = TRUE) max(min(max_places, maximum, na.rm = TRUE), minimum, na.rm = TRUE) } - + # format_percentage() function format_percentage <- function(x, digits = NULL, ...) { if (is.null(digits)) { digits <- getdecimalplaces(x) } - + # round right: percentage(0.4455) and format(as.percentage(0.4455), 1) should return "44.6%", not "44.5%" x_formatted <- format(round2(as.double(x), digits = digits + 2) * 100, scientific = FALSE, @@ -671,7 +752,7 @@ percentage <- function(x, digits = NULL, ...) { x_formatted[!grepl(pattern = "^[0-9.,e-]+$", x = x)] <- NA_character_ x_formatted } - + # the actual working part x <- as.double(x) if (is.null(digits)) { @@ -688,12 +769,12 @@ percentage <- function(x, digits = NULL, ...) { # see here for the full list: https://github.com/r-lib/backports strrep <- function(x, times) { x <- as.character(x) - if (length(x) == 0L) + if (length(x) == 0L) return(x) unlist(.mapply(function(x, times) { - if (is.na(x) || is.na(times)) + if (is.na(x) || is.na(times)) return(NA_character_) - if (times <= 0L) + if (times <= 0L) return("") paste0(replicate(times, x), collapse = "") }, list(x = x, times = times), MoreArgs = list()), use.names = FALSE) @@ -701,9 +782,9 @@ strrep <- function(x, times) { trimws <- function(x, which = c("both", "left", "right")) { which <- match.arg(which) mysub <- function(re, x) sub(re, "", x, perl = TRUE) - if (which == "left") + if (which == "left") return(mysub("^[ \t\r\n]+", x)) - if (which == "right") + if (which == "right") return(mysub("[ \t\r\n]+$", x)) mysub("[ \t\r\n]+$", mysub("^[ \t\r\n]+", x)) } diff --git a/R/like.R b/R/like.R index d132341e..3d1c8b8b 100755 --- a/R/like.R +++ b/R/like.R @@ -23,7 +23,7 @@ # how to conduct AMR analysis: https://msberends.github.io/AMR/ # # ==================================================================== # -#' Pattern Matching +#' Pattern matching with keyboard shortcut #' #' Convenient wrapper around [grep()] to match a pattern: `x %like% pattern`. It always returns a [`logical`] vector and is always case-insensitive (use `x %like_case% pattern` for case-sensitive matching). Also, `pattern` can be as long as `x` to compare items of each index in both vectors, or they both can have the same length to iterate over all cases. #' @inheritSection lifecycle Stable lifecycle @@ -41,9 +41,9 @@ #' * Checks if `pattern` is a regular expression and sets `fixed = TRUE` if not, to greatly improve speed #' * Tries again with `perl = TRUE` if regex fails #' -#' Using RStudio? This function can also be inserted from the Addins menu and can have its own Keyboard Shortcut like `Ctrl+Shift+L` or `Cmd+Shift+L` (see `Tools` > `Modify Keyboard Shortcuts...`). +#' Using RStudio? This function can also be inserted in your code from the Addins menu and can have its own Keyboard Shortcut like `Ctrl+Shift+L` or `Cmd+Shift+L` (see `Tools` > `Modify Keyboard Shortcuts...`). This addin iterates over all 'like' variants. So if you have defined the keyboard shortcut Ctrl/Cmd + L to this addin, it will first insert ` %like% ` and by pressing it again it will be replaced with ` %not_like% `, then ` %like_case% `, then ` %not_like_case% ` and then back to ` %like% `. #' -#' The `"%not_like%"` and `"%like_perl%"` functions are wrappers around `"%like%"`. +#' The `"%not_like%"` and `"%not_like_case%"` functions are wrappers around `"%like%"` and `"%like_case%"`. #' @source Idea from the [`like` function from the `data.table` package](https://github.com/Rdatatable/data.table/blob/master/R/like.R) #' @seealso [grep()] #' @inheritSection AMR Read more on our website! @@ -168,8 +168,15 @@ like <- function(x, pattern, ignore.case = TRUE) { like(x, pattern, ignore.case = FALSE) } + #' @rdname like #' @export +"%not_like_case%" <- function(x, pattern) { + meet_criteria(x, allow_NA = TRUE) + meet_criteria(pattern, allow_NA = FALSE) + !like(x, pattern, ignore.case = FALSE) +} + "%like_perl%" <- function(x, pattern) { meet_criteria(x, allow_NA = TRUE) meet_criteria(pattern, allow_NA = FALSE) diff --git a/R/mo.R b/R/mo.R index af8d1b29..daeb411e 100755 --- a/R/mo.R +++ b/R/mo.R @@ -9,7 +9,7 @@ # (c) 2018-2020 Berends MS, Luz CF et al. # # Developed at the University of Groningen, the Netherlands, in # # collaboration with non-profit organisations Certe Medical # -# Diagnostics & Advice, and University Medical Center Groningen. # +# Diagnostics & Advice, and University Medical Center Groningen. # # # # This R package is free software; you can freely use and distribute # # it for both personal and commercial purposes under the terms of the # @@ -44,7 +44,7 @@ #' @keywords mo Becker becker Lancefield lancefield guess #' @details #' ## General info -#' +#' #' A microorganism ID from this package (class: [`mo`]) is human readable and typically looks like these examples: #' ``` #' Code Full name @@ -68,35 +68,35 @@ #' The algorithm uses data from the Catalogue of Life (see below) and from one other source (see [microorganisms]). #' #' The [as.mo()] function uses several coercion rules for fast and logical results. It assesses the input matching criteria in the following order: -#' +#' #' 1. Human pathogenic prevalence: the function starts with more prevalent microorganisms, followed by less prevalent ones; #' 2. Taxonomic kingdom: the function starts with determining Bacteria, then Fungi, then Protozoa, then others; #' 3. Breakdown of input values to identify possible matches. #' #' This will lead to the effect that e.g. `"E. coli"` (a microorganism highly prevalent in humans) will return the microbial ID of *Escherichia coli* and not *Entamoeba coli* (a microorganism less prevalent in humans), although the latter would alphabetically come first. -#' +#' #' ## Coping with uncertain results -#' -#' In addition, the [as.mo()] function can differentiate four levels of uncertainty to guess valid results: +#' +#' In addition, the [as.mo()] function can differentiate four levels of uncertainty to guess valid results: #' - Uncertainty level 0: no additional rules are applied; #' - Uncertainty level 1: allow previously accepted (but now invalid) taxonomic names and minor spelling errors; #' - Uncertainty level 2: allow all of level 1, strip values between brackets, inverse the words of the input, strip off text elements from the end keeping at least two elements; #' - Uncertainty level 3: allow all of level 1 and 2, strip off text elements from the end, allow any part of a taxonomic name. -#' +#' #' The level of uncertainty can be set using the argument `allow_uncertain`. The default is `allow_uncertain = TRUE`, which is equal to uncertainty level 2. Using `allow_uncertain = FALSE` is equal to uncertainty level 0 and will skip all rules. You can also use e.g. `as.mo(..., allow_uncertain = 1)` to only allow up to level 1 uncertainty. -#' +#' #' With the default setting (`allow_uncertain = TRUE`, level 2), below examples will lead to valid results: #' - `"Streptococcus group B (known as S. agalactiae)"`. The text between brackets will be removed and a warning will be thrown that the result *Streptococcus group B* (``r as.mo("Streptococcus group B")``) needs review. #' - `"S. aureus - please mind: MRSA"`. The last word will be stripped, after which the function will try to find a match. If it does not, the second last word will be stripped, etc. Again, a warning will be thrown that the result *Staphylococcus aureus* (``r as.mo("Staphylococcus aureus")``) needs review. #' - `"Fluoroquinolone-resistant Neisseria gonorrhoeae"`. The first word will be stripped, after which the function will try to find a match. A warning will be thrown that the result *Neisseria gonorrhoeae* (``r as.mo("Neisseria gonorrhoeae")``) needs review. -#' +#' #' There are three helper functions that can be run after using the [as.mo()] function: #' - Use [mo_uncertainties()] to get a [data.frame] that prints in a pretty format with all taxonomic names that were guessed. The output contains the matching score for all matches (see *Background on matching score*). #' - Use [mo_failures()] to get a [character] [vector] with all values that could not be coerced to a valid value. #' - Use [mo_renamed()] to get a [data.frame] with all values that could be coerced based on old, previously accepted taxonomic names. #' #' ## Microbial prevalence of pathogens in humans -#' +#' #' The intelligent rules consider the prevalence of microorganisms in humans grouped into three groups, which is available as the `prevalence` columns in the [microorganisms] and [microorganisms.old] data sets. The grouping into human pathogenic prevalence is explained in the section *Matching score for microorganisms* below. #' @inheritSection mo_matching_score Matching score for microorganisms #' @inheritSection catalogue_of_life Catalogue of Life @@ -110,7 +110,7 @@ #' @export #' @return A [character] [vector] with additional class [`mo`] #' @seealso [microorganisms] for the [data.frame] that is being used to determine ID's. -#' +#' #' The [mo_property()] functions (like [mo_genus()], [mo_gramstain()]) to get properties based on the returned code. #' @inheritSection AMR Reference data publicly available #' @inheritSection AMR Read more on our website! @@ -130,7 +130,7 @@ #' as.mo("VISA") # Vancomycin Intermediate S. aureus #' as.mo("VRSA") # Vancomycin Resistant S. aureus #' as.mo(115329001) # SNOMED CT code -#' +#' #' # Dyslexia is no problem - these all work: #' as.mo("Ureaplasma urealyticum") #' as.mo("Ureaplasma urealyticus") @@ -151,10 +151,10 @@ #' mo_genus("E. coli") # returns "Escherichia" #' mo_gramstain("E. coli") # returns "Gram negative" #' } -as.mo <- function(x, - Becker = FALSE, - Lancefield = FALSE, - allow_uncertain = TRUE, +as.mo <- function(x, + Becker = FALSE, + Lancefield = FALSE, + allow_uncertain = TRUE, reference_df = get_mo_source(), ignore_pattern = getOption("AMR_ignore_pattern"), language = get_locale(), @@ -166,9 +166,9 @@ as.mo <- function(x, meet_criteria(reference_df, allow_class = "data.frame", allow_NULL = TRUE) meet_criteria(ignore_pattern, allow_class = "character", has_length = 1, allow_NULL = TRUE) meet_criteria(language, has_length = 1, is_in = c(LANGUAGES_SUPPORTED, ""), allow_NULL = TRUE, allow_NA = TRUE) - + check_dataset_integrity() - + if (tryCatch(all(x[!is.na(x)] %in% MO_lookup$mo) & isFALSE(Becker) & isFALSE(Lancefield), error = function(e) FALSE)) { @@ -176,34 +176,34 @@ as.mo <- function(x, # is.mo() won't work - codes might change between package versions return(to_class_mo(x)) } - + if (tryCatch(all(x == "" | gsub(".*(unknown ).*", "unknown name", tolower(x), perl = TRUE) %in% MO_lookup$fullname_lower, na.rm = TRUE) & isFALSE(Becker) & isFALSE(Lancefield), error = function(e) FALSE)) { # to improve speed, special case for taxonomically correct full names (case-insensitive) return(MO_lookup[match(gsub(".*(unknown ).*", "unknown name", tolower(x), perl = TRUE), MO_lookup$fullname_lower), "mo", drop = TRUE]) } - + # start off with replaced language-specific non-ASCII characters with ASCII characters x <- parse_and_convert(x) # replace mo codes used in older package versions x <- replace_old_mo_codes(x, property = "mo") # ignore cases that match the ignore pattern x <- replace_ignore_pattern(x, ignore_pattern) - + # WHONET: xxx = no growth x[tolower(as.character(paste0(x, ""))) %in% c("", "xxx", "na", "nan")] <- NA_character_ # Laboratory systems: remove (translated) entries like "no growth", etc. x[trimws2(x) %like% translate_AMR("no .*growth", language = language)] <- NA_character_ x[trimws2(x) %like% paste0("^(", translate_AMR("no|not", language = language), ") [a-z]+")] <- "UNKNOWN" uncertainty_level <- translate_allow_uncertain(allow_uncertain) - + if (mo_source_isvalid(reference_df) & isFALSE(Becker) & isFALSE(Lancefield) & !is.null(reference_df) & all(x %in% reference_df[, 1][[1]])) { - + # has valid own reference_df # (data.table not faster here) reference_df <- reference_df %pm>% pm_filter(!is.na(mo)) @@ -221,25 +221,27 @@ as.mo <- function(x, suppressWarnings( y <- data.frame(x = x, stringsAsFactors = FALSE) %pm>% pm_left_join(reference_df, by = "x") %pm>% - pm_pull("mo") + pm_pull("mo") %pm>% + # run as.mo() for when using old microbial codes + as.mo() ) - + } else if (all(x[!is.na(x)] %in% MO_lookup$mo) & isFALSE(Becker) & isFALSE(Lancefield)) { y <- x - + } else { # will be checked for mo class in validation and uses exec_as.mo internally if necessary y <- mo_validate(x = x, property = "mo", Becker = Becker, Lancefield = Lancefield, - allow_uncertain = uncertainty_level, + allow_uncertain = uncertainty_level, reference_df = reference_df, ignore_pattern = ignore_pattern, language = language, ...) } - + to_class_mo(y) } @@ -290,24 +292,24 @@ exec_as.mo <- function(x, meet_criteria(actual_uncertainty, allow_class = "numeric", has_length = 1) meet_criteria(actual_input, allow_class = "character", allow_NULL = TRUE) meet_criteria(language, has_length = 1, is_in = c(LANGUAGES_SUPPORTED, ""), allow_NULL = TRUE, allow_NA = TRUE) - + check_dataset_integrity() - - lookup <- function(needle, + + lookup <- function(needle, column = property, haystack = reference_data_to_use, n = 1, - debug_mode = debug, + debug_mode = debug, initial = initial_search, uncertainty = actual_uncertainty, input_actual = actual_input) { - + if (!is.null(input_actual)) { input <- input_actual } else { input <- tryCatch(x_backup[i], error = function(e) "") } - + # `column` can be NULL for all columns, or a selection # returns a character (vector) - if `column` > length 1 then with columns as names if (isTRUE(debug_mode)) { @@ -317,7 +319,7 @@ exec_as.mo <- function(x, res_df <- haystack[which(eval(substitute(needle), envir = haystack, enclos = parent.frame())), , drop = FALSE] if (NROW(res_df) > 1 & uncertainty != -1) { # sort the findings on matching score - scores <- mo_matching_score(x = input, + scores <- mo_matching_score(x = input, n = res_df[, "fullname", drop = TRUE]) res_df <- res_df[order(scores, decreasing = TRUE), , drop = FALSE] } @@ -326,7 +328,7 @@ exec_as.mo <- function(x, if (isTRUE(debug_mode)) { cat(font_red(" (no match)\n")) } - NA_character_ + NA_character_ } else { if (isTRUE(debug_mode)) { cat(font_green(paste0(" **MATCH** (", NROW(res_df), " results)\n"))) @@ -358,27 +360,27 @@ exec_as.mo <- function(x, res } } - + # start off with replaced language-specific non-ASCII characters with ASCII characters x <- parse_and_convert(x) # replace mo codes used in older package versions x <- replace_old_mo_codes(x, property) # ignore cases that match the ignore pattern x <- replace_ignore_pattern(x, ignore_pattern) - + # WHONET: xxx = no growth x[tolower(as.character(paste0(x, ""))) %in% c("", "xxx", "na", "nan")] <- NA_character_ # Laboratory systems: remove (translated) entries like "no growth", etc. x[trimws2(x) %like% translate_AMR("no .*growth", language = language)] <- NA_character_ x[trimws2(x) %like% paste0("^(", translate_AMR("no|not", language = language), ") [a-z]+")] <- "UNKNOWN" - + if (initial_search == TRUE) { options(mo_failures = NULL) options(mo_uncertainties = NULL) options(mo_renamed = NULL) } options(mo_renamed_last_run = NULL) - + failures <- character(0) uncertainty_level <- translate_allow_uncertain(allow_uncertain) uncertainties <- data.frame(uncertainty = integer(0), @@ -388,8 +390,7 @@ exec_as.mo <- function(x, mo = character(0), candidates = character(0), stringsAsFactors = FALSE) - old_mo_warning <- FALSE - + x_input <- x # already strip leading and trailing spaces x <- trimws(x) @@ -401,25 +402,28 @@ exec_as.mo <- function(x, & !is.null(x) & !identical(x, "") & !identical(x, "xxx")] - + # defined df to check for if (!is.null(reference_df)) { mo_source_isvalid(reference_df) - + reference_df <- reference_df %pm>% pm_filter(!is.na(mo)) - # keep only first two columns, second must be mo + # keep only first two columns, second must be named "mo" if (colnames(reference_df)[1] == "mo") { reference_df <- reference_df[, c(2, 1)] } else { reference_df <- reference_df[, c(1, 2)] } + # some microbial codes might be old + reference_df$mo <- as.mo(reference_df$mo) + colnames(reference_df)[1] <- "x" # remove factors, just keep characters suppressWarnings( reference_df[] <- lapply(reference_df, as.character) ) } - + # all empty if (all(identical(trimws(x_input), "") | is.na(x_input) | length(x) == 0)) { if (property == "mo") { @@ -427,39 +431,39 @@ exec_as.mo <- function(x, } else { return(rep(NA_character_, length(x_input))) } - + } else if (all(x %in% reference_df[, 1][[1]])) { # all in reference df colnames(reference_df)[1] <- "x" suppressWarnings( x <- MO_lookup[match(reference_df[match(x, reference_df$x), "mo", drop = TRUE], MO_lookup$mo), property, drop = TRUE] ) - + } else if (all(x %in% reference_data_to_use$mo)) { x <- MO_lookup[match(x, MO_lookup$mo), property, drop = TRUE] - + } else if (all(tolower(x) %in% reference_data_to_use$fullname_lower)) { # we need special treatment for very prevalent full names, they are likely! # e.g. as.mo("Staphylococcus aureus") x <- MO_lookup[match(tolower(x), MO_lookup$fullname_lower), property, drop = TRUE] - + } else if (all(x %in% reference_data_to_use$fullname)) { # we need special treatment for very prevalent full names, they are likely! # e.g. as.mo("Staphylococcus aureus") x <- MO_lookup[match(x, MO_lookup$fullname), property, drop = TRUE] - + } else if (all(toupper(x) %in% microorganisms.codes$code)) { # commonly used MO codes - x <- MO_lookup[match(microorganisms.codes[match(toupper(x), - microorganisms.codes$code), + x <- MO_lookup[match(microorganisms.codes[match(toupper(x), + microorganisms.codes$code), "mo", - drop = TRUE], - MO_lookup$mo), + drop = TRUE], + MO_lookup$mo), property, drop = TRUE] - + } else if (!all(x %in% microorganisms[, property])) { - + strip_whitespace <- function(x, dyslexia_mode) { # all whitespaces (tab, new lines, etc.) should be one space # and spaces before and after should be omitted @@ -472,22 +476,22 @@ exec_as.mo <- function(x, } trimmed } - + x_backup_untouched <- x x <- strip_whitespace(x, dyslexia_mode) x_backup <- x - + # from here on case-insensitive x <- tolower(x) - + x_backup[grepl("^(fungus|fungi)$", x)] <- "F_FUNGUS" # will otherwise become the kingdom - + # remove spp and species x <- gsub(" +(spp.?|ssp.?|sp.? |ss ?.?|subsp.?|subspecies|biovar |serovar |species)", " ", x, perl = TRUE) x <- gsub("(spp.?|subsp.?|subspecies|biovar|serovar|species)", "", x, perl = TRUE) x <- gsub("^([a-z]{2,4})(spe.?)$", "\\1", x, perl = TRUE) # when ending in SPE instead of SPP and preceded by 2-4 characters x <- strip_whitespace(x, dyslexia_mode) - + x_backup_without_spp <- x x_species <- paste(x, "species") # translate to English for supported languages of mo_property @@ -554,7 +558,7 @@ exec_as.mo <- function(x, # make sure to remove regex overkill (will lead to errors) x <- gsub("++", "+", x, fixed = TRUE) x <- gsub("?+", "?", x, fixed = TRUE) - + x_trimmed <- x x_trimmed_species <- paste(x_trimmed, "species") x_trimmed_without_group <- gsub(" gro.u.p$", "", x_trimmed, perl = TRUE) @@ -565,11 +569,11 @@ exec_as.mo <- function(x, x <- gsub("[ .]+", ".*", x, perl = TRUE) # add start en stop regex x <- paste0("^", x, "$") - + x_withspaces_start_only <- paste0("^", x_withspaces) x_withspaces_end_only <- paste0(x_withspaces, "$") x_withspaces_start_end <- paste0("^", x_withspaces, "$") - + if (isTRUE(debug)) { cat(paste0(font_blue("x"), ' "', x, '"\n')) cat(paste0(font_blue("x_species"), ' "', x_species, '"\n')) @@ -582,25 +586,25 @@ exec_as.mo <- function(x, cat(paste0(font_blue("x_trimmed_species"), ' "', x_trimmed_species, '"\n')) cat(paste0(font_blue("x_trimmed_without_group"), ' "', x_trimmed_without_group, '"\n')) } - + if (initial_search == TRUE) { progress <- progress_ticker(n = length(x), n_min = 25) # start if n >= 25 on.exit(close(progress)) } - + for (i in seq_len(length(x))) { - + if (initial_search == TRUE) { progress$tick() } - + # valid MO code ---- found <- lookup(mo == toupper(x_backup[i])) if (!is.na(found)) { x[i] <- found[1L] next } - + # valid fullname ---- found <- lookup(fullname_lower %in% gsub("[^a-zA-Z0-9_. -]", "", tolower(c(x_backup[i], x_backup_without_spp[i])), perl = TRUE)) # added the gsub() for "(unknown fungus)", since fullname_lower does not contain brackets @@ -608,7 +612,7 @@ exec_as.mo <- function(x, x[i] <- found[1L] next } - + # old fullname ---- found <- lookup(fullname_lower %in% tolower(c(x_backup[i], x_backup_without_spp[i])), column = NULL, # all columns @@ -630,13 +634,13 @@ exec_as.mo <- function(x, mo = lookup(fullname == found["fullname_new"], "mo", haystack = MO_lookup)) next } - + if (x_backup[i] %like_case% "\\(unknown [a-z]+\\)" | tolower(x_backup_without_spp[i]) %in% c("other", "none", "unknown")) { # empty and nonsense values, ignore without warning x[i] <- lookup(mo == "UNKNOWN") next } - + # exact SNOMED code ---- if (x_backup[i] %like% "^[0-9]+$") { snomed_found <- unlist(lapply(reference_data_to_use$snomed, @@ -653,24 +657,24 @@ exec_as.mo <- function(x, } } } - + # very probable: is G. species ---- - found <- lookup(g_species %in% gsub("[^a-z0-9/ \\-]+", "", + found <- lookup(g_species %in% gsub("[^a-z0-9/ \\-]+", "", tolower(c(x_backup[i], x_backup_without_spp[i])), perl = TRUE)) if (!is.na(found)) { x[i] <- found[1L] next } - + # WHONET and other common LIS codes ---- found <- lookup(code %in% toupper(c(x_backup_untouched[i], x_backup[i], x_backup_without_spp[i])), - column = "mo", + column = "mo", haystack = microorganisms.codes) if (!is.na(found)) { x[i] <- lookup(mo == found) next } - + # user-defined reference ---- if (!is.null(reference_df)) { if (x_backup[i] %in% reference_df[, 1]) { @@ -680,13 +684,13 @@ exec_as.mo <- function(x, next } } - + # WHONET: xxx = no growth if (tolower(as.character(paste0(x_backup_without_spp[i], ""))) %in% c("", "xxx", "na", "nan")) { x[i] <- NA_character_ next } - + # check for very small input, but ignore the O antigens of E. coli if (nchar(gsub("[^a-zA-Z]", "", x_trimmed[i])) < 3 & !toupper(x_backup_without_spp[i]) %like_case% "O?(26|103|104|104|111|121|145|157)") { @@ -697,13 +701,13 @@ exec_as.mo <- function(x, } next } - + if (x_backup_without_spp[i] %like_case% "(virus|viridae)") { # there is no fullname like virus or viridae, so don't try to coerce it x[i] <- NA_character_ next } - + # translate known trivial abbreviations to genus + species ---- if (toupper(x_backup_without_spp[i]) %in% c("MRSA", "MSSA", "VISA", "VRSA", "BORSA") | x_backup_without_spp[i] %like_case% " (mrsa|mssa|visa|vrsa) ") { @@ -765,14 +769,14 @@ exec_as.mo <- function(x, } if (x_backup_without_spp[i] %like_case% "(streptococ|streptokok).* [abcdfghk]$") { # Streptococci in different languages, like "estreptococos grupo B" - x[i] <- lookup(mo == toupper(gsub(".*(streptococ|streptokok|estreptococ).* ([abcdfghk])$", + x[i] <- lookup(mo == toupper(gsub(".*(streptococ|streptokok|estreptococ).* ([abcdfghk])$", "B_STRPT_GRP\\2", x_backup_without_spp[i])), uncertainty = -1) next } if (x_backup_without_spp[i] %like_case% "group [abcdfghk] (streptococ|streptokok|estreptococ)") { # Streptococci in different languages, like "Group A Streptococci" - x[i] <- lookup(mo == toupper(gsub(".*group ([abcdfghk]) (streptococ|streptokok|estreptococ).*", + x[i] <- lookup(mo == toupper(gsub(".*group ([abcdfghk]) (streptococ|streptokok|estreptococ).*", "B_STRPT_GRP\\1", x_backup_without_spp[i])), uncertainty = -1) next @@ -831,7 +835,7 @@ exec_as.mo <- function(x, x[i] <- lookup(genus == "Mycobacterium", uncertainty = -1) next } - + if (x_backup_without_spp[i] %like_case% "salmonella [a-z]+ ?.*") { if (x_backup_without_spp[i] %like_case% "salmonella group") { # Salmonella Group A to Z, just return S. species for now @@ -849,7 +853,7 @@ exec_as.mo <- function(x, next } } - + # trivial names known to the field: if ("meningococcus" %like_case% x_trimmed[i]) { # coerce Neisseria meningitidis @@ -867,7 +871,7 @@ exec_as.mo <- function(x, next } # } - + # NOW RUN THROUGH DIFFERENT PREVALENCE LEVELS check_per_prevalence <- function(data_to_check, data.old_to_check, @@ -880,10 +884,10 @@ exec_as.mo <- function(x, g.x_backup_without_spp, h.x_species, i.x_trimmed_species) { - + # FIRST TRY FULLNAMES AND CODES ---- # if only genus is available, return only genus - + if (all(!c(x[i], b.x_trimmed) %like_case% " ")) { found <- lookup(fullname_lower %in% c(h.x_species, i.x_trimmed_species), haystack = data_to_check) @@ -901,7 +905,7 @@ exec_as.mo <- function(x, } # rest of genus only is in allow_uncertain part. } - + # allow no codes less than 4 characters long, was already checked for WHONET earlier if (nchar(g.x_backup_without_spp) < 4) { x[i] <- lookup(mo == "UNKNOWN") @@ -910,21 +914,21 @@ exec_as.mo <- function(x, } return(x[i]) } - + # try probable: trimmed version of fullname ---- found <- lookup(fullname_lower %in% tolower(g.x_backup_without_spp), haystack = data_to_check) if (!is.na(found)) { return(found[1L]) } - + # try any match keeping spaces ---- found <- lookup(fullname_lower %like_case% d.x_withspaces_start_end, haystack = data_to_check) if (!is.na(found) & nchar(g.x_backup_without_spp) >= 6) { return(found[1L]) } - + # try any match keeping spaces, not ending with $ ---- found <- lookup(fullname_lower %like_case% paste0(trimws(e.x_withspaces_start_only), " "), haystack = data_to_check) @@ -936,14 +940,14 @@ exec_as.mo <- function(x, if (!is.na(found) & nchar(g.x_backup_without_spp) >= 6) { return(found[1L]) } - + # try any match keeping spaces, not start with ^ ---- found <- lookup(fullname_lower %like_case% paste0(" ", trimws(f.x_withspaces_end_only)), haystack = data_to_check) if (!is.na(found)) { return(found[1L]) } - + # try a trimmed version found <- lookup(fullname_lower %like_case% b.x_trimmed | fullname_lower %like_case% c.x_trimmed_without_group, @@ -951,8 +955,8 @@ exec_as.mo <- function(x, if (!is.na(found) & nchar(g.x_backup_without_spp) >= 6) { return(found[1L]) } - - + + # try splitting of characters in the middle and then find ID ---- # only when text length is 6 or lower # like esco = E. coli, klpn = K. pneumoniae, stau = S. aureus, staaur = S. aureus @@ -968,7 +972,7 @@ exec_as.mo <- function(x, return(found[1L]) } } - + # try fullname without start and without nchar limit of >= 6 ---- # like "K. pneu rhino" >> "Klebsiella pneumoniae (rhinoscleromatis)" = KLEPNERH found <- lookup(fullname_lower %like_case% e.x_withspaces_start_only, @@ -976,9 +980,9 @@ exec_as.mo <- function(x, if (!is.na(found)) { return(found[1L]) } - + # MISCELLANEOUS ---- - + # look for old taxonomic names ---- found <- lookup(fullname_lower %like_case% e.x_withspaces_start_only, column = NULL, # all columns @@ -1000,7 +1004,7 @@ exec_as.mo <- function(x, mo = lookup(fullname == found["fullname_new"], "mo", haystack = MO_lookup)) return(x[i]) } - + # check for uncertain results ---- uncertain_fn <- function(a.x_backup, b.x_trimmed, @@ -1009,16 +1013,16 @@ exec_as.mo <- function(x, f.x_withspaces_end_only, g.x_backup_without_spp, uncertain.reference_data_to_use) { - + if (uncertainty_level == 0) { # do not allow uncertainties return(NA_character_) } - + # UNCERTAINTY LEVEL 1 ---- if (uncertainty_level >= 1) { now_checks_for_uncertainty_level <- 1 - + # (1) look again for old taxonomic names, now for G. species ---- if (isTRUE(debug)) { cat(font_bold("\n[ UNCERTAINTY LEVEL", now_checks_for_uncertainty_level, "] (1) look again for old taxonomic names, now for G. species\n")) @@ -1051,7 +1055,7 @@ exec_as.mo <- function(x, result_mo = lookup(fullname == found["fullname_new"], "mo", haystack = MO_lookup))) return(x) } - + # (2) Try with misspelled input ---- # just rerun with dyslexia_mode = TRUE will used the extensive regex part above if (isTRUE(debug)) { @@ -1074,11 +1078,11 @@ exec_as.mo <- function(x, return(found) } } - + # UNCERTAINTY LEVEL 2 ---- if (uncertainty_level >= 2) { now_checks_for_uncertainty_level <- 2 - + # (3) look for genus only, part of name ---- if (isTRUE(debug)) { cat(font_bold("\n[ UNCERTAINTY LEVEL", now_checks_for_uncertainty_level, "] (3) look for genus only, part of name\n")) @@ -1102,7 +1106,7 @@ exec_as.mo <- function(x, } } } - + # (4) strip values between brackets ---- if (isTRUE(debug)) { cat(font_bold("\n[ UNCERTAINTY LEVEL", now_checks_for_uncertainty_level, "] (4) strip values between brackets\n")) @@ -1125,7 +1129,7 @@ exec_as.mo <- function(x, found <- lookup(mo == found) return(found) } - + # (5) inverse input ---- if (isTRUE(debug)) { cat(font_bold("\n[ UNCERTAINTY LEVEL", now_checks_for_uncertainty_level, "] (5) inverse input\n")) @@ -1134,7 +1138,7 @@ exec_as.mo <- function(x, if (isTRUE(debug)) { message("Running '", a.x_backup_inversed, "'") } - + # first try without dyslexia mode found <- suppressMessages(suppressWarnings(exec_as.mo(a.x_backup_inversed, initial_search = FALSE, dyslexia_mode = FALSE, allow_uncertain = FALSE, debug = debug, reference_data_to_use = uncertain.reference_data_to_use, actual_uncertainty = 2, actual_input = a.x_backup))) if (empty_result(found)) { @@ -1148,7 +1152,7 @@ exec_as.mo <- function(x, found <- lookup(mo == found) return(found) } - + # (6) try to strip off half an element from end and check the remains ---- if (isTRUE(debug)) { cat(font_bold("\n[ UNCERTAINTY LEVEL", now_checks_for_uncertainty_level, "] (6) try to strip off half an element from end and check the remains\n")) @@ -1197,7 +1201,7 @@ exec_as.mo <- function(x, # then with dyslexia mode found <- suppressMessages(suppressWarnings(exec_as.mo(x_strip_collapsed, initial_search = FALSE, dyslexia_mode = TRUE, allow_uncertain = FALSE, debug = debug, reference_data_to_use = uncertain.reference_data_to_use, actual_uncertainty = 2, actual_input = a.x_backup))) } - + if (!empty_result(found)) { found_result <- found uncertainties <<- rbind(uncertainties, @@ -1262,11 +1266,11 @@ exec_as.mo <- function(x, } } } - + # UNCERTAINTY LEVEL 3 ---- if (uncertainty_level >= 3) { now_checks_for_uncertainty_level <- 3 - + # (10) try to strip off one element from start and check the remains (any text size) ---- if (isTRUE(debug)) { cat(font_bold("\n[ UNCERTAINTY LEVEL", now_checks_for_uncertainty_level, "] (10) try to strip off one element from start and check the remains (any text size)\n")) @@ -1319,7 +1323,7 @@ exec_as.mo <- function(x, } } } - + # (12) part of a name (very unlikely match) ---- if (isTRUE(debug)) { cat(font_bold("\n[ UNCERTAINTY LEVEL", now_checks_for_uncertainty_level, "] (12) part of a name (very unlikely match)\n")) @@ -1336,38 +1340,38 @@ exec_as.mo <- function(x, return(found) } } - - + + # didn't found in uncertain results too return(NA_character_) } - + # uncertain results - x[i] <- uncertain_fn(a.x_backup = a.x_backup, + x[i] <- uncertain_fn(a.x_backup = a.x_backup, b.x_trimmed = b.x_trimmed, d.x_withspaces_start_end = d.x_withspaces_start_end, - e.x_withspaces_start_only = e.x_withspaces_start_only, + e.x_withspaces_start_only = e.x_withspaces_start_only, f.x_withspaces_end_only = f.x_withspaces_end_only, g.x_backup_without_spp = g.x_backup_without_spp, uncertain.reference_data_to_use = MO_lookup) # MO_lookup[which(MO_lookup$prevalence %in% c(1, 2)), ]) if (!empty_result(x[i])) { return(x[i]) } - # x[i] <- uncertain_fn(a.x_backup = a.x_backup, + # x[i] <- uncertain_fn(a.x_backup = a.x_backup, # b.x_trimmed = b.x_trimmed, # d.x_withspaces_start_end = d.x_withspaces_start_end, - # e.x_withspaces_start_only = e.x_withspaces_start_only, + # e.x_withspaces_start_only = e.x_withspaces_start_only, # f.x_withspaces_end_only = f.x_withspaces_end_only, # g.x_backup_without_spp = g.x_backup_without_spp, # uncertain.reference_data_to_use = MO_lookup[which(MO_lookup$prevalence == 3), ]) # if (!empty_result(x[i])) { # return(x[i]) # } - + # didn't found any return(NA_character_) } - + # CHECK ALL IN ONE GO ---- x[i] <- check_per_prevalence(data_to_check = MO_lookup, data.old_to_check = MO.old_lookup, @@ -1383,20 +1387,20 @@ exec_as.mo <- function(x, if (!empty_result(x[i])) { next } - - + + # no results found: make them UNKNOWN ---- x[i] <- lookup(mo == "UNKNOWN", uncertainty = -1) if (initial_search == TRUE) { failures <- c(failures, x_backup[i]) } } - + if (initial_search == TRUE) { close(progress) } } - + # handling failures ---- failures <- failures[!failures %in% c(NA, NULL, NaN)] if (length(failures) > 0 & initial_search == TRUE) { @@ -1427,7 +1431,7 @@ exec_as.mo <- function(x, if (NROW(uncertainties) > 0 & initial_search == TRUE) { uncertainties <- as.list(pm_distinct(uncertainties, input, .keep_all = TRUE)) options(mo_uncertainties = uncertainties) - + plural <- c("", "it", "was") if (length(uncertainties$input) > 1) { plural <- c("s", "them", "were") @@ -1436,7 +1440,7 @@ exec_as.mo <- function(x, " ", plural[3], " guessed with uncertainty. Use mo_uncertainties() to review ", plural[2], ".") message(font_red(msg)) } - + # Becker ---- if (Becker == TRUE | Becker == "all") { # warn when species found that are not in: @@ -1445,7 +1449,7 @@ exec_as.mo <- function(x, # - Becker et al. 2020, PMID 32056452 post_Becker <- c("") # 2020-10-20 currently all are mentioned in above papers if (any(x %in% MO_lookup[which(MO_lookup$species %in% post_Becker), property])) { - + warning("Becker ", font_italic("et al."), " (2014, 2019) does not contain these species named after their publication: ", font_italic(paste("S.", sort(mo_species(unique(x[x %in% MO_lookup[which(MO_lookup$species %in% post_Becker), property]]))), @@ -1454,19 +1458,19 @@ exec_as.mo <- function(x, call. = FALSE, immediate. = TRUE) } - + # 'MO_CONS' and 'MO_COPS' are vectors created in R/zzz.R CoNS <- MO_lookup[which(MO_lookup$mo %in% MO_CONS), property, drop = TRUE] x[x %in% CoNS] <- lookup(mo == "B_STPHY_CONS", uncertainty = -1) - + CoPS <- MO_lookup[which(MO_lookup$mo %in% MO_COPS), property, drop = TRUE] x[x %in% CoPS] <- lookup(mo == "B_STPHY_COPS", uncertainty = -1) - + if (Becker == "all") { x[x %in% lookup(fullname %like_case% "^Staphylococcus aureus", n = Inf)] <- lookup(mo == "B_STPHY_COPS", uncertainty = -1) } } - + # Lancefield ---- if (Lancefield == TRUE | Lancefield == "all") { # group A - S. pyogenes @@ -1488,52 +1492,47 @@ exec_as.mo <- function(x, # group K - S. salivarius x[x %in% lookup(genus == "Streptococcus" & species == "salivarius", n = Inf)] <- lookup(fullname == "Streptococcus group K", uncertainty = -1) } - + # Wrap up ---------------------------------------------------------------- - + # comply to x, which is also unique and without empty values x_input_unique_nonempty <- unique(x_input[!is.na(x_input) & !is.null(x_input) & !identical(x_input, "") & !identical(x_input, "xxx")]) - + # left join the found results to the original input values (x_input) df_found <- data.frame(input = as.character(x_input_unique_nonempty), found = as.character(x), stringsAsFactors = FALSE) df_input <- data.frame(input = as.character(x_input), stringsAsFactors = FALSE) - + # super fast using match() which is a lot faster than merge() x <- df_found$found[match(df_input$input, df_found$input)] - + if (property == "mo") { x <- to_class_mo(x) } - + if (length(mo_renamed()) > 0) { print(mo_renamed()) } - + if (initial_search == FALSE) { # we got here from uncertain_fn(). if (NROW(uncertainties) == 0) { # the stripped/transformed version of x_backup is apparently a full hit, like with: as.mo("Escherichia (hello there) coli") - uncertainties <- rbind(uncertainties, - format_uncertainty_as_df(uncertainty_level = actual_uncertainty, - input = actual_input, + uncertainties <- rbind(uncertainties, + format_uncertainty_as_df(uncertainty_level = actual_uncertainty, + input = actual_input, result_mo = x, candidates = "")) } # this will save the uncertain items as attribute, so they can be bound to `uncertainties` in the uncertain_fn() function x <- structure(x, uncertainties = uncertainties) } - - - if (old_mo_warning == TRUE & property != "mo") { - warning("The input contained old microorganism IDs from previous versions of this package.\nPlease use `as.mo()` on these old IDs to transform them to the new format.\nSUPPORT FOR THIS WILL BE DROPPED IN A FUTURE VERSION.", call. = FALSE) - } - + x } @@ -1542,11 +1541,11 @@ empty_result <- function(x) { } was_renamed <- function(name_old, name_new, ref_old = "", ref_new = "", mo = "") { - newly_set <- data.frame(old_name = name_old, + newly_set <- data.frame(old_name = name_old, old_ref = ref_old, new_name = name_new, new_ref = ref_new, - mo = mo, + mo = mo, stringsAsFactors = FALSE) already_set <- getOption("mo_renamed") if (!is.null(already_set)) { @@ -1585,14 +1584,14 @@ pillar_shaft.mo <- function(x, ...) { out[!is.na(x)] <- gsub("^([A-Z]+_)(.*)", paste0(font_subtle("\\1"), "\\2"), out[!is.na(x)], perl = TRUE) # and grey out every _ out[!is.na(x)] <- gsub("_", font_subtle("_"), out[!is.na(x)]) - + # markup NA and UNKNOWN out[is.na(x)] <- font_na(" NA") out[x == "UNKNOWN"] <- font_na(" UNKNOWN") - + # make it always fit exactly create_pillar_column(out, - align = "left", + align = "left", width = max(nchar(x)) + ifelse(any(x %in% c(NA, "UNKNOWN")), 2, 0)) } @@ -1706,7 +1705,7 @@ as.data.frame.mo <- function(x, ...) { y <- NextMethod() attributes(y) <- attributes(i) # must only contain valid MOs - class_integrity_check(y, "microorganism code", c(as.character(microorganisms$mo), + class_integrity_check(y, "microorganism code", c(as.character(microorganisms$mo), as.character(microorganisms.translation$mo_old))) } #' @method [[<- mo @@ -1716,7 +1715,7 @@ as.data.frame.mo <- function(x, ...) { y <- NextMethod() attributes(y) <- attributes(i) # must only contain valid MOs - class_integrity_check(y, "microorganism code", c(as.character(microorganisms$mo), + class_integrity_check(y, "microorganism code", c(as.character(microorganisms$mo), as.character(microorganisms.translation$mo_old))) } #' @method c mo @@ -1726,7 +1725,7 @@ c.mo <- function(x, ...) { y <- NextMethod() attributes(y) <- attributes(x) # must only contain valid MOs - class_integrity_check(y, "microorganism code", c(as.character(microorganisms$mo), + class_integrity_check(y, "microorganism code", c(as.character(microorganisms$mo), as.character(microorganisms.translation$mo_old))) } @@ -1762,11 +1761,11 @@ print.mo_uncertainties <- function(x, ...) { if (NROW(x) == 0) { return(NULL) } - cat(font_blue(strwrap("Matching scores are based on human pathogenic prevalence and the resemblance between the input and the full taxonomic name. Please see ?mo_matching_score.", - width = 0.98 * getOption("width")), + cat(font_blue(strwrap("Matching scores are based on human pathogenic prevalence and the resemblance between the input and the full taxonomic name. Please see ?mo_matching_score.", + width = 0.98 * getOption("width")), collapse = "\n")) cat("\n") - + msg <- "" for (i in seq_len(nrow(x))) { if (x[i, ]$candidates != "") { @@ -1778,17 +1777,17 @@ print.mo_uncertainties <- function(x, ...) { n_candidates <- length(candidates) candidates <- paste0(candidates, " (", scores_formatted[order(1 - scores)], ")", collapse = ", ") # align with input after arrow - candidates <- paste0("\n", + candidates <- paste0("\n", strwrap(paste0("Also matched", - ifelse(n_candidates >= 25, " (max 25)", ""), ": ", + ifelse(n_candidates >= 25, " (max 25)", ""), ": ", candidates), # this is already max 25 due to format_uncertainty_as_df() indent = nchar(x[i, ]$input) + 6, - exdent = nchar(x[i, ]$input) + 6, + exdent = nchar(x[i, ]$input) + 6, width = 0.98 * getOption("width")), collapse = "") # after strwrap, make taxonomic names italic candidates <- gsub("([A-Za-z]+)", font_italic("\\1"), candidates) - candidates <- gsub(paste(font_italic(c("Also", "matched"), collapse = NULL), collapse = " "), + candidates <- gsub(paste(font_italic(c("Also", "matched"), collapse = NULL), collapse = " "), "Also matched", candidates, fixed = TRUE) candidates <- gsub(font_italic("max"), "max", candidates, fixed = TRUE) @@ -1809,7 +1808,7 @@ print.mo_uncertainties <- function(x, ...) { ", matching score = ", score, ") ")), width = 0.98 * getOption("width"), - exdent = nchar(x[i, ]$input) + 6), + exdent = nchar(x[i, ]$input) + 6), collapse = "\n"), candidates, sep = "\n") @@ -1839,14 +1838,14 @@ print.mo_renamed <- function(x, ...) { return(invisible()) } for (i in seq_len(nrow(x))) { - message(font_blue(paste0("NOTE: ", - font_italic(x$old_name[i]), ifelse(x$old_ref[i] %in% c("", NA), "", + message(font_blue(paste0("NOTE: ", + font_italic(x$old_name[i]), ifelse(x$old_ref[i] %in% c("", NA), "", paste0(" (", gsub("et al.", font_italic("et al."), x$old_ref[i]), ")")), - " was renamed ", + " was renamed ", ifelse(as.integer(gsub("[^0-9]", "", x$new_ref[i])) < as.integer(gsub("[^0-9]", "", x$old_ref[i])), font_bold("back to "), ""), - font_italic(x$new_name[i]), ifelse(x$new_ref[i] %in% c("", NA), "", + font_italic(x$new_name[i]), ifelse(x$new_ref[i] %in% c("", NA), "", paste0(" (", gsub("et al.", font_italic("et al."), x$new_ref[i]), ")")), " [", x$mo[i], "]"))) } @@ -1934,7 +1933,9 @@ replace_old_mo_codes <- function(x, property) { # assign on places where a match was found x[which(!is.na(matched))] <- mo_new[which(!is.na(matched))] if (property != "mo") { - message(font_blue("NOTE: Old microbial codes (from previous package versions) were replaced with current codes used by this package.\n Please update your MO codes with as.mo()." )) + message_(font_blue("NOTE: The input contained old microbial codes (from previous package versions). Please update your MO codes with as.mo().")) + } else { + message_(font_blue("NOTE:", length(matched), "old microbial codes (from previous package versions) were updated to current used codes.")) } } x @@ -1944,8 +1945,8 @@ replace_ignore_pattern <- function(x, ignore_pattern) { if (!is.null(ignore_pattern) && !identical(trimws2(ignore_pattern), "")) { ignore_cases <- x %like% ignore_pattern if (sum(ignore_cases) > 0) { - message(font_blue(paste0("NOTE: the following input was ignored by `ignore_pattern = \"", ignore_pattern, "\"`: ", - paste0("'", sort(unique(x[x %like% ignore_pattern])), "'", collapse = ", "), + message(font_blue(paste0("NOTE: the following input was ignored by `ignore_pattern = \"", ignore_pattern, "\"`: ", + paste0("'", sort(unique(x[x %like% ignore_pattern])), "'", collapse = ", "), collapse = ", "))) x[x %like% ignore_pattern] <- NA_character_ } diff --git a/R/mo_property.R b/R/mo_property.R index e2794d70..c5132733 100755 --- a/R/mo_property.R +++ b/R/mo_property.R @@ -9,7 +9,7 @@ # (c) 2018-2020 Berends MS, Luz CF et al. # # Developed at the University of Groningen, the Netherlands, in # # collaboration with non-profit organisations Certe Medical # -# Diagnostics & Advice, and University Medical Center Groningen. # +# Diagnostics & Advice, and University Medical Center Groningen. # # # # This R package is free software; you can freely use and distribute # # it for both personal and commercial purposes under the terms of the # @@ -29,7 +29,7 @@ #' @inheritSection lifecycle Stable lifecycle #' @param x any character (vector) that can be coerced to a valid microorganism code with [as.mo()] #' @param property one of the column names of the [microorganisms] data set or `"shortname"` -#' @param language language of the returned text, defaults to system language (see [get_locale()]) and can be overwritten by setting the option `AMR_locale`, e.g. `options(AMR_locale = "de")`, see [translate]. Use `language = NULL` or `language = ""` to prevent translation. +#' @param language language of the returned text, defaults to system language (see [get_locale()]) and can be overwritten by setting the option `AMR_locale`, e.g. `options(AMR_locale = "de")`, see [translate]. Also used to translate text like "no growth". Use `language = NULL` or `language = ""` to prevent translation. #' @param ... other parameters passed on to [as.mo()], such as 'allow_uncertain' and 'ignore_pattern' #' @param open browse the URL using [utils::browseURL()] #' @details All functions will return the most recently known taxonomic property according to the Catalogue of Life, except for [mo_ref()], [mo_authors()] and [mo_year()]. Please refer to this example, knowing that *Escherichia blattae* was renamed to *Shimwellia blattae* in 2010: @@ -38,7 +38,7 @@ #' - `mo_ref("Shimwellia blattae")` will return `"Priest et al., 2010"` (without a message) #' #' The short name - [mo_shortname()] - almost always returns the first character of the genus and the full species, like `"E. coli"`. Exceptions are abbreviations of staphylococci (like *"CoNS"*, Coagulase-Negative Staphylococci) and beta-haemolytic streptococci (like *"GBS"*, Group B Streptococci). Please bear in mind that e.g. *E. coli* could mean *Escherichia coli* (kingdom of Bacteria) as well as *Entamoeba coli* (kingdom of Protozoa). Returning to the full name will be done using [as.mo()] internally, giving priority to bacteria and human pathogens, i.e. `"E. coli"` will be considered *Escherichia coli*. In other words, `mo_fullname(mo_shortname("Entamoeba coli"))` returns `"Escherichia coli"`. -#' +#' #' Since the top-level of the taxonomy is sometimes referred to as 'kingdom' and sometimes as 'domain', the functions [mo_kingdom()] and [mo_domain()] return the exact same results. #' #' The Gram stain - [mo_gramstain()] - will be determined based on the taxonomic kingdom and phylum. According to Cavalier-Smith (2002, [PMID 11837318](https://pubmed.ncbi.nlm.nih.gov/11837318)), who defined subkingdoms Negibacteria and Posibacteria, only these phyla are Posibacteria: Actinobacteria, Chloroflexi, Firmicutes and Tenericutes. These bacteria are considered Gram-positive - all other bacteria are considered Gram-negative. Species outside the kingdom of Bacteria will return a value `NA`. Functions [is_gram_negative()] and [is_gram_positive()] always return `TRUE` or `FALSE`, even for species outside the kingdom of Bacteria. @@ -148,7 +148,7 @@ mo_name <- function(x, language = get_locale(), ...) { meet_criteria(x, allow_NA = TRUE) meet_criteria(language, has_length = 1, is_in = c(LANGUAGES_SUPPORTED, ""), allow_NULL = TRUE, allow_NA = TRUE) - + translate_AMR(mo_validate(x = x, property = "fullname", language = language, ...), language = language, only_unknown = FALSE) } @@ -161,20 +161,20 @@ mo_fullname <- mo_name mo_shortname <- function(x, language = get_locale(), ...) { meet_criteria(x, allow_NA = TRUE) meet_criteria(language, has_length = 1, is_in = c(LANGUAGES_SUPPORTED, ""), allow_NULL = TRUE, allow_NA = TRUE) - + x.mo <- as.mo(x, language = language, ...) - + metadata <- get_mo_failures_uncertainties_renamed() - + replace_empty <- function(x) { x[x == ""] <- "spp." x } - + # get first char of genus and complete species in English genera <- mo_genus(x.mo, language = NULL) shortnames <- paste0(substr(genera, 1, 1), ". ", replace_empty(mo_species(x.mo, language = NULL))) - + # exceptions for where no species is known shortnames[shortnames %like% ".[.] spp[.]"] <- genera[shortnames %like% ".[.] spp[.]"] # exceptions for Staphylococci @@ -184,7 +184,7 @@ mo_shortname <- function(x, language = get_locale(), ...) { shortnames[shortnames %like% "S. group [ABCDFGHK]"] <- paste0("G", gsub("S. group ([ABCDFGHK])", "\\1", shortnames[shortnames %like% "S. group [ABCDFGHK]"]), "S") # unknown species etc. shortnames[shortnames %like% "unknown"] <- paste0("(", trimws(gsub("[^a-zA-Z -]", "", shortnames[shortnames %like% "unknown"])), ")") - + load_mo_failures_uncertainties_renamed(metadata) translate_AMR(shortnames, language = language, only_unknown = FALSE) } @@ -194,7 +194,7 @@ mo_shortname <- function(x, language = get_locale(), ...) { mo_subspecies <- function(x, language = get_locale(), ...) { meet_criteria(x, allow_NA = TRUE) meet_criteria(language, has_length = 1, is_in = c(LANGUAGES_SUPPORTED, ""), allow_NULL = TRUE, allow_NA = TRUE) - + translate_AMR(mo_validate(x = x, property = "subspecies", language = language, ...), language = language, only_unknown = TRUE) } @@ -203,7 +203,7 @@ mo_subspecies <- function(x, language = get_locale(), ...) { mo_species <- function(x, language = get_locale(), ...) { meet_criteria(x, allow_NA = TRUE) meet_criteria(language, has_length = 1, is_in = c(LANGUAGES_SUPPORTED, ""), allow_NULL = TRUE, allow_NA = TRUE) - + translate_AMR(mo_validate(x = x, property = "species", language = language, ...), language = language, only_unknown = TRUE) } @@ -212,7 +212,7 @@ mo_species <- function(x, language = get_locale(), ...) { mo_genus <- function(x, language = get_locale(), ...) { meet_criteria(x, allow_NA = TRUE) meet_criteria(language, has_length = 1, is_in = c(LANGUAGES_SUPPORTED, ""), allow_NULL = TRUE, allow_NA = TRUE) - + translate_AMR(mo_validate(x = x, property = "genus", language = language, ...), language = language, only_unknown = TRUE) } @@ -221,7 +221,7 @@ mo_genus <- function(x, language = get_locale(), ...) { mo_family <- function(x, language = get_locale(), ...) { meet_criteria(x, allow_NA = TRUE) meet_criteria(language, has_length = 1, is_in = c(LANGUAGES_SUPPORTED, ""), allow_NULL = TRUE, allow_NA = TRUE) - + translate_AMR(mo_validate(x = x, property = "family", language = language, ...), language = language, only_unknown = TRUE) } @@ -230,7 +230,7 @@ mo_family <- function(x, language = get_locale(), ...) { mo_order <- function(x, language = get_locale(), ...) { meet_criteria(x, allow_NA = TRUE) meet_criteria(language, has_length = 1, is_in = c(LANGUAGES_SUPPORTED, ""), allow_NULL = TRUE, allow_NA = TRUE) - + translate_AMR(mo_validate(x = x, property = "order", language = language, ...), language = language, only_unknown = TRUE) } @@ -239,7 +239,7 @@ mo_order <- function(x, language = get_locale(), ...) { mo_class <- function(x, language = get_locale(), ...) { meet_criteria(x, allow_NA = TRUE) meet_criteria(language, has_length = 1, is_in = c(LANGUAGES_SUPPORTED, ""), allow_NULL = TRUE, allow_NA = TRUE) - + translate_AMR(mo_validate(x = x, property = "class", language = language, ...), language = language, only_unknown = TRUE) } @@ -248,7 +248,7 @@ mo_class <- function(x, language = get_locale(), ...) { mo_phylum <- function(x, language = get_locale(), ...) { meet_criteria(x, allow_NA = TRUE) meet_criteria(language, has_length = 1, is_in = c(LANGUAGES_SUPPORTED, ""), allow_NULL = TRUE, allow_NA = TRUE) - + translate_AMR(mo_validate(x = x, property = "phylum", language = language, ...), language = language, only_unknown = TRUE) } @@ -257,7 +257,7 @@ mo_phylum <- function(x, language = get_locale(), ...) { mo_kingdom <- function(x, language = get_locale(), ...) { meet_criteria(x, allow_NA = TRUE) meet_criteria(language, has_length = 1, is_in = c(LANGUAGES_SUPPORTED, ""), allow_NULL = TRUE, allow_NA = TRUE) - + translate_AMR(mo_validate(x = x, property = "kingdom", language = language, ...), language = language, only_unknown = TRUE) } @@ -270,7 +270,7 @@ mo_domain <- mo_kingdom mo_type <- function(x, language = get_locale(), ...) { meet_criteria(x, allow_NA = TRUE) meet_criteria(language, has_length = 1, is_in = c(LANGUAGES_SUPPORTED, ""), allow_NULL = TRUE, allow_NA = TRUE) - + translate_AMR(mo_validate(x = x, property = "kingdom", language = language, ...), language = language, only_unknown = FALSE) } @@ -279,10 +279,10 @@ mo_type <- function(x, language = get_locale(), ...) { mo_gramstain <- function(x, language = get_locale(), ...) { meet_criteria(x, allow_NA = TRUE) meet_criteria(language, has_length = 1, is_in = c(LANGUAGES_SUPPORTED, ""), allow_NULL = TRUE, allow_NA = TRUE) - + x.mo <- as.mo(x, language = language, ...) metadata <- get_mo_failures_uncertainties_renamed() - + x.phylum <- mo_phylum(x.mo) # DETERMINE GRAM STAIN FOR BACTERIA # Source: https://itis.gov/servlet/SingleRpt/SingleRpt?search_topic=TSN&search_value=956097 @@ -303,25 +303,35 @@ mo_gramstain <- function(x, language = get_locale(), ...) { "Firmicutes", "Tenericutes") | x.mo == "B_GRAMP"] <- "Gram-positive" - + load_mo_failures_uncertainties_renamed(metadata) translate_AMR(x, language = language, only_unknown = FALSE) } #' @rdname mo_property #' @export -is_gram_negative <- function(x, ...) { +is_gram_negative <- function(x, language = get_locale(), ...) { meet_criteria(x, allow_NA = TRUE) - grams <- mo_gramstain(x, language = NULL, ...) - "Gram-negative" == grams & !is.na(grams) + meet_criteria(language, has_length = 1, is_in = c(LANGUAGES_SUPPORTED, ""), allow_NULL = TRUE, allow_NA = TRUE) + + x.mo <- as.mo(x, language = language, ...) + metadata <- get_mo_failures_uncertainties_renamed() + grams <- mo_gramstain(x.mo, language = NULL) + load_mo_failures_uncertainties_renamed(metadata) + grams == "Gram-negative" & !is.na(grams) } #' @rdname mo_property #' @export -is_gram_positive <- function(x, ...) { +is_gram_positive <- function(x, language = get_locale(), ...) { meet_criteria(x, allow_NA = TRUE) - grams <- mo_gramstain(x, language = NULL, ...) - "Gram-positive" == grams & !is.na(grams) + meet_criteria(language, has_length = 1, is_in = c(LANGUAGES_SUPPORTED, ""), allow_NULL = TRUE, allow_NA = TRUE) + + x.mo <- as.mo(x, language = language, ...) + metadata <- get_mo_failures_uncertainties_renamed() + grams <- mo_gramstain(x.mo, language = NULL) + load_mo_failures_uncertainties_renamed(metadata) + grams == "Gram-positive" & !is.na(grams) } #' @rdname mo_property @@ -329,7 +339,7 @@ is_gram_positive <- function(x, ...) { mo_snomed <- function(x, language = get_locale(), ...) { meet_criteria(x, allow_NA = TRUE) meet_criteria(language, has_length = 1, is_in = c(LANGUAGES_SUPPORTED, ""), allow_NULL = TRUE, allow_NA = TRUE) - + mo_validate(x = x, property = "snomed", language = language, ...) } @@ -338,7 +348,7 @@ mo_snomed <- function(x, language = get_locale(), ...) { mo_ref <- function(x, language = get_locale(), ...) { meet_criteria(x, allow_NA = TRUE) meet_criteria(language, has_length = 1, is_in = c(LANGUAGES_SUPPORTED, ""), allow_NULL = TRUE, allow_NA = TRUE) - + mo_validate(x = x, property = "ref", language = language, ...) } @@ -347,7 +357,7 @@ mo_ref <- function(x, language = get_locale(), ...) { mo_authors <- function(x, language = get_locale(), ...) { meet_criteria(x, allow_NA = TRUE) meet_criteria(language, has_length = 1, is_in = c(LANGUAGES_SUPPORTED, ""), allow_NULL = TRUE, allow_NA = TRUE) - + x <- mo_validate(x = x, property = "ref", language = language, ...) # remove last 4 digits and presumably the comma and space that preceed them x[!is.na(x)] <- gsub(",? ?[0-9]{4}", "", x[!is.na(x)]) @@ -359,7 +369,7 @@ mo_authors <- function(x, language = get_locale(), ...) { mo_year <- function(x, language = get_locale(), ...) { meet_criteria(x, allow_NA = TRUE) meet_criteria(language, has_length = 1, is_in = c(LANGUAGES_SUPPORTED, ""), allow_NULL = TRUE, allow_NA = TRUE) - + x <- mo_validate(x = x, property = "ref", language = language, ...) # get last 4 digits x[!is.na(x)] <- gsub(".*([0-9]{4})$", "\\1", x[!is.na(x)]) @@ -371,7 +381,7 @@ mo_year <- function(x, language = get_locale(), ...) { mo_rank <- function(x, language = get_locale(), ...) { meet_criteria(x, allow_NA = TRUE) meet_criteria(language, has_length = 1, is_in = c(LANGUAGES_SUPPORTED, ""), allow_NULL = TRUE, allow_NA = TRUE) - + mo_validate(x = x, property = "rank", language = language, ...) } @@ -380,10 +390,10 @@ mo_rank <- function(x, language = get_locale(), ...) { mo_taxonomy <- function(x, language = get_locale(), ...) { meet_criteria(x, allow_NA = TRUE) meet_criteria(language, has_length = 1, is_in = c(LANGUAGES_SUPPORTED, ""), allow_NULL = TRUE, allow_NA = TRUE) - + x <- as.mo(x, language = language, ...) metadata <- get_mo_failures_uncertainties_renamed() - + result <- list(kingdom = mo_kingdom(x, language = language), phylum = mo_phylum(x, language = language), class = mo_class(x, language = language), @@ -392,7 +402,7 @@ mo_taxonomy <- function(x, language = get_locale(), ...) { genus = mo_genus(x, language = language), species = mo_species(x, language = language), subspecies = mo_subspecies(x, language = language)) - + load_mo_failures_uncertainties_renamed(metadata) result } @@ -402,10 +412,10 @@ mo_taxonomy <- function(x, language = get_locale(), ...) { mo_synonyms <- function(x, language = get_locale(), ...) { meet_criteria(x, allow_NA = TRUE) meet_criteria(language, has_length = 1, is_in = c(LANGUAGES_SUPPORTED, ""), allow_NULL = TRUE, allow_NA = TRUE) - + x <- as.mo(x, language = language, ...) metadata <- get_mo_failures_uncertainties_renamed() - + IDs <- mo_name(x = x, language = NULL) syns <- lapply(IDs, function(newname) { res <- sort(microorganisms.old[which(microorganisms.old$fullname_new == newname), "fullname"]) @@ -421,7 +431,7 @@ mo_synonyms <- function(x, language = get_locale(), ...) { } else { result <- unlist(syns) } - + load_mo_failures_uncertainties_renamed(metadata) result } @@ -431,10 +441,10 @@ mo_synonyms <- function(x, language = get_locale(), ...) { mo_info <- function(x, language = get_locale(), ...) { meet_criteria(x, allow_NA = TRUE) meet_criteria(language, has_length = 1, is_in = c(LANGUAGES_SUPPORTED, ""), allow_NULL = TRUE, allow_NA = TRUE) - + x <- as.mo(x, language = language, ...) metadata <- get_mo_failures_uncertainties_renamed() - + info <- lapply(x, function(y) c(mo_taxonomy(y, language = language), list(synonyms = mo_synonyms(y), @@ -447,7 +457,7 @@ mo_info <- function(x, language = get_locale(), ...) { } else { result <- info[[1L]] } - + load_mo_failures_uncertainties_renamed(metadata) result } @@ -458,11 +468,11 @@ mo_url <- function(x, open = FALSE, language = get_locale(), ...) { meet_criteria(x, allow_NA = TRUE) meet_criteria(open, allow_class = "logical", has_length = 1) meet_criteria(language, has_length = 1, is_in = c(LANGUAGES_SUPPORTED, ""), allow_NULL = TRUE, allow_NA = TRUE) - + mo <- as.mo(x = x, language = language, ... = ...) mo_names <- mo_name(mo) metadata <- get_mo_failures_uncertainties_renamed() - + df <- data.frame(mo, stringsAsFactors = FALSE) %pm>% pm_left_join(pm_select(microorganisms, mo, source, species_id), by = "mo") df$url <- ifelse(df$source == "CoL", @@ -472,14 +482,14 @@ mo_url <- function(x, open = FALSE, language = get_locale(), ...) { NA_character_)) u <- df$url names(u) <- mo_names - + if (open == TRUE) { if (length(u) > 1) { warning("only the first URL will be opened, as `browseURL()` only suports one string.") } utils::browseURL(u[1L]) } - + load_mo_failures_uncertainties_renamed(metadata) u } @@ -491,18 +501,18 @@ mo_property <- function(x, property = "fullname", language = get_locale(), ...) meet_criteria(x, allow_NA = TRUE) meet_criteria(property, allow_class = "character", has_length = 1, is_in = colnames(microorganisms)) meet_criteria(language, has_length = 1, is_in = c(LANGUAGES_SUPPORTED, ""), allow_NULL = TRUE, allow_NA = TRUE) - + translate_AMR(mo_validate(x = x, property = property, language = language, ...), language = language, only_unknown = TRUE) } mo_validate <- function(x, property, language, ...) { check_dataset_integrity() - + if (tryCatch(all(x[!is.na(x)] %in% MO_lookup$mo) & length(list(...)) == 0, error = function(e) FALSE)) { # special case for mo_* functions where class is already return(MO_lookup[match(x, MO_lookup$mo), property, drop = TRUE]) } - + dots <- list(...) Becker <- dots$Becker if (is.null(Becker)) { @@ -512,14 +522,14 @@ mo_validate <- function(x, property, language, ...) { if (is.null(Lancefield)) { Lancefield <- FALSE } - + # try to catch an error when inputting an invalid parameter # so the 'call.' can be set to FALSE tryCatch(x[1L] %in% MO_lookup[1, property, drop = TRUE], error = function(e) stop(e$message, call. = FALSE)) - - if (is.mo(x) - & !Becker %in% c(TRUE, "all") + + if (is.mo(x) + & !Becker %in% c(TRUE, "all") & !Lancefield %in% c(TRUE, "all")) { # this will not reset mo_uncertainties and mo_failures # because it's already a valid MO @@ -529,7 +539,7 @@ mo_validate <- function(x, property, language, ...) { | Lancefield %in% c(TRUE, "all")) { x <- exec_as.mo(x, property = property, language = language, ...) } - + if (property == "mo") { return(to_class_mo(x)) } else if (property == "snomed") { diff --git a/docs/404.html b/docs/404.html index fc139ae1..2b059ce4 100644 --- a/docs/404.html +++ b/docs/404.html @@ -81,7 +81,7 @@ AMR (for R) - 1.4.0.9007 + 1.4.0.9008 diff --git a/docs/LICENSE-text.html b/docs/LICENSE-text.html index a8efcc17..a59446e2 100644 --- a/docs/LICENSE-text.html +++ b/docs/LICENSE-text.html @@ -81,7 +81,7 @@ AMR (for R) - 1.4.0.9007 + 1.4.0.9008 diff --git a/docs/articles/PCA.html b/docs/articles/PCA.html index 8e55edd8..bff885c4 100644 --- a/docs/articles/PCA.html +++ b/docs/articles/PCA.html @@ -39,7 +39,7 @@ AMR (for R) - 1.4.0 + 1.4.0.9008 @@ -187,7 +187,8 @@ -
+ +

Now to transform this to a data set with only resistance percentages per taxonomic order and genus:

-resistance_data <- example_isolates %>% 
-  group_by(order = mo_order(mo),       # group on anything, like order
-           genus = mo_genus(mo)) %>%   #  and genus as we do here
-  summarise_if(is.rsi, resistance) %>% # then get resistance of all drugs
-  select(order, genus, AMC, CXM, CTX, 
-         CAZ, GEN, TOB, TMP, SXT)      # and select only relevant columns
+resistance_data <- example_isolates %>% 
+  group_by(order = mo_order(mo),       # group on anything, like order
+           genus = mo_genus(mo)) %>%   #  and genus as we do here
+  summarise_if(is.rsi, resistance) %>% # then get resistance of all drugs
+  select(order, genus, AMC, CXM, CTX, 
+         CAZ, GEN, TOB, TMP, SXT)      # and select only relevant columns
 
-head(resistance_data)
+head(resistance_data)
 # # A tibble: 6 x 10
 # # Groups:   order [2]
 #   order           genus            AMC   CXM   CTX   CAZ   GEN   TOB   TMP   SXT
@@ -284,46 +284,40 @@
 # 3 Actinomycetales Cutibacterium     NA    NA    NA    NA    NA    NA    NA    NA
 # 4 Actinomycetales Dermabacter       NA    NA    NA    NA    NA    NA    NA    NA
 # 5 Actinomycetales Micrococcus       NA    NA    NA    NA    NA    NA    NA    NA
-# 6 Actinomycetales Rothia            NA    NA    NA    NA    NA    NA    NA    NA
-
+# 6 Actinomycetales Rothia NA NA NA NA NA NA NA NA

Perform principal component analysis

The new pca() function will automatically filter on rows that contain numeric values in all selected variables, so we now only need to do:

-pca_result <- pca(resistance_data)
+pca_result <- pca(resistance_data)
 # NOTE: Columns selected for PCA: AMC CXM CTX CAZ GEN TOB TMP SXT.
-#       Total observations available: 7.
-
+# Total observations available: 7.

The result can be reviewed with the good old summary() function:

-summary(pca_result)
+summary(pca_result)
 # Importance of components:
 #                          PC1    PC2     PC3     PC4     PC5     PC6       PC7
 # Standard deviation     2.154 1.6807 0.61365 0.33902 0.20757 0.03136 1.733e-16
 # Proportion of Variance 0.580 0.3531 0.04707 0.01437 0.00539 0.00012 0.000e+00
-# Cumulative Proportion  0.580 0.9331 0.98012 0.99449 0.99988 1.00000 1.000e+00
-
+# Cumulative Proportion 0.580 0.9331 0.98012 0.99449 0.99988 1.00000 1.000e+00

Good news. The first two components explain a total of 93.3% of the variance (see the PC1 and PC2 values of the Proportion of Variance. We can create a so-called biplot with the base R biplot() function, to see which antimicrobial resistance per drug explain the difference per microorganism.

Plotting the results

-biplot(pca_result)
-
+biplot(pca_result)

But we can’t see the explanation of the points. Perhaps this works better with our new ggplot_pca() function, that automatically adds the right labels and even groups:

-ggplot_pca(pca_result)
-
+ggplot_pca(pca_result)

You can also print an ellipse per group, and edit the appearance:

-ggplot_pca(pca_result, ellipse = TRUE) +
-  ggplot2::labs(title = "An AMR/PCA biplot!")
-
+ggplot_pca(pca_result, ellipse = TRUE) + + ggplot2::labs(title = "An AMR/PCA biplot!")

@@ -343,7 +337,7 @@
-

Site built with pkgdown 1.5.1.9000.

+

Site built with pkgdown 1.6.1.

diff --git a/docs/articles/PCA_files/anchor-sections-1.0/anchor-sections.css b/docs/articles/PCA_files/anchor-sections-1.0/anchor-sections.css new file mode 100644 index 00000000..07aee5fc --- /dev/null +++ b/docs/articles/PCA_files/anchor-sections-1.0/anchor-sections.css @@ -0,0 +1,4 @@ +/* Styles for section anchors */ +a.anchor-section {margin-left: 10px; visibility: hidden; color: inherit;} +a.anchor-section::before {content: '#';} +.hasAnchor:hover a.anchor-section {visibility: visible;} diff --git a/docs/articles/PCA_files/anchor-sections-1.0/anchor-sections.js b/docs/articles/PCA_files/anchor-sections-1.0/anchor-sections.js new file mode 100644 index 00000000..570f99a0 --- /dev/null +++ b/docs/articles/PCA_files/anchor-sections-1.0/anchor-sections.js @@ -0,0 +1,33 @@ +// Anchor sections v1.0 written by Atsushi Yasumoto on Oct 3rd, 2020. +document.addEventListener('DOMContentLoaded', function() { + // Do nothing if AnchorJS is used + if (typeof window.anchors === 'object' && anchors.hasOwnProperty('hasAnchorJSLink')) { + return; + } + + const h = document.querySelectorAll('h1, h2, h3, h4, h5, h6'); + + // Do nothing if sections are already anchored + if (Array.from(h).some(x => x.classList.contains('hasAnchor'))) { + return null; + } + + // Use section id when pandoc runs with --section-divs + const section_id = function(x) { + return ((x.classList.contains('section') || (x.tagName === 'SECTION')) + ? x.id : ''); + }; + + // Add anchors + h.forEach(function(x) { + const id = x.id || section_id(x.parentElement); + if (id === '') { + return null; + } + let anchor = document.createElement('a'); + anchor.href = '#' + id; + anchor.classList = ['anchor-section']; + x.classList.add('hasAnchor'); + x.appendChild(anchor); + }); +}); diff --git a/docs/articles/PCA_files/figure-html/unnamed-chunk-5-1.png b/docs/articles/PCA_files/figure-html/unnamed-chunk-5-1.png index 47cbc42e..54c1ec0a 100644 Binary files a/docs/articles/PCA_files/figure-html/unnamed-chunk-5-1.png and b/docs/articles/PCA_files/figure-html/unnamed-chunk-5-1.png differ diff --git a/docs/articles/PCA_files/figure-html/unnamed-chunk-6-1.png b/docs/articles/PCA_files/figure-html/unnamed-chunk-6-1.png index 5d335a59..2d168e58 100644 Binary files a/docs/articles/PCA_files/figure-html/unnamed-chunk-6-1.png and b/docs/articles/PCA_files/figure-html/unnamed-chunk-6-1.png differ diff --git a/docs/articles/PCA_files/figure-html/unnamed-chunk-7-1.png b/docs/articles/PCA_files/figure-html/unnamed-chunk-7-1.png index 54e5f0a4..c98890e5 100644 Binary files a/docs/articles/PCA_files/figure-html/unnamed-chunk-7-1.png and b/docs/articles/PCA_files/figure-html/unnamed-chunk-7-1.png differ diff --git a/docs/articles/SPSS.html b/docs/articles/SPSS.html index c2a11435..a482d9a9 100644 --- a/docs/articles/SPSS.html +++ b/docs/articles/SPSS.html @@ -39,7 +39,7 @@ AMR (for R) - 1.4.0 + 1.4.0.9008 @@ -187,13 +187,14 @@ -
+ +

@@ -294,10 +294,10 @@

If additional packages are needed, RStudio will ask you if they should be installed on beforehand.

In the the window that opens, you can define all options (parameters) that should be used for import and you’re ready to go:

-

If you want named variables to be imported as factors so it resembles SPSS more, use as_factor().

+

If you want named variables to be imported as factors so it resembles SPSS more, use as_factor().

The difference is this:

-SPSS_data
+SPSS_data
 # # A tibble: 4,203 x 4
 #     v001 sex       status    statusage
 #    <dbl> <dbl+lbl> <dbl+lbl>     <dbl>
@@ -313,7 +313,7 @@
 # 10 10018 0         1              66.6
 # # … with 4,193 more rows
 
-as_factor(SPSS_data)
+as_factor(SPSS_data)
 # # A tibble: 4,203 x 4
 #     v001 sex    status statusage
 #    <dbl> <fct>  <fct>      <dbl>
@@ -327,8 +327,7 @@
 #  8 10011 Male   alive       73.1
 #  9 10017 Male   alive       56.7
 # 10 10018 Female alive       66.6
-# # … with 4,193 more rows
-
+# # … with 4,193 more rows

@@ -336,10 +335,9 @@

To import data from SPSS, SAS or Stata, you can use the great haven package yourself:

 # download and install the latest version:
-install.packages("haven")
+install.packages("haven")
 # load the package you just installed:
-library(haven) 
-
+library(haven)

You can now import files as follows:

@@ -347,23 +345,21 @@

To read files from SPSS into R:

 # read any SPSS file based on file extension (best way):
-read_spss(file = "path/to/file")
+read_spss(file = "path/to/file")
 
 # read .sav or .zsav file:
-read_sav(file = "path/to/file")
+read_sav(file = "path/to/file")
 
 # read .por file:
-read_por(file = "path/to/file")
-
-

Do not forget about as_factor(), as mentioned above.

+read_por(file = "path/to/file")

+

Do not forget about as_factor(), as mentioned above.

To export your R objects to the SPSS file format:

 # save as .sav file:
-write_sav(data = yourdata, path = "path/to/file")
+write_sav(data = yourdata, path = "path/to/file")
 
 # save as compressed .zsav file:
-write_sav(data = yourdata, path = "path/to/file", compress = TRUE)
-
+write_sav(data = yourdata, path = "path/to/file", compress = TRUE)

@@ -371,20 +367,18 @@

To read files from SAS into R:

 # read .sas7bdat + .sas7bcat files:
-read_sas(data_file = "path/to/file", catalog_file = NULL)
+read_sas(data_file = "path/to/file", catalog_file = NULL)
 
 # read SAS transport files (version 5 and version 8):
-read_xpt(file = "path/to/file")
-
+read_xpt(file = "path/to/file")

To export your R objects to the SAS file format:

 # save as regular SAS file:
-write_sas(data = yourdata, path = "path/to/file")
+write_sas(data = yourdata, path = "path/to/file")
 
 # the SAS transport format is an open format 
 # (required for submission of the data to the FDA)
-write_xpt(data = yourdata, path = "path/to/file", version = 8)
-
+write_xpt(data = yourdata, path = "path/to/file", version = 8)

@@ -392,17 +386,15 @@

To read files from Stata into R:

 # read .dta file:
-read_stata(file = "/path/to/file")
+read_stata(file = "/path/to/file")
 
 # works exactly the same:
-read_dta(file = "/path/to/file")
-
+read_dta(file = "/path/to/file")

To export your R objects to the Stata file format:

 # save as .dta file, Stata version 14:
 # (supports Stata v8 until v15 at the time of writing)
-write_dta(data = yourdata, path = "/path/to/file", version = 14)
-
+write_dta(data = yourdata, path = "/path/to/file", version = 14) @@ -423,7 +415,7 @@
-

Site built with pkgdown 1.5.1.9000.

+

Site built with pkgdown 1.6.1.

diff --git a/docs/articles/SPSS_files/anchor-sections-1.0/anchor-sections.css b/docs/articles/SPSS_files/anchor-sections-1.0/anchor-sections.css new file mode 100644 index 00000000..07aee5fc --- /dev/null +++ b/docs/articles/SPSS_files/anchor-sections-1.0/anchor-sections.css @@ -0,0 +1,4 @@ +/* Styles for section anchors */ +a.anchor-section {margin-left: 10px; visibility: hidden; color: inherit;} +a.anchor-section::before {content: '#';} +.hasAnchor:hover a.anchor-section {visibility: visible;} diff --git a/docs/articles/SPSS_files/anchor-sections-1.0/anchor-sections.js b/docs/articles/SPSS_files/anchor-sections-1.0/anchor-sections.js new file mode 100644 index 00000000..570f99a0 --- /dev/null +++ b/docs/articles/SPSS_files/anchor-sections-1.0/anchor-sections.js @@ -0,0 +1,33 @@ +// Anchor sections v1.0 written by Atsushi Yasumoto on Oct 3rd, 2020. +document.addEventListener('DOMContentLoaded', function() { + // Do nothing if AnchorJS is used + if (typeof window.anchors === 'object' && anchors.hasOwnProperty('hasAnchorJSLink')) { + return; + } + + const h = document.querySelectorAll('h1, h2, h3, h4, h5, h6'); + + // Do nothing if sections are already anchored + if (Array.from(h).some(x => x.classList.contains('hasAnchor'))) { + return null; + } + + // Use section id when pandoc runs with --section-divs + const section_id = function(x) { + return ((x.classList.contains('section') || (x.tagName === 'SECTION')) + ? x.id : ''); + }; + + // Add anchors + h.forEach(function(x) { + const id = x.id || section_id(x.parentElement); + if (id === '') { + return null; + } + let anchor = document.createElement('a'); + anchor.href = '#' + id; + anchor.classList = ['anchor-section']; + x.classList.add('hasAnchor'); + x.appendChild(anchor); + }); +}); diff --git a/docs/articles/WHONET.html b/docs/articles/WHONET.html index c2a7e97a..273f3e19 100644 --- a/docs/articles/WHONET.html +++ b/docs/articles/WHONET.html @@ -39,7 +39,7 @@ AMR (for R) - 1.4.0 + 1.4.0.9008 @@ -187,7 +187,8 @@ -
+ +

This package comes with an example data set WHONET. We will use it for this analysis.

@@ -216,11 +216,10 @@ Preparation

First, load the relevant packages if you did not yet did this. I use the tidyverse for all of my analyses. All of them. If you don’t know it yet, I suggest you read about it on their website: https://www.tidyverse.org/.

-library(dplyr)   # part of tidyverse
-library(ggplot2) # part of tidyverse
-library(AMR)     # this package
-library(cleaner) # to create frequency tables
-
+library(dplyr) # part of tidyverse +library(ggplot2) # part of tidyverse +library(AMR) # this package +library(cleaner) # to create frequency tables

We will have to transform some variables to simplify and automate the analysis:

  • Microorganisms should be transformed to our own microorganism IDs (called an mo) using our Catalogue of Life reference data set, which contains all ~70,000 microorganisms from the taxonomic kingdoms Bacteria, Fungi and Protozoa. We do the tranformation with as.mo(). This function also recognises almost all WHONET abbreviations of microorganisms.
  • @@ -228,19 +227,17 @@
 # transform variables
-data <- WHONET %>%
+data <- WHONET %>%
   # get microbial ID based on given organism
-  mutate(mo = as.mo(Organism)) %>% 
+  mutate(mo = as.mo(Organism)) %>% 
   # transform everything from "AMP_ND10" to "CIP_EE" to the new `rsi` class
-  mutate_at(vars(AMP_ND10:CIP_EE), as.rsi)
-
+ mutate_at(vars(AMP_ND10:CIP_EE), as.rsi)

No errors or warnings, so all values are transformed succesfully.

We also created a package dedicated to data cleaning and checking, called the cleaner package. Its freq() function can be used to create frequency tables.

So let’s check our data, with a couple of frequency tables:

 # our newly created `mo` variable, put in the mo_name() function
-data %>% freq(mo_name(mo), nmax = 10)
-
+data %>% freq(mo_name(mo), nmax = 10)

Frequency table

Class: character
Length: 500
@@ -344,15 +341,16 @@ Longest: 40

 # our transformed antibiotic columns
 # amoxicillin/clavulanic acid (J01CR02) as an example
-data %>% freq(AMC_ND2)
-
+data %>% freq(AMC_ND2)

Frequency table

Class: factor > ordered > rsi (numeric)
Length: 500
Levels: 3: S < I < R
Available: 481 (96.2%, NA: 19 = 3.8%)
Unique: 3

-

%SI: 78.59%

+

Drug: Amoxicillin/clavulanic acid (AMC, J01CR02)
+Drug group: Beta-lactams/penicillins
+%SI: 78.59%

@@ -395,11 +393,10 @@ Unique: 3

A first glimpse at results

An easy ggplot will already give a lot of information, using the included ggplot_rsi() function:

-data %>%
-  group_by(Country) %>%
-  select(Country, AMP_ND2, AMC_ED20, CAZ_ED10, CIP_ED5) %>%
-  ggplot_rsi(translate_ab = 'ab', facet = "Country", datalabels = FALSE)
-
+data%>% + group_by(Country)%>% + select(Country, AMP_ND2, AMC_ED20, CAZ_ED10, CIP_ED5)%>% + ggplot_rsi(translate_ab ='ab', facet ="Country", datalabels =FALSE)

@@ -417,7 +414,7 @@ Unique: 3

-

Site built with pkgdown 1.5.1.9000.

+

Site built with pkgdown 1.6.1.

diff --git a/docs/articles/WHONET_files/anchor-sections-1.0/anchor-sections.css b/docs/articles/WHONET_files/anchor-sections-1.0/anchor-sections.css new file mode 100644 index 00000000..07aee5fc --- /dev/null +++ b/docs/articles/WHONET_files/anchor-sections-1.0/anchor-sections.css @@ -0,0 +1,4 @@ +/* Styles for section anchors */ +a.anchor-section {margin-left: 10px; visibility: hidden; color: inherit;} +a.anchor-section::before {content: '#';} +.hasAnchor:hover a.anchor-section {visibility: visible;} diff --git a/docs/articles/WHONET_files/anchor-sections-1.0/anchor-sections.js b/docs/articles/WHONET_files/anchor-sections-1.0/anchor-sections.js new file mode 100644 index 00000000..570f99a0 --- /dev/null +++ b/docs/articles/WHONET_files/anchor-sections-1.0/anchor-sections.js @@ -0,0 +1,33 @@ +// Anchor sections v1.0 written by Atsushi Yasumoto on Oct 3rd, 2020. +document.addEventListener('DOMContentLoaded', function() { + // Do nothing if AnchorJS is used + if (typeof window.anchors === 'object' && anchors.hasOwnProperty('hasAnchorJSLink')) { + return; + } + + const h = document.querySelectorAll('h1, h2, h3, h4, h5, h6'); + + // Do nothing if sections are already anchored + if (Array.from(h).some(x => x.classList.contains('hasAnchor'))) { + return null; + } + + // Use section id when pandoc runs with --section-divs + const section_id = function(x) { + return ((x.classList.contains('section') || (x.tagName === 'SECTION')) + ? x.id : ''); + }; + + // Add anchors + h.forEach(function(x) { + const id = x.id || section_id(x.parentElement); + if (id === '') { + return null; + } + let anchor = document.createElement('a'); + anchor.href = '#' + id; + anchor.classList = ['anchor-section']; + x.classList.add('hasAnchor'); + x.appendChild(anchor); + }); +}); diff --git a/docs/articles/WHONET_files/figure-html/unnamed-chunk-7-1.png b/docs/articles/WHONET_files/figure-html/unnamed-chunk-7-1.png index 1678805a..b49114bc 100644 Binary files a/docs/articles/WHONET_files/figure-html/unnamed-chunk-7-1.png and b/docs/articles/WHONET_files/figure-html/unnamed-chunk-7-1.png differ diff --git a/docs/articles/benchmarks.html b/docs/articles/benchmarks.html index 9cd704c7..b8c3637f 100644 --- a/docs/articles/benchmarks.html +++ b/docs/articles/benchmarks.html @@ -39,7 +39,7 @@ AMR (for R) - 1.4.0.9000 + 1.4.0.9008 @@ -187,7 +187,8 @@ -
+ +
+# expr min lq mean median uq max +# as.mo("sau") 26.0 26.0 50.0 26.0 26.0 180 +# as.mo("stau") 430.0 430.0 470.0 440.0 490.0 630 +# as.mo("STAU") 430.0 440.0 470.0 500.0 500.0 500 +# as.mo("staaur") 26.0 26.0 32.0 26.0 26.0 76 +# as.mo("STAAUR") 26.0 26.0 44.0 26.0 81.0 83 +# as.mo("S. aureus") 60.0 60.0 77.0 61.0 110.0 120 +# as.mo("S aureus") 60.0 61.0 94.0 110.0 120.0 120 +# as.mo("Staphylococcus aureus") 4.1 4.1 9.6 4.1 4.2 59 +# as.mo("Staphylococcus aureus (MRSA)") 2200.0 2200.0 2200.0 2200.0 2300.0 2300 +# as.mo("Sthafilokkockus aaureuz") 760.0 800.0 810.0 800.0 810.0 840 +# as.mo("MRSA") 26.0 26.0 37.0 26.0 26.0 83 +# as.mo("VISA") 42.0 42.0 59.0 42.0 92.0 97 +# as.mo("VRSA") 42.0 42.0 68.0 67.0 92.0 97 +# neval +# 10 +# 10 +# 10 +# 10 +# 10 +# 10 +# 10 +# 10 +# 10 +# 10 +# 10 +# 10 +# 10

In the table above, all measurements are shown in milliseconds (thousands of seconds). A value of 5 milliseconds means it can determine 200 input values per second. It case of 100 milliseconds, this is only 10 input values per second. It is clear that accepted taxonomic names are extremely fast, but some variations can take up to 500-1000 times as much time.

To improve performance, two important calculations take almost no time at all: repetitive results and already precalculated results.

@@ -270,8 +285,8 @@ print(run_it, unit = "ms", signif = 3) # Unit: milliseconds # expr min lq mean median uq max neval -# mo_name(x) 150 154 195 179 197 301 10
-

So getting official taxonomic names of 2,000,000 (!!) items consisting of 90 unique values only takes 0.179 seconds. You only lose time on your unique input values.

+# mo_name(x) 308 326 399 331 370 676 10
+

So getting official taxonomic names of 2,000,000 (!!) items consisting of 90 unique values only takes 0.331 seconds. You only lose time on your unique input values.

@@ -284,11 +299,11 @@ times = 10) print(run_it, unit = "ms", signif = 3) # Unit: milliseconds -# expr min lq mean median uq max neval -# A 7.11 7.23 7.79 7.74 8.46 8.54 10 -# B 21.40 21.90 30.40 24.00 25.50 60.00 10 -# C 1.97 1.98 2.12 2.05 2.34 2.43 10

-

So going from mo_name("Staphylococcus aureus") to "Staphylococcus aureus" takes 0.002 seconds - it doesn’t even start calculating if the result would be the same as the expected resulting value. That goes for all helper functions:

+# expr min lq mean median uq max neval +# A 16.20 16.20 18.60 17.90 18.90 28.00 10 +# B 50.90 51.10 57.70 51.20 53.10 108.00 10 +# C 3.58 3.72 4.03 4.06 4.14 4.91 10 +

So going from mo_name("Staphylococcus aureus") to "Staphylococcus aureus" takes 0.0041 seconds - it doesn’t even start calculating if the result would be the same as the expected resulting value. That goes for all helper functions:

 run_it <- microbenchmark(A = mo_species("aureus"),
                          B = mo_genus("Staphylococcus"),
@@ -301,15 +316,15 @@
                          times = 10)
 print(run_it, unit = "ms", signif = 3)
 # Unit: milliseconds
-#  expr  min   lq mean median   uq   max neval
-#     A 1.33 1.57 1.70   1.69 1.83  2.18    10
-#     B 1.62 1.67 5.34   1.70 2.15 36.90    10
-#     C 1.43 1.55 1.64   1.59 1.70  2.06    10
-#     D 1.29 1.44 1.53   1.55 1.66  1.70    10
-#     E 1.31 1.58 1.68   1.65 1.71  2.11    10
-#     F 1.54 1.61 1.80   1.67 1.86  2.57    10
-#     G 1.52 1.62 1.73   1.71 1.77  2.04    10
-#     H 1.40 1.55 1.70   1.65 1.92  2.03    10
+# expr min lq mean median uq max neval +# A 3.62 3.62 3.66 3.64 3.67 3.80 10 +# B 3.57 3.59 3.68 3.64 3.72 4.01 10 +# C 3.63 3.64 3.65 3.64 3.67 3.71 10 +# D 3.57 3.58 3.62 3.61 3.67 3.69 10 +# E 3.56 3.57 3.60 3.59 3.60 3.68 10 +# F 3.55 3.58 3.73 3.65 3.85 4.01 10 +# G 3.54 3.56 3.62 3.58 3.61 3.93 10 +# H 3.53 3.57 3.59 3.58 3.63 3.66 10

Of course, when running mo_phylum("Firmicutes") the function has zero knowledge about the actual microorganism, namely S. aureus. But since the result would be "Firmicutes" anyway, there is no point in calculating the result. And because this package ‘knows’ all phyla of all known bacteria (according to the Catalogue of Life), it can just return the initial value immediately.

@@ -336,14 +351,14 @@ times = 100) print(run_it, unit = "ms", signif = 4) # Unit: milliseconds -# expr min lq mean median uq max neval -# en 14.20 14.66 20.04 15.14 16.90 58.67 100 -# de 17.15 17.85 24.30 18.27 20.00 59.17 100 -# nl 29.13 30.50 34.60 31.26 32.86 72.63 100 -# es 16.95 17.70 22.61 18.35 20.54 59.42 100 -# it 17.04 17.60 21.72 18.14 19.60 59.19 100 -# fr 16.82 17.52 21.71 18.05 19.63 61.06 100 -# pt 16.92 17.58 19.92 18.23 19.19 54.61 100
+# expr min lq mean median uq max neval +# en 35.85 36.13 41.92 36.29 36.84 318.20 100 +# de 42.37 42.67 45.11 42.91 43.19 95.04 100 +# nl 70.13 70.65 75.92 70.95 71.50 140.20 100 +# es 42.16 42.46 46.88 42.64 43.20 95.37 100 +# it 41.85 42.24 46.84 42.40 42.84 95.92 100 +# fr 42.13 42.53 48.41 42.71 43.27 95.21 100 +# pt 42.25 42.47 48.56 42.71 43.12 96.60 100

Currently supported are German, Dutch, Spanish, Italian, French and Portuguese.

diff --git a/docs/articles/benchmarks_files/anchor-sections-1.0/anchor-sections.css b/docs/articles/benchmarks_files/anchor-sections-1.0/anchor-sections.css new file mode 100644 index 00000000..07aee5fc --- /dev/null +++ b/docs/articles/benchmarks_files/anchor-sections-1.0/anchor-sections.css @@ -0,0 +1,4 @@ +/* Styles for section anchors */ +a.anchor-section {margin-left: 10px; visibility: hidden; color: inherit;} +a.anchor-section::before {content: '#';} +.hasAnchor:hover a.anchor-section {visibility: visible;} diff --git a/docs/articles/benchmarks_files/anchor-sections-1.0/anchor-sections.js b/docs/articles/benchmarks_files/anchor-sections-1.0/anchor-sections.js new file mode 100644 index 00000000..570f99a0 --- /dev/null +++ b/docs/articles/benchmarks_files/anchor-sections-1.0/anchor-sections.js @@ -0,0 +1,33 @@ +// Anchor sections v1.0 written by Atsushi Yasumoto on Oct 3rd, 2020. +document.addEventListener('DOMContentLoaded', function() { + // Do nothing if AnchorJS is used + if (typeof window.anchors === 'object' && anchors.hasOwnProperty('hasAnchorJSLink')) { + return; + } + + const h = document.querySelectorAll('h1, h2, h3, h4, h5, h6'); + + // Do nothing if sections are already anchored + if (Array.from(h).some(x => x.classList.contains('hasAnchor'))) { + return null; + } + + // Use section id when pandoc runs with --section-divs + const section_id = function(x) { + return ((x.classList.contains('section') || (x.tagName === 'SECTION')) + ? x.id : ''); + }; + + // Add anchors + h.forEach(function(x) { + const id = x.id || section_id(x.parentElement); + if (id === '') { + return null; + } + let anchor = document.createElement('a'); + anchor.href = '#' + id; + anchor.classList = ['anchor-section']; + x.classList.add('hasAnchor'); + x.appendChild(anchor); + }); +}); diff --git a/docs/articles/benchmarks_files/figure-html/unnamed-chunk-4-1.png b/docs/articles/benchmarks_files/figure-html/unnamed-chunk-4-1.png index f3ac52fe..4492f129 100644 Binary files a/docs/articles/benchmarks_files/figure-html/unnamed-chunk-4-1.png and b/docs/articles/benchmarks_files/figure-html/unnamed-chunk-4-1.png differ diff --git a/docs/articles/index.html b/docs/articles/index.html index 938b71e1..5993fb35 100644 --- a/docs/articles/index.html +++ b/docs/articles/index.html @@ -81,7 +81,7 @@ AMR (for R) - 1.4.0.9007 + 1.4.0.9008 diff --git a/docs/articles/resistance_predict.html b/docs/articles/resistance_predict.html index 204a89e6..a7866c12 100644 --- a/docs/articles/resistance_predict.html +++ b/docs/articles/resistance_predict.html @@ -39,7 +39,7 @@ AMR (for R) - 1.4.0 + 1.4.0.9008 @@ -187,7 +187,8 @@ -
+ +

@@ -236,7 +236,7 @@
# NOTE: Using column `date` as input for `col_date`.

This text is only a printed summary - the actual result (output) of the function is a data.frame containing for each year: the number of observations, the actual observed resistance, the estimated resistance and the standard error below and above the estimation:

-predict_TZP
+predict_TZP
 #    year      value    se_min    se_max observations   observed  estimated
 # 1  2002 0.20000000        NA        NA           15 0.20000000 0.05616378
 # 2  2003 0.06250000        NA        NA           32 0.06250000 0.06163839
@@ -266,36 +266,31 @@
 # 26 2027 0.41315710 0.3244399 0.5018743           NA         NA 0.41315710
 # 27 2028 0.43730688 0.3418075 0.5328063           NA         NA 0.43730688
 # 28 2029 0.46175755 0.3597639 0.5637512           NA         NA 0.46175755
-# 29 2030 0.48639359 0.3782932 0.5944939           NA         NA 0.48639359
-
+# 29 2030 0.48639359 0.3782932 0.5944939 NA NA 0.48639359

The function plot is available in base R, and can be extended by other packages to depend the output based on the type of input. We extended its function to cope with resistance predictions:

-plot(predict_TZP)
-
+plot(predict_TZP)

This is the fastest way to plot the result. It automatically adds the right axes, error bars, titles, number of available observations and type of model.

We also support the ggplot2 package with our custom function ggplot_rsi_predict() to create more appealing plots:

-ggplot_rsi_predict(predict_TZP)
-
+ggplot_rsi_predict(predict_TZP)

 
 # choose for error bars instead of a ribbon
-ggplot_rsi_predict(predict_TZP, ribbon = FALSE)
-
+ggplot_rsi_predict(predict_TZP, ribbon =FALSE)

Choosing the right model

Resistance is not easily predicted; if we look at vancomycin resistance in Gram-positive bacteria, the spread (i.e. standard error) is enormous:

-example_isolates %>%
-  filter(mo_gramstain(mo, language = NULL) == "Gram-positive") %>%
-  resistance_predict(col_ab = "VAN", year_min = 2010, info = FALSE, model = "binomial") %>% 
-  ggplot_rsi_predict()
-# NOTE: Using column `date` as input for `col_date`.
-
+example_isolates %>% + filter(mo_gramstain(mo, language = NULL) == "Gram-positive") %>% + resistance_predict(col_ab = "VAN", year_min = 2010, info = FALSE, model = "binomial") %>% + ggplot_rsi_predict() +# NOTE: Using column `date` as input for `col_date`.

Vancomycin resistance could be 100% in ten years, but might also stay around 0%.

You can define the model with the model parameter. The model chosen above is a generalised linear regression model using a binomial distribution, assuming that a period of zero resistance was followed by a period of increasing resistance leading slowly to more and more resistance.

@@ -337,28 +332,26 @@

For the vancomycin resistance in Gram-positive bacteria, a linear model might be more appropriate since no binomial distribution is to be expected based on the observed years:

-example_isolates %>%
-  filter(mo_gramstain(mo, language = NULL) == "Gram-positive") %>%
-  resistance_predict(col_ab = "VAN", year_min = 2010, info = FALSE, model = "linear") %>% 
-  ggplot_rsi_predict()
-# NOTE: Using column `date` as input for `col_date`.
-
+example_isolates %>% + filter(mo_gramstain(mo, language = NULL) == "Gram-positive") %>% + resistance_predict(col_ab = "VAN", year_min = 2010, info = FALSE, model = "linear") %>% + ggplot_rsi_predict() +# NOTE: Using column `date` as input for `col_date`.

This seems more likely, doesn’t it?

The model itself is also available from the object, as an attribute:

-model <- attributes(predict_TZP)$model
+model <- attributes(predict_TZP)$model
 
-summary(model)$family
+summary(model)$family
 # 
 # Family: binomial 
 # Link function: logit
 
-summary(model)$coefficients
+summary(model)$coefficients
 #                  Estimate  Std. Error   z value     Pr(>|z|)
 # (Intercept) -200.67944891 46.17315349 -4.346237 1.384932e-05
-# year           0.09883005  0.02295317  4.305725 1.664395e-05
-
+# year 0.09883005 0.02295317 4.305725 1.664395e-05 @@ -378,7 +371,7 @@
-

Site built with pkgdown 1.5.1.9000.

+

Site built with pkgdown 1.6.1.

diff --git a/docs/articles/resistance_predict_files/anchor-sections-1.0/anchor-sections.css b/docs/articles/resistance_predict_files/anchor-sections-1.0/anchor-sections.css new file mode 100644 index 00000000..07aee5fc --- /dev/null +++ b/docs/articles/resistance_predict_files/anchor-sections-1.0/anchor-sections.css @@ -0,0 +1,4 @@ +/* Styles for section anchors */ +a.anchor-section {margin-left: 10px; visibility: hidden; color: inherit;} +a.anchor-section::before {content: '#';} +.hasAnchor:hover a.anchor-section {visibility: visible;} diff --git a/docs/articles/resistance_predict_files/anchor-sections-1.0/anchor-sections.js b/docs/articles/resistance_predict_files/anchor-sections-1.0/anchor-sections.js new file mode 100644 index 00000000..570f99a0 --- /dev/null +++ b/docs/articles/resistance_predict_files/anchor-sections-1.0/anchor-sections.js @@ -0,0 +1,33 @@ +// Anchor sections v1.0 written by Atsushi Yasumoto on Oct 3rd, 2020. +document.addEventListener('DOMContentLoaded', function() { + // Do nothing if AnchorJS is used + if (typeof window.anchors === 'object' && anchors.hasOwnProperty('hasAnchorJSLink')) { + return; + } + + const h = document.querySelectorAll('h1, h2, h3, h4, h5, h6'); + + // Do nothing if sections are already anchored + if (Array.from(h).some(x => x.classList.contains('hasAnchor'))) { + return null; + } + + // Use section id when pandoc runs with --section-divs + const section_id = function(x) { + return ((x.classList.contains('section') || (x.tagName === 'SECTION')) + ? x.id : ''); + }; + + // Add anchors + h.forEach(function(x) { + const id = x.id || section_id(x.parentElement); + if (id === '') { + return null; + } + let anchor = document.createElement('a'); + anchor.href = '#' + id; + anchor.classList = ['anchor-section']; + x.classList.add('hasAnchor'); + x.appendChild(anchor); + }); +}); diff --git a/docs/articles/resistance_predict_files/figure-html/unnamed-chunk-4-1.png b/docs/articles/resistance_predict_files/figure-html/unnamed-chunk-4-1.png index 70b98bf4..edb9c138 100644 Binary files a/docs/articles/resistance_predict_files/figure-html/unnamed-chunk-4-1.png and b/docs/articles/resistance_predict_files/figure-html/unnamed-chunk-4-1.png differ diff --git a/docs/articles/resistance_predict_files/figure-html/unnamed-chunk-5-1.png b/docs/articles/resistance_predict_files/figure-html/unnamed-chunk-5-1.png index 7e019abb..e33de999 100644 Binary files a/docs/articles/resistance_predict_files/figure-html/unnamed-chunk-5-1.png and b/docs/articles/resistance_predict_files/figure-html/unnamed-chunk-5-1.png differ diff --git a/docs/articles/resistance_predict_files/figure-html/unnamed-chunk-5-2.png b/docs/articles/resistance_predict_files/figure-html/unnamed-chunk-5-2.png index bef9d014..a307bc84 100644 Binary files a/docs/articles/resistance_predict_files/figure-html/unnamed-chunk-5-2.png and b/docs/articles/resistance_predict_files/figure-html/unnamed-chunk-5-2.png differ diff --git a/docs/articles/resistance_predict_files/figure-html/unnamed-chunk-6-1.png b/docs/articles/resistance_predict_files/figure-html/unnamed-chunk-6-1.png index 1578af4d..53cd4e5c 100644 Binary files a/docs/articles/resistance_predict_files/figure-html/unnamed-chunk-6-1.png and b/docs/articles/resistance_predict_files/figure-html/unnamed-chunk-6-1.png differ diff --git a/docs/articles/resistance_predict_files/figure-html/unnamed-chunk-7-1.png b/docs/articles/resistance_predict_files/figure-html/unnamed-chunk-7-1.png index 4db93791..32dc5c24 100644 Binary files a/docs/articles/resistance_predict_files/figure-html/unnamed-chunk-7-1.png and b/docs/articles/resistance_predict_files/figure-html/unnamed-chunk-7-1.png differ diff --git a/docs/articles/welcome_to_AMR.html b/docs/articles/welcome_to_AMR.html index afb92eec..d02b2993 100644 --- a/docs/articles/welcome_to_AMR.html +++ b/docs/articles/welcome_to_AMR.html @@ -39,7 +39,7 @@ AMR (for R) - 1.4.0 + 1.4.0.9008 @@ -187,7 +187,8 @@ -
+ +
-

Site built with pkgdown 1.5.1.9000.

+

Site built with pkgdown 1.6.1.

diff --git a/docs/articles/welcome_to_AMR_files/anchor-sections-1.0/anchor-sections.css b/docs/articles/welcome_to_AMR_files/anchor-sections-1.0/anchor-sections.css new file mode 100644 index 00000000..07aee5fc --- /dev/null +++ b/docs/articles/welcome_to_AMR_files/anchor-sections-1.0/anchor-sections.css @@ -0,0 +1,4 @@ +/* Styles for section anchors */ +a.anchor-section {margin-left: 10px; visibility: hidden; color: inherit;} +a.anchor-section::before {content: '#';} +.hasAnchor:hover a.anchor-section {visibility: visible;} diff --git a/docs/articles/welcome_to_AMR_files/anchor-sections-1.0/anchor-sections.js b/docs/articles/welcome_to_AMR_files/anchor-sections-1.0/anchor-sections.js new file mode 100644 index 00000000..570f99a0 --- /dev/null +++ b/docs/articles/welcome_to_AMR_files/anchor-sections-1.0/anchor-sections.js @@ -0,0 +1,33 @@ +// Anchor sections v1.0 written by Atsushi Yasumoto on Oct 3rd, 2020. +document.addEventListener('DOMContentLoaded', function() { + // Do nothing if AnchorJS is used + if (typeof window.anchors === 'object' && anchors.hasOwnProperty('hasAnchorJSLink')) { + return; + } + + const h = document.querySelectorAll('h1, h2, h3, h4, h5, h6'); + + // Do nothing if sections are already anchored + if (Array.from(h).some(x => x.classList.contains('hasAnchor'))) { + return null; + } + + // Use section id when pandoc runs with --section-divs + const section_id = function(x) { + return ((x.classList.contains('section') || (x.tagName === 'SECTION')) + ? x.id : ''); + }; + + // Add anchors + h.forEach(function(x) { + const id = x.id || section_id(x.parentElement); + if (id === '') { + return null; + } + let anchor = document.createElement('a'); + anchor.href = '#' + id; + anchor.classList = ['anchor-section']; + x.classList.add('hasAnchor'); + x.appendChild(anchor); + }); +}); diff --git a/docs/authors.html b/docs/authors.html index 5f89f267..cddf97b3 100644 --- a/docs/authors.html +++ b/docs/authors.html @@ -81,7 +81,7 @@ AMR (for R) - 1.4.0.9007 + 1.4.0.9008
diff --git a/docs/index.html b/docs/index.html index 6880b98b..efafa8fa 100644 --- a/docs/index.html +++ b/docs/index.html @@ -43,7 +43,7 @@ AMR (for R) - 1.4.0.9007 + 1.4.0.9008
diff --git a/docs/news/index.html b/docs/news/index.html index 520cbd93..47933806 100644 --- a/docs/news/index.html +++ b/docs/news/index.html @@ -81,7 +81,7 @@ AMR (for R) - 1.4.0.9007 + 1.4.0.9008
@@ -236,20 +236,20 @@ Source: NEWS.md -
-

-AMR 1.4.0.9007 Unreleased +
+

+AMR 1.4.0.9008 Unreleased

-
+

-Last updated: 21 October 2020 +Last updated: 26 October 2020

New

  • Functions is_gram_negative() and is_gram_positive() as wrappers around mo_gramstain(). They always return TRUE or FALSE, thus always return FALSE for species outside the taxonomic kingdom of Bacteria.
  • -
  • Functions %not_like% and %like_perl% as wrappers around %like%.
  • +
  • Functions %not_like% and %not_like_case% as wrappers around %like% and %like_case%. The RStudio addin to insert the text " %like% " as provided in this package now iterates over all like variants. So if you have defined the keyboard shortcut Ctrl/Cmd + L to this addin, it will first insert %like% and by pressing it again it will be replaced with %not_like%, etc.
@@ -260,12 +260,14 @@
  • Deprecated function p_symbol() that not really fits the scope of this package. It will be removed in a future version. See here for the source code to preserve it.
  • Better determination of disk zones and MIC values when running as.rsi() on a data.frame
  • Updated coagulase-negative staphylococci with Becker et al. 2020 (PMID 32056452), meaning that the species S. argensis, S. caeli, S. debuckii, S. edaphicus and S. pseudoxylosus are now all considered CoNS
  • +
  • Fix for using parameter reference_df in as.mo() and mo_*() functions that contain old microbial codes (from previous package versions)
  • Other

      +
    • All messages thrown by this package now have correct line breaks
    • More extensive unit tests
    @@ -468,7 +470,7 @@

    Making this package independent of especially the tidyverse (e.g. packages dplyr and tidyr) tremendously increases sustainability on the long term, since tidyverse functions change quite often. Good for users, but hard for package maintainers. Most of our functions are replaced with versions that only rely on base R, which keeps this package fully functional for many years to come, without requiring a lot of maintenance to keep up with other packages anymore. Another upside it that this package can now be used with all versions of R since R-3.0.0 (April 2013). Our package is being used in settings where the resources are very limited. Fewer dependencies on newer software is helpful for such settings.

    Negative effects of this change are:

      -
    • Function freq() that was borrowed from the cleaner package was removed. Use cleaner::freq(), or run library("cleaner") before you use freq().
    • +
    • Function freq() that was borrowed from the cleaner package was removed. Use cleaner::freq(), or run library("cleaner") before you use freq().
    • Printing values of class mo or rsi in a tibble will no longer be in colour and printing rsi in a tibble will show the class <ord>, not <rsi> anymore. This is purely a visual effect.
    • All functions from the mo_* family (like mo_name() and mo_gramstain()) are noticeably slower when running on hundreds of thousands of rows.
    • For developers: classes mo and ab now both also inherit class character, to support any data transformation. This change invalidates code that checks for class length == 1.
    • @@ -796,7 +798,7 @@ This works for all drug combinations, such as ampicillin/sulbactam, ceftazidime/ #> invalid microorganism code, NA generated

    This is important, because a value like "testvalue" could never be understood by e.g. mo_name(), although the class would suggest a valid microbial code.

    -
  • Function freq() has moved to a new package, clean (CRAN link), since creating frequency tables actually does not fit the scope of this package. The freq() function still works, since it is re-exported from the clean package (which will be installed automatically upon updating this AMR package).

  • +
  • Function freq() has moved to a new package, clean (CRAN link), since creating frequency tables actually does not fit the scope of this package. The freq() function still works, since it is re-exported from the clean package (which will be installed automatically upon updating this AMR package).

  • Renamed data set septic_patients to example_isolates

  • @@ -1059,7 +1061,7 @@ This works for all drug combinations, such as ampicillin/sulbactam, ceftazidime/
  • The age() function gained a new parameter exact to determine ages with decimals
  • Removed deprecated functions guess_mo(), guess_atc(), EUCAST_rules(), interpretive_reading(), rsi()
  • -
  • Frequency tables (freq()): +
  • Frequency tables (freq()):
    • speed improvement for microbial IDs

    • fixed factor level names for R Markdown

    • @@ -1068,12 +1070,12 @@ This works for all drug combinations, such as ampicillin/sulbactam, ceftazidime/

      support for boxplots:

       septic_patients %>% 
      -  freq(age) %>% 
      +  freq(age) %>% 
         boxplot()
       # grouped boxplots:
       septic_patients %>% 
         group_by(hospital_id) %>% 
      -  freq(age) %>%
      +  freq(age) %>%
         boxplot()
    @@ -1083,7 +1085,7 @@ This works for all drug combinations, such as ampicillin/sulbactam, ceftazidime/
  • Added ceftazidim intrinsic resistance to Streptococci
  • Changed default settings for age_groups(), to let groups of fives and tens end with 100+ instead of 120+
  • -
  • Fix for freq() for when all values are NA +
  • Fix for freq() for when all values are NA
  • Fix for first_isolate() for when dates are missing
  • Improved speed of guess_ab_col() @@ -1315,7 +1317,7 @@ This works for all drug combinations, such as ampicillin/sulbactam, ceftazidime/
  • -
  • Frequency tables (freq() function): +
  • Frequency tables (freq() function):
    • Support for tidyverse quasiquotation! Now you can create frequency tables of function outcomes:

      @@ -1324,15 +1326,15 @@ This works for all drug combinations, such as ampicillin/sulbactam, ceftazidime/ # OLD WAY septic_patients %>% mutate(genus = mo_genus(mo)) %>% - freq(genus) + freq(genus) # NEW WAY septic_patients %>% - freq(mo_genus(mo)) + freq(mo_genus(mo)) # Even supports grouping variables: septic_patients %>% group_by(gender) %>% - freq(mo_genus(mo))
  • + freq(mo_genus(mo))

  • Header info is now available as a list, with the header function

  • The parameter header is now set to TRUE at default, even for markdown

  • @@ -1414,23 +1416,23 @@ This works for all drug combinations, such as ampicillin/sulbactam, ceftazidime/
  • Using portion_* functions now throws a warning when total available isolate is below parameter minimum

  • Functions as.mo, as.rsi, as.mic, as.atc and freq will not set package name as attribute anymore

  • -

    Frequency tables - freq():

    +

    Frequency tables - freq():

    • Support for grouping variables, test with:

       septic_patients %>% 
         group_by(hospital_id) %>% 
      -  freq(gender)
      + freq(gender)
    • Support for (un)selecting columns:

       septic_patients %>% 
      -  freq(hospital_id) %>% 
      +  freq(hospital_id) %>% 
         select(-count, -cum_count) # only get item, percent, cum_percent
    • -
    • Check for hms::is.hms

    • +
    • Check for hms::is.hms

    • Now prints in markdown at default in non-interactive sessions

    • No longer adds the factor level column and sorts factors on count again

    • Support for class difftime

    • @@ -1445,7 +1447,7 @@ This works for all drug combinations, such as ampicillin/sulbactam, ceftazidime/
    • Removed diacritics from all authors (columns microorganisms$ref and microorganisms.old$ref) to comply with CRAN policy to only allow ASCII characters

    • Fix for mo_property not working properly

    • Fix for eucast_rules where some Streptococci would become ceftazidime R in EUCAST rule 4.5

    • -
    • Support for named vectors of class mo, useful for top_freq()

    • +
    • Support for named vectors of class mo, useful for top_freq()

    • ggplot_rsi and scale_y_percent have breaks parameter

    • AI improvements for as.mo:

      @@ -1606,12 +1608,12 @@ This works for all drug combinations, such as ampicillin/sulbactam, ceftazidime/

      Support for types (classes) list and matrix for freq

       my_matrix = with(septic_patients, matrix(c(age, gender), ncol = 2))
      -freq(my_matrix)
      +freq(my_matrix)

      For lists, subsetting is possible:

       my_list = list(age = septic_patients$age, gender = septic_patients$gender)
      -my_list %>% freq(age)
      -my_list %>% freq(gender)
      +my_list %>% freq(age) +my_list %>% freq(gender)
    @@ -1685,13 +1687,13 @@ This works for all drug combinations, such as ampicillin/sulbactam, ceftazidime/
    • A vignette to explain its usage
    • Support for rsi (antimicrobial resistance) to use as input
    • -
    • Support for table to use as input: freq(table(x, y)) +
    • Support for table to use as input: freq(table(x, y))
    • Support for existing functions hist and plot to use a frequency table as input: hist(freq(df$age))
    • Support for as.vector, as.data.frame, as_tibble and format
    • -
    • Support for quasiquotation: freq(mydata, mycolumn) is the same as mydata %>% freq(mycolumn) +
    • Support for quasiquotation: freq(mydata, mycolumn) is the same as mydata %>% freq(mycolumn)
    • Function top_freq function to return the top/below n items as vector
    • Header of frequency tables now also show Mean Absolute Deviaton (MAD) and Interquartile Range (IQR)
    • diff --git a/docs/pkgdown.yml b/docs/pkgdown.yml index cdcc68f9..c5b3b9a4 100644 --- a/docs/pkgdown.yml +++ b/docs/pkgdown.yml @@ -1,4 +1,4 @@ -pandoc: 2.9.2.1 +pandoc: 2.7.3 pkgdown: 1.6.1 pkgdown_sha: ~ articles: @@ -12,7 +12,7 @@ articles: datasets: datasets.html resistance_predict: resistance_predict.html welcome_to_AMR: welcome_to_AMR.html -last_built: 2020-10-21T13:25Z +last_built: 2020-10-26T10:52Z urls: reference: https://msberends.github.io/AMR//reference article: https://msberends.github.io/AMR//articles diff --git a/docs/reference/AMR-deprecated.html b/docs/reference/AMR-deprecated.html index a36f0183..75c040cf 100644 --- a/docs/reference/AMR-deprecated.html +++ b/docs/reference/AMR-deprecated.html @@ -82,7 +82,7 @@ AMR (for R) - 1.4.0.9001 + 1.4.0.9008 diff --git a/docs/reference/age.html b/docs/reference/age.html index ae1c4d73..b9481718 100644 --- a/docs/reference/age.html +++ b/docs/reference/age.html @@ -82,7 +82,7 @@ AMR (for R) - 1.4.0.9001 + 1.4.0.9008 diff --git a/docs/reference/age_groups.html b/docs/reference/age_groups.html index b8c07046..df8b7115 100644 --- a/docs/reference/age_groups.html +++ b/docs/reference/age_groups.html @@ -82,7 +82,7 @@ AMR (for R) - 1.4.0.9001 + 1.4.0.9008 diff --git a/docs/reference/antibiotic_class_selectors.html b/docs/reference/antibiotic_class_selectors.html index 74776212..9878f8be 100644 --- a/docs/reference/antibiotic_class_selectors.html +++ b/docs/reference/antibiotic_class_selectors.html @@ -82,7 +82,7 @@ AMR (for R) - 1.4.0.9001 + 1.4.0.9008 diff --git a/docs/reference/as.mo.html b/docs/reference/as.mo.html index 2929590f..64a0351b 100644 --- a/docs/reference/as.mo.html +++ b/docs/reference/as.mo.html @@ -82,7 +82,7 @@ AMR (for R) - 1.4.0.9003 + 1.4.0.9008 diff --git a/docs/reference/as.rsi.html b/docs/reference/as.rsi.html index 54b4007e..5dcbf2fc 100644 --- a/docs/reference/as.rsi.html +++ b/docs/reference/as.rsi.html @@ -82,7 +82,7 @@ AMR (for R) - 1.4.0.9004 + 1.4.0.9008 diff --git a/docs/reference/bug_drug_combinations.html b/docs/reference/bug_drug_combinations.html index 1a483cac..f7ae9ab5 100644 --- a/docs/reference/bug_drug_combinations.html +++ b/docs/reference/bug_drug_combinations.html @@ -82,7 +82,7 @@ AMR (for R) - 1.4.0.9001 + 1.4.0.9008 diff --git a/docs/reference/filter_ab_class.html b/docs/reference/filter_ab_class.html index c0c462e6..b3a04a3d 100644 --- a/docs/reference/filter_ab_class.html +++ b/docs/reference/filter_ab_class.html @@ -82,7 +82,7 @@ AMR (for R) - 1.4.0 + 1.4.0.9008 @@ -242,33 +242,33 @@

      Filter isolates on results in specific antimicrobial classes. This makes it easy to filter on isolates that were tested for e.g. any aminoglycoside, or to filter on carbapenem-resistant isolates without the need to specify the drugs.

      -
      filter_ab_class(x, ab_class, result = NULL, scope = "any", ...)
      +    
      filter_ab_class(x, ab_class, result = NULL, scope = "any", ...)
       
      -filter_aminoglycosides(x, result = NULL, scope = "any", ...)
      +filter_aminoglycosides(x, result = NULL, scope = "any", ...)
       
      -filter_carbapenems(x, result = NULL, scope = "any", ...)
      +filter_carbapenems(x, result = NULL, scope = "any", ...)
       
      -filter_cephalosporins(x, result = NULL, scope = "any", ...)
      +filter_cephalosporins(x, result = NULL, scope = "any", ...)
       
      -filter_1st_cephalosporins(x, result = NULL, scope = "any", ...)
      +filter_1st_cephalosporins(x, result = NULL, scope = "any", ...)
       
      -filter_2nd_cephalosporins(x, result = NULL, scope = "any", ...)
      +filter_2nd_cephalosporins(x, result = NULL, scope = "any", ...)
       
      -filter_3rd_cephalosporins(x, result = NULL, scope = "any", ...)
      +filter_3rd_cephalosporins(x, result = NULL, scope = "any", ...)
       
      -filter_4th_cephalosporins(x, result = NULL, scope = "any", ...)
      +filter_4th_cephalosporins(x, result = NULL, scope = "any", ...)
       
      -filter_5th_cephalosporins(x, result = NULL, scope = "any", ...)
      +filter_5th_cephalosporins(x, result = NULL, scope = "any", ...)
       
      -filter_fluoroquinolones(x, result = NULL, scope = "any", ...)
      +filter_fluoroquinolones(x, result = NULL, scope = "any", ...)
       
      -filter_glycopeptides(x, result = NULL, scope = "any", ...)
      +filter_glycopeptides(x, result = NULL, scope = "any", ...)
       
      -filter_macrolides(x, result = NULL, scope = "any", ...)
      +filter_macrolides(x, result = NULL, scope = "any", ...)
       
      -filter_penicillins(x, result = NULL, scope = "any", ...)
      +filter_penicillins(x, result = NULL, scope = "any", ...)
       
      -filter_tetracyclines(x, result = NULL, scope = "any", ...)
      +filter_tetracyclines(x, result = NULL, scope = "any", ...)

      Arguments

      @@ -310,43 +310,43 @@ The lifecycle of this function is stable

      antibiotic_class_selectors() for the select() equivalent.

      Examples

      -
      filter_aminoglycosides(example_isolates)
      +    
      filter_aminoglycosides(example_isolates)
       
       # \donttest{
      -if (require("dplyr")) {
      +if (require("dplyr")) {
       
         # filter on isolates that have any result for any aminoglycoside
      -  example_isolates %>% filter_aminoglycosides()
      -  example_isolates %>% filter_ab_class("aminoglycoside")
      +  example_isolates %>% filter_aminoglycosides()
      +  example_isolates %>% filter_ab_class("aminoglycoside")
       
         # this is essentially the same as (but without determination of column names):
      -  example_isolates %>%
      -    filter_at(.vars = vars(c("GEN", "TOB", "AMK", "KAN")),
      -              .vars_predicate = any_vars(. %in% c("S", "I", "R")))
      +  example_isolates %>%
      +    filter_at(.vars = vars(c("GEN", "TOB", "AMK", "KAN")),
      +              .vars_predicate = any_vars(. %in% c("S", "I", "R")))
       
       
         # filter on isolates that show resistance to ANY aminoglycoside
      -  example_isolates %>% filter_aminoglycosides("R", "any")
      +  example_isolates %>% filter_aminoglycosides("R", "any")
        
         # filter on isolates that show resistance to ALL aminoglycosides
      -  example_isolates %>% filter_aminoglycosides("R", "all")
      +  example_isolates %>% filter_aminoglycosides("R", "all")
        
         # filter on isolates that show resistance to
         # any aminoglycoside and any fluoroquinolone
      -  example_isolates %>%
      -    filter_aminoglycosides("R") %>%
      -    filter_fluoroquinolones("R")
      +  example_isolates %>%
      +    filter_aminoglycosides("R") %>%
      +    filter_fluoroquinolones("R")
        
         # filter on isolates that show resistance to
         # all aminoglycosides and all fluoroquinolones
      -  example_isolates %>%
      -    filter_aminoglycosides("R", "all") %>%
      -    filter_fluoroquinolones("R", "all")
      +  example_isolates %>%
      +    filter_aminoglycosides("R", "all") %>%
      +    filter_fluoroquinolones("R", "all")
         
         # with dplyr 1.0.0 and higher (that adds 'across()'), this is equal:
      -  example_isolates %>% filter_carbapenems("R", "all")
      -  example_isolates %>% filter(across(carbapenems(), ~. == "R"))
      -}
      +  example_isolates %>% filter_carbapenems("R", "all")
      +  example_isolates %>% filter(across(carbapenems(), ~. == "R"))
      +}
       # }
       
      @@ -364,7 +364,7 @@ The lifecycle of this function is stable
      -

      Site built with pkgdown 1.5.1.9000.

      +

      Site built with pkgdown 1.6.1.

      diff --git a/docs/reference/ggplot_pca.html b/docs/reference/ggplot_pca.html index f030146e..8c4335d9 100644 --- a/docs/reference/ggplot_pca.html +++ b/docs/reference/ggplot_pca.html @@ -82,7 +82,7 @@ AMR (for R) - 1.4.0.9001 + 1.4.0.9008 diff --git a/docs/reference/ggplot_rsi.html b/docs/reference/ggplot_rsi.html index aa0c0863..ed656446 100644 --- a/docs/reference/ggplot_rsi.html +++ b/docs/reference/ggplot_rsi.html @@ -82,7 +82,7 @@ AMR (for R) - 1.4.0.9005 + 1.4.0.9008 diff --git a/docs/reference/guess_ab_col.html b/docs/reference/guess_ab_col.html index 8d6433eb..6c43b4d4 100644 --- a/docs/reference/guess_ab_col.html +++ b/docs/reference/guess_ab_col.html @@ -82,7 +82,7 @@ AMR (for R) - 1.4.0.9001 + 1.4.0.9008 diff --git a/docs/reference/index.html b/docs/reference/index.html index 9bf4c5c6..c080b55b 100644 --- a/docs/reference/index.html +++ b/docs/reference/index.html @@ -81,7 +81,7 @@ AMR (for R) - 1.4.0.9007 + 1.4.0.9008 @@ -581,9 +581,9 @@
      - +
      -

      like() `%like%` `%like_case%`

      +

      like() `%like%` `%not_like%` `%like_case%` `%not_like_case%`

      Pattern Matching

      Pattern matching with keyboard shortcut

      diff --git a/docs/reference/key_antibiotics.html b/docs/reference/key_antibiotics.html index 14e314da..89c5d3be 100644 --- a/docs/reference/key_antibiotics.html +++ b/docs/reference/key_antibiotics.html @@ -82,7 +82,7 @@ AMR (for R) - 1.4.0.9001 + 1.4.0.9008 diff --git a/docs/reference/lifecycle.html b/docs/reference/lifecycle.html index e6a5443d..2372b3f7 100644 --- a/docs/reference/lifecycle.html +++ b/docs/reference/lifecycle.html @@ -84,7 +84,7 @@ This page contains a section for every lifecycle (with text borrowed from the af AMR (for R) - 1.4.0 + 1.4.0.9008 @@ -295,7 +295,7 @@ The lifecycle of this function is questioning. This function mi
      -

      Site built with pkgdown 1.5.1.9000.

      +

      Site built with pkgdown 1.6.1.

      diff --git a/docs/reference/like.html b/docs/reference/like.html index 9459a30f..e7d875a9 100644 --- a/docs/reference/like.html +++ b/docs/reference/like.html @@ -6,7 +6,7 @@ -Pattern Matching — like • AMR (for R) +Pattern matching with keyboard shortcut — like • AMR (for R) @@ -48,7 +48,7 @@ - + @@ -82,7 +82,7 @@ AMR (for R) - 1.4.0.9000 + 1.4.0.9008 @@ -233,7 +233,7 @@
      @@ -246,7 +246,11 @@ x %like% pattern -x %like_case% pattern +x %not_like% pattern + +x %like_case% pattern + +x %not_like_case% pattern

      Arguments

      @@ -280,7 +284,8 @@
    • Tries again with perl = TRUE if regex fails

    • -

      Using RStudio? This function can also be inserted from the Addins menu and can have its own Keyboard Shortcut like Ctrl+Shift+L or Cmd+Shift+L (see Tools > Modify Keyboard Shortcuts...).

      +

      Using RStudio? This function can also be inserted in your code from the Addins menu and can have its own Keyboard Shortcut like Ctrl+Shift+L or Cmd+Shift+L (see Tools > Modify Keyboard Shortcuts...). This addin iterates over all 'like' variants. So if you have defined the keyboard shortcut Ctrl/Cmd + L to this addin, it will first insert %like% and by pressing it again it will be replaced with %not_like%, then %like_case%, then %not_like_case% and then back to %like%.

      +

      The "%not_like%" and "%not_like_case%" functions are wrappers around "%like%" and "%like_case%".

      Stable lifecycle

      diff --git a/docs/reference/mo_property.html b/docs/reference/mo_property.html index 38b02145..c7e8aeea 100644 --- a/docs/reference/mo_property.html +++ b/docs/reference/mo_property.html @@ -82,7 +82,7 @@ AMR (for R) - 1.4.0.9003 + 1.4.0.9008 @@ -270,9 +270,9 @@ mo_gramstain(x, language =get_locale(), ...) -is_gram_negative(x, ...) +is_gram_negative(x, language =get_locale(), ...) -is_gram_positive(x, ...) +is_gram_positive(x, language =get_locale(), ...)mo_snomed(x, language =get_locale(), ...) @@ -303,7 +303,7 @@ - + diff --git a/docs/reference/pca.html b/docs/reference/pca.html index 0034a047..a7eecbbc 100644 --- a/docs/reference/pca.html +++ b/docs/reference/pca.html @@ -82,7 +82,7 @@ AMR (for R) - 1.4.0 + 1.4.0.9008 @@ -242,15 +242,15 @@

      Performs a principal component analysis (PCA) based on a data set with automatic determination for afterwards plotting the groups and labels, and automatic filtering on only suitable (i.e. non-empty and numeric) variables.

      -
      pca(
      -  x,
      -  ...,
      -  retx = TRUE,
      -  center = TRUE,
      -  scale. = TRUE,
      -  tol = NULL,
      -  rank. = NULL
      -)
      +
      pca(
      +  x,
      +  ...,
      +  retx = TRUE,
      +  center = TRUE,
      +  scale. = TRUE,
      +  tol = NULL,
      +  rank. = NULL
      +)

      Arguments

      language

      language of the returned text, defaults to system language (see get_locale()) and can be overwritten by setting the option AMR_locale, e.g. options(AMR_locale = "de"), see translate. Use language = NULL or language = "" to prevent translation.

      language of the returned text, defaults to system language (see get_locale()) and can be overwritten by setting the option AMR_locale, e.g. options(AMR_locale = "de"), see translate. Also used to translate text like "no growth". Use language = NULL or language = "" to prevent translation.

      ...
      @@ -324,22 +324,22 @@ The lifecycle of this function is maturing< # \donttest{ -if (require("dplyr")) { +if (require("dplyr")) { # calculate the resistance per group first - resistance_data <- example_isolates %>% - group_by(order = mo_order(mo), # group on anything, like order - genus = mo_genus(mo)) %>% # and genus as we do here - summarise_if(is.rsi, resistance) # then get resistance of all drugs + resistance_data <- example_isolates %>% + group_by(order = mo_order(mo), # group on anything, like order + genus = mo_genus(mo)) %>% # and genus as we do here + summarise_if(is.rsi, resistance) # then get resistance of all drugs # now conduct PCA for certain antimicrobial agents - pca_result <- resistance_data %>% - pca(AMC, CXM, CTX, CAZ, GEN, TOB, TMP, SXT) + pca_result <- resistance_data %>% + pca(AMC, CXM, CTX, CAZ, GEN, TOB, TMP, SXT) - pca_result - summary(pca_result) - biplot(pca_result) - ggplot_pca(pca_result) # a new and convenient plot function -} + pca_result + summary(pca_result) + biplot(pca_result) + ggplot_pca(pca_result) # a new and convenient plot function +} # } @@ -357,7 +357,7 @@ The lifecycle of this function is maturing<
      -

      Site built with pkgdown 1.5.1.9000.

      +

      Site built with pkgdown 1.6.1.

      diff --git a/docs/survey.html b/docs/survey.html index 287f0388..a4eb760e 100644 --- a/docs/survey.html +++ b/docs/survey.html @@ -81,7 +81,7 @@ AMR (for R) - 1.4.0.9007 + 1.4.0.9008 diff --git a/inst/rstudio/addins.dcf b/inst/rstudio/addins.dcf index d43c060a..738ce92b 100644 --- a/inst/rstudio/addins.dcf +++ b/inst/rstudio/addins.dcf @@ -2,6 +2,6 @@ Name: Insert %in% Binding: addin_insert_in Interactive: false -Name: Insert %like% +Name: Insert %like% / %not_like% Binding: addin_insert_like Interactive: false diff --git a/man/like.Rd b/man/like.Rd index ebefb15a..a7067402 100755 --- a/man/like.Rd +++ b/man/like.Rd @@ -3,8 +3,10 @@ \name{like} \alias{like} \alias{\%like\%} +\alias{\%not_like\%} \alias{\%like_case\%} -\title{Pattern Matching} +\alias{\%not_like_case\%} +\title{Pattern matching with keyboard shortcut} \source{ Idea from the \href{https://github.com/Rdatatable/data.table/blob/master/R/like.R}{\code{like} function from the \code{data.table} package} } @@ -13,7 +15,11 @@ like(x, pattern, ignore.case = TRUE) x \%like\% pattern +x \%not_like\% pattern + x \%like_case\% pattern + +x \%not_like_case\% pattern } \arguments{ \item{x}{a character vector where matches are sought, or an object which can be coerced by \code{\link[=as.character]{as.character()}} to a character vector.} @@ -37,7 +43,9 @@ The \verb{\%like\%} function: \item Tries again with \code{perl = TRUE} if regex fails } -Using RStudio? This function can also be inserted from the Addins menu and can have its own Keyboard Shortcut like \code{Ctrl+Shift+L} or \code{Cmd+Shift+L} (see \code{Tools} > \verb{Modify Keyboard Shortcuts...}). +Using RStudio? This function can also be inserted in your code from the Addins menu and can have its own Keyboard Shortcut like \code{Ctrl+Shift+L} or \code{Cmd+Shift+L} (see \code{Tools} > \verb{Modify Keyboard Shortcuts...}). This addin iterates over all 'like' variants. So if you have defined the keyboard shortcut Ctrl/Cmd + L to this addin, it will first insert \verb{\%like\%} and by pressing it again it will be replaced with \verb{\%not_like\%}, then \verb{\%like_case\%}, then \verb{\%not_like_case\%} and then back to \verb{\%like\%}. + +The \code{"\%not_like\%"} and \code{"\%not_like_case\%"} functions are wrappers around \code{"\%like\%"} and \code{"\%like_case\%"}. } \section{Stable lifecycle}{ diff --git a/man/mo_property.Rd b/man/mo_property.Rd index 62e2ae07..fac2dfaf 100644 --- a/man/mo_property.Rd +++ b/man/mo_property.Rd @@ -57,9 +57,9 @@ mo_type(x, language = get_locale(), ...) mo_gramstain(x, language = get_locale(), ...) -is_gram_negative(x, ...) +is_gram_negative(x, language = get_locale(), ...) -is_gram_positive(x, ...) +is_gram_positive(x, language = get_locale(), ...) mo_snomed(x, language = get_locale(), ...) @@ -84,7 +84,7 @@ mo_property(x, property = "fullname", language = get_locale(), ...) \arguments{ \item{x}{any character (vector) that can be coerced to a valid microorganism code with \code{\link[=as.mo]{as.mo()}}} -\item{language}{language of the returned text, defaults to system language (see \code{\link[=get_locale]{get_locale()}}) and can be overwritten by setting the option \code{AMR_locale}, e.g. \code{options(AMR_locale = "de")}, see \link{translate}. Use \code{language = NULL} or \code{language = ""} to prevent translation.} +\item{language}{language of the returned text, defaults to system language (see \code{\link[=get_locale]{get_locale()}}) and can be overwritten by setting the option \code{AMR_locale}, e.g. \code{options(AMR_locale = "de")}, see \link{translate}. Also used to translate text like "no growth". Use \code{language = NULL} or \code{language = ""} to prevent translation.} \item{...}{other parameters passed on to \code{\link[=as.mo]{as.mo()}}, such as 'allow_uncertain' and 'ignore_pattern'} diff --git a/tests/testthat/test-like.R b/tests/testthat/test-like.R index 276d346f..c941fda0 100644 --- a/tests/testthat/test-like.R +++ b/tests/testthat/test-like.R @@ -28,8 +28,13 @@ context("like.R") test_that("`like` works", { skip_on_cran() expect_true(sum("test" %like% c("^t", "^s")) == 1) + expect_true("test" %like% "test") - expect_true("test" %like% "TEST") + expect_false("test" %like_case% "TEST") + + expect_false("test" %not_like% "test") + expect_true("test" %not_like_case% "TEST") + expect_true(as.factor("test") %like% "TEST") expect_identical(factor(c("Test case", "Something different", "Yet another thing")) %like% c("case", "diff", "yet"), c(TRUE, TRUE, TRUE))