From 81af41da3a0ed85a02fe70cba0ca9b0531f0676d Mon Sep 17 00:00:00 2001 From: "Matthijs S. Berends" Date: Thu, 17 Dec 2020 16:22:25 +0100 Subject: [PATCH] (v1.4.0.9041) updates based on review --- DESCRIPTION | 6 +- NEWS.md | 11 +- R/aa_helper_functions.R | 25 +++- R/aa_helper_pm_functions.R | 46 ++++---- R/ab.R | 7 +- R/ab_from_text.R | 2 +- R/amr.R | 4 +- R/catalogue_of_life.R | 4 +- R/data.R | 4 +- R/disk.R | 11 +- R/first_isolate.R | 2 +- R/g.test.R | 4 +- R/globals.R | 3 +- R/join_microorganisms.R | 2 +- R/mic.R | 10 +- R/mo.R | 107 +++++++++-------- R/mo_property.R | 2 +- R/mo_source.R | 108 ++++++++++-------- R/proportion.R | 4 +- R/resistance_predict.R | 2 +- R/rsi.R | 46 ++++---- R/rsi_calc.R | 6 +- R/translate.R | 4 +- R/zzz.R | 1 + data-raw/reproduction_of_poorman.R | 4 + docs/404.html | 2 +- docs/LICENSE-text.html | 2 +- docs/articles/datasets.html | 40 +++---- .../header-attrs-2.6/header-attrs.js | 12 ++ docs/articles/index.html | 2 +- docs/authors.html | 2 +- docs/index.html | 2 +- docs/news/index.html | 19 +-- docs/pkgdown.yml | 2 +- docs/reference/AMR.html | 6 +- docs/reference/ab_from_text.html | 40 +++---- docs/reference/antibiotics.html | 44 +++---- docs/reference/as.ab.html | 45 ++++---- docs/reference/as.mo.html | 44 +++---- docs/reference/bug_drug_combinations.html | 40 +++---- docs/reference/catalogue_of_life.html | 42 +++---- docs/reference/count.html | 42 +++---- docs/reference/first_isolate.html | 40 +++---- docs/reference/g.test.html | 40 +++---- docs/reference/ggplot_rsi.html | 42 +++---- docs/reference/index.html | 2 +- docs/reference/join.html | 40 +++---- docs/reference/mo_property.html | 40 +++---- docs/reference/mo_source.html | 76 +++++++----- docs/reference/plot.html | 2 +- docs/reference/proportion.html | 42 +++---- docs/reference/resistance_predict.html | 40 +++---- docs/reference/translate.html | 40 +++---- docs/survey.html | 2 +- man/AMR.Rd | 4 +- man/ab_from_text.Rd | 2 +- man/antibiotics.Rd | 4 +- man/as.ab.Rd | 8 +- man/as.mo.Rd | 4 +- man/bug_drug_combinations.Rd | 2 +- man/catalogue_of_life.Rd | 4 +- man/count.Rd | 4 +- man/first_isolate.Rd | 2 +- man/g.test.Rd | 2 +- man/ggplot_rsi.Rd | 4 +- man/join.Rd | 2 +- man/mo_property.Rd | 2 +- man/mo_source.Rd | 36 ++++-- man/proportion.Rd | 4 +- man/resistance_predict.Rd | 2 +- man/translate.Rd | 2 +- tests/testthat/test-rsi.R | 14 +-- tests/testthat/test-zzz.R | 5 +- vignettes/datasets.Rmd | 13 +-- 74 files changed, 710 insertions(+), 627 deletions(-) create mode 100644 docs/articles/datasets_files/header-attrs-2.6/header-attrs.js diff --git a/DESCRIPTION b/DESCRIPTION index 38ae3acb..71a599cb 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,6 +1,6 @@ Package: AMR -Version: 1.4.0.9040 -Date: 2020-12-16 +Version: 1.4.0.9041 +Date: 2020-12-17 Title: Antimicrobial Resistance Analysis Authors@R: c( person(role = c("aut", "cre"), @@ -47,6 +47,7 @@ Suggests: ggplot2, knitr, microbenchmark, + pillar, readxl, rmarkdown, rstudioapi, @@ -54,6 +55,7 @@ Suggests: skimr, testthat, tidyr, + tidyselect, xml2 VignetteBuilder: knitr,rmarkdown URL: https://msberends.github.io/AMR/, https://github.com/msberends/AMR diff --git a/NEWS.md b/NEWS.md index e19c7bf6..dae2f341 100755 --- a/NEWS.md +++ b/NEWS.md @@ -1,5 +1,7 @@ -# AMR 1.4.0.9040 -## Last updated: 16 December 2020 +# AMR 1.4.0.9041 +## Last updated: 17 December 2020 + +Note: some changes in this version were suggested by anonymous reviewers from the journal we submitted our manuscript about this package to. We are those reviewers very grateful for going through our code so thoroughly! ### New * Function `is_new_episode()` to determine patient episodes which are not necessarily based on microorganisms. It also supports grouped variables with e.g. `mutate()`, `filter()` and `summarise()` of the `dplyr` package: @@ -26,6 +28,7 @@ as_tibble() ``` * For all function parameters in the code, it is now defined what the exact type of user input should be (inspired by the [`typed`](https://github.com/moodymudskipper/typed) package). If the user input for a certain function does not meet the requirements for a specific parameter (such as the class or length), an informative error will be thrown. This makes the package more robust and the use of it more reproducible and reliable. In total, more than 400 arguments were defined. +* Fix for `set_mo_source()`, that previously would not remember the file location of the original file * Deprecated function `p_symbol()` that not really fits the scope of this package. It will be removed in a future version. See [here](https://github.com/msberends/AMR/blob/v1.4.0/R/p_symbol.R) for the source code to preserve it. * Better determination of disk zones and MIC values when running `as.rsi()` on a data.frame * Updated coagulase-negative staphylococci determination with Becker *et al.* 2020 (PMID 32056452), meaning that the species *S. argensis*, *S. caeli*, *S. debuckii*, *S. edaphicus* and *S. pseudoxylosus* are now all considered CoNS @@ -40,14 +43,16 @@ * Fix for plotting MIC values with `plot()` * Added `plot()` generic to class `` * LA-MRSA and CA-MRSA are now recognised as an abbreviation for *Staphylococcus aureus*, meaning that e.g. `mo_genus("LA-MRSA")` will return `"Staphylococcus"` and `mo_is_gram_positive("LA-MRSA")` will return `TRUE`. +* Fix for using `as.rsi()` on a `data.frame` that only contains one column for antibiotic interpretations ### Other * All messages and warnings thrown by this package now break sentences on whole words * More extensive unit tests +* Internal calls to `options()` were all removed in favour of a new internal environment `mo_env` # AMR 1.4.0 -Note: some changes in this version were suggested by anonymous reviewers from the journal we submitted our manuscipt about this package to. We are those reviewers very grateful for going through our code so thoroughly! +Note: some changes in this version were suggested by anonymous reviewers from the journal we submitted our manuscript about this package to. We are those reviewers very grateful for going through our code so thoroughly! ### New * Support for 'EUCAST Expert Rules' / 'EUCAST Intrinsic Resistance and Unusual Phenotypes' version 3.2 of May 2020. With this addition to the previously implemented version 3.1 of 2016, the `eucast_rules()` function can now correct for more than 180 different antibiotics and the `mdro()` function can determine multidrug resistance based on more than 150 different antibiotics. All previously implemented versions of the EUCAST rules are now maintained and kept available in this package. The `eucast_rules()` function consequently gained the parameters `version_breakpoints` (at the moment defaults to v10.0, 2020) and `version_expertrules` (at the moment defaults to v3.2, 2020). The `example_isolates` data set now also reflects the change from v3.1 to v3.2. The `mdro()` function now accepts `guideline == "EUCAST3.1"` and `guideline == "EUCAST3.2"`. diff --git a/R/aa_helper_functions.R b/R/aa_helper_functions.R index 18b55a4c..ddc601c3 100755 --- a/R/aa_helper_functions.R +++ b/R/aa_helper_functions.R @@ -101,6 +101,8 @@ check_dataset_integrity <- function() { # package not yet loaded require("AMR") }) + stop_if(!check_microorganisms | !check_antibiotics, + "the data set `microorganisms` or `antibiotics` was overwritten in your environment because another package with the same object names was loaded _after_ the AMR package, preventing the AMR package from working correctly. Please load the AMR package last.") invisible(TRUE) } @@ -224,10 +226,11 @@ import_fn <- function(name, pkg, error_on_fail = TRUE) { stop_ifnot_installed(pkg) } tryCatch( - get(name, envir = asNamespace(pkg)), + # don't use get() to avoid fetching non-API functions + getExportedValue(name = name, ns = asNamespace(pkg)), error = function(e) { if (isTRUE(error_on_fail)) { - stop_("function ", name, "() not found in package '", pkg, + stop_("function ", name, "() is not an exported object from package '", pkg, "'. Please create an issue at https://github.com/msberends/AMR/issues. Many thanks!", call = FALSE) } else { @@ -239,7 +242,7 @@ import_fn <- function(name, pkg, error_on_fail = TRUE) { # this alternative wrapper to the message(), warning() and stop() functions: # - wraps text to never break lines within words # - ignores formatted text while wrapping -# - adds indentation dependent on the type of message (like NOTE) +# - adds indentation dependent on the type of message (such as NOTE) # - can add additional formatting functions like blue or bold text word_wrap <- function(..., add_fn = list(), @@ -690,6 +693,17 @@ set_clean_class <- function(x, new_class) { x } +formatted_filesize <- function(...) { + size_kb <- file.size(...) / 1024 + if (size_kb < 1) { + paste(round(size_kb, 1), "kB") + } else if (size_kb < 100) { + paste(round(size_kb, 0), "kB") + } else { + paste(round(size_kb / 1024, 1), "MB") + } +} + create_pillar_column <- function(x, ...) { new_pillar_shaft_simple <- import_fn("new_pillar_shaft_simple", "pillar", error_on_fail = FALSE) if (!is.null(new_pillar_shaft_simple)) { @@ -817,7 +831,7 @@ percentage <- function(x, digits = NULL, ...) { } # prevent dependency on package 'backports' -# these functions were not available in previous versions of R (last checked: R 4.0.2) +# these functions were not available in previous versions of R (last checked: R 4.0.3) # see here for the full list: https://github.com/r-lib/backports strrep <- function(x, times) { x <- as.character(x) @@ -861,3 +875,6 @@ str2lang <- function(s) { isNamespaceLoaded <- function(pkg) { pkg %in% loadedNamespaces() } +lengths = function(x, use.names = TRUE) { + vapply(x, length, FUN.VALUE = NA_integer_, USE.NAMES = use.names) +} diff --git a/R/aa_helper_pm_functions.R b/R/aa_helper_pm_functions.R index aa5ff7b5..52dd75a1 100644 --- a/R/aa_helper_pm_functions.R +++ b/R/aa_helper_pm_functions.R @@ -388,29 +388,29 @@ pm_group_size <- function(x) { pm_n_groups <- function(x) { nrow(pm_group_data(x)) } -pm_group_split <- function(.data, ..., .keep = TRUE) { - dots_len <- ...length() > 0L - if (pm_has_groups(.data) && isTRUE(dots_len)) { - warning("... is ignored in pm_group_split(), please use pm_group_by(..., .add = TRUE) %pm>% pm_group_split()") - } - if (!pm_has_groups(.data) && isTRUE(dots_len)) { - .data <- pm_group_by(.data, ...) - } - if (!pm_has_groups(.data) && isFALSE(dots_len)) { - return(list(.data)) - } - pm_context$setup(.data) - on.exit(pm_context$clean(), add = TRUE) - pm_groups <- pm_get_groups(.data) - attr(pm_context$.data, "pm_groups") <- NULL - res <- pm_split_into_groups(pm_context$.data, pm_groups) - names(res) <- NULL - if (isFALSE(.keep)) { - res <- lapply(res, function(x) x[, !colnames(x) %in% pm_groups]) - } - any_empty <- unlist(lapply(res, function(x) !(nrow(x) == 0L))) - res[any_empty] -} +# pm_group_split <- function(.data, ..., .keep = TRUE) { +# dots_len <- ...length() > 0L +# if (pm_has_groups(.data) && isTRUE(dots_len)) { +# warning("... is ignored in pm_group_split(), please use pm_group_by(..., .add = TRUE) %pm>% pm_group_split()") +# } +# if (!pm_has_groups(.data) && isTRUE(dots_len)) { +# .data <- pm_group_by(.data, ...) +# } +# if (!pm_has_groups(.data) && isFALSE(dots_len)) { +# return(list(.data)) +# } +# pm_context$setup(.data) +# on.exit(pm_context$clean(), add = TRUE) +# pm_groups <- pm_get_groups(.data) +# attr(pm_context$.data, "pm_groups") <- NULL +# res <- pm_split_into_groups(pm_context$.data, pm_groups) +# names(res) <- NULL +# if (isFALSE(.keep)) { +# res <- lapply(res, function(x) x[, !colnames(x) %in% pm_groups]) +# } +# any_empty <- unlist(lapply(res, function(x) !(nrow(x) == 0L))) +# res[any_empty] +# } pm_group_keys <- function(.data) { pm_groups <- pm_get_groups(.data) diff --git a/R/ab.R b/R/ab.R index a9e2d4e8..3d67ccdf 100755 --- a/R/ab.R +++ b/R/ab.R @@ -37,13 +37,14 @@ #' #' All these properties will be searched for the user input. The [as.ab()] can correct for different forms of misspelling: #' -#' * Wrong spelling of drug names (like "tobramicin" or "gentamycin"), which corrects for most audible similarities such as f/ph, x/ks, c/z/s, t/th, etc. +#' * Wrong spelling of drug names (such as "tobramicin" or "gentamycin"), which corrects for most audible similarities such as f/ph, x/ks, c/z/s, t/th, etc. #' * Too few or too many vowels or consonants -#' * Switching two characters (like "mreopenem", often the case in clinical data, when doctors typed too fast) +#' * Switching two characters (such as "mreopenem", often the case in clinical data, when doctors typed too fast) #' * Digitalised paper records, leaving artefacts like 0/o/O (zero and O's), B/8, n/r, etc. #' -#' Use the [ab_property()] functions to get properties based on the returned antibiotic ID, see Examples. +#' Use the [`ab_*`][ab_property()] functions to get properties based on the returned antibiotic ID, see Examples. #' +#' Note: the [as.ab()] and [`ab_*`][ab_property()] functions may use very long regular expression to match brand names of antimicrobial agents. This may fail on some systems. #' @section Source: #' World Health Organization (WHO) Collaborating Centre for Drug Statistics Methodology: \url{https://www.whocc.no/atc_ddd_index/} #' diff --git a/R/ab_from_text.R b/R/ab_from_text.R index c3149c98..cf78d35c 100644 --- a/R/ab_from_text.R +++ b/R/ab_from_text.R @@ -46,7 +46,7 @@ #' Without using `collapse`, this function will return a [list]. This can be convenient to use e.g. inside a `mutate()`):\cr #' `df %>% mutate(abx = ab_from_text(clinical_text))` #' -#' The returned AB codes can be transformed to official names, groups, etc. with all [ab_property()] functions like [ab_name()] and [ab_group()], or by using the `translate_ab` parameter. +#' The returned AB codes can be transformed to official names, groups, etc. with all [`ab_*`][ab_property()] functions such as [ab_name()] and [ab_group()], or by using the `translate_ab` parameter. #' #' With using `collapse`, this function will return a [character]:\cr #' `df %>% mutate(abx = ab_from_text(clinical_text, collapse = "|"))` diff --git a/R/amr.R b/R/amr.R index 0b9910eb..34f35018 100644 --- a/R/amr.R +++ b/R/amr.R @@ -42,8 +42,8 @@ #' - Determining multi-drug resistance (MDR) / multi-drug resistant organisms (MDRO) #' - Calculating (empirical) susceptibility of both mono therapy and combination therapies #' - Predicting future antimicrobial resistance using regression models -#' - Getting properties for any microorganism (like Gram stain, species, genus or family) -#' - Getting properties for any antibiotic (like name, code of EARS-Net/ATC/LOINC/PubChem, defined daily dose or trade name) +#' - Getting properties for any microorganism (such as Gram stain, species, genus or family) +#' - Getting properties for any antibiotic (such as name, code of EARS-Net/ATC/LOINC/PubChem, defined daily dose or trade name) #' - Plotting antimicrobial resistance #' - Applying EUCAST expert rules #' - Getting SNOMED codes of a microorganism, or getting properties of a microorganism based on a SNOMED code diff --git a/R/catalogue_of_life.R b/R/catalogue_of_life.R index 1d9f302e..239e6b38 100755 --- a/R/catalogue_of_life.R +++ b/R/catalogue_of_life.R @@ -50,8 +50,8 @@ format_included_data_number <- function(data) { #' @section Included taxa: #' Included are: #' - All `r format_included_data_number(microorganisms[which(microorganisms$kingdom %in% c("Archeae", "Bacteria", "Chromista", "Protozoa")), ])` (sub)species from the kingdoms of Archaea, Bacteria, Chromista and Protozoa -#' - All `r format_included_data_number(microorganisms[which(microorganisms$kingdom == "Fungi" & microorganisms$order %in% c("Eurotiales", "Microascales", "Mucorales", "Onygenales", "Pneumocystales", "Saccharomycetales", "Schizosaccharomycetales", "Tremellales")), ])` (sub)species from these orders of the kingdom of Fungi: Eurotiales, Microascales, Mucorales, Onygenales, Pneumocystales, Saccharomycetales, Schizosaccharomycetales and Tremellales, as well as `r format_included_data_number(microorganisms[which(microorganisms$kingdom == "Fungi" & !microorganisms$order %in% c("Eurotiales", "Microascales", "Mucorales", "Onygenales", "Pneumocystales", "Saccharomycetales", "Schizosaccharomycetales", "Tremellales")), ])` other fungal (sub)species. The kingdom of Fungi is a very large taxon with almost 300,000 different (sub)species, of which most are not microbial (but rather macroscopic, like mushrooms). Because of this, not all fungi fit the scope of this package and including everything would tremendously slow down our algorithms too. By only including the aforementioned taxonomic orders, the most relevant fungi are covered (like all species of *Aspergillus*, *Candida*, *Cryptococcus*, *Histplasma*, *Pneumocystis*, *Saccharomyces* and *Trichophyton*). -#' - All `r format_included_data_number(microorganisms[which(microorganisms$kingdom == "Animalia"), ])` (sub)species from `r format_included_data_number(microorganisms[which(microorganisms$kingdom == "Animalia"), "genus"])` other relevant genera from the kingdom of Animalia (like *Strongyloides* and *Taenia*) +#' - All `r format_included_data_number(microorganisms[which(microorganisms$kingdom == "Fungi" & microorganisms$order %in% c("Eurotiales", "Microascales", "Mucorales", "Onygenales", "Pneumocystales", "Saccharomycetales", "Schizosaccharomycetales", "Tremellales")), ])` (sub)species from these orders of the kingdom of Fungi: Eurotiales, Microascales, Mucorales, Onygenales, Pneumocystales, Saccharomycetales, Schizosaccharomycetales and Tremellales, as well as `r format_included_data_number(microorganisms[which(microorganisms$kingdom == "Fungi" & !microorganisms$order %in% c("Eurotiales", "Microascales", "Mucorales", "Onygenales", "Pneumocystales", "Saccharomycetales", "Schizosaccharomycetales", "Tremellales")), ])` other fungal (sub)species. The kingdom of Fungi is a very large taxon with almost 300,000 different (sub)species, of which most are not microbial (but rather macroscopic, like mushrooms). Because of this, not all fungi fit the scope of this package and including everything would tremendously slow down our algorithms too. By only including the aforementioned taxonomic orders, the most relevant fungi are covered (such as all species of *Aspergillus*, *Candida*, *Cryptococcus*, *Histplasma*, *Pneumocystis*, *Saccharomyces* and *Trichophyton*). +#' - All `r format_included_data_number(microorganisms[which(microorganisms$kingdom == "Animalia"), ])` (sub)species from `r format_included_data_number(microorganisms[which(microorganisms$kingdom == "Animalia"), "genus"])` other relevant genera from the kingdom of Animalia (such as *Strongyloides* and *Taenia*) #' - All `r format_included_data_number(microorganisms.old)` previously accepted names of all included (sub)species (these were taxonomically renamed) #' - The complete taxonomic tree of all included (sub)species: from kingdom to subspecies #' - The responsible author(s) and year of scientific publication diff --git a/R/data.R b/R/data.R index f0ca3735..2db75973 100755 --- a/R/data.R +++ b/R/data.R @@ -25,10 +25,10 @@ #' Data sets with `r format(nrow(antibiotics) + nrow(antivirals), big.mark = ",")` antimicrobials #' -#' Two data sets containing all antibiotics/antimycotics and antivirals. Use [as.ab()] or one of the [ab_property()] functions to retrieve values from the [antibiotics] data set. Three identifiers are included in this data set: an antibiotic ID (`ab`, primarily used in this package) as defined by WHONET/EARS-Net, an ATC code (`atc`) as defined by the WHO, and a Compound ID (`cid`) as found in PubChem. Other properties in this data set are derived from one or more of these codes. +#' Two data sets containing all antibiotics/antimycotics and antivirals. Use [as.ab()] or one of the [`ab_*`][ab_property()] functions to retrieve values from the [antibiotics] data set. Three identifiers are included in this data set: an antibiotic ID (`ab`, primarily used in this package) as defined by WHONET/EARS-Net, an ATC code (`atc`) as defined by the WHO, and a Compound ID (`cid`) as found in PubChem. Other properties in this data set are derived from one or more of these codes. #' @format #' ### For the [antibiotics] data set: a [data.frame] with `r nrow(antibiotics)` observations and `r ncol(antibiotics)` variables: -#' - `ab`\cr Antibiotic ID as used in this package (like `AMC`), using the official EARS-Net (European Antimicrobial Resistance Surveillance Network) codes where available +#' - `ab`\cr Antibiotic ID as used in this package (such as `AMC`), using the official EARS-Net (European Antimicrobial Resistance Surveillance Network) codes where available #' - `atc`\cr ATC code (Anatomical Therapeutic Chemical) as defined by the WHOCC, like `J01CR02` #' - `cid`\cr Compound ID as found in PubChem #' - `name`\cr Official name as used by WHONET/EARS-Net or the WHO diff --git a/R/disk.R b/R/disk.R index 824052ca..627f5473 100644 --- a/R/disk.R +++ b/R/disk.R @@ -114,8 +114,9 @@ all_valid_disks <- function(x) { if (!inherits(x, c("disk", "character", "numeric", "integer"))) { return(FALSE) } - x_disk <- suppressWarnings(as.disk(x[!is.na(x)])) - !any(is.na(x_disk)) & !all(is.na(x)) + x_disk <- tryCatch(suppressWarnings(as.disk(x[!is.na(x)])), + error = function(e) NA) + !any(is.na(x_disk)) && !all(is.na(x)) } #' @rdname as.disk @@ -223,14 +224,12 @@ unique.disk <- function(x, incomparables = FALSE, ...) { # will be exported using s3_register() in R/zzz.R get_skimmers.disk <- function(column) { - sfl <- import_fn("sfl", "skimr", error_on_fail = FALSE) - inline_hist <- import_fn("inline_hist", "skimr", error_on_fail = FALSE) - sfl( + skimr::sfl( skim_type = "disk", min = ~min(as.double(.), na.rm = TRUE), max = ~max(as.double(.), na.rm = TRUE), median = ~stats::median(as.double(.), na.rm = TRUE), n_unique = ~pm_n_distinct(., na.rm = TRUE), - hist = ~inline_hist(stats::na.omit(as.double(.))) + hist = ~skimr::inline_hist(stats::na.omit(as.double(.))) ) } diff --git a/R/first_isolate.R b/R/first_isolate.R index ec5de8af..6fd2e193 100755 --- a/R/first_isolate.R +++ b/R/first_isolate.R @@ -31,7 +31,7 @@ #' @param col_date column name of the result date (or date that is was received on the lab), defaults to the first column with a date class #' @param col_patient_id column name of the unique IDs of the patients, defaults to the first column that starts with 'patient' or 'patid' (case insensitive) #' @param col_mo column name of the IDs of the microorganisms (see [as.mo()]), defaults to the first column of class [`mo`]. Values will be coerced using [as.mo()]. -#' @param col_testcode column name of the test codes. Use `col_testcode = NULL` to **not** exclude certain test codes (like test codes for screening). In that case `testcodes_exclude` will be ignored. +#' @param col_testcode column name of the test codes. Use `col_testcode = NULL` to **not** exclude certain test codes (such as test codes for screening). In that case `testcodes_exclude` will be ignored. #' @param col_specimen column name of the specimen type or group #' @param col_icu column name of the logicals (`TRUE`/`FALSE`) whether a ward or department is an Intensive Care Unit (ICU) #' @param col_keyantibiotics column name of the key antibiotics to determine first *weighted* isolates, see [key_antibiotics()]. Defaults to the first column that starts with 'key' followed by 'ab' or 'antibiotics' (case insensitive). Use `col_keyantibiotics = FALSE` to prevent this. diff --git a/R/g.test.R b/R/g.test.R index db5bf294..06263763 100755 --- a/R/g.test.R +++ b/R/g.test.R @@ -34,7 +34,7 @@ #' #' The p-value is computed from the asymptotic chi-squared distribution of the test statistic. #' -#' In the contingency table case simulation is done by random sampling from the set of all contingency tables with given marginals, and works only if the marginals are strictly positive. Note that this is not the usual sampling situation assumed for a chi-squared test (like the *G*-test) but rather that for Fisher's exact test. +#' In the contingency table case simulation is done by random sampling from the set of all contingency tables with given marginals, and works only if the marginals are strictly positive. Note that this is not the usual sampling situation assumed for a chi-squared test (such as the *G*-test) but rather that for Fisher's exact test. #' #' In the goodness-of-fit case simulation is done by random sampling from the discrete distribution specified by `p`, each sample being of size `n = sum(x)`. This simulation is done in \R and may be slow. #' @@ -144,7 +144,7 @@ g.test <- function(x, DNAME <- paste(paste(DNAME, collapse = "\n"), "and", paste(DNAME2, collapse = "\n")) } - if (any(x < 0) || anyNA(x)) + if (any(x < 0) || any(is.na((x)))) # this last one was anyNA, but only introduced in R 3.1.0 stop("all entries of 'x' must be nonnegative and finite") if ((n <- sum(x)) == 0) stop("at least one entry of 'x' must be positive") diff --git a/R/globals.R b/R/globals.R index bc1d87f7..0829d9ee 100755 --- a/R/globals.R +++ b/R/globals.R @@ -23,8 +23,7 @@ # how to conduct AMR analysis: https://msberends.github.io/AMR/ # # ==================================================================== # -globalVariables(c("...length", # for pm_group_split() on R 3.3 - ".rowid", +globalVariables(c(".rowid", "ab", "ab_txt", "angle", diff --git a/R/join_microorganisms.R b/R/join_microorganisms.R index 42eb0083..fa0ce027 100755 --- a/R/join_microorganisms.R +++ b/R/join_microorganisms.R @@ -31,7 +31,7 @@ #' @name join #' @aliases join inner_join #' @param x existing table to join, or character vector -#' @param by a variable to join by - if left empty will search for a column with class [`mo`] (created with [as.mo()]) or will be `"mo"` if that column name exists in `x`, could otherwise be a column name of `x` with values that exist in `microorganisms$mo` (like `by = "bacteria_id"`), or another column in [microorganisms] (but then it should be named, like `by = c("bacteria_id" = "fullname")`) +#' @param by a variable to join by - if left empty will search for a column with class [`mo`] (created with [as.mo()]) or will be `"mo"` if that column name exists in `x`, could otherwise be a column name of `x` with values that exist in `microorganisms$mo` (such as `by = "bacteria_id"`), or another column in [microorganisms] (but then it should be named, like `by = c("bacteria_id" = "fullname")`) #' @param suffix if there are non-joined duplicate variables in `x` and `y`, these suffixes will be added to the output to disambiguate them. Should be a character vector of length 2. #' @param ... ignored #' @details **Note:** As opposed to the `join()` functions of `dplyr`, [character] vectors are supported and at default existing columns will get a suffix `"2"` and the newly joined columns will not get a suffix. diff --git a/R/mic.R b/R/mic.R index bb192e8f..0750f8bc 100755 --- a/R/mic.R +++ b/R/mic.R @@ -142,7 +142,7 @@ all_valid_mics <- function(x) { } x_mic <- tryCatch(suppressWarnings(as.mic(x[!is.na(x)])), error = function(e) NA) - !any(is.na(x_mic)) & !all(is.na(x)) + !any(is.na(x_mic)) && !all(is.na(x)) } #' @rdname as.mic @@ -175,7 +175,7 @@ as.numeric.mic <- function(x, ...) { #' @method droplevels mic #' @export #' @noRd -droplevels.mic <- function(x, exclude = ifelse(anyNA(levels(x)), NULL, NA), ...) { +droplevels.mic <- function(x, exclude = if (any(is.na(levels(x)))) NULL else NA, ...) { x <- droplevels.factor(x, exclude = exclude, ...) class(x) <- c("mic", "ordered", "factor") x @@ -323,14 +323,12 @@ unique.mic <- function(x, incomparables = FALSE, ...) { # will be exported using s3_register() in R/zzz.R get_skimmers.mic <- function(column) { - sfl <- import_fn("sfl", "skimr", error_on_fail = FALSE) - inline_hist <- import_fn("inline_hist", "skimr", error_on_fail = FALSE) - sfl( + skimr::sfl( skim_type = "mic", min = ~as.character(sort(stats::na.omit(.))[1]), max = ~as.character(sort(stats::na.omit(.))[length(stats::na.omit(.))]), median = ~as.character(stats::na.omit(.)[as.double(stats::na.omit(.)) == median(as.double(stats::na.omit(.)))])[1], n_unique = ~pm_n_distinct(., na.rm = TRUE), - hist_log2 = ~inline_hist(log2(as.double(stats::na.omit(.)))) + hist_log2 = ~skimr::inline_hist(log2(as.double(stats::na.omit(.)))) ) } diff --git a/R/mo.R b/R/mo.R index de212b9f..0df4163d 100755 --- a/R/mo.R +++ b/R/mo.R @@ -25,7 +25,7 @@ #' Transform input to a microorganism ID #' -#' Use this function to determine a valid microorganism ID ([`mo`]). Determination is done using intelligent rules and the complete taxonomic kingdoms Bacteria, Chromista, Protozoa, Archaea and most microbial species from the kingdom Fungi (see Source). The input can be almost anything: a full name (like `"Staphylococcus aureus"`), an abbreviated name (like `"S. aureus"`), an abbreviation known in the field (like `"MRSA"`), or just a genus. Please see *Examples*. +#' Use this function to determine a valid microorganism ID ([`mo`]). Determination is done using intelligent rules and the complete taxonomic kingdoms Bacteria, Chromista, Protozoa, Archaea and most microbial species from the kingdom Fungi (see Source). The input can be almost anything: a full name (like `"Staphylococcus aureus"`), an abbreviated name (such as `"S. aureus"`), an abbreviation known in the field (such as `"MRSA"`), or just a genus. Please see *Examples*. #' @inheritSection lifecycle Stable lifecycle #' @param x a character vector or a [data.frame] with one or two columns #' @param Becker a logical to indicate whether staphylococci should be categorised into coagulase-negative staphylococci ("CoNS") and coagulase-positive staphylococci ("CoPS") instead of their own species, according to Karsten Becker *et al.* (1,2,3). @@ -111,7 +111,7 @@ #' @return A [character] [vector] with additional class [`mo`] #' @seealso [microorganisms] for the [data.frame] that is being used to determine ID's. #' -#' The [mo_property()] functions (like [mo_genus()], [mo_gramstain()]) to get properties based on the returned code. +#' The [`mo_*`][mo_property()] functions (such as [mo_genus()], [mo_gramstain()]) to get properties based on the returned code. #' @inheritSection AMR Reference data publicly available #' @inheritSection AMR Read more on our website! #' @examples @@ -199,10 +199,10 @@ as.mo <- function(x, x[trimws2(x) %like% paste0("^(", translate_AMR("no|not", language = language), ") [a-z]+")] <- "UNKNOWN" uncertainty_level <- translate_allow_uncertain(allow_uncertain) - if (mo_source_isvalid(reference_df) + if (!is.null(reference_df) + && mo_source_isvalid(reference_df) && isFALSE(Becker) && isFALSE(Lancefield) - && !is.null(reference_df) && all(x %in% unlist(reference_df), na.rm = TRUE)) { reference_df <- repair_reference_df(reference_df) @@ -358,11 +358,11 @@ exec_as.mo <- function(x, x[trimws2(x) %like% paste0("^(", translate_AMR("no|not", language = language), ") [a-z]+")] <- "UNKNOWN" if (initial_search == TRUE) { - options(mo_failures = NULL) - options(mo_uncertainties = NULL) - options(mo_renamed = NULL) + mo_env$mo_failures <- NULL + mo_env$mo_uncertainties <- NULL + mo_env$mo_renamed <- NULL } - options(mo_renamed_last_run = NULL) + mo_env$mo_renamed_last_run <- NULL failures <- character(0) uncertainty_level <- translate_allow_uncertain(allow_uncertain) @@ -595,7 +595,7 @@ exec_as.mo <- function(x, } else { x[i] <- lookup(fullname == found["fullname_new"], haystack = MO_lookup) } - options(mo_renamed_last_run = found["fullname"]) + mo_env$mo_renamed_last_run <- found["fullname"] was_renamed(name_old = found["fullname"], name_new = lookup(fullname == found["fullname_new"], "fullname", haystack = MO_lookup), ref_old = found["ref"], @@ -970,7 +970,7 @@ exec_as.mo <- function(x, } else { x[i] <- lookup(fullname == found["fullname_new"], haystack = MO_lookup) } - options(mo_renamed_last_run = found["fullname"]) + mo_env$mo_renamed_last_run <- found["fullname"] was_renamed(name_old = found["fullname"], name_new = lookup(fullname == found["fullname_new"], "fullname", haystack = MO_lookup), ref_old = found["ref"], @@ -1022,7 +1022,7 @@ exec_as.mo <- function(x, ref_old = found["ref"], ref_new = lookup(fullname == found["fullname_new"], "ref", haystack = MO_lookup), mo = lookup(fullname == found["fullname_new"], "mo", haystack = MO_lookup)) - options(mo_renamed_last_run = found["fullname"]) + mo_env$mo_renamed_last_run <- found["fullname"] uncertainties <<- rbind(uncertainties, format_uncertainty_as_df(uncertainty_level = now_checks_for_uncertainty_level, input = a.x_backup, @@ -1393,7 +1393,7 @@ exec_as.mo <- function(x, # handling failures ---- failures <- failures[!failures %in% c(NA, NULL, NaN)] if (length(failures) > 0 & initial_search == TRUE) { - options(mo_failures = sort(unique(failures))) + mo_env$mo_failures <- sort(unique(failures)) plural <- c("value", "it", "was") if (pm_n_distinct(failures) > 1) { plural <- c("values", "them", "were") @@ -1420,7 +1420,7 @@ exec_as.mo <- function(x, # handling uncertainties ---- if (NROW(uncertainties) > 0 & initial_search == TRUE) { uncertainties <- as.list(pm_distinct(uncertainties, input, .keep_all = TRUE)) - options(mo_uncertainties = uncertainties) + mo_env$mo_uncertainties <- uncertainties plural <- c("", "it", "was") if (length(uncertainties$input) > 1) { @@ -1540,13 +1540,13 @@ was_renamed <- function(name_old, name_new, ref_old = "", ref_new = "", mo = "") new_ref = ref_new, mo = mo, stringsAsFactors = FALSE) - already_set <- getOption("mo_renamed") + already_set <- mo_env$mo_renamed if (!is.null(already_set)) { - options(mo_renamed = rbind(already_set, + mo_env$mo_renamed = rbind(already_set, newly_set, - stringsAsFactors = FALSE)) + stringsAsFactors = FALSE) } else { - options(mo_renamed = newly_set) + mo_env$mo_renamed <- newly_set } } @@ -1554,9 +1554,9 @@ format_uncertainty_as_df <- function(uncertainty_level, input, result_mo, candidates = NULL) { - if (!is.null(getOption("mo_renamed_last_run", default = NULL))) { - fullname <- getOption("mo_renamed_last_run") - options(mo_renamed_last_run = NULL) + if (!is.null(mo_env$mo_renamed_last_run)) { + fullname <- mo_env$mo_renamed_last_run + mo_env$mo_renamed_last_run <- NULL renamed_to <- MO_lookup[match(result_mo, MO_lookup$mo), "fullname", drop = TRUE][1] } else { fullname <- MO_lookup[match(result_mo, MO_lookup$mo), "fullname", drop = TRUE][1] @@ -1603,27 +1603,32 @@ freq.mo <- function(x, ...) { if (is.null(digits)) { digits <- 2 } - freq.default <- import_fn("freq.default", "cleaner", error_on_fail = FALSE) - freq.default(x = x, ..., - .add_header = list(`Gram-negative` = paste0(format(sum(grams == "Gram-negative", na.rm = TRUE), - big.mark = ",", - decimal.mark = "."), - " (", percentage(sum(grams == "Gram-negative", na.rm = TRUE) / length(grams), digits = digits), - ")"), - `Gram-positive` = paste0(format(sum(grams == "Gram-positive", na.rm = TRUE), - big.mark = ",", - decimal.mark = "."), - " (", percentage(sum(grams == "Gram-positive", na.rm = TRUE) / length(grams), digits = digits), - ")"), - `Nr. of genera` = pm_n_distinct(mo_genus(x_noNA, language = NULL)), - `Nr. of species` = pm_n_distinct(paste(mo_genus(x_noNA, language = NULL), - mo_species(x_noNA, language = NULL))))) + cleaner::freq.default( + x = x, + ..., + .add_header = list( + `Gram-negative` = paste0( + format(sum(grams == "Gram-negative", na.rm = TRUE), + big.mark = ",", + decimal.mark = "."), + " (", percentage(sum(grams == "Gram-negative", na.rm = TRUE) / length(grams), + digits = digits), + ")"), + `Gram-positive` = paste0( + format(sum(grams == "Gram-positive", na.rm = TRUE), + big.mark = ",", + decimal.mark = "."), + " (", percentage(sum(grams == "Gram-positive", na.rm = TRUE) / length(grams), + digits = digits), + ")"), + `Nr. of genera` = pm_n_distinct(mo_genus(x_noNA, language = NULL)), + `Nr. of species` = pm_n_distinct(paste(mo_genus(x_noNA, language = NULL), + mo_species(x_noNA, language = NULL))))) } # will be exported using s3_register() in R/zzz.R get_skimmers.mo <- function(column) { - sfl <- import_fn("sfl", "skimr", error_on_fail = FALSE) - sfl( + skimr::sfl( skim_type = "mo", unique_total = ~pm_n_distinct(., na.rm = TRUE), gram_negative = ~sum(mo_is_gram_negative(stats::na.omit(.))), @@ -1736,16 +1741,16 @@ unique.mo <- function(x, incomparables = FALSE, ...) { #' @rdname as.mo #' @export mo_failures <- function() { - getOption("mo_failures") + mo_env$mo_failures } #' @rdname as.mo #' @export mo_uncertainties <- function() { - if (is.null(getOption("mo_uncertainties"))) { + if (is.null(mo_env$mo_uncertainties)) { return(NULL) } - set_clean_class(as.data.frame(getOption("mo_uncertainties"), + set_clean_class(as.data.frame(mo_env$mo_uncertainties, stringsAsFactors = FALSE), new_class = c("mo_uncertainties", "data.frame")) } @@ -1814,7 +1819,7 @@ print.mo_uncertainties <- function(x, ...) { #' @rdname as.mo #' @export mo_renamed <- function() { - items <- getOption("mo_renamed", default = NULL) + items <- mo_env$mo_renamed if (is.null(items)) { items <- data.frame(stringsAsFactors = FALSE) } else { @@ -1878,20 +1883,20 @@ translate_allow_uncertain <- function(allow_uncertain) { } get_mo_failures_uncertainties_renamed <- function() { - remember <- list(failures = getOption("mo_failures"), - uncertainties = getOption("mo_uncertainties"), - renamed = getOption("mo_renamed")) + remember <- list(failures = mo_env$mo_failures, + uncertainties = mo_env$mo_uncertainties, + renamed = mo_env$mo_renamed) # empty them, otherwise mo_shortname("Chlamydophila psittaci") will give 3 notes - options("mo_failures" = NULL) - options("mo_uncertainties" = NULL) - options("mo_renamed" = NULL) + mo_env$mo_failures <- NULL + mo_env$mo_uncertainties <- NULL + mo_env$mo_renamed <- NULL remember } load_mo_failures_uncertainties_renamed <- function(metadata) { - options("mo_failures" = metadata$failures) - options("mo_uncertainties" = metadata$uncertainties) - options("mo_renamed" = metadata$renamed) + mo_env$mo_failures <- metadata$failures + mo_env$mo_uncertainties <- metadata$uncertainties + mo_env$mo_renamed <- metadata$renamed } trimws2 <- function(x) { @@ -1978,3 +1983,5 @@ repair_reference_df <- function(reference_df) { reference_df[, "mo"] <- as.mo(reference_df[, "mo", drop = TRUE]) reference_df } + +mo_env <- new.env(hash = FALSE) diff --git a/R/mo_property.R b/R/mo_property.R index a96263b0..239a5c85 100755 --- a/R/mo_property.R +++ b/R/mo_property.R @@ -38,7 +38,7 @@ #' - `mo_ref("Escherichia blattae")` will return `"Burgess et al., 1973"` (with a message about the renaming) #' - `mo_ref("Shimwellia blattae")` will return `"Priest et al., 2010"` (without a message) #' -#' The short name - [mo_shortname()] - almost always returns the first character of the genus and the full species, like `"E. coli"`. Exceptions are abbreviations of staphylococci (like *"CoNS"*, Coagulase-Negative Staphylococci) and beta-haemolytic streptococci (like *"GBS"*, Group B Streptococci). Please bear in mind that e.g. *E. coli* could mean *Escherichia coli* (kingdom of Bacteria) as well as *Entamoeba coli* (kingdom of Protozoa). Returning to the full name will be done using [as.mo()] internally, giving priority to bacteria and human pathogens, i.e. `"E. coli"` will be considered *Escherichia coli*. In other words, `mo_fullname(mo_shortname("Entamoeba coli"))` returns `"Escherichia coli"`. +#' The short name - [mo_shortname()] - almost always returns the first character of the genus and the full species, like `"E. coli"`. Exceptions are abbreviations of staphylococci (such as *"CoNS"*, Coagulase-Negative Staphylococci) and beta-haemolytic streptococci (such as *"GBS"*, Group B Streptococci). Please bear in mind that e.g. *E. coli* could mean *Escherichia coli* (kingdom of Bacteria) as well as *Entamoeba coli* (kingdom of Protozoa). Returning to the full name will be done using [as.mo()] internally, giving priority to bacteria and human pathogens, i.e. `"E. coli"` will be considered *Escherichia coli*. In other words, `mo_fullname(mo_shortname("Entamoeba coli"))` returns `"Escherichia coli"`. #' #' Since the top-level of the taxonomy is sometimes referred to as 'kingdom' and sometimes as 'domain', the functions [mo_kingdom()] and [mo_domain()] return the exact same results. #' diff --git a/R/mo_source.R b/R/mo_source.R index 10493ab7..db55a1dc 100644 --- a/R/mo_source.R +++ b/R/mo_source.R @@ -30,16 +30,17 @@ #' This is **the fastest way** to have your organisation (or analysis) specific codes picked up and translated by this package. #' @inheritSection lifecycle Stable lifecycle #' @param path location of your reference file, see Details. Can be `""`, `NULL` or `FALSE` to delete the reference file. +#' @param destination destination of the compressed data file, default to the user's home directory. #' @rdname mo_source #' @name mo_source #' @aliases set_mo_source get_mo_source #' @details The reference file can be a text file separated with commas (CSV) or tabs or pipes, an Excel file (either 'xls' or 'xlsx' format) or an R object file (extension '.rds'). To use an Excel file, you will need to have the `readxl` package installed. #' -#' [set_mo_source()] will check the file for validity: it must be a [data.frame], must have a column named `"mo"` which contains values from [`microorganisms$mo`][microorganisms] and must have a reference column with your own defined values. If all tests pass, [set_mo_source()] will read the file into R and will ask to export it to `"~/.mo_source.rds"`. The CRAN policy disallows packages to write to the file system, although '*exceptions may be allowed in interactive sessions if the package obtains confirmation from the user*'. For this reason, this function only works in interactive sessions so that the user can **specifically confirm and allow** that this file will be created. +#' [set_mo_source()] will check the file for validity: it must be a [data.frame], must have a column named `"mo"` which contains values from [`microorganisms$mo`][microorganisms] and must have a reference column with your own defined values. If all tests pass, [set_mo_source()] will read the file into R and will ask to export it to `"~/mo_source.rds"`. The CRAN policy disallows packages to write to the file system, although '*exceptions may be allowed in interactive sessions if the package obtains confirmation from the user*'. For this reason, this function only works in interactive sessions so that the user can **specifically confirm and allow** that this file will be created. The destination of this file can be set with the `destination` parameter and defaults to the user's home directory. It can also be set as an \R option, using `options(AMR_mo_source = "my/location/file.rds)`. #' -#' The created compressed data file `"~/.mo_source.rds"` will be used at default for MO determination (function [as.mo()] and consequently all `mo_*` functions like [mo_genus()] and [mo_gramstain()]). The location of the original file will be saved as an R option with `options(mo_source = path)`. Its timestamp will be saved with `options(mo_source_datetime = ...)`. +#' The created compressed data file `"mo_source.rds"` will be used at default for MO determination (function [as.mo()] and consequently all `mo_*` functions like [mo_genus()] and [mo_gramstain()]). The location and timestamp of the original file will be saved as an attribute to the compressed data file. #' -#' The function [get_mo_source()] will return the data set by reading `"~/.mo_source.rds"` with [readRDS()]. If the original file has changed (by checking the aforementioned options `mo_source` and `mo_source_datetime`), it will call [set_mo_source()] to update the data file automatically if used in an interactive session. +#' The function [get_mo_source()] will return the data set by reading `"mo_source.rds"` with [readRDS()]. If the original file has changed (by checking the location and timestamp of the original file), it will call [set_mo_source()] to update the data file automatically if used in an interactive session. #' #' Reading an Excel file (`.xlsx`) with only one row has a size of 8-9 kB. The compressed file created with [set_mo_source()] will then have a size of 0.1 kB and can be read by [get_mo_source()] in only a couple of microseconds (millionths of a second). #' @@ -60,16 +61,18 @@ #' #' ``` #' set_mo_source("home/me/ourcodes.xlsx") -#' #> NOTE: Created mo_source file '~/.mo_source.rds' from 'home/me/ourcodes.xlsx' -#' #> (columns "Organisation XYZ" and "mo") +#' #> NOTE: Created mo_source file '/Users/me/mo_source.rds' (0.3 kB) from +#' #> '/Users/me/Documents/ourcodes.xlsx' (9 kB), columns +#' #> "Organisation XYZ" and "mo" #' ``` #' -#' It has now created a file `"~/.mo_source.rds"` with the contents of our Excel file. Only the first column with foreign values and the 'mo' column will be kept when creating the RDS file. +#' It has now created a file `"~/mo_source.rds"` with the contents of our Excel file. Only the first column with foreign values and the 'mo' column will be kept when creating the RDS file. #' #' And now we can use it in our functions: #' #' ``` #' as.mo("lab_mo_ecoli") +#' #> Class #' #> [1] B_ESCHR_COLI #' #' mo_genus("lab_mo_kpneumoniae") @@ -77,6 +80,9 @@ #' #' # other input values still work too #' as.mo(c("Escherichia coli", "E. coli", "lab_mo_ecoli")) +#' #> NOTE: Translation to one microorganism was guessed with uncertainty. +#' #> Use mo_uncertainties() to review it. +#' #> Class #' #> [1] B_ESCHR_COLI B_ESCHR_COLI B_ESCHR_COLI #' ``` #' @@ -96,8 +102,10 @@ #' #' ``` #' as.mo("lab_mo_ecoli") -#' #> NOTE: Updated mo_source file '~/.mo_source.rds' from 'home/me/ourcodes.xlsx' -#' #> (columns "Organisation XYZ" and "mo") +#' #> NOTE: Updated mo_source file '/Users/me/mo_source.rds' (0.3 kB) from +#' #> '/Users/me/Documents/ourcodes.xlsx' (9 kB), columns +#' #> "Organisation XYZ" and "mo" +#' #> Class #' #> [1] B_ESCHR_COLI #' #' mo_genus("lab_Staph_aureus") @@ -108,25 +116,26 @@ #' #' ``` #' set_mo_source(NULL) -#' # Removed mo_source file '~/.mo_source.rds'. +#' #> Removed mo_source file '/Users/me/mo_source.rds' #' ``` #' -#' If the original Excel file is moved or deleted, the mo_source file will be removed upon the next use of [as.mo()]. If the mo_source file is manually deleted (i.e. without using [set_mo_source()]), the references to the mo_source file will be removed upon the next use of [as.mo()]. +#' If the original Excel file is moved or deleted, the mo_source file will be removed upon the next use of [as.mo()]. #' @export #' @inheritSection AMR Read more on our website! -set_mo_source <- function(path) { - meet_criteria(path, allow_class = "character", has_length = 1) +set_mo_source <- function(path, destination = getOption("AMR_mo_source", "~/mo_source.rds")) { + meet_criteria(path, allow_class = "character", has_length = 1, allow_NULL = TRUE) + meet_criteria(destination, allow_class = "character", has_length = 1) + stop_ifnot(destination %like% "[.]rds$", "the `destination` must be a file location with file extension .rds") - file_location <- path.expand("~/mo_source.rds") + mo_source_destination <- path.expand(destination) stop_ifnot(interactive(), "This function can only be used in interactive mode, since it must ask for the user's permission to write a file to their home folder.") if (is.null(path) || path %in% c(FALSE, "")) { - options(mo_source = NULL) - options(mo_source_timestamp = NULL) - if (file.exists(file_location)) { - unlink(file_location) - message_("Removed mo_source file '", font_bold(file_location), "'", + mo_env$mo_source <- NULL + if (file.exists(mo_source_destination)) { + unlink(mo_source_destination) + message_("Removed mo_source file '", font_bold(mo_source_destination), "'", add_fn = font_red, as_note = FALSE) } @@ -178,16 +187,19 @@ set_mo_source <- function(path) { } df <- as.data.frame(df, stringAsFactors = FALSE) + df[, "mo"] <- set_clean_class(df[, "mo", drop = TRUE], c("mo", "character")) # success - if (file.exists(file_location)) { + if (file.exists(mo_source_destination)) { action <- "Updated" } else { action <- "Created" # only ask when file is created, not when it is updated - txt <- paste0("This will write create the new file '", - file_location, - "', for which your permission is needed.\n\nDo you agree that this file will be created? ") + txt <- paste0(word_wrap(paste0("This will write create the new file '", + mo_source_destination, + "', for which your permission is needed.")), + "\n\n", + word_wrap("Do you agree that this file will be created?")) if ("rsasdtudioapi" %in% rownames(utils::installed.packages())) { showQuestion <- import_fn("showQuestion", "rstudioapi") q_continue <- showQuestion("Create new file in home directory", txt) @@ -198,42 +210,38 @@ set_mo_source <- function(path) { return(invisible()) } } - saveRDS(df, file_location) - options(mo_source = path) - options(mo_source_timestamp = as.character(file.info(path)$mtime)) - message_(action, " mo_source file '", font_bold(file_location), "'", - " from '", font_bold(path), "'", - '(columns "', colnames(df)[1], '" and "', colnames(df)[2], '")') + attr(df, "mo_source_location") <- path + attr(df, "mo_source_timestamp") <- file.mtime(path) + saveRDS(df, mo_source_destination) + mo_env$mo_source <- df + message_(action, " mo_source file '", font_bold(mo_source_destination), + "' (", formatted_filesize(mo_source_destination), + ") from '", font_bold(path), + "' (", formatted_filesize(path), + '), columns "', colnames(df)[1], '" and "', colnames(df)[2], '"') } #' @rdname mo_source #' @export -get_mo_source <- function() { - if (is.null(getOption("mo_source", NULL))) { +get_mo_source <- function(destination = getOption("AMR_mo_source", "~/mo_source.rds")) { + if (!file.exists(path.expand(destination))) { + if (interactive()) { + # source file might have been deleted, update reference + set_mo_source("") + } return(NULL) } - - if (!file.exists(path.expand("~/mo_source.rds"))) { - options(mo_source = NULL) - options(mo_source_timestamp = NULL) - message_("Removed references to deleted mo_source file (see ?mo_source)") - return(NULL) + if (is.null(mo_env$mo_source)) { + mo_env$mo_source <- readRDS(path.expand(destination)) } - old_time <- as.POSIXct(getOption("mo_source_timestamp")) - new_time <- as.POSIXct(as.character(file.info(getOption("mo_source", ""))$mtime)) - - if (is.na(new_time)) { - # source file was deleted, remove reference too - set_mo_source("") - return(NULL) + old_time <- attributes(mo_env$mo_source)$mo_source_timestamp + new_time <- file.mtime(attributes(mo_env$mo_source)$mo_source_location) + if (interactive() && !identical(old_time, new_time)) { + # source file was updated, also update reference + set_mo_source(attributes(mo_env$mo_source)$mo_source_location) } - if (interactive() && new_time != old_time) { - # set updated source - set_mo_source(getOption("mo_source")) - } - file_location <- path.expand("~/mo_source.rds") - readRDS(file_location) + mo_env$mo_source } mo_source_isvalid <- function(x, refer_to_name = "`reference_df`", stop_on_error = TRUE) { @@ -242,7 +250,7 @@ mo_source_isvalid <- function(x, refer_to_name = "`reference_df`", stop_on_error if (paste(deparse(substitute(x)), collapse = "") == "get_mo_source()") { return(TRUE) } - if (identical(x, get_mo_source())) { + if (is.null(mo_env$mo_source) && (identical(x, get_mo_source()))) { return(TRUE) } if (is.null(x)) { diff --git a/R/proportion.R b/R/proportion.R index bd130438..d982f0f6 100755 --- a/R/proportion.R +++ b/R/proportion.R @@ -34,9 +34,9 @@ #' @param as_percent a logical to indicate whether the output must be returned as a hundred fold with % sign (a character). A value of `0.123456` will then be returned as `"12.3%"`. #' @param only_all_tested (for combination therapies, i.e. using more than one variable for `...`): a logical to indicate that isolates must be tested for all antibiotics, see section *Combination therapy* below #' @param data a [data.frame] containing columns with class [`rsi`] (see [as.rsi()]) -#' @param translate_ab a column name of the [antibiotics] data set to translate the antibiotic abbreviations to, using [ab_property()]. Use a value +#' @param translate_ab a column name of the [antibiotics] data set to translate the antibiotic abbreviations to, using [ab_property()] #' @inheritParams ab_property -#' @param combine_SI a logical to indicate whether all values of S and I must be merged into one, so the output only consists of S+I vs. R (susceptible vs. resistant). This used to be the parameter `combine_IR`, but this now follows the redefinition by EUCAST about the interpretion of I (increased exposure) in 2019, see section 'Interpretation of S, I and R' below. Default is `TRUE`. +#' @param combine_SI a logical to indicate whether all values of S and I must be merged into one, so the output only consists of S+I vs. R (susceptible vs. resistant). This used to be the parameter `combine_IR`, but this now follows the redefinition by EUCAST about the interpretation of I (increased exposure) in 2019, see section 'Interpretation of S, I and R' below. Default is `TRUE`. #' @param combine_IR a logical to indicate whether all values of I and R must be merged into one, so the output only consists of S vs. I+R (susceptible vs. non-susceptible). This is outdated, see parameter `combine_SI`. #' @inheritSection as.rsi Interpretation of R and S/I #' @details diff --git a/R/resistance_predict.R b/R/resistance_predict.R index 3d33eb00..4620fb5d 100755 --- a/R/resistance_predict.R +++ b/R/resistance_predict.R @@ -34,7 +34,7 @@ #' @param year_every unit of sequence between lowest year found in the data and `year_max` #' @param minimum minimal amount of available isolates per year to include. Years containing less observations will be estimated by the model. #' @param model the statistical model of choice. This could be a generalised linear regression model with binomial distribution (i.e. using `glm(..., family = binomial)``, assuming that a period of zero resistance was followed by a period of increasing resistance leading slowly to more and more resistance. See Details for all valid options. -#' @param I_as_S a logical to indicate whether values `I` should be treated as `S` (will otherwise be treated as `R`). The default, `TRUE`, follows the redefinition by EUCAST about the interpretion of I (increased exposure) in 2019, see section *Interpretation of S, I and R* below. +#' @param I_as_S a logical to indicate whether values `"I"` should be treated as `"S"` (will otherwise be treated as `"R"`). The default, `TRUE`, follows the redefinition by EUCAST about the interpretation of I (increased exposure) in 2019, see section *Interpretation of S, I and R* below. #' @param preserve_measurements a logical to indicate whether predictions of years that are actually available in the data should be overwritten by the original data. The standard errors of those years will be `NA`. #' @param info a logical to indicate whether textual analysis should be printed with the name and [summary()] of the statistical model. #' @param main title of the plot diff --git a/R/rsi.R b/R/rsi.R index f0cd96e1..4ccc35c4 100755 --- a/R/rsi.R +++ b/R/rsi.R @@ -481,7 +481,7 @@ as.rsi.data.frame <- function(x, meet_criteria(conserve_capped_values, allow_class = "logical", has_length = 1) meet_criteria(add_intrinsic_resistance, allow_class = "logical", has_length = 1) meet_criteria(reference_data, allow_class = "data.frame") - + for (i in seq_len(ncol(x))) { # don't keep factors if (is.factor(x[, i, drop = TRUE])) { @@ -494,7 +494,7 @@ as.rsi.data.frame <- function(x, if (is.null(col_mo)) { col_mo <- search_type_in_df(x = x, type = "mo", info = FALSE) } - + # -- UTIs col_uti <- uti if (is.null(col_uti)) { @@ -535,12 +535,13 @@ as.rsi.data.frame <- function(x, uti <- FALSE } } - + i <- 0 sel <- colnames(pm_select(x, ...)) if (!is.null(col_mo)) { sel <- sel[sel != col_mo] } + ab_cols <- colnames(x)[sapply(x, function(y) { i <<- i + 1 check <- is.mic(y) | is.disk(y) @@ -563,17 +564,16 @@ as.rsi.data.frame <- function(x, return(FALSE) } })] - + stop_if(length(ab_cols) == 0, "no columns with MIC values, disk zones or antibiotic column names found in this data set. Use as.mic() or as.disk() to transform antimicrobial columns.") # set type per column types <- character(length(ab_cols)) - types[sapply(x[, ab_cols], is.disk)] <- "disk" - types[types == "" & sapply(x[, ab_cols], all_valid_disks)] <- "disk" - types[sapply(x[, ab_cols], is.mic)] <- "mic" - types[types == "" & sapply(x[, ab_cols], all_valid_mics)] <- "mic" - types[types == "" & !sapply(x[, ab_cols], is.rsi)] <- "rsi" - + types[sapply(x[, ab_cols, drop = FALSE], is.disk)] <- "disk" + types[types == "" & sapply(x[, ab_cols, drop = FALSE], all_valid_disks)] <- "disk" + types[sapply(x[, ab_cols, drop = FALSE], is.mic)] <- "mic" + types[types == "" & sapply(x[, ab_cols, drop = FALSE], all_valid_mics)] <- "mic" + types[types == "" & !sapply(x[, ab_cols, drop = FALSE], is.rsi)] <- "rsi" if (any(types %in% c("mic", "disk"), na.rm = TRUE)) { # now we need an mo column stop_if(is.null(col_mo), "`col_mo` must be set") @@ -582,9 +582,9 @@ as.rsi.data.frame <- function(x, col_mo <- search_type_in_df(x = x, type = "mo") } } - + x_mo <- as.mo(x %pm>% pm_pull(col_mo)) - + for (i in seq_len(length(ab_cols))) { if (types[i] == "mic") { x[, ab_cols[i]] <- as.rsi(x = x %pm>% @@ -845,19 +845,22 @@ freq.rsi <- function(x, ...) { }))[1L] } ab <- suppressMessages(suppressWarnings(as.ab(x_name))) - freq.default <- import_fn("freq.default", "cleaner", error_on_fail = FALSE) digits <- list(...)$digits if (is.null(digits)) { digits <- 2 } if (!is.na(ab)) { - freq.default(x = x, ..., - .add_header = list(Drug = paste0(ab_name(ab, language = NULL), " (", ab, ", ", ab_atc(ab), ")"), - `Drug group` = ab_group(ab, language = NULL), - `%SI` = percentage(susceptibility(x, minimum = 0, as_percent = FALSE), digits = digits))) + cleaner::freq.default(x = x, ..., + .add_header = list( + Drug = paste0(ab_name(ab, language = NULL), " (", ab, ", ", ab_atc(ab), ")"), + `Drug group` = ab_group(ab, language = NULL), + `%SI` = percentage(susceptibility(x, minimum = 0, as_percent = FALSE), + digits = digits))) } else { - freq.default(x = x, ..., - .add_header = list(`%SI` = percentage(susceptibility(x, minimum = 0, as_percent = FALSE), digits = digits))) + cleaner::freq.default(x = x, ..., + .add_header = list( + `%SI` = percentage(susceptibility(x, minimum = 0, as_percent = FALSE), + digits = digits))) } } @@ -892,8 +895,7 @@ get_skimmers.rsi <- function(column) { } } - sfl <- import_fn("sfl", "skimr", error_on_fail = FALSE) - sfl( + skimr::sfl( skim_type = "rsi", ab_name = name_call, count_R = count_R, @@ -916,7 +918,7 @@ print.rsi <- function(x, ...) { #' @method droplevels rsi #' @export #' @noRd -droplevels.rsi <- function(x, exclude = if (anyNA(levels(x))) NULL else NA, ...) { +droplevels.rsi <- function(x, exclude = if (any(is.na(levels(x)))) NULL else NA, ...) { x <- droplevels.factor(x, exclude = exclude, ...) class(x) <- c("rsi", "ordered", "factor") x diff --git a/R/rsi_calc.R b/R/rsi_calc.R index e2dc5ba2..8e7e4c06 100755 --- a/R/rsi_calc.R +++ b/R/rsi_calc.R @@ -96,7 +96,11 @@ rsi_calc <- function(..., if (is.null(x)) { warning_("argument is NULL (check if columns exist): returning NA", call = FALSE) - return(NA) + if (as_percent == TRUE) { + return(NA_character_) + } else { + return(NA_real_) + } } print_warning <- FALSE diff --git a/R/translate.R b/R/translate.R index bb4f1e5d..6d72f882 100755 --- a/R/translate.R +++ b/R/translate.R @@ -27,7 +27,7 @@ #' #' For language-dependent output of AMR functions, like [mo_name()], [mo_gramstain()], [mo_type()] and [ab_name()]. #' @inheritSection lifecycle Stable lifecycle -#' @details Strings will be translated to foreign languages if they are defined in a local translation file. Additions to this file can be suggested at our repository. The file can be found here: . This file will be read by all functions where a translated output can be desired, like all [mo_property()] functions ([mo_name()], [mo_gramstain()], [mo_type()], etc.) and [ab_property()] functions ([ab_name()], [ab_group()] etc.). +#' @details Strings will be translated to foreign languages if they are defined in a local translation file. Additions to this file can be suggested at our repository. The file can be found here: . This file will be read by all functions where a translated output can be desired, like all [`mo_*`][mo_property()] functions (such as [mo_name()], [mo_gramstain()], [mo_type()], etc.) and [`ab_*`][ab_property()] functions (such as [ab_name()], [ab_group()], etc.). #' #' Currently supported languages are: `r paste(sort(gsub(";.*", "", ISOcodes::ISO_639_2[which(ISOcodes::ISO_639_2$Alpha_2 %in% LANGUAGES_SUPPORTED), "Name"])), collapse = ", ")`. Please note that currently not all these languages have translations available for all antimicrobial agents and colloquial microorganism names. #' @@ -96,7 +96,7 @@ get_locale <- function() { } } - coerce_language_setting(Sys.getlocale()) + coerce_language_setting(Sys.getlocale("LC_COLLATE")) } coerce_language_setting <- function(lang) { diff --git a/R/zzz.R b/R/zzz.R index 3e5f1c79..e5171e8c 100755 --- a/R/zzz.R +++ b/R/zzz.R @@ -24,6 +24,7 @@ # ==================================================================== # .onLoad <- function(libname, pkgname) { + assign(x = "AB_lookup", value = create_AB_lookup(), envir = asNamespace("AMR")) diff --git a/data-raw/reproduction_of_poorman.R b/data-raw/reproduction_of_poorman.R index b5ec9fc0..13d49b3e 100644 --- a/data-raw/reproduction_of_poorman.R +++ b/data-raw/reproduction_of_poorman.R @@ -75,3 +75,7 @@ contents <- gsub("pm_distinct <- function(.data, ..., .keep_all = FALSE)", "pm_d contents <- contents[!grepl("summarize", contents)] writeLines(contents, "R/aa_helper_pm_functions.R") + +# after this, comment out: +# pm_left_join() since we use a faster version +# pm_group_split() since we don't use it and it relies on R 3.5.0 for the use of ...length(), which is hard to support with C++ code diff --git a/docs/404.html b/docs/404.html index 4ebcba69..85039b20 100644 --- a/docs/404.html +++ b/docs/404.html @@ -81,7 +81,7 @@ AMR (for R) - 1.4.0.9040 + 1.4.0.9041 diff --git a/docs/LICENSE-text.html b/docs/LICENSE-text.html index 7cb2de3e..29eccb72 100644 --- a/docs/LICENSE-text.html +++ b/docs/LICENSE-text.html @@ -81,7 +81,7 @@ AMR (for R) - 1.4.0.9040 + 1.4.0.9041 diff --git a/docs/articles/datasets.html b/docs/articles/datasets.html index ffe3cc27..e6cdae33 100644 --- a/docs/articles/datasets.html +++ b/docs/articles/datasets.html @@ -39,7 +39,7 @@ AMR (for R) - 1.4.0.9032 + 1.4.0.9041 @@ -47,14 +47,14 @@