From f6336fdd8972f6c0bd5c96bfc092a8d7e0fbcedf Mon Sep 17 00:00:00 2001 From: "Matthijs S. Berends" Date: Mon, 28 Jan 2019 11:20:32 +0100 Subject: [PATCH] quasiquotation for freq() --- DESCRIPTION | 4 +- NAMESPACE | 191 +++++++++++++++++++++++++++++ NEWS.md | 18 ++- R/freq.R | 147 +++++++++++++--------- docs/LICENSE-text.html | 2 +- docs/articles/index.html | 2 +- docs/authors.html | 2 +- docs/index.html | 28 +++-- docs/news/index.html | 242 +++++++++++++++++++++++++++++++------ docs/pkgdown.yml | 2 +- docs/reference/freq.html | 13 +- docs/reference/index.html | 2 +- man/freq.Rd | 9 +- tests/testthat/test-atc.R | 18 +-- tests/testthat/test-freq.R | 6 + 15 files changed, 550 insertions(+), 136 deletions(-) diff --git a/DESCRIPTION b/DESCRIPTION index f4e2b336..5ef11236 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,6 +1,6 @@ Package: AMR -Version: 0.5.0.9013 -Date: 2019-01-27 +Version: 0.5.0.9014 +Date: 2019-01-28 Title: Antimicrobial Resistance Analysis Authors@R: c( person( diff --git a/NAMESPACE b/NAMESPACE index 57c08ae1..51b0f4ac 100755 --- a/NAMESPACE +++ b/NAMESPACE @@ -1,5 +1,192 @@ # Generated by roxygen2: do not edit by hand +S3method(as.data.frame,atc) +S3method(as.data.frame,frequency_tbl) +S3method(as.data.frame,mo) +S3method(as.double,mic) +S3method(as.integer,mic) +S3method(as.numeric,mic) +S3method(as.vector,frequency_tbl) +S3method(as_tibble,frequency_tbl) +S3method(barplot,mic) +S3method(barplot,rsi) +S3method(diff,frequency_tbl) +S3method(droplevels,mic) +S3method(droplevels,rsi) +S3method(format,frequency_tbl) +S3method(hist,frequency_tbl) +S3method(kurtosis,data.frame) +S3method(kurtosis,default) +S3method(kurtosis,matrix) +S3method(plot,frequency_tbl) +S3method(plot,mic) +S3method(plot,resistance_predict) +S3method(plot,rsi) +S3method(print,atc) +S3method(print,frequency_tbl) +S3method(print,mic) +S3method(print,mo) +S3method(print,rsi) +S3method(pull,atc) +S3method(pull,mo) +S3method(select,frequency_tbl) +S3method(skewness,data.frame) +S3method(skewness,default) +S3method(skewness,matrix) +S3method(summary,mic) +S3method(summary,mo) +S3method(summary,rsi) +export("%like%") +export(EUCAST_rules) +export(ab_atc) +export(ab_certe) +export(ab_name) +export(ab_official) +export(ab_property) +export(ab_tradenames) +export(ab_trivial_nl) +export(ab_umcg) +export(abname) +export(age) +export(age_groups) +export(anti_join_microorganisms) +export(as.atc) +export(as.mic) +export(as.mo) +export(as.rsi) +export(atc_certe) +export(atc_ddd) +export(atc_groups) +export(atc_name) +export(atc_official) +export(atc_online_ddd) +export(atc_online_groups) +export(atc_online_property) +export(atc_property) +export(atc_tradenames) +export(atc_trivial_nl) +export(atc_umcg) +export(brmo) +export(count_I) +export(count_IR) +export(count_R) +export(count_S) +export(count_SI) +export(count_all) +export(count_df) +export(eucast_exceptional_phenotypes) +export(eucast_rules) +export(facet_rsi) +export(filter_first_isolate) +export(filter_first_weighted_isolate) +export(first_isolate) +export(freq) +export(frequency_tbl) +export(full_join_microorganisms) +export(g.test) +export(geom_rsi) +export(get_locale) +export(get_mo_source) +export(ggplot_rsi) +export(ggplot_rsi_predict) +export(guess_ab_col) +export(guess_atc) +export(guess_mo) +export(header) +export(inner_join_microorganisms) +export(interpretive_reading) +export(is.atc) +export(is.mic) +export(is.mo) +export(is.rsi) +export(is.rsi.eligible) +export(key_antibiotics) +export(key_antibiotics_equal) +export(kurtosis) +export(labels_rsi_count) +export(left_join_microorganisms) +export(like) +export(mdro) +export(mo_TSN) +export(mo_authors) +export(mo_class) +export(mo_failures) +export(mo_family) +export(mo_fullname) +export(mo_genus) +export(mo_gramstain) +export(mo_kingdom) +export(mo_order) +export(mo_phylum) +export(mo_property) +export(mo_ref) +export(mo_renamed) +export(mo_shortname) +export(mo_species) +export(mo_subkingdom) +export(mo_subspecies) +export(mo_taxonomy) +export(mo_type) +export(mo_year) +export(mrgn) +export(n_rsi) +export(p.symbol) +export(portion_I) +export(portion_IR) +export(portion_R) +export(portion_S) +export(portion_SI) +export(portion_df) +export(ratio) +export(read.4D) +export(resistance_predict) +export(right_join_microorganisms) +export(rsi) +export(rsi_predict) +export(scale_rsi_colours) +export(scale_y_percent) +export(semi_join_microorganisms) +export(set_mo_source) +export(skewness) +export(theme_rsi) +export(top_freq) +exportMethods(as.data.frame.atc) +exportMethods(as.data.frame.frequency_tbl) +exportMethods(as.data.frame.mo) +exportMethods(as.double.mic) +exportMethods(as.integer.mic) +exportMethods(as.numeric.mic) +exportMethods(as.vector.frequency_tbl) +exportMethods(as_tibble.frequency_tbl) +exportMethods(barplot.mic) +exportMethods(barplot.rsi) +exportMethods(diff.frequency_tbl) +exportMethods(droplevels.mic) +exportMethods(droplevels.rsi) +exportMethods(format.frequency_tbl) +exportMethods(hist.frequency_tbl) +exportMethods(kurtosis) +exportMethods(kurtosis.data.frame) +exportMethods(kurtosis.default) +exportMethods(kurtosis.matrix) +exportMethods(plot.frequency_tbl) +exportMethods(plot.mic) +exportMethods(plot.rsi) +exportMethods(print.atc) +exportMethods(print.frequency_tbl) +exportMethods(print.mic) +exportMethods(print.mo) +exportMethods(print.rsi) +exportMethods(pull.atc) +exportMethods(pull.mo) +exportMethods(select.frequency_tbl) +exportMethods(skewness) +exportMethods(skewness.data.frame) +exportMethods(skewness.default) +exportMethods(skewness.matrix) +exportMethods(summary.mic) +exportMethods(summary.mo) +exportMethods(summary.rsi) importFrom(crayon,bgGreen) importFrom(crayon,bgRed) importFrom(crayon,bgYellow) @@ -22,6 +209,7 @@ importFrom(dplyr,arrange) importFrom(dplyr,arrange_at) importFrom(dplyr,as_tibble) importFrom(dplyr,between) +importFrom(dplyr,bind_cols) importFrom(dplyr,bind_rows) importFrom(dplyr,case_when) importFrom(dplyr,desc) @@ -65,6 +253,9 @@ importFrom(graphics,plot) importFrom(graphics,text) importFrom(hms,is.hms) importFrom(knitr,kable) +importFrom(rlang,as_name) +importFrom(rlang,enquos) +importFrom(rlang,eval_tidy) importFrom(stats,complete.cases) importFrom(stats,fivenum) importFrom(stats,glm) diff --git a/NEWS.md b/NEWS.md index 4590581d..75468aa8 100755 --- a/NEWS.md +++ b/NEWS.md @@ -48,7 +48,7 @@ #### Changed * Added 65 antibiotics to the `antibiotics` data set, from the [Pharmaceuticals Community Register](http://ec.europa.eu/health/documents/community-register/html/atc.htm) of the European Commission * Removed columns `atc_group1_nl` and `atc_group2_nl` from the `antibiotics` data set -* Functions `atc_ddd()` and `atc_groups()` have been renamed `atc_online_ddd()` and `atc_online_groups()`. The old function are deprecated and will be removed in a future version. +* Functions `atc_ddd()` and `atc_groups()` have been renamed `atc_online_ddd()` and `atc_online_groups()`. The old functions are deprecated and will be removed in a future version. * Function `guess_mo()` is now deprecated in favour of `as.mo()` and will be removed in future versions * Function `guess_atc()` is now deprecated in favour of `as.atc()` and will be removed in future versions * Function `eucast_rules()`: @@ -78,6 +78,22 @@ * Summaries of class `mo` will now return the top 3 and the unique count, e.g. using `summary(mo)` * Small text updates to summaries of class `rsi` and `mic` * Frequency tables (`freq()` function): + * Support for tidyverse quasiquotation! So now you can create frequency tables of function outcomes: + ```r + # Determine genus of microorganisms (mo) in `septic_patients` data set: + # OLD WAY + septic_patients %>% + mutate(genus = mo_genus(mo)) %>% + freq(genus) + # NEW WAY + septic_patients %>% + freq(mo_genus(mo)) + + # Even supports grouping variables: + septic_patients %>% + group_by(gender) %>% + freq(mo_genus(mo)) + ``` * Header info is now available as a list, with the `header` function * Added header info for class `mo` to show unique count of families, genera and species * Now honours the `decimal.mark` setting, which just like `format` defaults to `getOption("OutDec")` diff --git a/R/freq.R b/R/freq.R index 0d6ee9eb..37d23823 100755 --- a/R/freq.R +++ b/R/freq.R @@ -21,10 +21,10 @@ #' Frequency table #' -#' Create a frequency table of a vector with items or a data frame. Supports quasiquotation and markdown for reports. The best practice is: \code{data \%>\% freq(var)}.\cr +#' Create a frequency table of a vector with items or a \code{data.frame}. Supports quasiquotation and markdown for reports. Best practice is: \code{data \%>\% freq(var)}.\cr #' \code{top_freq} can be used to get the top/bottom \emph{n} items of a frequency table, with counts as names. #' @param x vector of any class or a \code{\link{data.frame}}, \code{\link{tibble}} (may contain a grouping variable) or \code{\link{table}} -#' @param ... up to nine different columns of \code{x} when \code{x} is a \code{data.frame} or \code{tibble}, to calculate frequencies from - see Examples +#' @param ... up to nine different columns of \code{x} when \code{x} is a \code{data.frame} or \code{tibble}, to calculate frequencies from - see Examples. Also supports quasiquotion. #' @param sort.count sort on count, i.e. frequencies. This will be \code{TRUE} at default for everything except when using grouping variables. #' @param nmax number of row to print. The default, \code{15}, uses \code{\link{getOption}("max.print.freq")}. Use \code{nmax = 0}, \code{nmax = Inf}, \code{nmax = NULL} or \code{nmax = NA} to print all rows. #' @param na.rm a logical value indicating whether \code{NA} values should be removed from the frequency table. The header (if set) will always print the amount of \code{NA}s. @@ -67,10 +67,11 @@ #' The function \code{top_freq} uses \code{\link[dplyr]{top_n}} internally and will include more than \code{n} rows if there are ties. #' @importFrom stats fivenum sd mad #' @importFrom grDevices boxplot.stats -#' @importFrom dplyr %>% arrange arrange_at desc filter_at funs group_by mutate mutate_at n n_distinct pull select summarise tibble ungroup vars all_vars +#' @importFrom dplyr %>% arrange arrange_at bind_cols desc filter_at funs group_by mutate mutate_at n n_distinct pull select summarise tibble ungroup vars all_vars #' @importFrom utils browseVignettes #' @importFrom hms is.hms #' @importFrom crayon red green silver +#' @importFrom rlang enquos eval_tidy as_name #' @keywords summary summarise frequency freq #' @rdname freq #' @name freq @@ -99,9 +100,12 @@ #' # multiple selected variables will be pasted together #' septic_patients %>% #' left_join_microorganisms %>% -#' filter(hospital_id == "A") %>% #' freq(genus, species) #' +#' # functions as quasiquotation are also supported +#' septic_patients %>% +#' freq(mo_genus(mo), mo_species(mo)) +#' #' #' # group a variable and analyse another #' septic_patients %>% @@ -216,11 +220,6 @@ frequency_tbl <- function(x, } if (any(class(x) == "data.frame")) { - x.group <- group_vars(x) - if (length(x.group) > 1) { - x.group <- x.group[1L] - warning("freq supports one grouping variable, only `", x.group, "` will be kept.", call. = FALSE) - } if (is.null(x.name)) { x.name <- deparse(substitute(x)) @@ -228,56 +227,88 @@ frequency_tbl <- function(x, if (x.name == ".") { x.name <- NULL } - dots <- base::eval(base::substitute(base::alist(...))) - ndots <- length(dots) - if (ndots < 10) { - cols <- as.character(dots) - if (!all(cols %in% colnames(x))) { - stop("one or more columns not found: `", paste(cols, collapse = "`, `"), "`", call. = FALSE) - } - if (length(x.group) > 0) { - x.group_cols <- c(x.group, cols) - # if (droplevels == TRUE) { - # x <- x %>% mutate_at(vars(x.group_cols), droplevels) - # } - suppressWarnings( - df <- x %>% - group_by_at(vars(x.group_cols)) %>% - summarise(count = n()) - ) - if (na.rm == TRUE) { - df <- df %>% filter_at(vars(cols), all_vars(!is.na(.))) - } - if (!missing(sort.count)) { - if (sort.count == TRUE) { - df <- df %>% arrange_at(c(x.group, "count"), desc) - } - } - df <- df %>% - mutate(cum_count = cumsum(count)) - - df.topleft <- df[1, 1] - df <- df %>% - ungroup() %>% - # do not repeat group labels - mutate_at(vars(x.group), funs(ifelse(lag(.) == ., "", .))) - df[1, 1] <- df.topleft - colnames(df)[1:2] <- c("group", "item") - - if (!is.null(levels(df$item)) & droplevels == TRUE) { - # is factor - df <- df %>% filter(count != 0) - } - } - if (length(cols) > 0) { - x <- x[, cols] - } - } else if (ndots >= 10) { - stop("A maximum of 9 columns can be analysed at the same time.", call. = FALSE) - } else { - cols <- NULL + x.group <- group_vars(x) + if (length(x.group) > 1) { + x.group <- x.group[1L] + warning("freq supports one grouping variable, only `", x.group, "` will be kept.", call. = FALSE) } + + user_exprs <- enquos(...) + + if (length(user_exprs) > 0) { + new_list <- list(0) + + for (i in 1:length(user_exprs)) { + new_list[[i]] <- eval_tidy(user_exprs[[i]], data = x) + this_name <- try( as_name(user_exprs[[i]]) , silent = TRUE) + if (class(this_name) == "try-error") { + this_name <- paste0("V", i) + } + cols <- c(cols, this_name) + } + + if (length(new_list) == 1 & length(x.group) == 0) { + # is now character + x <- new_list[[1]] + df <- NULL + cols <- NULL + } else { + # create data frame + df <- as.data.frame(new_list, col.names = paste0("V", 1:length(new_list)), stringsAsFactors = FALSE) + } + } else { + # complete data frame + df <- x + } + + # support grouping variables + if (length(x.group) > 0) { + x.group_cols <- c(x.group, cols) + x <- bind_cols(x, df) + # if (droplevels == TRUE) { + # x <- x %>% mutate_at(vars(x.group_cols), droplevels) + # } + suppressWarnings( + df <- x %>% + group_by_at(vars(x.group_cols)) %>% + summarise(count = n()) + ) + if (na.rm == TRUE) { + df <- df %>% filter_at(vars(x.group_cols), all_vars(!is.na(.))) + } + if (!missing(sort.count)) { + if (sort.count == TRUE) { + df <- df %>% arrange_at(c(x.group_cols, "count"), desc) + } + } + df <- df %>% + mutate(cum_count = cumsum(count)) + + df.topleft <- df[1, 1] + df <- df %>% + ungroup() %>% + # do not repeat group labels + mutate_at(vars(x.group), funs(ifelse(lag(.) == ., "", .))) + df[1, 1] <- df.topleft + colnames(df)[1:2] <- c("group", "item") + + if (!is.null(levels(df$item)) & droplevels == TRUE) { + # is factor + df <- df %>% filter(count != 0) + } + } else { + if (!is.null(df)) { + # no groups, multiple values like: septic_patients %>% freq(mo, mo_genus(mo)) + x <- df + df <- NULL + cols <- NULL + } + } + if (length(cols) > 0 & is.data.frame(x)) { + x <- x[, cols] + } + } else if (any(class(x) == "table")) { x <- as.data.frame(x, stringsAsFactors = FALSE) # now this DF contains 3 columns: the 2 vars and a Freq column @@ -286,7 +317,7 @@ frequency_tbl <- function(x, times = x$Freq) x.name <- "a `table` object" cols <- NULL - #mult.columns <- 2 + # mult.columns <- 2 } else { x.name <- NULL cols <- NULL diff --git a/docs/LICENSE-text.html b/docs/LICENSE-text.html index 4c1b830e..79527f39 100644 --- a/docs/LICENSE-text.html +++ b/docs/LICENSE-text.html @@ -78,7 +78,7 @@ AMR (for R) - 0.5.0.9013 + 0.5.0.9014 diff --git a/docs/articles/index.html b/docs/articles/index.html index faf0b3b6..9c46efc2 100644 --- a/docs/articles/index.html +++ b/docs/articles/index.html @@ -78,7 +78,7 @@ AMR (for R) - 0.5.0.9013 + 0.5.0.9014 diff --git a/docs/authors.html b/docs/authors.html index db0706f0..6eee37d7 100644 --- a/docs/authors.html +++ b/docs/authors.html @@ -78,7 +78,7 @@ AMR (for R) - 0.5.0.9013 + 0.5.0.9014 diff --git a/docs/index.html b/docs/index.html index 54e9a49c..0cd2fc42 100644 --- a/docs/index.html +++ b/docs/index.html @@ -42,7 +42,7 @@ AMR (for R) - 0.5.0.9013 + 0.5.0.9014 @@ -226,7 +226,7 @@

Get this package

This package is available on the official R network (CRAN), which has a peer-reviewed submission process. Install this package in R with:

-
install.packages("AMR")
+
install.packages("AMR")

It will be downloaded and installed automatically. For RStudio, click on the menu Tools > Install Packages… and then type in “AMR” and press Install.

@@ -259,17 +259,17 @@ Overview of functions

The AMR package basically does four important things:

    -
  1. It cleanses existing data, by transforming it to reproducible and profound classes, making the most efficient use of R. These functions all use artificial intelligence to guess results that you would expect:
  2. -
+
  • +

    It cleanses existing data, by transforming it to reproducible and profound classes, making the most efficient use of R. These functions all use artificial intelligence to guess results that you would expect:

    • Use as.mo() to get an ID of a microorganism. The IDs are human readable for the trained eye - the ID of Klebsiella pneumoniae is “B_KLBSL_PNE” (B stands for Bacteria) and the ID of S. aureus is “B_STPHY_AUR”. The function takes almost any text as input that looks like the name or code of a microorganism like “E. coli”, “esco” or “esccol” and tries to find expected results using artificial intelligence (AI) on the included ITIS data set, consisting of almost 20,000 microorganisms. It is very fast, please see our benchmarks. Moreover, it can group Staphylococci into coagulase negative and positive (CoNS and CoPS, see source) and can categorise Streptococci into Lancefield groups (like beta-haemolytic Streptococcus Group B, source).
    • Use as.rsi() to transform values to valid antimicrobial results. It produces just S, I or R based on your input and warns about invalid values. Even values like “<=0.002; S” (combined MIC/RSI) will result in “S”.
    • Use as.mic() to cleanse your MIC values. It produces a so-called factor (called ordinal in SPSS) with valid MIC values as levels. A value like “<=0.002; S” (combined MIC/RSI) will result in “<=0.002”.
    • Use as.atc() to get the ATC code of an antibiotic as defined by the WHO. This package contains a database with most LIS codes, official names, DDDs and even trade names of antibiotics. For example, the values “Furabid”, “Furadantin”, “nitro” all return the ATC code of Nitrofurantoine.
    -
      -
    1. It enhances existing data and adds new data from data sets included in this package.
    2. -
    +
  • +
  • +

    It enhances existing data and adds new data from data sets included in this package.

    • Use eucast_rules() to apply EUCAST expert rules to isolates.
    • Use first_isolate() to identify the first isolates of every patient using guidelines from the CLSI (Clinical and Laboratory Standards Institute). @@ -281,9 +281,9 @@
    • The data set microorganisms contains the complete taxonomic tree of more than 18,000 microorganisms (bacteria, fungi/yeasts and protozoa). Furthermore, the colloquial name and Gram stain are available, which enables resistance analysis of e.g. different antibiotics per Gram stain. The package also contains functions to look up values in this data set like mo_genus(), mo_family(), mo_gramstain() or even mo_phylum(). As they use as.mo() internally, they also use artificial intelligence. For example, mo_genus("MRSA") and mo_genus("S. aureus") will both return "Staphylococcus". They also come with support for German, Dutch, Spanish, Italian, French and Portuguese. These functions can be used to add new variables to your data.
    • The data set antibiotics contains the ATC code, LIS codes, official name, trivial name and DDD of both oral and parenteral administration. It also contains a total of 298 trade names. Use functions like ab_name() and ab_tradenames() to look up values. The ab_* functions use as.atc() internally so they support AI to guess your expected result. For example, ab_name("Fluclox"), ab_name("Floxapen") and ab_name("J01CF05") will all return "Flucloxacillin". These functions can again be used to add new variables to your data.
    -
      -
    1. It analyses the data with convenient functions that use well-known methods.
    2. -
    +
  • +
  • +

    It analyses the data with convenient functions that use well-known methods.

    -
      -
    1. It teaches the user how to use all the above actions.
    2. -
    +
  • +
  • +

    It teaches the user how to use all the above actions.

    • The package contains extensive help pages with many examples.
    • It also contains an example data set called septic_patients. This data set contains: @@ -304,6 +304,8 @@
  • + +

    diff --git a/docs/news/index.html b/docs/news/index.html index bdf51a36..c74c4e51 100644 --- a/docs/news/index.html +++ b/docs/news/index.html @@ -78,7 +78,7 @@ AMR (for R) - 0.5.0.9013 + 0.5.0.9014

    @@ -229,10 +229,22 @@
    @@ -253,15 +278,19 @@

    @@ -506,15 +638,21 @@ New

    + +
  • Determining bacterial ID: +
    • New functions as.bactid and is.bactid to transform/ look up microbial ID’s.
    • The existing function guess_bactid is now an alias of as.bactid
    • New Becker classification for Staphylococcus to categorise them into Coagulase Negative Staphylococci (CoNS) and Coagulase Positve Staphylococci (CoPS)
    • New Lancefield classification for Streptococcus to categorise them into Lancefield groups
    • +
    +
  • For convience, new descriptive statistical functions kurtosis and skewness that are lacking in base R - they are generic functions and have support for vectors, data.frames and matrices
  • Function g.test to perform the Χ2 distributed G-test, which use is the same as chisq.test
  • -
  • Function ratio to transform a vector of values to a preset ratio
  • +
  • +Function ratio to transform a vector of values to a preset ratio + +
  • Support for Addins menu in RStudio to quickly insert %in% or %like% (and give them keyboard shortcuts), or to view the datasets that come with this package
  • Function p.symbol to transform p values to their related symbols: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
  • Functions clipboard_import and clipboard_export as helper functions to quickly copy and paste from/to software like Excel and SPSS. These functions use the clipr package, but are a little altered to also support headless Linux servers (so you can use it in RStudio Server)
  • -
  • New for frequency tables (function freq):
  • +
  • New for frequency tables (function freq): +
    • A vignette to explain its usage
    • Support for rsi (antimicrobial resistance) to use as input
    • Support for table to use as input: freq(table(x, y)) @@ -555,6 +703,8 @@
    • Header of frequency tables now also show Mean Absolute Deviaton (MAD) and Interquartile Range (IQR)
    • Possibility to globally set the default for the amount of items to print, with options(max.print.freq = n) where n is your preset value
    +
  • +

    @@ -576,21 +726,27 @@
  • Small improvements to the microorganisms dataset (especially for Salmonella) and the column bactid now has the new class "bactid"
  • -
  • Combined MIC/RSI values will now be coerced by the rsi and mic functions:
  • +
  • Combined MIC/RSI values will now be coerced by the rsi and mic functions: + +
  • Now possible to coerce MIC values with a space between operator and value, i.e. as.mic("<= 0.002") now works
  • Classes rsi and mic do not add the attribute package.version anymore
  • Added "groups" option for atc_property(..., property). It will return a vector of the ATC hierarchy as defined by the WHO. The new function atc_groups is a convenient wrapper around this.
  • Build-in host check for atc_property as it requires the host set by url to be responsive
  • Improved first_isolate algorithm to exclude isolates where bacteria ID or genus is unavailable
  • Fix for warning hybrid evaluation forced for row_number (924b62) from the dplyr package v0.7.5 and above
  • -
  • Support for empty values and for 1 or 2 columns as input for guess_bactid (now called as.bactid)
  • +
  • Support for empty values and for 1 or 2 columns as input for guess_bactid (now called as.bactid) +
    • So yourdata %>% select(genus, species) %>% as.bactid() now also works
    • +
    +
  • Other small fixes
  • @@ -598,11 +754,14 @@

    Other

    @@ -621,10 +780,13 @@
  • Function guess_bactid to determine the ID of a microorganism based on genus/species or known abbreviations like MRSA
  • Function guess_atc to determine the ATC of an antibiotic based on name, trade name, or known abbreviations
  • Function freq to create frequency tables, with additional info in a header
  • -
  • Function MDRO to determine Multi Drug Resistant Organisms (MDRO) with support for country-specific guidelines.
  • +
  • Function MDRO to determine Multi Drug Resistant Organisms (MDRO) with support for country-specific guidelines. + +
  • New algorithm to determine weighted isolates, can now be "points" or "keyantibiotics", see ?first_isolate
  • New print format for tibbles and data.tables
  • diff --git a/docs/pkgdown.yml b/docs/pkgdown.yml index 3c396dbb..31227eab 100644 --- a/docs/pkgdown.yml +++ b/docs/pkgdown.yml @@ -1,4 +1,4 @@ -pandoc: 1.17.2 +pandoc: 2.3.1 pkgdown: 1.3.0 pkgdown_sha: ~ articles: diff --git a/docs/reference/freq.html b/docs/reference/freq.html index d8db6949..58b3ed92 100644 --- a/docs/reference/freq.html +++ b/docs/reference/freq.html @@ -47,7 +47,7 @@ - @@ -81,7 +81,7 @@ top_freq can be used to get the top/bottom n items of a frequency table, with co AMR (for R) - 0.5.0.9012 + 0.5.0.9014 @@ -224,7 +224,7 @@ top_freq can be used to get the top/bottom n items of a frequency table, with co
    -

    Create a frequency table of a vector with items or a data frame. Supports quasiquotation and markdown for reports. The best practice is: data %>% freq(var).
    +

    Create a frequency table of a vector with items or a data.frame. Supports quasiquotation and markdown for reports. Best practice is: data %>% freq(var).
    top_freq can be used to get the top/bottom n items of a frequency table, with counts as names.

    @@ -262,7 +262,7 @@ top_freq can be used to get the top/bottom n items of a frequency table, with co ... -

    up to nine different columns of x when x is a data.frame or tibble, to calculate frequencies from - see Examples

    +

    up to nine different columns of x when x is a data.frame or tibble, to calculate frequencies from - see Examples. Also supports quasiquotion.

    sort.count @@ -393,9 +393,12 @@ On our website https://msberends.gitla # multiple selected variables will be pasted together septic_patients %>% left_join_microorganisms %>% - filter(hospital_id == "A") %>% freq(genus, species) +# functions as quasiquotation are also supported +septic_patients %>% + freq(mo_genus(mo), mo_species(mo)) + # group a variable and analyse another septic_patients %>% diff --git a/docs/reference/index.html b/docs/reference/index.html index d79cba58..6c03ab79 100644 --- a/docs/reference/index.html +++ b/docs/reference/index.html @@ -78,7 +78,7 @@ AMR (for R) - 0.5.0.9013 + 0.5.0.9014 diff --git a/man/freq.Rd b/man/freq.Rd index 77d5ee8b..da2873e1 100755 --- a/man/freq.Rd +++ b/man/freq.Rd @@ -34,7 +34,7 @@ header(f, property = NULL) \arguments{ \item{x}{vector of any class or a \code{\link{data.frame}}, \code{\link{tibble}} (may contain a grouping variable) or \code{\link{table}}} -\item{...}{up to nine different columns of \code{x} when \code{x} is a \code{data.frame} or \code{tibble}, to calculate frequencies from - see Examples} +\item{...}{up to nine different columns of \code{x} when \code{x} is a \code{data.frame} or \code{tibble}, to calculate frequencies from - see Examples. Also supports quasiquotion.} \item{sort.count}{sort on count, i.e. frequencies. This will be \code{TRUE} at default for everything except when using grouping variables.} @@ -78,7 +78,7 @@ header(f, property = NULL) A \code{data.frame} (with an additional class \code{"frequency_tbl"}) with five columns: \code{item}, \code{count}, \code{percent}, \code{cum_count} and \code{cum_percent}. } \description{ -Create a frequency table of a vector with items or a data frame. Supports quasiquotation and markdown for reports. The best practice is: \code{data \%>\% freq(var)}.\cr +Create a frequency table of a vector with items or a \code{data.frame}. Supports quasiquotation and markdown for reports. Best practice is: \code{data \%>\% freq(var)}.\cr \code{top_freq} can be used to get the top/bottom \emph{n} items of a frequency table, with counts as names. } \details{ @@ -135,9 +135,12 @@ septic_patients \%>\% # multiple selected variables will be pasted together septic_patients \%>\% left_join_microorganisms \%>\% - filter(hospital_id == "A") \%>\% freq(genus, species) +# functions as quasiquotation are also supported +septic_patients \%>\% + freq(mo_genus(mo), mo_species(mo)) + # group a variable and analyse another septic_patients \%>\% diff --git a/tests/testthat/test-atc.R b/tests/testthat/test-atc.R index 467604a1..b616e8b8 100755 --- a/tests/testthat/test-atc.R +++ b/tests/testthat/test-atc.R @@ -21,15 +21,15 @@ context("atc.R") -test_that("guess_atc works", { - expect_equal(as.character(guess_atc(c("J01FA01", - "Erythromycin", - "eryt", - "ERYT", - "ERY", - "Erythrocin", - "Eryzole", - "Pediamycin"))), +test_that("as.atc works", { + expect_equal(suppressWarnings(as.character(guess_atc(c("J01FA01", + "Erythromycin", + "eryt", + "ERYT", + "ERY", + "Erythrocin", + "Eryzole", + "Pediamycin")))), rep("J01FA01", 8)) expect_identical(class(as.atc("amox")), "atc") diff --git a/tests/testthat/test-freq.R b/tests/testthat/test-freq.R index 96fc2e77..ae45f75e 100755 --- a/tests/testthat/test-freq.R +++ b/tests/testthat/test-freq.R @@ -92,6 +92,12 @@ test_that("frequency table works", { expect_output(print(septic_patients %>% group_by(gender) %>% freq(amox, quote = TRUE))) expect_output(print(septic_patients %>% group_by(gender) %>% freq(amox, markdown = TRUE))) + # quasiquotation + expect_output(print(septic_patients %>% freq(mo_genus(mo)))) + expect_output(print(septic_patients %>% freq(mo, mo_genus(mo)))) + expect_output(print(septic_patients %>% group_by(gender) %>% freq(mo_genus(mo)))) + expect_output(print(septic_patients %>% group_by(gender) %>% freq(mo, mo_genus(mo)))) + # top 5 expect_equal( septic_patients %>%