From 0b3dc0231f9ee8f7917d17e79e10785e2cb489c1 Mon Sep 17 00:00:00 2001 From: "Matthijs S. Berends" Date: Wed, 30 Jan 2019 16:00:55 +0100 Subject: [PATCH] freq fix --- DESCRIPTION | 2 +- NEWS.md | 1 + R/freq.R | 28 +++-- docs/articles/WHONET.html | 51 +++++--- docs/index.html | 30 ++--- docs/news/index.html | 258 ++++++++++++++++++++++++++++++-------- docs/pkgdown.yml | 2 +- docs/reference/freq.html | 22 ++-- man/freq.Rd | 6 +- 9 files changed, 284 insertions(+), 116 deletions(-) diff --git a/DESCRIPTION b/DESCRIPTION index 60dfbaf1..67a8e1d2 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,6 +1,6 @@ Package: AMR Version: 0.5.0.9016 -Date: 2019-01-29 +Date: 2019-01-30 Title: Antimicrobial Resistance Analysis Authors@R: c( person( diff --git a/NEWS.md b/NEWS.md index e424788b..c04ebab5 100755 --- a/NEWS.md +++ b/NEWS.md @@ -99,6 +99,7 @@ freq(mo_genus(mo)) ``` * Header info is now available as a list, with the `header` function + * The parameter `header` is now set to `TRUE` at default, even for markdown * Added header info for class `mo` to show unique count of families, genera and species * Now honours the `decimal.mark` setting, which just like `format` defaults to `getOption("OutDec")` * The new `big.mark` parameter will at default be `","` when `decimal.mark = "."` and `"."` otherwise diff --git a/R/freq.R b/R/freq.R index 0bf89807..36730c16 100755 --- a/R/freq.R +++ b/R/freq.R @@ -192,7 +192,7 @@ frequency_tbl <- function(x, markdown = !interactive(), digits = 2, quote = FALSE, - header = !markdown, + header = TRUE, title = NULL, na = "", droplevels = TRUE, @@ -203,7 +203,6 @@ frequency_tbl <- function(x, mult.columns <- 0 x.group = character(0) df <- NULL - # x_haslevels <- !is.null(levels(x)) x.name <- NULL cols <- NULL cols.names <- NULL @@ -229,9 +228,10 @@ frequency_tbl <- function(x, x.name <- x.name %>% strsplit("%>%", fixed = TRUE) %>% unlist() %>% .[1] %>% trimws() } if (x.name == ".") { - x.name <- "a data.frame" + x.name <- "a `data.frame`" + } else { + x.name <- paste0("`", x.name, "`") } - x.name <- paste0("`", x.name, "`") x.name.dims <- x %>% dim() %>% format(decimal.mark = decimal.mark, big.mark = big.mark) %>% @@ -545,6 +545,10 @@ format_header <- function(x, markdown = FALSE, decimal.mark = ".", big.mark = ", newline <-"\n" if (markdown == TRUE) { newline <- " \n" + # no colours in markdown + silver <- function(x) x + green <- function(x) x + red <- function(x) x } header <- header(x) @@ -682,10 +686,10 @@ format_header <- function(x, markdown = FALSE, decimal.mark = ".", big.mark = ", #' @importFrom dplyr top_n pull top_freq <- function(f, n) { if (!"frequency_tbl" %in% class(f)) { - stop("top_freq can only be applied to frequency tables", call. = FALSE) + stop("`top_freq` can only be applied to frequency tables", call. = FALSE) } if (!is.numeric(n) | length(n) != 1L) { - stop("For top_freq, `nmax` must be a number of length 1", call. = FALSE) + stop("For `top_freq`, 'n' must be a number of length 1", call. = FALSE) } top <- f %>% top_n(n, count) vect <- top %>% pull(item) @@ -699,6 +703,9 @@ top_freq <- function(f, n) { #' @rdname freq #' @export header <- function(f, property = NULL) { + if (!"frequency_tbl" %in% class(f)) { + stop("`header` can only be applied to frequency tables", call. = FALSE) + } if (is.null(property)) { attributes(f)$header } else { @@ -770,7 +777,7 @@ diff.frequency_tbl <- function(x, y, ...) { print.frequency_tbl <- function(x, nmax = getOption("max.print.freq", default = 15), markdown = !interactive(), - header = !markdown, + header = TRUE, decimal.mark = getOption("OutDec"), big.mark = ifelse(decimal.mark != ",", ",", "."), ...) { @@ -837,10 +844,6 @@ print.frequency_tbl <- function(x, if (!missing(markdown)) { if (markdown == TRUE) { opt$tbl_format <- "markdown" - if (missing(header)) { - # default header off for markdown - header <- FALSE - } } else { opt$tbl_format <- "pandoc" } @@ -862,7 +865,8 @@ print.frequency_tbl <- function(x, if (is.null(opt$digits)) { opt$digits <- 2 } - cat(format_header(x, digits = opt$digits, markdown = markdown, decimal.mark = decimal.mark, big.mark = big.mark)) + cat(format_header(x, digits = opt$digits, markdown = (opt$tbl_format == "markdown"), + decimal.mark = decimal.mark, big.mark = big.mark)) } } else if (opt$tbl_format == "markdown") { # do print title as caption in markdown diff --git a/docs/articles/WHONET.html b/docs/articles/WHONET.html index 32cddd86..d8ed4117 100644 --- a/docs/articles/WHONET.html +++ b/docs/articles/WHONET.html @@ -185,7 +185,7 @@

How to work with WHONET data

Matthijs S. Berends

-

29 January 2019

+

30 January 2019

@@ -199,32 +199,38 @@ Import of data

This tutorial assumes you already imported the WHONET data with e.g. the readxl package. In RStudio, this can be done using the menu button ‘Import Dataset’ in the tab ‘Environment’. Choose the option ‘From Excel’ and select your exported file. Make sure date fields are imported correctly.

An example syntax could look like this:

-
library(readxl)
-data <- read_excel(path = "path/to/your/file.xlsx")
+
library(readxl)
+data <- read_excel(path = "path/to/your/file.xlsx")

This package comes with an example data set WHONET. We will use it for this analysis.

Preparation

First, load the relevant packages if you did not yet did this. I use the tidyverse for all of my analyses. All of them. If you don’t know it yet, I suggest you read about it on their website: https://www.tidyverse.org/.

-
library(dplyr)   # part of tidyverse
-library(ggplot2) # part of tidyverse
-library(AMR)     # this package
+
library(dplyr)   # part of tidyverse
+library(ggplot2) # part of tidyverse
+library(AMR)     # this package

We will have to transform some variables to simplify and automate the analysis:

  • Microorganisms should be transformed to our own microorganism IDs (called an mo) using the ITIS reference data set, which contains all ~20,000 microorganisms from the taxonomic kingdoms Bacteria, Fungi and Protozoa. We do the tranformation with as.mo().
  • Antimicrobial results or interpretations have to be clean and valid. In other words, they should only contain values "S", "I" or "R". That is exactly where the as.rsi() function is for.
-
# transform variables
-data <- WHONET %>%
-  # get microbial ID based on given organism
-  mutate(mo = as.mo(Organism)) %>% 
-  # transform everything from "AMP_ND10" to "CIP_EE" to the new `rsi` class
-  mutate_at(vars(AMP_ND10:CIP_EE), as.rsi)
+

No errors or warnings, so all values are transformed succesfully. Let’s check it though, with a couple of frequency tables:

-
# our newly created `mo` variable
-data %>% freq(mo, nmax = 10)
-

Frequency table of mo from a data.frame (500 x 54)

+ +

Frequency table of mo from a data.frame (500 x 54)
+Class: mo (character)
+Length: 500 (of which NA: 0 = 0.00%)
+Unique: 56

+

Families: 14
+Genera: 23
+Species: 51

@@ -318,11 +324,16 @@ data %>%

-# our transformed antibiotic columns
-# amoxicillin/clavulanic acid (J01CR02) as an example
-data %>% freq(AMC_ND2)
-

Frequency table of AMC_ND2 from a data.frame (500 x 54)

+ +

Frequency table of AMC_ND2 from a data.frame (500 x 54)
+Class: factor > ordered > rsi (numeric)
+Levels: S < I < R
+Length: 500 (of which NA: 41 = 8.20%)
+Unique: 3

+

%IR: 28.98% (ratio S : IR = 1.0 : 0.4)

diff --git a/docs/index.html b/docs/index.html index dcfa7119..8e236023 100644 --- a/docs/index.html +++ b/docs/index.html @@ -233,11 +233,11 @@

Get this package

This package is available on the official R network (CRAN), which has a peer-reviewed submission process. Install this package in R with:

- +

It will be downloaded and installed automatically. For RStudio, click on the menu Tools > Install Packages… and then type in “AMR” and press Install.

The latest and unpublished development version can be installed with (precaution: may be unstable):

-
install.packages("devtools")
-devtools::install_gitlab("msberends/AMR")
+
install.packages("devtools")
+devtools::install_gitlab("msberends/AMR")

@@ -276,17 +276,17 @@ devtools::install_gitlab("msberends/AMR Overview of functions

The AMR package basically does four important things:

    -
  1. It cleanses existing data by providing new classes for microoganisms, antibiotics and antimicrobial results (both S/I/R and MIC). With this package, you learn R everything about microbiology that is needed for analysis. These functions all use artificial intelligence to guess results that you would expect:
  2. -
+
  • +

    It cleanses existing data by providing new classes for microoganisms, antibiotics and antimicrobial results (both S/I/R and MIC). With this package, you learn R everything about microbiology that is needed for analysis. These functions all use artificial intelligence to guess results that you would expect:

    • Use as.mo() to get an ID of a microorganism. The IDs are human readable for the trained eye - the ID of Klebsiella pneumoniae is “B_KLBSL_PNE” (B stands for Bacteria) and the ID of S. aureus is “B_STPHY_AUR”. The function takes almost any text as input that looks like the name or code of a microorganism like “E. coli”, “esco” or “esccol” and tries to find expected results using artificial intelligence (AI) on the included ITIS data set, consisting of almost 20,000 microorganisms. It is very fast, please see our benchmarks. Moreover, it can group Staphylococci into coagulase negative and positive (CoNS and CoPS, see source) and can categorise Streptococci into Lancefield groups (like beta-haemolytic Streptococcus Group B, source).
    • Use as.rsi() to transform values to valid antimicrobial results. It produces just S, I or R based on your input and warns about invalid values. Even values like “<=0.002; S” (combined MIC/RSI) will result in “S”.
    • Use as.mic() to cleanse your MIC values. It produces a so-called factor (called ordinal in SPSS) with valid MIC values as levels. A value like “<=0.002; S” (combined MIC/RSI) will result in “<=0.002”.
    • Use as.atc() to get the ATC code of an antibiotic as defined by the WHO. This package contains a database with most LIS codes, official names, DDDs and even trade names of antibiotics. For example, the values “Furabid”, “Furadantin”, “nitro” all return the ATC code of Nitrofurantoine.
    -
      -
    1. It enhances existing data and adds new data from data sets included in this package.
    2. -
    +
  • +
  • +

    It enhances existing data and adds new data from data sets included in this package.

    • Use eucast_rules() to apply EUCAST expert rules to isolates.
    • Use first_isolate() to identify the first isolates of every patient using guidelines from the CLSI (Clinical and Laboratory Standards Institute). @@ -298,9 +298,9 @@ devtools::install_gitlab("msberends/AMR
    • The data set microorganisms contains the complete taxonomic tree of almost 20,000 microorganisms (bacteria, fungi/yeasts and protozoa). Furthermore, the colloquial name and Gram stain are available, which enables resistance analysis of e.g. different antibiotics per Gram stain. The package also contains functions to look up values in this data set like mo_genus(), mo_family(), mo_gramstain() or even mo_phylum(). As they use as.mo() internally, they also use artificial intelligence. For example, mo_genus("MRSA") and mo_genus("S. aureus") will both return "Staphylococcus". They also come with support for German, Dutch, Spanish, Italian, French and Portuguese. These functions can be used to add new variables to your data.
    • The data set antibiotics contains almost 500 antimicrobial drugs with their ATC code, EARS-Net code, common LIS codes, official name, trivial name and DDD of both oral and parenteral administration. It also contains hundreds of trade names. Use functions like atc_name() and atc_tradenames() to look up values. The atc_* functions use as.atc() internally so they support AI to guess your expected result. For example, atc_name("Fluclox"), atc_name("Floxapen") and atc_name("J01CF05") will all return "Flucloxacillin". These functions can again be used to add new variables to your data.
    -
      -
    1. It analyses the data with convenient functions that use well-known methods.
    2. -
    +
  • +
  • +

    It analyses the data with convenient functions that use well-known methods.

    -
      -
    1. It teaches the user how to use all the above actions.
    2. -
    +
  • +
  • +

    It teaches the user how to use all the above actions.

    • Aside from this website with many tutorials, the package itself contains extensive help pages with many examples for all functions.
    • It also contains an example data set called septic_patients. This data set contains: @@ -321,6 +321,8 @@ devtools::install_gitlab("msberends/AMR
  • + +

    diff --git a/docs/news/index.html b/docs/news/index.html index f0ba4d3f..b23ee952 100644 --- a/docs/news/index.html +++ b/docs/news/index.html @@ -236,13 +236,28 @@
    • BREAKING: removed deprecated functions, parameters and references to ‘bactid’. Use as.mo() to identify an MO code.
    • -
    • Support for data from WHONET and EARS-Net (European Antimicrobial Resistance Surveillance Network):
    • +
    • Support for data from WHONET and EARS-Net (European Antimicrobial Resistance Surveillance Network): +
      • Exported files from WHONET can be read and used in this package. For functions like first_isolate() and eucast_rules(), all parameters will be filled in automatically.
      • This package now knows all antibiotic abbrevations by EARS-Net (which are also being used by WHONET) - the antibiotics data set now contains a column ears_net.
      • -
      • All ab_* functions are deprecated and replaced by atc_* functions: r ab_property -> atc_property() ab_name -> atc_name() ab_official -> atc_official() ab_trivial_nl -> atc_trivial_nl() ab_certe -> atc_certe() ab_umcg -> atc_umcg() ab_tradenames -> atc_tradenames() These functions use as.atc() internally. The old atc_property has been renamed atc_online_property(). This is done for two reasons: firstly, not all ATC codes are of antibiotics (ab) but can also be of antivirals or antifungals. Secondly, the input must have class atc or must be coerable to this class. Properties of these classes should start with the same class name, analogous to as.mo() and e.g. mo_genus.
      • -
      • New website: https://msberends.gitlab.io/AMR (built with the great pkgdown)
      • +
      +
    • +
    • +

      All ab_* functions are deprecated and replaced by atc_* functions:

      + +These functions use as.atc() internally. The old atc_property has been renamed atc_online_property(). This is done for two reasons: firstly, not all ATC codes are of antibiotics (ab) but can also be of antivirals or antifungals. Secondly, the input must have class atc or must be coerable to this class. Properties of these classes should start with the same class name, analogous to as.mo() and e.g. mo_genus.
    • +
    • New website: https://msberends.gitlab.io/AMR (built with the great pkgdown) +
      • Contains the complete manual of this package and all of its functions with an explanation of their parameters
      • Contains a comprehensive tutorial about how to conduct antimicrobial resistance analysis
      • +
      +
    • New functions set_mo_source() and get_mo_source() to use your own predefined MO codes as input for as.mo() and consequently all mo_* functions
    • Support for the upcoming dplyr version 0.8.0
    • New function guess_ab_col() to find an antibiotic column in a table
    • @@ -250,11 +265,24 @@
    • New function mo_renamed() to get a list of all returned values from as.mo() that have had taxonomic renaming
    • New function age() to calculate the (patients) age in years
    • New function age_groups() to split ages into custom or predefined groups (like children or elderly). This allows for easier demographic antimicrobial resistance analysis per age group.
    • -
    • New function ggplot_rsi_predict() as well as the base R plot() function can now be used for resistance prediction calculated with resistance_predict(): r x <- resistance_predict(septic_patients, col_ab = "amox") plot(x) ggplot_rsi_predict(x) +
    • +

      New function ggplot_rsi_predict() as well as the base R plot() function can now be used for resistance prediction calculated with resistance_predict():

      +
      x <- resistance_predict(septic_patients, col_ab = "amox")
      +plot(x)
      +ggplot_rsi_predict(x)
    • -
    • Functions filter_first_isolate() and filter_first_weighted_isolate() to shorten and fasten filtering on data sets with antimicrobial results, e.g.: r septic_patients %>% filter_first_isolate(...) # or filter_first_isolate(septic_patients, ...) is equal to: r septic_patients %>% mutate(only_firsts = first_isolate(septic_patients, ...)) %>% filter(only_firsts == TRUE) %>% select(-only_firsts) +
    • +

      Functions filter_first_isolate() and filter_first_weighted_isolate() to shorten and fasten filtering on data sets with antimicrobial results, e.g.:

      + +

      is equal to:

      +
      septic_patients %>%
      +  mutate(only_firsts = first_isolate(septic_patients, ...)) %>%
      +  filter(only_firsts == TRUE) %>%
      +  select(-only_firsts)
    • -
    • New vignettes about how to conduct AMR analysis, predict antimicrobial resistance, use the G-test and more. These are also available (and even easier readable) on our website: https://msberends.gitlab.io/AMR.
    • +
    • New vignettes about how to conduct AMR analysis, predict antimicrobial resistance, use the G-test and more. These are also available (and even easier readable) on our website: https://msberends.gitlab.io/AMR.

    @@ -266,12 +294,16 @@
  • Functions atc_ddd() and atc_groups() have been renamed atc_online_ddd() and atc_online_groups(). The old functions are deprecated and will be removed in a future version.
  • Function guess_mo() is now deprecated in favour of as.mo() and will be removed in future versions
  • Function guess_atc() is now deprecated in favour of as.atc() and will be removed in future versions
  • -
  • Function eucast_rules():
  • +
  • Function eucast_rules(): +
    • Updated EUCAST Clinical breakpoints to version 9.0 of 1 January 2019
    • Fixed a critical bug where some rules that depend on previous applied rules would not be applied adequately
    • Emphasised in manual that penicillin is meant as benzylpenicillin (ATC J01CE01)
    • -
    • Improvements for as.mo():
    • +
    +
  • +
  • Improvements for as.mo(): +
    • Fix for vector containing only empty values
    • Finds better results when input is in other languages
    • Better handling for subspecies
    • @@ -282,12 +314,17 @@
    • Progress bar will be shown when it takes more than 3 seconds to get results
    • Support for formatted console text
    • Console will return the percentage of uncoercable input
    • -
    • Function first_isolate():
    • +
    +
  • +
  • Function first_isolate(): +
    • Fixed a bug where distances between dates would not be calculated right - in the septic_patients data set this yielded a difference of 0.15% more isolates
    • Will now use a column named like “patid” for the patient ID (parameter col_patientid), when this parameter was left blank
    • Will now use a column named like “key(…)ab” or “key(…)antibiotics” for the key antibiotics (parameter col_keyantibiotics()), when this parameter was left blank
    • Removed parameter output_logical, the function will now always return a logical value
    • Renamed parameter filter_specimen to specimen_group, although using filter_specimen will still work
    • +
    +
  • A note to the manual pages of the portion functions, that low counts can influence the outcome and that the portion functions may camouflage this, since they only return the portion (albeit being dependent on the minimum parameter)
  • Merged data sets microorganisms.certe and microorganisms.umcg into microorganisms.codes
  • @@ -300,24 +337,26 @@
  • Small text updates to summaries of class rsi and mic
  • -
  • Frequency tables (freq() function):
  • +
  • Frequency tables (freq() function): + +
  • Function scale_y_percent() now contains the limits parameter
  • Automatic parameter filling for mdro(), key_antibiotics() and eucast_rules()
  • Updated examples for resistance prediction (resistance_predict() function)
  • -
  • Fix for as.mic() to support more values ending in (several) zeroes

  • +
  • Fix for as.mic() to support more values ending in (several) zeroes
  • @@ -368,7 +409,8 @@ septic_patients %>%
  • EUCAST_rules was renamed to eucast_rules, the old function still exists as a deprecated function
  • -
  • Big changes to the eucast_rules function:
  • +
  • Big changes to the eucast_rules function: +
    • Now also applies rules from the EUCAST ‘Breakpoint tables for bacteria’, version 8.1, 2018, http://www.eucast.org/clinical_breakpoints/ (see Source of the function)
    • New parameter rules to specify which rules should be applied (expert rules, breakpoints, others or all)
    • New parameter verbose which can be set to TRUE to get very specific messages about which columns and rows were affected
    • @@ -377,11 +419,18 @@ septic_patients %>%
    • Data set septic_patients now reflects these changes
    • Added parameter pipe for piperacillin (J01CA12), also to the mdro function
    • Small fixes to EUCAST clinical breakpoint rules
    • +
    +
  • Added column kingdom to the microorganisms data set, and function mo_kingdom to look up values
  • Tremendous speed improvement for as.mo (and subsequently all mo_* functions), as empty values wil be ignored a priori
  • Fewer than 3 characters as input for as.mo will return NA
  • -
  • Function as.mo (and all mo_* wrappers) now supports genus abbreviations with “species” attached r as.mo("E. species") # B_ESCHR mo_fullname("E. spp.") # "Escherichia species" as.mo("S. spp") # B_STPHY mo_fullname("S. species") # "Staphylococcus species" +
  • +

    Function as.mo (and all mo_* wrappers) now supports genus abbreviations with “species” attached

    +
    as.mo("E. species")        # B_ESCHR
    +mo_fullname("E. spp.")     # "Escherichia species"
    +as.mo("S. spp")            # B_STPHY
    +mo_fullname("S. species")  # "Staphylococcus species"
  • Added parameter combine_IR (TRUE/FALSE) to functions portion_df and count_df, to indicate that all values of I and R must be merged into one, so the output only consists of S vs. IR (susceptible vs. non-susceptible)
  • Fix for portion_*(..., as_percent = TRUE) when minimal number of isolates would not be met
  • @@ -390,18 +439,19 @@ septic_patients %>%
  • Using portion_* functions now throws a warning when total available isolate is below parameter minimum
  • Functions as.mo, as.rsi, as.mic, as.atc and freq will not set package name as attribute anymore
  • -
  • Frequency tables - freq():
  • +
  • Frequency tables - freq(): + +
  • first_isolate now tries to find columns to use as input when parameters are left blank
  • Improvements for MDRO algorithm (function mdro)
  • @@ -423,7 +475,8 @@ septic_patients %>%
  • ggplot_rsi and scale_y_percent have breaks parameter
  • -
  • AI improvements for as.mo:
  • +
  • AI improvements for as.mo: +
    • "CRS" -> Stenotrophomonas maltophilia
    • @@ -436,6 +489,8 @@ septic_patients %>%
    • "MSSE" -> Staphylococcus epidermidis
    • +
    +
  • Fix for join functions
  • Speed improvement for is.rsi.eligible, now 15-20 times faster
  • In g.test, when sum(x) is below 1000 or any of the expected values is below 5, Fisher’s Exact Test will be suggested
  • @@ -464,7 +519,8 @@ septic_patients %>% New
    • The data set microorganisms now contains all microbial taxonomic data from ITIS (kingdoms Bacteria, Fungi and Protozoa), the Integrated Taxonomy Information System, available via https://itis.gov. The data set now contains more than 18,000 microorganisms with all known bacteria, fungi and protozoa according ITIS with genus, species, subspecies, family, order, class, phylum and subkingdom. The new data set microorganisms.old contains all previously known taxonomic names from those kingdoms.
    • -
    • New functions based on the existing function mo_property:
    • +
    • New functions based on the existing function mo_property: +
      • Taxonomic names: mo_phylum, mo_class, mo_order, mo_family, mo_genus, mo_species, mo_subspecies
      • Semantic names: mo_fullname, mo_shortname @@ -474,22 +530,52 @@ septic_patients %>%
      • Author and year: mo_ref
      -

      They also come with support for German, Dutch, French, Italian, Spanish and Portuguese: r mo_gramstain("E. coli") # [1] "Gram negative" mo_gramstain("E. coli", language = "de") # German # [1] "Gramnegativ" mo_gramstain("E. coli", language = "es") # Spanish # [1] "Gram negativo" mo_fullname("S. group A", language = "pt") # Portuguese # [1] "Streptococcus grupo A"

      -

      Furthermore, former taxonomic names will give a note about the current taxonomic name: r mo_gramstain("Esc blattae") # Note: 'Escherichia blattae' (Burgess et al., 1973) was renamed 'Shimwellia blattae' (Priest and Barker, 2010) # [1] "Gram negative"

      +

      They also come with support for German, Dutch, French, Italian, Spanish and Portuguese:

      +
      mo_gramstain("E. coli")
      +# [1] "Gram negative"
      +mo_gramstain("E. coli", language = "de") # German
      +# [1] "Gramnegativ"
      +mo_gramstain("E. coli", language = "es") # Spanish
      +# [1] "Gram negativo"
      +mo_fullname("S. group A", language = "pt") # Portuguese
      +# [1] "Streptococcus grupo A"
      +

      Furthermore, former taxonomic names will give a note about the current taxonomic name:

      + +
    • +
    • Functions count_R, count_IR, count_I, count_SI and count_S to selectively count resistant or susceptible isolates
        -
      • Functions count_R, count_IR, count_I, count_SI and count_S to selectively count resistant or susceptible isolates
      • Extra function count_df (which works like portion_df) to get all counts of S, I and R of a data set with antibiotic columns, with support for grouped variables
      • +
      +
    • Function is.rsi.eligible to check for columns that have valid antimicrobial results, but do not have the rsi class yet. Transform the columns of your raw data with: data %>% mutate_if(is.rsi.eligible, as.rsi)
    • -
    • Functions as.mo and is.mo as replacements for as.bactid and is.bactid (since the microoganisms data set not only contains bacteria). These last two functions are deprecated and will be removed in a future release. The as.mo function determines microbial IDs using Artificial Intelligence (AI): r as.mo("E. coli") # [1] B_ESCHR_COL as.mo("MRSA") # [1] B_STPHY_AUR as.mo("S group A") # [1] B_STRPTC_GRA And with great speed too - on a quite regular Linux server from 2007 it takes us less than 0.02 seconds to transform 25,000 items: r thousands_of_E_colis <- rep("E. coli", 25000) microbenchmark::microbenchmark(as.mo(thousands_of_E_colis), unit = "s") # Unit: seconds # min median max neval # 0.01817717 0.01843957 0.03878077 100 +
    • +

      Functions as.mo and is.mo as replacements for as.bactid and is.bactid (since the microoganisms data set not only contains bacteria). These last two functions are deprecated and will be removed in a future release. The as.mo function determines microbial IDs using Artificial Intelligence (AI):

      + +

      And with great speed too - on a quite regular Linux server from 2007 it takes us less than 0.02 seconds to transform 25,000 items:

      +
    • Added parameter reference_df for as.mo, so users can supply their own microbial IDs, name or codes as a reference table
    • -
    • Renamed all previous references to bactid to mo, like:
    • +
    • Renamed all previous references to bactid to mo, like: +
      • Column names inputs of EUCAST_rules, first_isolate and key_antibiotics
      • Column names of datasets microorganisms and septic_patients
      • All old syntaxes will still work with this version, but will throw warnings
      • +
      +
    • Function labels_rsi_count to print datalabels on a RSI ggplot2 model
    • Functions as.atc and is.atc to transform/look up antibiotic ATC codes as defined by the WHO. The existing function guess_atc is now an alias of as.atc.

    • Function ab_property and its aliases: ab_name, ab_tradenames, ab_certe, ab_umcg and ab_trivial_nl @@ -504,7 +590,14 @@ septic_patients %>% Changed
      • Added three antimicrobial agents to the antibiotics data set: Terbinafine (D01BA02), Rifaximin (A07AA11) and Isoconazole (D01AC05)
      • -
      • Added 163 trade names to the antibiotics data set, it now contains 298 different trade names in total, e.g.: r ab_official("Bactroban") # [1] "Mupirocin" ab_name(c("Bactroban", "Amoxil", "Zithromax", "Floxapen")) # [1] "Mupirocin" "Amoxicillin" "Azithromycin" "Flucloxacillin" ab_atc(c("Bactroban", "Amoxil", "Zithromax", "Floxapen")) # [1] "R01AX06" "J01CA04" "J01FA10" "J01CF05" +
      • +

        Added 163 trade names to the antibiotics data set, it now contains 298 different trade names in total, e.g.:

        +
        ab_official("Bactroban")
        +# [1] "Mupirocin"
        +ab_name(c("Bactroban", "Amoxil", "Zithromax", "Floxapen"))
        +# [1] "Mupirocin" "Amoxicillin" "Azithromycin" "Flucloxacillin"
        +ab_atc(c("Bactroban", "Amoxil", "Zithromax", "Floxapen"))
        +# [1] "R01AX06" "J01CA04" "J01FA10" "J01CF05"
      • For first_isolate, rows will be ignored when there’s no species available
      • Function ratio is now deprecated and will be removed in a future release, as it is not really the scope of this package
      • @@ -513,9 +606,36 @@ septic_patients %>%
      • Added prevalence column to the microorganisms data set
      • Added parameters minimum and as_percent to portion_df
      • -
      • Support for quasiquotation in the functions series count_* and portions_*, and n_rsi. This allows to check for more than 2 vectors or columns. ```r septic_patients %>% select(amox, cipr) %>% count_IR() # which is the same as: septic_patients %>% count_IR(amox, cipr)
      • +
      • +

        Support for quasiquotation in the functions series count_* and portions_*, and n_rsi. This allows to check for more than 2 vectors or columns.

        + +
      • +
      • Edited ggplot_rsi and geom_rsi so they can cope with count_df. The new fun parameter has value portion_df at default, but can be set to count_df.
      • +
      • Fix for ggplot_rsi when the ggplot2 package was not loaded
      • +
      • Added datalabels function labels_rsi_count to ggplot_rsi +
      • +
      • Added possibility to set any parameter to geom_rsi (and ggplot_rsi) so you can set your own preferences
      • +
      • Fix for joins, where predefined suffices would not be honoured
      • +
      • Added parameter quote to the freq function
      • +
      • Added generic function diff for frequency tables
      • +
      • Added longest en shortest character length in the frequency table (freq) header of class character +
      • +
      • +

        Support for types (classes) list and matrix for freq

        +
        my_matrix = with(septic_patients, matrix(c(age, gender), ncol = 2))
        +freq(my_matrix)
        +

        For lists, subsetting is possible:

        +
        my_list = list(age = septic_patients$age, gender = septic_patients$gender)
        +my_list %>% freq(age)
        +my_list %>% freq(gender)
        +
      -

      septic_patients %>% portion_S(amcl) septic_patients %>% portion_S(amcl, gent) septic_patients %>% portion_S(amcl, gent, pita) * Edited `ggplot_rsi` and `geom_rsi` so they can cope with `count_df`. The new `fun` parameter has value `portion_df` at default, but can be set to `count_df`. * Fix for `ggplot_rsi` when the `ggplot2` package was not loaded * Added datalabels function `labels_rsi_count` to `ggplot_rsi` * Added possibility to set any parameter to `geom_rsi` (and `ggplot_rsi`) so you can set your own preferences * Fix for joins, where predefined suffices would not be honoured * Added parameter `quote` to the `freq` function * Added generic function `diff` for frequency tables * Added longest en shortest character length in the frequency table (`freq`) header of class `character` * Support for types (classes) list and matrix for `freq`r my_matrix = with(septic_patients, matrix(c(age, gender), ncol = 2)) freq(my_matrix) For lists, subsetting is possible:r my_list = list(age = septic_patients$age, gender = septic_patients$gender) my_list %>% freq(age) my_list %>% freq(gender) ```

    @@ -534,15 +654,21 @@ septic_patients %>% New

    • -BREAKING: rsi_df was removed in favour of new functions portion_R, portion_IR, portion_I, portion_SI and portion_S to selectively calculate resistance or susceptibility. These functions are 20 to 30 times faster than the old rsi function. The old function still works, but is deprecated.
    • +BREAKING: rsi_df was removed in favour of new functions portion_R, portion_IR, portion_I, portion_SI and portion_S to selectively calculate resistance or susceptibility. These functions are 20 to 30 times faster than the old rsi function. The old function still works, but is deprecated. +
      • New function portion_df to get all portions of S, I and R of a data set with antibiotic columns, with support for grouped variables
      • +
      +
    • -BREAKING: the methodology for determining first weighted isolates was changed. The antibiotics that are compared between isolates (call key antibiotics) to include more first isolates (afterwards called first weighted isolates) are now as follows:
    • +BREAKING: the methodology for determining first weighted isolates was changed. The antibiotics that are compared between isolates (call key antibiotics) to include more first isolates (afterwards called first weighted isolates) are now as follows: +
      • Universal: amoxicillin, amoxicillin/clavlanic acid, cefuroxime, piperacillin/tazobactam, ciprofloxacin, trimethoprim/sulfamethoxazole
      • Gram-positive: vancomycin, teicoplanin, tetracycline, erythromycin, oxacillin, rifampicin
      • Gram-negative: gentamicin, tobramycin, colistin, cefotaxime, ceftazidime, meropenem
      • -
      • Support for ggplot2 +
      +
    • Support for ggplot2 +
      • New functions geom_rsi, facet_rsi, scale_y_percent, scale_rsi_colours and theme_rsi
      • New wrapper function ggplot_rsi to apply all above functions on a data set: @@ -553,22 +679,32 @@ septic_patients %>%
    • -
    • Determining bacterial ID:
    • +
    + +
  • Determining bacterial ID: +
    • New functions as.bactid and is.bactid to transform/ look up microbial ID’s.
    • The existing function guess_bactid is now an alias of as.bactid
    • New Becker classification for Staphylococcus to categorise them into Coagulase Negative Staphylococci (CoNS) and Coagulase Positve Staphylococci (CoPS)
    • New Lancefield classification for Streptococcus to categorise them into Lancefield groups
    • +
    +
  • For convience, new descriptive statistical functions kurtosis and skewness that are lacking in base R - they are generic functions and have support for vectors, data.frames and matrices
  • Function g.test to perform the Χ2 distributed G-test, which use is the same as chisq.test
  • -
  • Function ratio to transform a vector of values to a preset ratio
  • +
  • +Function ratio to transform a vector of values to a preset ratio + +
  • Support for Addins menu in RStudio to quickly insert %in% or %like% (and give them keyboard shortcuts), or to view the datasets that come with this package
  • Function p.symbol to transform p values to their related symbols: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
  • Functions clipboard_import and clipboard_export as helper functions to quickly copy and paste from/to software like Excel and SPSS. These functions use the clipr package, but are a little altered to also support headless Linux servers (so you can use it in RStudio Server)
  • -
  • New for frequency tables (function freq):
  • +
  • New for frequency tables (function freq): +
    • A vignette to explain its usage
    • Support for rsi (antimicrobial resistance) to use as input
    • Support for table to use as input: freq(table(x, y)) @@ -583,6 +719,8 @@ septic_patients %>%
    • Header of frequency tables now also show Mean Absolute Deviaton (MAD) and Interquartile Range (IQR)
    • Possibility to globally set the default for the amount of items to print, with options(max.print.freq = n) where n is your preset value
    +
  • +

    @@ -604,21 +742,27 @@ septic_patients %>%
  • Small improvements to the microorganisms dataset (especially for Salmonella) and the column bactid now has the new class "bactid"
  • -
  • Combined MIC/RSI values will now be coerced by the rsi and mic functions:
  • +
  • Combined MIC/RSI values will now be coerced by the rsi and mic functions: + +
  • Now possible to coerce MIC values with a space between operator and value, i.e. as.mic("<= 0.002") now works
  • Classes rsi and mic do not add the attribute package.version anymore
  • Added "groups" option for atc_property(..., property). It will return a vector of the ATC hierarchy as defined by the WHO. The new function atc_groups is a convenient wrapper around this.
  • Build-in host check for atc_property as it requires the host set by url to be responsive
  • Improved first_isolate algorithm to exclude isolates where bacteria ID or genus is unavailable
  • Fix for warning hybrid evaluation forced for row_number (924b62) from the dplyr package v0.7.5 and above
  • -
  • Support for empty values and for 1 or 2 columns as input for guess_bactid (now called as.bactid)
  • +
  • Support for empty values and for 1 or 2 columns as input for guess_bactid (now called as.bactid) +
    • So yourdata %>% select(genus, species) %>% as.bactid() now also works
    • +
    +
  • Other small fixes
  • @@ -626,11 +770,14 @@ septic_patients %>%

    Other

    @@ -645,14 +792,17 @@ septic_patients %>%
    • Full support for Windows, Linux and macOS
    • Full support for old R versions, only R-3.0.0 (April 2013) or later is needed (needed packages may have other dependencies)
    • -
    • Function n_rsi to count cases where antibiotic test results were available, to be used in conjunction with dplyr::summarise, see ?rsi
    • +
    • Function n_rsi to count cases where antibiotic test results were available, to be used in conjunction with dplyr::summarise, see ?rsi
    • Function guess_bactid to determine the ID of a microorganism based on genus/species or known abbreviations like MRSA
    • Function guess_atc to determine the ATC of an antibiotic based on name, trade name, or known abbreviations
    • Function freq to create frequency tables, with additional info in a header
    • -
    • Function MDRO to determine Multi Drug Resistant Organisms (MDRO) with support for country-specific guidelines.
    • +
    • Function MDRO to determine Multi Drug Resistant Organisms (MDRO) with support for country-specific guidelines. + +
    • New algorithm to determine weighted isolates, can now be "points" or "keyantibiotics", see ?first_isolate
    • New print format for tibbles and data.tables
    • diff --git a/docs/pkgdown.yml b/docs/pkgdown.yml index 6102d6c7..ac0898dd 100644 --- a/docs/pkgdown.yml +++ b/docs/pkgdown.yml @@ -1,4 +1,4 @@ -pandoc: 1.17.2 +pandoc: 2.3.1 pkgdown: 1.3.0 pkgdown_sha: ~ articles: diff --git a/docs/reference/freq.html b/docs/reference/freq.html index c876a6a2..e46d8360 100644 --- a/docs/reference/freq.html +++ b/docs/reference/freq.html @@ -239,13 +239,13 @@ top_freq can be used to get the top/bottom n items of a frequency table, with co
      frequency_tbl(x, ..., sort.count = TRUE,
         nmax = getOption("max.print.freq"), na.rm = TRUE, row.names = TRUE,
         markdown = !interactive(), digits = 2, quote = FALSE,
      -  header = !markdown, title = NULL, na = "<NA>", droplevels = TRUE,
      +  header = TRUE, title = NULL, na = "<NA>", droplevels = TRUE,
         sep = " ", decimal.mark = getOption("OutDec"),
         big.mark = ifelse(decimal.mark != ",", ",", "."))
       
       freq(x, ..., sort.count = TRUE, nmax = getOption("max.print.freq"),
         na.rm = TRUE, row.names = TRUE, markdown = !interactive(),
      -  digits = 2, quote = FALSE, header = !markdown, title = NULL,
      +  digits = 2, quote = FALSE, header = TRUE, title = NULL,
         na = "<NA>", droplevels = TRUE, sep = " ",
         decimal.mark = getOption("OutDec"), big.mark = ifelse(decimal.mark !=
         ",", ",", "."))
      @@ -256,7 +256,7 @@ top_freq can be used to get the top/bottom n items of a frequency table, with co
       
       # S3 method for frequency_tbl
       print(x, nmax = getOption("max.print.freq",
      -  default = 15), markdown = !interactive(), header = !markdown,
      +  default = 15), markdown = !interactive(), header = TRUE,
         decimal.mark = getOption("OutDec"), big.mark = ifelse(decimal.mark !=
         ",", ",", "."), ...)
      @@ -368,7 +368,7 @@ top_freq can be used to get the top/bottom n items of a frequency table, with co
    • Median, using median, with percentage since oldest

    In factors, all factor levels that are not existing in the input data will be dropped.

    -

    The function top_freq uses top_n internally and will include more than n rows if there are ties.

    +

    The function top_freq uses top_n internally and will include more than n rows if there are ties.

    Read more on our website!

    @@ -392,8 +392,8 @@ On our website https://msberends.gitla # you could also use `select` or `pull` to get your variables septic_patients %>% - filter(hospital_id=="A") %>% - select(mo) %>% + filter(hospital_id=="A") %>% + select(mo) %>%freq() @@ -409,20 +409,20 @@ On our website https://msberends.gitla # group a variable and analyse another septic_patients %>% - group_by(hospital_id) %>% + group_by(hospital_id) %>%freq(gender) # get top 10 bugs of hospital A as a vectorseptic_patients%>% - filter(hospital_id=="A") %>% + filter(hospital_id=="A") %>%freq(mo) %>%top_freq(10) # save frequency table to an objectyears<-septic_patients%>% - mutate(year=format(date, "%Y")) %>% + mutate(year=format(date, "%Y")) %>%freq(year) @@ -473,11 +473,11 @@ On our website https://msberends.gitla # only get selected columns septic_patients %>% freq(hospital_id) %>% - select(item, percent) + select(item, percent) septic_patients%>%freq(hospital_id) %>% - select(-count, -cum_count) + select(-count, -cum_count) # check differences between frequency tables diff --git a/man/freq.Rd b/man/freq.Rd index a43f8ac7..1bb44253 100755 --- a/man/freq.Rd +++ b/man/freq.Rd @@ -11,13 +11,13 @@ frequency_tbl(x, ..., sort.count = TRUE, nmax = getOption("max.print.freq"), na.rm = TRUE, row.names = TRUE, markdown = !interactive(), digits = 2, quote = FALSE, - header = !markdown, title = NULL, na = "", droplevels = TRUE, + header = TRUE, title = NULL, na = "", droplevels = TRUE, sep = " ", decimal.mark = getOption("OutDec"), big.mark = ifelse(decimal.mark != ",", ",", ".")) freq(x, ..., sort.count = TRUE, nmax = getOption("max.print.freq"), na.rm = TRUE, row.names = TRUE, markdown = !interactive(), - digits = 2, quote = FALSE, header = !markdown, title = NULL, + digits = 2, quote = FALSE, header = TRUE, title = NULL, na = "", droplevels = TRUE, sep = " ", decimal.mark = getOption("OutDec"), big.mark = ifelse(decimal.mark != ",", ",", ".")) @@ -27,7 +27,7 @@ top_freq(f, n) header(f, property = NULL) \method{print}{frequency_tbl}(x, nmax = getOption("max.print.freq", - default = 15), markdown = !interactive(), header = !markdown, + default = 15), markdown = !interactive(), header = TRUE, decimal.mark = getOption("OutDec"), big.mark = ifelse(decimal.mark != ",", ",", "."), ...) }