diff --git a/NAMESPACE b/NAMESPACE index 1a1b00cb..d50e9182 100755 --- a/NAMESPACE +++ b/NAMESPACE @@ -11,6 +11,7 @@ S3method(as.vector,frequency_tbl) S3method(as_tibble,frequency_tbl) S3method(barplot,mic) S3method(barplot,rsi) +S3method(diff,frequency_tbl) S3method(format,frequency_tbl) S3method(hist,frequency_tbl) S3method(kurtosis,data.frame) @@ -133,6 +134,7 @@ exportMethods(as.vector.frequency_tbl) exportMethods(as_tibble.frequency_tbl) exportMethods(barplot.mic) exportMethods(barplot.rsi) +exportMethods(diff.frequency_tbl) exportMethods(format.frequency_tbl) exportMethods(hist.frequency_tbl) exportMethods(kurtosis) @@ -171,6 +173,7 @@ importFrom(dplyr,case_when) importFrom(dplyr,desc) importFrom(dplyr,everything) importFrom(dplyr,filter) +importFrom(dplyr,full_join) importFrom(dplyr,group_by) importFrom(dplyr,group_by_at) importFrom(dplyr,group_vars) diff --git a/NEWS.md b/NEWS.md index e1d30d34..f0559927 100755 --- a/NEWS.md +++ b/NEWS.md @@ -7,15 +7,15 @@ * Semantic names: `mo_fullname`, `mo_shortname` * Microbial properties: `mo_type`, `mo_gramstain`. - They also come with support for German, Dutch, French, Italian, Spanish and Portuguese, and it defaults to the systems locale: + They also come with support for German, Dutch, French, Italian, Spanish and Portuguese: ```r mo_gramstain("E. coli") # [1] "Gram negative" - mo_gramstain("E. coli", language = "de") # "de" = Deutsch / German + mo_gramstain("E. coli", language = "de") # "de" = German # [1] "Gramnegativ" - mo_gramstain("E. coli", language = "es") # "es" = EspaƱol / Spanish + mo_gramstain("E. coli", language = "es") # "es" = Spanish # [1] "Gram negativo" - mo_fullname("S. group A") # when run on a on a Portuguese system + mo_fullname("S. group A", language = "pt") # Portuguese # [1] "Streptococcus grupo A" ``` @@ -31,11 +31,11 @@ * Functions `as.mo` and `is.mo` as replacements for `as.bactid` and `is.bactid` (since the `microoganisms` data set not only contains bacteria). These last two functions are deprecated and will be removed in a future release. The `as.mo` function determines microbial IDs using Artificial Intelligence (AI): ```r as.mo("E. coli") - # [1] ESCCOL + # [1] B_ESCHR_COL as.mo("MRSA") - # [1] STAAUR + # [1] B_STPHY_AUR as.mo("S group A") - # [1] STCGRA + # [1] B_STRPTC_GRA ``` And with great speed too - on a quite regular Linux server from 2007 it takes us 0.009 seconds to transform 25,000 items: ```r @@ -54,7 +54,8 @@ * Function `ab_property` and its aliases: `ab_name`, `ab_tradenames`, `ab_certe`, `ab_umcg` and `ab_trivial_nl` * Introduction to AMR as a vignette -* Removed clipbaord functions as it violated the CRAN policy +* Removed clipboard functions as it violated the CRAN policy +* Renamed `septic_patients$sex` to `septic_patients$gender` #### Changed * Added three antimicrobial agents to the `antibiotics` data set: Terbinafine (D01BA02), Rifaximin (A07AA11) and Isoconazole (D01AC05) @@ -92,14 +93,14 @@ * Added longest en shortest character length in the frequency table (`freq`) header of class `character` * Support for types (classes) list and matrix for `freq` ```r - my_matrix = with(septic_patients, matrix(c(age, sex), ncol = 2)) + my_matrix = with(septic_patients, matrix(c(age, gender), ncol = 2)) freq(my_matrix) ``` For lists, subsetting is possible: ```r - my_list = list(age = septic_patients$age, sex = septic_patients$sex) + my_list = list(age = septic_patients$age, gender = septic_patients$gender) my_list %>% freq(age) - my_list %>% freq(sex) + my_list %>% freq(gender) ``` #### Other diff --git a/R/freq.R b/R/freq.R index 28b3e1fe..a1f84a1c 100755 --- a/R/freq.R +++ b/R/freq.R @@ -130,7 +130,7 @@ #' sort(septic_patients$age)) # TRUE #' #' # it also supports `table` objects: -#' table(septic_patients$sex, +#' table(septic_patients$gender, #' septic_patients$age) %>% #' freq(sep = " **sep** ") #' @@ -502,6 +502,46 @@ top_freq <- function(f, n) { vect } +#' @rdname freq +#' @exportMethod diff.frequency_tbl +#' @importFrom dplyr %>% full_join mutate +#' @export +diff.frequency_tbl <- function(x, y, ...) { + # check classes + if (!"frequency_tbl" %in% class(x) + | !"frequency_tbl" %in% class(y)) { + stop("Both x and y must be a frequency table.") + } + + x.attr <- attributes(x)$opt + + # only keep item and count + x <- x[, 1:2] + y <- y[, 1:2] + + x <- x %>% + full_join(y, + by = colnames(x)[1], + suffix = c(".x", ".y")) %>% + mutate( + diff = case_when( + is.na(count.y) ~ -count.x, + is.na(count.x) ~ count.y, + TRUE ~ count.y - count.x)) %>% + mutate( + diff.percent = percent( + diff / count.x, + force_zero = TRUE)) + + print( + knitr::kable(x, + format = x.attr$tbl_format, + col.names = c("Item", "Count #1", "Count #2", "Difference", "Diff. percent"), + align = "lrrrr", + padding = 1) + ) +} + #' @rdname freq #' @exportMethod print.frequency_tbl #' @importFrom knitr kable diff --git a/R/mo_property.R b/R/mo_property.R index e6b6270e..4a0bbed8 100644 --- a/R/mo_property.R +++ b/R/mo_property.R @@ -128,10 +128,12 @@ mo_shortname <- function(x, Becker = FALSE, Lancefield = FALSE, language = NULL) result <- as.character(res1) } else { x <- AMR::as.mo(x) - result <- data.frame(mo = x) %>% - left_join(AMR::microorganisms, by = "mo") %>% - mutate(shortname = ifelse(!is.na(genus) & !is.na(species), paste0(substr(genus, 1, 1), ". ", species), NA_character_)) %>% - pull(shortname) + suppressWarnings( + result <- data.frame(mo = x) %>% + left_join(AMR::microorganisms, by = "mo") %>% + mutate(shortname = ifelse(!is.na(genus) & !is.na(species), paste0(substr(genus, 1, 1), ". ", species), NA_character_)) %>% + pull(shortname) + ) } mo_translate(result, language = language) } diff --git a/README.md b/README.md index 61fca18c..4308317d 100755 --- a/README.md +++ b/README.md @@ -120,9 +120,13 @@ help(package = "AMR") ``` ## ITIS - + -This `AMR` package contains the **complete microbial taxonomic data** (with seven taxonomic ranks - from subkingdom to subspecies) from the publicly available Integrated Taxonomic Information System (ITIS, https://www.itis.gov). ITIS is a partnership of U.S., Canadian, and Mexican agencies and taxonomic specialists. The complete taxonomic kingdoms Bacteria, Fungi and Protozoa are included in this package, as well as all previously accepted names known to ITIS. This allows users to use authoritative taxonomic information for their data analyses on any microorganisms, not only human pathogens. +This `AMR` package contains the **complete microbial taxonomic data** (with all seven taxonomic ranks - from subkingdom to subspecies) from the publicly available Integrated Taxonomic Information System (ITIS, https://www.itis.gov). + +The complete taxonomic kingdoms Bacteria, Fungi and Protozoa are included in this package, as well as all previously accepted names known to ITIS. This allows users to use authoritative taxonomic information for their data analyses on any microorganisms, not only human pathogens. + +ITIS is a partnership of U.S., Canadian, and Mexican agencies and taxonomic specialists. ### New classes This package contains two new S3 classes: `mic` for MIC values (e.g. from Vitek or Phoenix) and `rsi` for antimicrobial drug interpretations (i.e. S, I and R). Both are actually ordered factors under the hood (an MIC of `2` being higher than `<=1` but lower than `>=32`, and for class `rsi` factors are ordered as `S < I < R`). @@ -150,7 +154,7 @@ plot(septic_patients$cipr) ![example_1_rsi](man/figures/rsi_example1.png) - +< Or use the `ggplot2` and `dplyr` packages to create more appealing plots: ```r diff --git a/data/microorganisms.certe.rda b/data/microorganisms.certe.rda index 955cd948..15fe6e6c 100644 Binary files a/data/microorganisms.certe.rda and b/data/microorganisms.certe.rda differ diff --git a/data/microorganisms.rda b/data/microorganisms.rda index 3b1b2d33..20ee3215 100755 Binary files a/data/microorganisms.rda and b/data/microorganisms.rda differ diff --git a/data/septic_patients.rda b/data/septic_patients.rda index 7d66129f..8879b548 100755 Binary files a/data/septic_patients.rda and b/data/septic_patients.rda differ diff --git a/man/freq.Rd b/man/freq.Rd index 781ef25d..d27c0144 100755 --- a/man/freq.Rd +++ b/man/freq.Rd @@ -4,6 +4,7 @@ \alias{freq} \alias{frequency_tbl} \alias{top_freq} +\alias{diff.frequency_tbl} \alias{print.frequency_tbl} \title{Frequency table} \usage{ @@ -17,6 +18,8 @@ freq(x, ..., sort.count = TRUE, nmax = getOption("max.print.freq"), top_freq(f, n) +\method{diff}{frequency_tbl}(x, y, ...) + \method{print}{frequency_tbl}(x, nmax = getOption("max.print.freq", default = 15), ...) } @@ -143,7 +146,7 @@ identical(septic_patients \%>\% sort(septic_patients$age)) # TRUE # it also supports `table` objects: -table(septic_patients$sex, +table(septic_patients$gender, septic_patients$age) \%>\% freq(sep = " **sep** ") diff --git a/tests/testthat/test-first_isolate.R b/tests/testthat/test-first_isolate.R index 0cb920e4..c3a5cab6 100755 --- a/tests/testthat/test-first_isolate.R +++ b/tests/testthat/test-first_isolate.R @@ -114,7 +114,7 @@ test_that("first isolates work", { first_isolate(col_date = "date", col_mo = "mo", col_patient_id = "patient_id", - col_testcode = "sex", + col_testcode = "gender", testcodes_exclude = "M")) # errors diff --git a/tests/testthat/test-freq.R b/tests/testthat/test-freq.R index 2a17f2f9..8b528037 100755 --- a/tests/testthat/test-freq.R +++ b/tests/testthat/test-freq.R @@ -32,17 +32,17 @@ test_that("frequency table works", { # factor expect_output(print(freq(septic_patients$hospital_id))) # table - expect_output(print(freq(table(septic_patients$sex, septic_patients$age)))) + expect_output(print(freq(table(septic_patients$gender, septic_patients$age)))) # rsi expect_output(print(freq(septic_patients$amcl))) # hms expect_output(suppressWarnings(print(freq(hms::as.hms(sample(c(0:86399), 50)))))) # matrix expect_output(print(freq(as.matrix(septic_patients$age)))) - expect_output(print(freq(as.matrix(septic_patients[, c("age", "sex")])))) + expect_output(print(freq(as.matrix(septic_patients[, c("age", "gender")])))) # list expect_output(print(freq(list(age = septic_patients$age)))) - expect_output(print(freq(list(age = septic_patients$age, sex = septic_patients$sex)))) + expect_output(print(freq(list(age = septic_patients$age, gender = septic_patients$gender)))) library(dplyr) expect_output(septic_patients %>% select(1:2) %>% freq() %>% print()) diff --git a/vignettes/freq.Rmd b/vignettes/freq.Rmd index 59ad453c..cb2347bf 100755 --- a/vignettes/freq.Rmd +++ b/vignettes/freq.Rmd @@ -25,9 +25,9 @@ Frequency tables (or frequency distributions) are summaries of the distribution ## Frequencies of one variable -To only show and quickly review the content of one variable, you can just select this variable in various ways. Let's say we want to get the frequencies of the `sex` variable of the `septic_patients` dataset: +To only show and quickly review the content of one variable, you can just select this variable in various ways. Let's say we want to get the frequencies of the `gender` variable of the `septic_patients` dataset: ```{r, echo = TRUE} -septic_patients %>% freq(sex) +septic_patients %>% freq(gender) ``` This immediately shows the class of the variable, its length and availability (i.e. the amount of `NA`), the amount of unique values and (most importantly) that among septic patients men are more prevalent than women.