diff for freq, fix for mo_shortname

2026-07-27 09:10:57 +02:00 · 2018-09-29 21:54:32 +02:00
parent a6aa9e95fb
commit 92c9cc2608
12 changed files with 79 additions and 26 deletions
--- a/3
+++ b/3
@@ -11,6 +11,7 @@ S3method(as.vector,frequency_tbl)
 S3method(as_tibble,frequency_tbl)
 S3method(barplot,mic)
 S3method(barplot,rsi)
+S3method(diff,frequency_tbl)
 S3method(format,frequency_tbl)
 S3method(hist,frequency_tbl)
 S3method(kurtosis,data.frame)
@@ -133,6 +134,7 @@ exportMethods(as.vector.frequency_tbl)
 exportMethods(as_tibble.frequency_tbl)
 exportMethods(barplot.mic)
 exportMethods(barplot.rsi)
+exportMethods(diff.frequency_tbl)
 exportMethods(format.frequency_tbl)
 exportMethods(hist.frequency_tbl)
 exportMethods(kurtosis)
@@ -171,6 +173,7 @@ importFrom(dplyr,case_when)
 importFrom(dplyr,desc)
 importFrom(dplyr,everything)
 importFrom(dplyr,filter)
+importFrom(dplyr,full_join)
 importFrom(dplyr,group_by)
 importFrom(dplyr,group_by_at)
 importFrom(dplyr,group_vars)
--- a/NEWS.md
+++ b/NEWS.md
@@ -7,15 +7,15 @@
  * Semantic names: `mo_fullname`, `mo_shortname`
  * Microbial properties: `mo_type`, `mo_gramstain`.
  
-  They also come with support for German, Dutch, French, Italian, Spanish and Portuguese, and it defaults to the systems locale:
+  They also come with support for German, Dutch, French, Italian, Spanish and Portuguese:
  ```r
  mo_gramstain("E. coli")
  # [1] "Gram negative"
-  mo_gramstain("E. coli", language = "de") # "de" = Deutsch / German
+  mo_gramstain("E. coli", language = "de") # "de" = German
  # [1] "Gramnegativ"
-  mo_gramstain("E. coli", language = "es") # "es" = Español / Spanish
+  mo_gramstain("E. coli", language = "es") # "es" = Spanish
  # [1] "Gram negativo"
-  mo_fullname("S. group A") # when run on a on a Portuguese system
+  mo_fullname("S. group A", language = "pt") # Portuguese
  # [1] "Streptococcus grupo A"
  ```
  
@@ -31,11 +31,11 @@
 * Functions `as.mo` and `is.mo` as replacements for `as.bactid` and `is.bactid` (since the `microoganisms` data set not only contains bacteria). These last two functions are deprecated and will be removed in a future release. The `as.mo` function determines microbial IDs using Artificial Intelligence (AI):
  ```r
  as.mo("E. coli")
-  # [1] ESCCOL
+  # [1] B_ESCHR_COL
  as.mo("MRSA")
-  # [1] STAAUR
+  # [1] B_STPHY_AUR
  as.mo("S group A")
-  # [1] STCGRA
+  # [1] B_STRPTC_GRA
  ```
  And with great speed too - on a quite regular Linux server from 2007 it takes us 0.009 seconds to transform 25,000 items:
  ```r
@@ -54,7 +54,8 @@

 * Function `ab_property` and its aliases: `ab_name`, `ab_tradenames`, `ab_certe`, `ab_umcg` and `ab_trivial_nl`
 * Introduction to AMR as a vignette
-* Removed clipbaord functions as it violated the CRAN policy
+* Removed clipboard functions as it violated the CRAN policy
+* Renamed `septic_patients$sex` to `septic_patients$gender`

 #### Changed
 * Added three antimicrobial agents to the `antibiotics` data set: Terbinafine (D01BA02), Rifaximin (A07AA11) and Isoconazole (D01AC05)
@@ -92,14 +93,14 @@
 * Added longest en shortest character length in the frequency table (`freq`) header of class `character`
 * Support for types (classes) list and matrix for `freq`
  ```r
-  my_matrix = with(septic_patients, matrix(c(age, sex), ncol = 2))
+  my_matrix = with(septic_patients, matrix(c(age, gender), ncol = 2))
  freq(my_matrix)
  ```
  For lists, subsetting is possible:
  ```r
-  my_list = list(age = septic_patients$age, sex = septic_patients$sex)
+  my_list = list(age = septic_patients$age, gender = septic_patients$gender)
  my_list %>% freq(age)
-  my_list %>% freq(sex)
+  my_list %>% freq(gender)
  ```

 #### Other
--- a/R/freq.R
+++ b/R/freq.R
@@ -130,7 +130,7 @@
 #'           sort(septic_patients$age)) # TRUE
 #'
 #' # it also supports `table` objects:
-#' table(septic_patients$sex,
+#' table(septic_patients$gender,
 #'       septic_patients$age) %>%
 #'   freq(sep = " **sep** ")
 #'
@@ -502,6 +502,46 @@ top_freq <- function(f, n) {
  vect
 }

+#' @rdname freq
+#' @exportMethod diff.frequency_tbl
+#' @importFrom dplyr %>% full_join mutate
+#' @export
+diff.frequency_tbl <- function(x, y, ...) {
+  # check classes
+  if (!"frequency_tbl" %in% class(x)
+      | !"frequency_tbl" %in% class(y)) {
+    stop("Both x and y must be a frequency table.")
+  }
+
+  x.attr <- attributes(x)$opt
+
+  # only keep item and count
+  x <- x[, 1:2]
+  y <- y[, 1:2]
+
+  x <- x %>%
+    full_join(y,
+              by = colnames(x)[1],
+              suffix = c(".x", ".y")) %>%
+    mutate(
+      diff = case_when(
+        is.na(count.y) ~ -count.x,
+        is.na(count.x) ~ count.y,
+        TRUE ~ count.y - count.x)) %>%
+    mutate(
+      diff.percent = percent(
+        diff / count.x,
+        force_zero = TRUE))
+
+  print(
+    knitr::kable(x,
+                 format = x.attr$tbl_format,
+                 col.names = c("Item", "Count #1", "Count #2", "Difference", "Diff. percent"),
+                 align = "lrrrr",
+                 padding = 1)
+  )
+}
+
 #' @rdname freq
 #' @exportMethod print.frequency_tbl
 #' @importFrom knitr kable
--- a/R/mo_property.R
+++ b/R/mo_property.R
@@ -128,10 +128,12 @@ mo_shortname <- function(x, Becker = FALSE, Lancefield = FALSE, language = NULL)
    result <- as.character(res1)
  } else {
    x <- AMR::as.mo(x)
-    result <- data.frame(mo = x) %>%
-      left_join(AMR::microorganisms, by = "mo") %>%
-      mutate(shortname = ifelse(!is.na(genus) & !is.na(species), paste0(substr(genus, 1, 1), ". ", species), NA_character_)) %>%
-      pull(shortname)
+    suppressWarnings(
+      result <- data.frame(mo = x) %>%
+        left_join(AMR::microorganisms, by = "mo") %>%
+        mutate(shortname = ifelse(!is.na(genus) & !is.na(species), paste0(substr(genus, 1, 1), ". ", species), NA_character_)) %>%
+        pull(shortname)
+    )
  }
  mo_translate(result, language = language)
 }
--- a/README.md
+++ b/README.md
@@ -120,9 +120,13 @@ help(package = "AMR")
 ```

 ## ITIS
-<img src="man/figures/logo_amr.png" height="50px"><img src="man/figures/plus.png" height="50px"><img src="man/figures/itis_logo.jpg" height="50px">
+<img src="man/figures/itis_logo.jpg" height="100px">

-This `AMR` package contains the **complete microbial taxonomic data** (with seven taxonomic ranks - from subkingdom to subspecies) from the publicly available Integrated Taxonomic Information System (ITIS, https://www.itis.gov). ITIS is a partnership of U.S., Canadian, and Mexican agencies and taxonomic specialists. The complete taxonomic kingdoms Bacteria, Fungi and Protozoa are included in this package, as well as all previously accepted names known to ITIS. This allows users to use authoritative taxonomic information for their data analyses on any microorganisms, not only human pathogens.
+This `AMR` package contains the **complete microbial taxonomic data** (with all  seven taxonomic ranks - from subkingdom to subspecies) from the publicly available Integrated Taxonomic Information System (ITIS, https://www.itis.gov). 
+
+The complete taxonomic kingdoms Bacteria, Fungi and Protozoa are included in this package, as well as all previously accepted names known to ITIS. This allows users to use authoritative taxonomic information for their data analyses on any microorganisms, not only human pathogens.
+
+ITIS is a partnership of U.S., Canadian, and Mexican agencies and taxonomic specialists.

 ### New classes
 This package contains two new S3 classes: `mic` for MIC values (e.g. from Vitek or Phoenix) and `rsi` for antimicrobial drug interpretations (i.e. S, I and R). Both are actually ordered factors under the hood (an MIC of `2` being higher than `<=1` but lower than `>=32`, and for class `rsi` factors are ordered as `S < I < R`). 
@@ -150,7 +154,7 @@ plot(septic_patients$cipr)

 ![example_1_rsi](man/figures/rsi_example1.png)

-<img src="man/figures/logo_amr.png" height="50px"><img src="man/figures/plus.png" height="50px"><img src="https://github.com/tidyverse/dplyr/blob/master/man/figures/logo.png" height="50px"><img src="man/figures/plus.png" height="50px"><img src="https://github.com/tidyverse/ggplot2/blob/master/man/figures/logo.png" height="50px">
+<<img src="https://github.com/tidyverse/dplyr/blob/master/man/figures/logo.png" height="50px"> <img src="https://github.com/tidyverse/ggplot2/blob/master/man/figures/logo.png" height="50px">

 Or use the `ggplot2` and `dplyr` packages to create more appealing plots:
 ```r
--- a/data/microorganisms.certe.rda
+++ b/data/microorganisms.certe.rda
--- a/data/microorganisms.rda
+++ b/data/microorganisms.rda
--- a/data/septic_patients.rda
+++ b/data/septic_patients.rda
--- a/man/freq.Rd
+++ b/man/freq.Rd
@@ -4,6 +4,7 @@
 \alias{freq}
 \alias{frequency_tbl}
 \alias{top_freq}
+\alias{diff.frequency_tbl}
 \alias{print.frequency_tbl}
 \title{Frequency table}
 \usage{
@@ -17,6 +18,8 @@ freq(x, ..., sort.count = TRUE, nmax = getOption("max.print.freq"),

 top_freq(f, n)

+\method{diff}{frequency_tbl}(x, y, ...)
+
 \method{print}{frequency_tbl}(x, nmax = getOption("max.print.freq",
  default = 15), ...)
 }
@@ -143,7 +146,7 @@ identical(septic_patients \%>\%
          sort(septic_patients$age)) # TRUE

 # it also supports `table` objects:
-table(septic_patients$sex,
+table(septic_patients$gender,
      septic_patients$age) \%>\%
  freq(sep = " **sep** ")

--- a/tests/testthat/test-first_isolate.R
+++ b/tests/testthat/test-first_isolate.R
@@ -114,7 +114,7 @@ test_that("first isolates work", {
                            first_isolate(col_date = "date",
                                          col_mo = "mo",
                                          col_patient_id = "patient_id",
-                                          col_testcode = "sex",
+                                          col_testcode = "gender",
                                          testcodes_exclude = "M"))

  # errors
--- a/tests/testthat/test-freq.R
+++ b/tests/testthat/test-freq.R
@@ -32,17 +32,17 @@ test_that("frequency table works", {
  # factor
  expect_output(print(freq(septic_patients$hospital_id)))
  # table
-  expect_output(print(freq(table(septic_patients$sex, septic_patients$age))))
+  expect_output(print(freq(table(septic_patients$gender, septic_patients$age))))
  # rsi
  expect_output(print(freq(septic_patients$amcl)))
  # hms
  expect_output(suppressWarnings(print(freq(hms::as.hms(sample(c(0:86399), 50))))))
  # matrix
  expect_output(print(freq(as.matrix(septic_patients$age))))
-  expect_output(print(freq(as.matrix(septic_patients[, c("age", "sex")]))))
+  expect_output(print(freq(as.matrix(septic_patients[, c("age", "gender")]))))
  # list
  expect_output(print(freq(list(age = septic_patients$age))))
-  expect_output(print(freq(list(age = septic_patients$age, sex = septic_patients$sex))))
+  expect_output(print(freq(list(age = septic_patients$age, gender = septic_patients$gender))))

  library(dplyr)
  expect_output(septic_patients %>% select(1:2) %>% freq() %>% print())
--- a/vignettes/freq.Rmd
+++ b/vignettes/freq.Rmd
@@ -25,9 +25,9 @@ Frequency tables (or frequency distributions) are summaries of the distribution

 ## Frequencies of one variable

-To only show and quickly review the content of one variable, you can just select this variable in various ways. Let's say we want to get the frequencies of the `sex` variable of the `septic_patients` dataset:
+To only show and quickly review the content of one variable, you can just select this variable in various ways. Let's say we want to get the frequencies of the `gender` variable of the `septic_patients` dataset:
 ```{r, echo = TRUE}
-septic_patients %>% freq(sex)
+septic_patients %>% freq(gender)
 ```
 This immediately shows the class of the variable, its length and availability (i.e. the amount of `NA`), the amount of unique values and (most importantly) that among septic patients men are more prevalent than women.