diff for freq, fix for mo_shortname

This commit is contained in:
dr. M.S. (Matthijs) Berends 2018-09-29 21:54:32 +02:00
parent a6aa9e95fb
commit 92c9cc2608
12 changed files with 79 additions and 26 deletions

View File

@ -11,6 +11,7 @@ S3method(as.vector,frequency_tbl)
S3method(as_tibble,frequency_tbl)
S3method(barplot,mic)
S3method(barplot,rsi)
S3method(diff,frequency_tbl)
S3method(format,frequency_tbl)
S3method(hist,frequency_tbl)
S3method(kurtosis,data.frame)
@ -133,6 +134,7 @@ exportMethods(as.vector.frequency_tbl)
exportMethods(as_tibble.frequency_tbl)
exportMethods(barplot.mic)
exportMethods(barplot.rsi)
exportMethods(diff.frequency_tbl)
exportMethods(format.frequency_tbl)
exportMethods(hist.frequency_tbl)
exportMethods(kurtosis)
@ -171,6 +173,7 @@ importFrom(dplyr,case_when)
importFrom(dplyr,desc)
importFrom(dplyr,everything)
importFrom(dplyr,filter)
importFrom(dplyr,full_join)
importFrom(dplyr,group_by)
importFrom(dplyr,group_by_at)
importFrom(dplyr,group_vars)

23
NEWS.md
View File

@ -7,15 +7,15 @@
* Semantic names: `mo_fullname`, `mo_shortname`
* Microbial properties: `mo_type`, `mo_gramstain`.
They also come with support for German, Dutch, French, Italian, Spanish and Portuguese, and it defaults to the systems locale:
They also come with support for German, Dutch, French, Italian, Spanish and Portuguese:
```r
mo_gramstain("E. coli")
# [1] "Gram negative"
mo_gramstain("E. coli", language = "de") # "de" = Deutsch / German
mo_gramstain("E. coli", language = "de") # "de" = German
# [1] "Gramnegativ"
mo_gramstain("E. coli", language = "es") # "es" = Español / Spanish
mo_gramstain("E. coli", language = "es") # "es" = Spanish
# [1] "Gram negativo"
mo_fullname("S. group A") # when run on a on a Portuguese system
mo_fullname("S. group A", language = "pt") # Portuguese
# [1] "Streptococcus grupo A"
```
@ -31,11 +31,11 @@
* Functions `as.mo` and `is.mo` as replacements for `as.bactid` and `is.bactid` (since the `microoganisms` data set not only contains bacteria). These last two functions are deprecated and will be removed in a future release. The `as.mo` function determines microbial IDs using Artificial Intelligence (AI):
```r
as.mo("E. coli")
# [1] ESCCOL
# [1] B_ESCHR_COL
as.mo("MRSA")
# [1] STAAUR
# [1] B_STPHY_AUR
as.mo("S group A")
# [1] STCGRA
# [1] B_STRPTC_GRA
```
And with great speed too - on a quite regular Linux server from 2007 it takes us 0.009 seconds to transform 25,000 items:
```r
@ -54,7 +54,8 @@
* Function `ab_property` and its aliases: `ab_name`, `ab_tradenames`, `ab_certe`, `ab_umcg` and `ab_trivial_nl`
* Introduction to AMR as a vignette
* Removed clipbaord functions as it violated the CRAN policy
* Removed clipboard functions as it violated the CRAN policy
* Renamed `septic_patients$sex` to `septic_patients$gender`
#### Changed
* Added three antimicrobial agents to the `antibiotics` data set: Terbinafine (D01BA02), Rifaximin (A07AA11) and Isoconazole (D01AC05)
@ -92,14 +93,14 @@
* Added longest en shortest character length in the frequency table (`freq`) header of class `character`
* Support for types (classes) list and matrix for `freq`
```r
my_matrix = with(septic_patients, matrix(c(age, sex), ncol = 2))
my_matrix = with(septic_patients, matrix(c(age, gender), ncol = 2))
freq(my_matrix)
```
For lists, subsetting is possible:
```r
my_list = list(age = septic_patients$age, sex = septic_patients$sex)
my_list = list(age = septic_patients$age, gender = septic_patients$gender)
my_list %>% freq(age)
my_list %>% freq(sex)
my_list %>% freq(gender)
```
#### Other

View File

@ -130,7 +130,7 @@
#' sort(septic_patients$age)) # TRUE
#'
#' # it also supports `table` objects:
#' table(septic_patients$sex,
#' table(septic_patients$gender,
#' septic_patients$age) %>%
#' freq(sep = " **sep** ")
#'
@ -502,6 +502,46 @@ top_freq <- function(f, n) {
vect
}
#' @rdname freq
#' @exportMethod diff.frequency_tbl
#' @importFrom dplyr %>% full_join mutate
#' @export
diff.frequency_tbl <- function(x, y, ...) {
# check classes
if (!"frequency_tbl" %in% class(x)
| !"frequency_tbl" %in% class(y)) {
stop("Both x and y must be a frequency table.")
}
x.attr <- attributes(x)$opt
# only keep item and count
x <- x[, 1:2]
y <- y[, 1:2]
x <- x %>%
full_join(y,
by = colnames(x)[1],
suffix = c(".x", ".y")) %>%
mutate(
diff = case_when(
is.na(count.y) ~ -count.x,
is.na(count.x) ~ count.y,
TRUE ~ count.y - count.x)) %>%
mutate(
diff.percent = percent(
diff / count.x,
force_zero = TRUE))
print(
knitr::kable(x,
format = x.attr$tbl_format,
col.names = c("Item", "Count #1", "Count #2", "Difference", "Diff. percent"),
align = "lrrrr",
padding = 1)
)
}
#' @rdname freq
#' @exportMethod print.frequency_tbl
#' @importFrom knitr kable

View File

@ -128,10 +128,12 @@ mo_shortname <- function(x, Becker = FALSE, Lancefield = FALSE, language = NULL)
result <- as.character(res1)
} else {
x <- AMR::as.mo(x)
result <- data.frame(mo = x) %>%
left_join(AMR::microorganisms, by = "mo") %>%
mutate(shortname = ifelse(!is.na(genus) & !is.na(species), paste0(substr(genus, 1, 1), ". ", species), NA_character_)) %>%
pull(shortname)
suppressWarnings(
result <- data.frame(mo = x) %>%
left_join(AMR::microorganisms, by = "mo") %>%
mutate(shortname = ifelse(!is.na(genus) & !is.na(species), paste0(substr(genus, 1, 1), ". ", species), NA_character_)) %>%
pull(shortname)
)
}
mo_translate(result, language = language)
}

View File

@ -120,9 +120,13 @@ help(package = "AMR")
```
## ITIS
<img src="man/figures/logo_amr.png" height="50px"><img src="man/figures/plus.png" height="50px"><img src="man/figures/itis_logo.jpg" height="50px">
<img src="man/figures/itis_logo.jpg" height="100px">
This `AMR` package contains the **complete microbial taxonomic data** (with seven taxonomic ranks - from subkingdom to subspecies) from the publicly available Integrated Taxonomic Information System (ITIS, https://www.itis.gov). ITIS is a partnership of U.S., Canadian, and Mexican agencies and taxonomic specialists. The complete taxonomic kingdoms Bacteria, Fungi and Protozoa are included in this package, as well as all previously accepted names known to ITIS. This allows users to use authoritative taxonomic information for their data analyses on any microorganisms, not only human pathogens.
This `AMR` package contains the **complete microbial taxonomic data** (with all seven taxonomic ranks - from subkingdom to subspecies) from the publicly available Integrated Taxonomic Information System (ITIS, https://www.itis.gov).
The complete taxonomic kingdoms Bacteria, Fungi and Protozoa are included in this package, as well as all previously accepted names known to ITIS. This allows users to use authoritative taxonomic information for their data analyses on any microorganisms, not only human pathogens.
ITIS is a partnership of U.S., Canadian, and Mexican agencies and taxonomic specialists.
### New classes
This package contains two new S3 classes: `mic` for MIC values (e.g. from Vitek or Phoenix) and `rsi` for antimicrobial drug interpretations (i.e. S, I and R). Both are actually ordered factors under the hood (an MIC of `2` being higher than `<=1` but lower than `>=32`, and for class `rsi` factors are ordered as `S < I < R`).
@ -150,7 +154,7 @@ plot(septic_patients$cipr)
![example_1_rsi](man/figures/rsi_example1.png)
<img src="man/figures/logo_amr.png" height="50px"><img src="man/figures/plus.png" height="50px"><img src="https://github.com/tidyverse/dplyr/blob/master/man/figures/logo.png" height="50px"><img src="man/figures/plus.png" height="50px"><img src="https://github.com/tidyverse/ggplot2/blob/master/man/figures/logo.png" height="50px">
<<img src="https://github.com/tidyverse/dplyr/blob/master/man/figures/logo.png" height="50px"> <img src="https://github.com/tidyverse/ggplot2/blob/master/man/figures/logo.png" height="50px">
Or use the `ggplot2` and `dplyr` packages to create more appealing plots:
```r

Binary file not shown.

Binary file not shown.

Binary file not shown.

View File

@ -4,6 +4,7 @@
\alias{freq}
\alias{frequency_tbl}
\alias{top_freq}
\alias{diff.frequency_tbl}
\alias{print.frequency_tbl}
\title{Frequency table}
\usage{
@ -17,6 +18,8 @@ freq(x, ..., sort.count = TRUE, nmax = getOption("max.print.freq"),
top_freq(f, n)
\method{diff}{frequency_tbl}(x, y, ...)
\method{print}{frequency_tbl}(x, nmax = getOption("max.print.freq",
default = 15), ...)
}
@ -143,7 +146,7 @@ identical(septic_patients \%>\%
sort(septic_patients$age)) # TRUE
# it also supports `table` objects:
table(septic_patients$sex,
table(septic_patients$gender,
septic_patients$age) \%>\%
freq(sep = " **sep** ")

View File

@ -114,7 +114,7 @@ test_that("first isolates work", {
first_isolate(col_date = "date",
col_mo = "mo",
col_patient_id = "patient_id",
col_testcode = "sex",
col_testcode = "gender",
testcodes_exclude = "M"))
# errors

View File

@ -32,17 +32,17 @@ test_that("frequency table works", {
# factor
expect_output(print(freq(septic_patients$hospital_id)))
# table
expect_output(print(freq(table(septic_patients$sex, septic_patients$age))))
expect_output(print(freq(table(septic_patients$gender, septic_patients$age))))
# rsi
expect_output(print(freq(septic_patients$amcl)))
# hms
expect_output(suppressWarnings(print(freq(hms::as.hms(sample(c(0:86399), 50))))))
# matrix
expect_output(print(freq(as.matrix(septic_patients$age))))
expect_output(print(freq(as.matrix(septic_patients[, c("age", "sex")]))))
expect_output(print(freq(as.matrix(septic_patients[, c("age", "gender")]))))
# list
expect_output(print(freq(list(age = septic_patients$age))))
expect_output(print(freq(list(age = septic_patients$age, sex = septic_patients$sex))))
expect_output(print(freq(list(age = septic_patients$age, gender = septic_patients$gender))))
library(dplyr)
expect_output(septic_patients %>% select(1:2) %>% freq() %>% print())

View File

@ -25,9 +25,9 @@ Frequency tables (or frequency distributions) are summaries of the distribution
## Frequencies of one variable
To only show and quickly review the content of one variable, you can just select this variable in various ways. Let's say we want to get the frequencies of the `sex` variable of the `septic_patients` dataset:
To only show and quickly review the content of one variable, you can just select this variable in various ways. Let's say we want to get the frequencies of the `gender` variable of the `septic_patients` dataset:
```{r, echo = TRUE}
septic_patients %>% freq(sex)
septic_patients %>% freq(gender)
```
This immediately shows the class of the variable, its length and availability (i.e. the amount of `NA`), the amount of unique values and (most importantly) that among septic patients men are more prevalent than women.