diff --git a/DESCRIPTION b/DESCRIPTION index 69c0be44..1705c0fb 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,6 +1,6 @@ Package: AMR -Version: 1.3.0.9002 -Date: 2020-08-14 +Version: 1.3.0.9003 +Date: 2020-08-15 Title: Antimicrobial Resistance Analysis Authors@R: c( person(role = c("aut", "cre"), diff --git a/NEWS.md b/NEWS.md index 039eac00..d1527534 100755 --- a/NEWS.md +++ b/NEWS.md @@ -1,5 +1,5 @@ -# AMR 1.3.0.9002 -## Last updated: 14 August 2020 +# AMR 1.3.0.9003 +## Last updated: 15 August 2020 ### New * Data set `intrinsic_resistant`. This data set contains all bug-drug combinations where the 'bug' is intrinsic resistant to the 'drug' according to the latest EUCAST insights. It contains just two columns: `microorganism` and `antibiotic`. @@ -15,18 +15,19 @@ ``` ### Changed -* Support for using `dplyr`'s `across()` in `as.rsi()` to interpret MIC values or disk zone diameters, that now also automatically determines the column with microorganism names or codes. - ```r - # until dplyr 1.0.0 - your_data %>% mutate_if(is.mic, as.rsi) - your_data %>% mutate_if(is.disk, as.rsi) +* Improvements for `as.rsi()`: + * Support for using `dplyr`'s `across()` to interpret MIC values or disk zone diameters, which also automatically determines the column with microorganism names or codes. + ```r + # until dplyr 1.0.0 + your_data %>% mutate_if(is.mic, as.rsi) + your_data %>% mutate_if(is.disk, as.rsi) - # since dplyr 1.0.0 - your_data %>% mutate(across(where(is.mic), as.rsi)) - your_data %>% mutate(across(where(is.disk), as.rsi)) - ``` -* Improved overall speed by tweaking joining functions - + # since dplyr 1.0.0 + your_data %>% mutate(across(where(is.mic), as.rsi)) + your_data %>% mutate(across(where(is.disk), as.rsi)) + ``` + * Big speed improvement for interpreting MIC values and disk zone diameters. When interpreting 5,000 MIC values of two antibiotics (10,000 values in total), our benchmarks showed a total run time going from 80.7-85.1 seconds to 1.8-2.0 seconds. +* Overall speed improvement by tweaking joining functions # AMR 1.3.0 diff --git a/R/aa_helper_functions.R b/R/aa_helper_functions.R index b72563b4..3e11363d 100755 --- a/R/aa_helper_functions.R +++ b/R/aa_helper_functions.R @@ -62,14 +62,12 @@ left_join <- function(x, y, by = NULL, suffix = c(".x", ".y")) { if (length(by) == 1) { by <- rep(by, 2) } - requires_suffix <- any(colnames(x) %in% colnames(y)) - if (requires_suffix == TRUE) { - int_x <- colnames(x) %in% colnames(y) & colnames(x) != by[1] - int_y <- colnames(y) %in% colnames(x) & colnames(y) != by[2] - - colnames(x)[int_x] <- paste0(colnames(x)[int_x], suffix[1L]) - colnames(y)[int_y] <- paste0(colnames(y)[int_y], suffix[2L]) - } + + int_x <- colnames(x) %in% colnames(y) & colnames(x) != by[1] + int_y <- colnames(y) %in% colnames(x) & colnames(y) != by[2] + colnames(x)[int_x] <- paste0(colnames(x)[int_x], suffix[1L]) + colnames(y)[int_y] <- paste0(colnames(y)[int_y], suffix[2L]) + merged <- cbind(x, y[match(x[, by[1], drop = TRUE], y[, by[2], drop = TRUE]), diff --git a/R/data.R b/R/data.R index 4ffa5a76..6f914640 100755 --- a/R/data.R +++ b/R/data.R @@ -255,5 +255,4 @@ catalogue_of_life <- list( #' pull(microorganism) #' # [1] "Enterococcus casseliflavus" "Enterococcus gallinarum" #' } -#' @seealso [intrinsic_resistant] "intrinsic_resistant" diff --git a/R/rsi.R b/R/rsi.R index 371b4baa..81add8b8 100755 --- a/R/rsi.R +++ b/R/rsi.R @@ -311,7 +311,7 @@ as.rsi.mic <- function(x, stop_('No information was supplied about the microorganisms (missing parameter "mo"). See ?as.rsi.\n\n', "To transform certain columns with e.g. mutate_at(), use\n", "`data %>% mutate_at(vars(...), as.rsi, mo = .$x)`, where x is your column with microorganisms.\n\n", - "To tranform all MIC variables in a data set, use `as.rsi(data)` or `data %>% as.rsi()`.", call = FALSE) + "To tranform all MIC values in a data set, use `data %>% as.rsi()` or data %>% mutate_if(is.mic, as.rsi).", call = FALSE) } ab_coerced <- suppressWarnings(as.ab(ab)) @@ -379,7 +379,7 @@ as.rsi.disk <- function(x, stop_('No information was supplied about the microorganisms (missing parameter "mo"). See ?as.rsi.\n\n', "To transform certain columns with e.g. mutate_at(), use\n", "`data %>% mutate_at(vars(...), as.rsi, mo = .$x)`, where x is your column with microorganisms.\n\n", - "To tranform all disk diffusion zones in a data set, use `as.rsi(data)` or `data %>% as.rsi()`.", call = FALSE) + "To tranform all disk diffusion zones in a data set, use `data %>% as.rsi()` or data %>% mutate_if(is.disk, as.rsi).", call = FALSE) } ab_coerced <- suppressWarnings(as.ab(ab)) @@ -535,6 +535,11 @@ get_guideline <- function(guideline) { } exec_as.rsi <- function(method, x, mo, ab, guideline, uti, conserve_capped_values) { + x_bak <- data.frame(x_mo = paste0(x, mo)) + df <- unique(data.frame(x, mo), stringsAsFactors = FALSE) + x <- df$x + mo <- df$mo + if (method == "mic") { x <- as.mic(x) # when as.rsi.mic is called directly } else if (method == "disk") { @@ -575,10 +580,10 @@ exec_as.rsi <- function(method, x, mo, ab, guideline, uti, conserve_capped_value warning("Interpretation of ", font_bold(ab_name(ab, tolower = TRUE)), " for some microorganisms is only available for (uncomplicated) urinary tract infections (UTI).\n Use parameter 'uti' to set which isolates are from urine. See ?as.rsi.", call. = FALSE) warned <- TRUE } - + for (i in seq_len(length(x))) { get_record <- trans %>% - # no UTI for now + # no sebsetting to UTI for now subset(lookup %in% c(lookup_mo[i], lookup_genus[i], lookup_family[i], @@ -591,7 +596,7 @@ exec_as.rsi <- function(method, x, mo, ab, guideline, uti, conserve_capped_value get_record <- get_record %>% # be as specific as possible (i.e. prefer species over genus): # desc(uti) = TRUE on top and FALSE on bottom - arrange(desc(uti), desc(nchar(mo))) # 'uti' is a column in rsi_translation + arrange(desc(uti), desc(nchar(mo))) # 'uti' is a column in data set 'rsi_translation' } else { get_record <- get_record %>% filter(uti == FALSE) %>% # 'uti' is a column in rsi_translation @@ -620,9 +625,15 @@ exec_as.rsi <- function(method, x, mo, ab, guideline, uti, conserve_capped_value } } } + + new_rsi <- x_bak %>% + left_join(data.frame(x_mo = paste0(df$x, df$mo), new_rsi), by = "x_mo") %>% + pull(new_rsi) + if (warned == FALSE) { message(font_green("OK.")) } + structure(.Data = factor(new_rsi, levels = c("S", "I", "R"), ordered = TRUE), class = c("rsi", "ordered", "factor")) } diff --git a/_pkgdown.yml b/_pkgdown.yml index 81d368a5..1320a95a 100644 --- a/_pkgdown.yml +++ b/_pkgdown.yml @@ -139,7 +139,7 @@ reference: contents: - "`microorganisms`" - "`antibiotics`" - - "`antivirals`" + - "`intrinsic_resistant`" - "`example_isolates`" - "`example_isolates_unclean`" - "`rsi_translation`" diff --git a/docs/404.html b/docs/404.html index a1a6f673..3469455d 100644 --- a/docs/404.html +++ b/docs/404.html @@ -81,7 +81,7 @@
diff --git a/docs/LICENSE-text.html b/docs/LICENSE-text.html index c0a2449d..16496ec7 100644 --- a/docs/LICENSE-text.html +++ b/docs/LICENSE-text.html @@ -81,7 +81,7 @@ diff --git a/docs/articles/index.html b/docs/articles/index.html index cb9deb89..6821137a 100644 --- a/docs/articles/index.html +++ b/docs/articles/index.html @@ -81,7 +81,7 @@ diff --git a/docs/articles/welcome_to_AMR.html b/docs/articles/welcome_to_AMR.html index 2a42dbbc..3323932e 100644 --- a/docs/articles/welcome_to_AMR.html +++ b/docs/articles/welcome_to_AMR.html @@ -39,7 +39,7 @@ @@ -186,7 +186,7 @@vignettes/welcome_to_AMR.Rmd
welcome_to_AMR.Rmd
NEWS.md
- as.rsi()
:
+Support for using dplyr
’s across()
in as.rsi()
to interpret MIC values or disk zone diameters, that now also automatically determines the column with microorganism names or codes.
Support for using dplyr
’s across()
to interpret MIC values or disk zone diameters, which also automatically determines the column with microorganism names or codes.
Improved overall speed by tweaking joining functions
Big speed improvement for interpreting MIC values and disk zone diameters. When interpreting 5,000 MIC values of two antibiotics (10,000 values in total), our benchmarks showed a total run time going from 80.7-85.1 seconds to 1.8-2.0 seconds.
Making this package independent of especially the tidyverse (e.g. packages dplyr
and tidyr
) tremendously increases sustainability on the long term, since tidyverse functions change quite often. Good for users, but hard for package maintainers. Most of our functions are replaced with versions that only rely on base R, which keeps this package fully functional for many years to come, without requiring a lot of maintenance to keep up with other packages anymore. Another upside it that this package can now be used with all versions of R since R-3.0.0 (April 2013). Our package is being used in settings where the resources are very limited. Fewer dependencies on newer software is helpful for such settings.
Negative effects of this change are:
freq()
that was borrowed from the cleaner
package was removed. Use cleaner::freq()
, or run library("cleaner")
before you use freq()
.freq()
that was borrowed from the cleaner
package was removed. Use cleaner::freq()
, or run library("cleaner")
before you use freq()
.mo
or rsi
in a tibble will no longer be in colour and printing rsi
in a tibble will show the class <ord>
, not <rsi>
anymore. This is purely a visual effect.mo_*
family (like mo_name()
and mo_gramstain()
) are noticeably slower when running on hundreds of thousands of rows.mo
and ab
now both also inherit class character
, to support any data transformation. This change invalidates code that checks for class length == 1.This is important, because a value like "testvalue"
could never be understood by e.g. mo_name()
, although the class would suggest a valid microbial code.
Function freq()
has moved to a new package, clean
(CRAN link), since creating frequency tables actually does not fit the scope of this package. The freq()
function still works, since it is re-exported from the clean
package (which will be installed automatically upon updating this AMR
package).
Function freq()
has moved to a new package, clean
(CRAN link), since creating frequency tables actually does not fit the scope of this package. The freq()
function still works, since it is re-exported from the clean
package (which will be installed automatically upon updating this AMR
package).
Renamed data set septic_patients
to example_isolates
age()
function gained a new parameter exact
to determine ages with decimalsguess_mo()
, guess_atc()
, EUCAST_rules()
, interpretive_reading()
, rsi()
freq()
):
+freq()
):
speed improvement for microbial IDs
fixed factor level names for R Markdown
support for boxplots:
age_groups()
, to let groups of fives and tens end with 100+ instead of 120+freq()
for when all values are NA
+freq()
for when all values are NA
first_isolate()
for when dates are missingguess_ab_col()
@@ -1244,7 +1249,7 @@ This works for all drug combinations, such as ampicillin/sulbactam, ceftazidime/
freq()
function):
+freq()
function):
Support for tidyverse quasiquotation! Now you can create frequency tables of function outcomes:
@@ -1253,15 +1258,15 @@ This works for all drug combinations, such as ampicillin/sulbactam, ceftazidime/ # OLD WAY septic_patients %>% mutate(genus = mo_genus(mo)) %>% - freq(genus) + freq(genus) # NEW WAY septic_patients %>% - freq(mo_genus(mo)) + freq(mo_genus(mo)) # Even supports grouping variables: septic_patients %>% group_by(gender) %>% - freq(mo_genus(mo)) + freq(mo_genus(mo))Header info is now available as a list, with the header
function
Using portion_*
functions now throws a warning when total available isolate is below parameter minimum
Functions as.mo
, as.rsi
, as.mic
, as.atc
and freq
will not set package name as attribute anymore
Frequency tables - freq()
:
Frequency tables - freq()
:
Support for grouping variables, test with:
Support for (un)selecting columns:
Removed diacritics from all authors (columns microorganisms$ref
and microorganisms.old$ref
) to comply with CRAN policy to only allow ASCII characters
Fix for mo_property
not working properly
Fix for eucast_rules
where some Streptococci would become ceftazidime R in EUCAST rule 4.5
Support for named vectors of class mo
, useful for top_freq()
Support for named vectors of class mo
, useful for top_freq()
ggplot_rsi
and scale_y_percent
have breaks
parameter
AI improvements for as.mo
:
Support for types (classes) list and matrix for freq
my_matrix = with(septic_patients, matrix(c(age, gender), ncol = 2)) -freq(my_matrix) +freq(my_matrix)
For lists, subsetting is possible:
rsi
(antimicrobial resistance) to use as inputtable
to use as input: freq(table(x, y))
+table
to use as input: freq(table(x, y))
hist
and plot
to use a frequency table as input: hist(freq(df$age))
as.vector
, as.data.frame
, as_tibble
and format
freq(mydata, mycolumn)
is the same as mydata %>% freq(mycolumn)
+freq(mydata, mycolumn)
is the same as mydata %>% freq(mycolumn)
top_freq
function to return the top/below n items as vectorThese functions can be used to count resistant/susceptible microbial isolates. All functions support quasiquotation with pipes, can be used in summarise()
from the dplyr
package and also support grouped variables, please see Examples.
These functions can be used to count resistant/susceptible microbial isolates. All functions support quasiquotation with pipes, can be used in summarise()
from the dplyr
package and also support grouped variables, please see Examples.
count_resistant()
should be used to count resistant isolates, count_susceptible()
should be used to count susceptible isolates.
Data sets with 558 antimicrobials
Data set with bacterial intrinsic resistance
On our website https://msberends.github.io/AMR you can find a comprehensive tutorial about how to conduct AMR analysis, the complete documentation of all functions (which reads a lot easier than here in R) and an example analysis using WHONET data. As we would like to better understand the backgrounds and needs of our users, please participate in our survey!
-intrinsic_resistant
if (require("dplyr")) { diff --git a/docs/reference/lifecycle.html b/docs/reference/lifecycle.html index 52391dc6..2b352ddf 100644 --- a/docs/reference/lifecycle.html +++ b/docs/reference/lifecycle.html @@ -84,7 +84,7 @@ This page contains a section for every lifecycle (with text borrowed from the af diff --git a/docs/reference/microorganisms.html b/docs/reference/microorganisms.html index 4baeab6d..bfc893e6 100644 --- a/docs/reference/microorganisms.html +++ b/docs/reference/microorganisms.html @@ -82,7 +82,7 @@ diff --git a/docs/reference/proportion.html b/docs/reference/proportion.html index ca66b324..f275a256 100644 --- a/docs/reference/proportion.html +++ b/docs/reference/proportion.html @@ -83,7 +83,7 @@ resistance() should be used to calculate resistance, susceptibility() should be diff --git a/docs/reference/rsi_translation.html b/docs/reference/rsi_translation.html index ed7c2793..10aae30c 100644 --- a/docs/reference/rsi_translation.html +++ b/docs/reference/rsi_translation.html @@ -82,7 +82,7 @@ diff --git a/docs/survey.html b/docs/survey.html index 9c8406c3..09b33c4a 100644 --- a/docs/survey.html +++ b/docs/survey.html @@ -81,7 +81,7 @@ diff --git a/man/intrinsic_resistant.Rd b/man/intrinsic_resistant.Rd index f62f7e71..1f2cefcc 100644 --- a/man/intrinsic_resistant.Rd +++ b/man/intrinsic_resistant.Rd @@ -35,7 +35,4 @@ if (require("dplyr")) { # [1] "Enterococcus casseliflavus" "Enterococcus gallinarum" } } -\seealso{ -\link{intrinsic_resistant} -} \keyword{datasets}