mirror of
				https://github.com/msberends/AMR.git
				synced 2025-10-25 15:56:20 +02:00 
			
		
		
		
	speed improvements
This commit is contained in:
		| @@ -1,6 +1,6 @@ | ||||
| Package: AMR | ||||
| Version: 0.2.0.9011 | ||||
| Date: 2018-07-13 | ||||
| Date: 2018-07-15 | ||||
| Title: Antimicrobial Resistance Analysis | ||||
| Authors@R: c( | ||||
|     person( | ||||
|   | ||||
							
								
								
									
										4
									
								
								NEWS.md
									
									
									
									
									
								
							
							
						
						
									
										4
									
								
								NEWS.md
									
									
									
									
									
								
							| @@ -1,6 +1,6 @@ | ||||
| # 0.2.0.90xx (development version) | ||||
| #### New | ||||
| * **BREAKING**: `rsi_df` was removed in favour of new functions `resistance` and `susceptibility`. Now, all functions used to calculate resistance (`resistance` and `susceptibility`) or count isolates (`n_rsi`) use **hybrid evaluation**. This means calculations are not done in R directly but rather in C++ using the `Rcpp` package, making them 60 to 65 times faster. The function `rsi` still works, but is deprecated. | ||||
| * **BREAKING**: `rsi_df` was removed in favour of new functions `resistance` and `susceptibility`. Now, all functions used to calculate resistance (`resistance` and `susceptibility`) or count isolates (`n_rsi`) use **hybrid evaluation**. This means calculations are not done in R directly but rather in C++ using the `Rcpp` package, making them 25 to 30 times faster. The function `rsi` still works, but is deprecated. | ||||
| * Support for Addins menu in RStudio to quickly insert `%in%` or `%like%` (and give them keyboard shortcuts), or to view the datasets that come with this package | ||||
| * For convience, new descriptive statistical functions `kurtosis` and `skewness` that are lacking in base R - they are generic functions and have support for vectors, data.frames and matrices | ||||
| * Function `g.test` as added to perform the Χ<sup>2</sup> distributed [*G*-test](https://en.wikipedia.org/wiki/G-test), which use is the same as `chisq.test` | ||||
| @@ -23,6 +23,8 @@ ratio(c(772, 1611, 737), ratio = "1:2:1") | ||||
|  | ||||
| #### Changed | ||||
| * Pretty printing for tibbles removed as it is not really the scope of this package | ||||
| * Improved speed of key antibiotics comparison for determining first isolates | ||||
| * Printing of class `mic` now shows all MIC values | ||||
| * `%like%` now supports multiple patterns | ||||
| * Frequency tables are now actual `data.frame`s with altered console printing to make it look like a frequency table. Because of this, the parameter `toConsole` is not longer needed. | ||||
| * Small translational improvements to the `septic_patients` dataset | ||||
|   | ||||
							
								
								
									
										17
									
								
								R/classes.R
									
									
									
									
									
								
							
							
						
						
									
										17
									
								
								R/classes.R
									
									
									
									
									
								
							| @@ -360,14 +360,15 @@ print.mic <- function(x, ...) { | ||||
|   n_total <- x %>% length() | ||||
|   x <- x[!is.na(x)] | ||||
|   n <- x %>% length() | ||||
|   cat("Class 'mic': ", n, " isolates\n", sep = '') | ||||
|   cat('\n') | ||||
|   cat('<NA> ', n_total - n, '\n') | ||||
|   cat('\n') | ||||
|   tbl <- tibble(x = x, y = 1) %>% group_by(x) %>% summarise(y = sum(y)) | ||||
|   cnt <- tbl %>% pull(y) | ||||
|   names(cnt) <- tbl %>% pull(x) | ||||
|   print(cnt) | ||||
|   cat("Class 'mic'\n") | ||||
|   cat(n, " results (missing: ", n_total - n, ' = ', percent((n_total - n) / n_total, force_zero = TRUE), ')\n', sep = "") | ||||
|   if (n > 0) { | ||||
|     cat('\n') | ||||
|     tibble(MIC = x, y = 1) %>% | ||||
|       group_by(MIC) %>% | ||||
|       summarise(n = sum(y)) %>% | ||||
|       base::print.data.frame(row.names = FALSE) | ||||
|   } | ||||
| } | ||||
|  | ||||
| #' @exportMethod summary.mic | ||||
|   | ||||
| @@ -314,11 +314,11 @@ first_isolate <- function(tbl, | ||||
|   if (col_keyantibiotics != '') { | ||||
|     if (info == TRUE) { | ||||
|       if (type == 'keyantibiotics') { | ||||
|         cat('Comparing key antibiotics for first weighted isolates (') | ||||
|         cat('Key antibiotics for first weighted isolates will be compared (') | ||||
|         if (ignore_I == FALSE) { | ||||
|           cat('NOT ') | ||||
|         } | ||||
|         cat('ignoring I)...\n') | ||||
|         cat('ignoring I).') | ||||
|       } | ||||
|       if (type == 'points') { | ||||
|         cat(paste0('Comparing antibiotics for first weighted isolates (using points threshold of ' | ||||
| @@ -523,7 +523,6 @@ key_antibiotics_equal <- function(x, | ||||
|                                   points_threshold = 2, | ||||
|                                   info = FALSE) { | ||||
|   # x is active row, y is lag | ||||
|  | ||||
|   type <- type[1] | ||||
|  | ||||
|   if (length(x) != length(y)) { | ||||
| @@ -532,73 +531,75 @@ key_antibiotics_equal <- function(x, | ||||
|  | ||||
|   result <- logical(length(x)) | ||||
|  | ||||
|   if (info == TRUE) { | ||||
|     p <- dplyr::progress_estimated(length(x)) | ||||
|   } | ||||
|  | ||||
|   for (i in 1:length(x)) { | ||||
|   if (type == "keyantibiotics") { | ||||
|     if (ignore_I == TRUE) { | ||||
|       # evaluation using regular expression will treat '?' as any character | ||||
|       # so I is actually ignored then | ||||
|       x <- gsub('I', '?', x, ignore.case = TRUE) | ||||
|       y <- gsub('I', '?', y, ignore.case = TRUE) | ||||
|     } | ||||
|     for (i in 1:length(x)) { | ||||
|       result[i] <- grepl(x = x[i], | ||||
|                          pattern = y[i], | ||||
|                          ignore.case = TRUE) | | ||||
|         grepl(x = y[i], | ||||
|               pattern = x[i], | ||||
|               ignore.case = TRUE) | ||||
|     } | ||||
|     return(result) | ||||
|   } else { | ||||
|  | ||||
|     if (info == TRUE) { | ||||
|       p$tick()$print() | ||||
|       p <- dplyr::progress_estimated(length(x)) | ||||
|     } | ||||
|  | ||||
|     if (is.na(x[i])) { | ||||
|       x[i] <- '' | ||||
|     } | ||||
|     if (is.na(y[i])) { | ||||
|       y[i] <- '' | ||||
|     } | ||||
|     for (i in 1:length(x)) { | ||||
|  | ||||
|     if (nchar(x[i]) != nchar(y[i])) { | ||||
|       if (info == TRUE) { | ||||
|         p$tick()$print() | ||||
|       } | ||||
|  | ||||
|       result[i] <- FALSE | ||||
|       if (is.na(x[i])) { | ||||
|         x[i] <- '' | ||||
|       } | ||||
|       if (is.na(y[i])) { | ||||
|         y[i] <- '' | ||||
|       } | ||||
|  | ||||
|     } else if (x[i] == '' & y[i] == '') { | ||||
|       if (nchar(x[i]) != nchar(y[i])) { | ||||
|  | ||||
|       result[i] <- TRUE | ||||
|         result[i] <- FALSE | ||||
|  | ||||
|     } else { | ||||
|       } else if (x[i] == '' & y[i] == '') { | ||||
|  | ||||
|       x2 <- strsplit(x[i], "")[[1]] | ||||
|       y2 <- strsplit(y[i], "")[[1]] | ||||
|  | ||||
|       if (type == 'points') { | ||||
|         # count points for every single character: | ||||
|         # - no change is 0 points | ||||
|         # - I <-> S|R is 0.5 point | ||||
|         # - S|R <-> R|S is 1 point | ||||
|         # use the levels of as.rsi (S = 1, I = 2, R = 3) | ||||
|  | ||||
|         suppressWarnings(x2 <- x2 %>% as.rsi() %>% as.double()) | ||||
|         suppressWarnings(y2 <- y2 %>% as.rsi() %>% as.double()) | ||||
|  | ||||
|         points <- (x2 - y2) %>% abs() %>% sum(na.rm = TRUE) | ||||
|         result[i] <- ((points / 2) >= points_threshold) | ||||
|  | ||||
|       } else if (type == 'keyantibiotics') { | ||||
|         # check if key antibiotics are exactly the same | ||||
|         # also possible to ignore I, so only S <-> R and S <-> R are counted | ||||
|         if (ignore_I == TRUE) { | ||||
|           valid_chars <- c('S', 's', 'R', 'r') | ||||
|         } else { | ||||
|           valid_chars <- c('S', 's', 'I', 'i', 'R', 'r') | ||||
|         } | ||||
|  | ||||
|         # remove invalid values (like "-", NA) on both locations | ||||
|         x2[which(!x2 %in% valid_chars)] <- '?' | ||||
|         x2[which(!y2 %in% valid_chars)] <- '?' | ||||
|         y2[which(!x2 %in% valid_chars)] <- '?' | ||||
|         y2[which(!y2 %in% valid_chars)] <- '?' | ||||
|  | ||||
|         result[i] <- all(x2 == y2) | ||||
|         result[i] <- TRUE | ||||
|  | ||||
|       } else { | ||||
|         stop('`', type, '` is not a valid value for type, must be "points" or "keyantibiotics". See ?first_isolate.') | ||||
|  | ||||
|         x2 <- strsplit(x[i], "")[[1]] | ||||
|         y2 <- strsplit(y[i], "")[[1]] | ||||
|  | ||||
|         if (type == 'points') { | ||||
|           # count points for every single character: | ||||
|           # - no change is 0 points | ||||
|           # - I <-> S|R is 0.5 point | ||||
|           # - S|R <-> R|S is 1 point | ||||
|           # use the levels of as.rsi (S = 1, I = 2, R = 3) | ||||
|  | ||||
|           suppressWarnings(x2 <- x2 %>% as.rsi() %>% as.double()) | ||||
|           suppressWarnings(y2 <- y2 %>% as.rsi() %>% as.double()) | ||||
|  | ||||
|           points <- (x2 - y2) %>% abs() %>% sum(na.rm = TRUE) | ||||
|           result[i] <- ((points / 2) >= points_threshold) | ||||
|  | ||||
|         } else { | ||||
|           stop('`', type, '` is not a valid value for type, must be "points" or "keyantibiotics". See ?first_isolate.') | ||||
|         } | ||||
|       } | ||||
|     } | ||||
|     if (info == TRUE) { | ||||
|       cat('\n') | ||||
|     } | ||||
|     result | ||||
|   } | ||||
|   if (info == TRUE) { | ||||
|     cat('\n') | ||||
|   } | ||||
|   result | ||||
| } | ||||
|   | ||||
							
								
								
									
										4
									
								
								R/freq.R
									
									
									
									
									
								
							
							
						
						
									
										4
									
								
								R/freq.R
									
									
									
									
									
								
							| @@ -151,7 +151,9 @@ frequency_tbl <- function(x, | ||||
|     dots <- base::eval(base::substitute(base::alist(...))) | ||||
|     ndots <- length(dots) | ||||
|  | ||||
|     if (ndots > 0 & ndots < 10) { | ||||
|     if (NROW(x) == 0) { | ||||
|       x <- NA | ||||
|     } else if (ndots > 0 & ndots < 10) { | ||||
|       cols <- as.character(dots) | ||||
|       if (!all(cols %in% colnames(x))) { | ||||
|         stop("one or more columns not found: `", paste(cols, collapse = "`, `"), '`', call. = FALSE) | ||||
|   | ||||
| @@ -41,6 +41,7 @@ globalVariables(c('abname', | ||||
|                   'labs', | ||||
|                   'median', | ||||
|                   'mic', | ||||
|                   'MIC', | ||||
|                   'microorganisms', | ||||
|                   'mocode', | ||||
|                   'molis', | ||||
|   | ||||
							
								
								
									
										258
									
								
								R/resistance.R
									
									
									
									
									
								
							
							
						
						
									
										258
									
								
								R/resistance.R
									
									
									
									
									
								
							| @@ -24,9 +24,11 @@ | ||||
| #' @param minimum minimal amount of available isolates. Any number lower than \code{minimum} will return \code{NA}. | ||||
| #' @param as_percent logical to indicate whether the output must be returned as percent (text), will else be a double | ||||
| #' @param interpretation antimicrobial interpretation | ||||
| #' @param info \emph{DEPRECATED} calculate the amount of available isolates and print it, like \code{n = 423} | ||||
| #' @param warning \emph{DEPRECATED} show a warning when the available amount of isolates is below \code{minimum} | ||||
| #' @details \strong{Remember that you should filter your table to let it contain only first isolates!} Use \code{\link{first_isolate}} to determine them in your data set. | ||||
| #' | ||||
| #' All return values are calculated using hybrid evaluation (i.e. using C++), which makes these functions 60-65 times faster than in \code{AMR} v0.2.0 and below. The \code{rsi} function is available for backwards compatibility and deprecated. It now uses the \code{resistance} and \code{susceptibility} functions internally, based on the \code{interpretation} parameter. | ||||
| #' The functions \code{resistance}, \code{susceptibility} and \code{n_rsi} calculate using hybrid evaluation (i.e. using C++), which makes these functions 25-30 times faster than the old \code{rsi} function. This function is still available for backwards compatibility but is deprecated. | ||||
| #' \if{html}{ | ||||
| #'   \cr | ||||
| #'   To calculate the probability (\emph{p}) of susceptibility of one antibiotic, we use this formula: | ||||
| @@ -90,6 +92,29 @@ | ||||
| #'          genus == "Helicobacter") %>% | ||||
| #'   summarise(p = susceptibility(amox, metr), # amoxicillin with metronidazole | ||||
| #'             n = n_rsi(amox, metr)) | ||||
| #' | ||||
| #' | ||||
| #' # How fast is this hybrid evaluation in C++ compared to R? | ||||
| #' # In other words: how is the speed improvement of the new `resistance` compared to old `rsi`? | ||||
| #' | ||||
| #' library(microbenchmark) | ||||
| #' df <- septic_patients %>% group_by(hospital_id, bactid) # 317 groups with sizes 1 to 167 | ||||
| #' | ||||
| #' microbenchmark(old_IR = df %>% summarise(p = rsi(amox, minimum = 0, interpretation = "IR")), | ||||
| #'                new_IR = df %>% summarise(p = resistance(amox, minimum = 0)), | ||||
| #'                old_S = df %>% summarise(p = rsi(amox, minimum = 0, interpretation = "S")), | ||||
| #'                new_S = df %>% summarise(p = susceptibility(amox, minimum = 0)), | ||||
| #'                times = 5, | ||||
| #'                unit = "s") | ||||
| #' | ||||
| #' # Unit: seconds | ||||
| #' #    expr          min         lq       mean     median         uq        max neval | ||||
| #' #  old_IR   1.95600230 1.96096857 1.97981537 1.96823318 2.00645711 2.00741568     5 | ||||
| #' #  new_IR   0.06872808 0.06984932 0.07162866 0.06987306 0.07050094 0.07919192     5 | ||||
| #' #   old_S   1.68893579 1.69024888 1.72461867 1.69785934 1.70428796 1.84176137     5 | ||||
| #' #   new_S   0.06737037 0.06838167 0.07431906 0.07745364 0.07827224 0.08011738     5 | ||||
| #' | ||||
| #' # The old function took roughly 2 seconds, the new ones take 0.07 seconds. | ||||
| #' } | ||||
| resistance <- function(ab, | ||||
|                        include_I = TRUE, | ||||
| @@ -109,7 +134,11 @@ resistance <- function(ab, | ||||
|     stop('`as_percent` must be logical', call. = FALSE) | ||||
|   } | ||||
|  | ||||
|   x <- as.integer(as.rsi(ab)) | ||||
|   if (!is.rsi(ab)) { | ||||
|     x <- as.rsi(ab) | ||||
|   } else { | ||||
|     x <- ab | ||||
|   } | ||||
|   total <-  .Call(`_AMR_rsi_calc_total`, x) | ||||
|   if (total < minimum) { | ||||
|     return(NA) | ||||
| @@ -144,16 +173,22 @@ susceptibility <- function(ab1, | ||||
|     stop('`as_percent` must be logical', call. = FALSE) | ||||
|   } | ||||
|  | ||||
|   if (!is.rsi(ab1)) { | ||||
|     ab1 <- as.rsi(ab1) | ||||
|   } | ||||
|   if (!is.null(ab2)) { | ||||
|     if (NCOL(ab2) > 1) { | ||||
|       stop('`ab2` must be a vector of antimicrobial interpretations', call. = FALSE) | ||||
|     } | ||||
|     x <- apply(X = data.frame(ab1 = as.integer(as.rsi(ab1)), | ||||
|                               ab2 = as.integer(as.rsi(ab2))), | ||||
|     if (!is.rsi(ab2)) { | ||||
|       ab2 <- as.rsi(ab2) | ||||
|     } | ||||
|     x <- apply(X = data.frame(ab1 = as.integer(ab1), | ||||
|                               ab2 = as.integer(ab2)), | ||||
|                MARGIN = 1, | ||||
|                FUN = min) | ||||
|   } else { | ||||
|     x <- as.integer(as.rsi(ab1)) | ||||
|     x <- ab1 | ||||
|   } | ||||
|   total <-  .Call(`_AMR_rsi_calc_total`, x) | ||||
|   if (total < minimum) { | ||||
| @@ -174,42 +209,221 @@ n_rsi <- function(ab1, ab2 = NULL) { | ||||
|   if (NCOL(ab1) > 1) { | ||||
|     stop('`ab1` must be a vector of antimicrobial interpretations', call. = FALSE) | ||||
|   } | ||||
|   if (!is.rsi(ab1)) { | ||||
|     ab1 <- as.rsi(ab1) | ||||
|   } | ||||
|   if (!is.null(ab2)) { | ||||
|     if (NCOL(ab2) > 1) { | ||||
|       stop('`ab2` must be a vector of antimicrobial interpretations', call. = FALSE) | ||||
|     } | ||||
|     x <- apply(X = data.frame(ab1 = as.integer(as.rsi(ab1)), | ||||
|                               ab2 = as.integer(as.rsi(ab2))), | ||||
|     if (!is.rsi(ab2)) { | ||||
|       ab2 <- as.rsi(ab2) | ||||
|     } | ||||
|     x <- apply(X = data.frame(ab1 = as.integer(ab1), | ||||
|                               ab2 = as.integer(ab2)), | ||||
|                MARGIN = 1, | ||||
|                FUN = min) | ||||
|   } else { | ||||
|     x <- as.integer(as.rsi(ab1)) | ||||
|     x <- ab1 | ||||
|   } | ||||
|   .Call(`_AMR_rsi_calc_total`, x) | ||||
| } | ||||
|  | ||||
|  | ||||
| #' @rdname resistance | ||||
| #' @export | ||||
| rsi <- function(ab1, | ||||
|                 ab2 = NULL, | ||||
|                 interpretation = "IR", | ||||
|                 ab2 = NA, | ||||
|                 interpretation = 'IR', | ||||
|                 minimum = 30, | ||||
|                 as_percent = FALSE) { | ||||
|   warning("'rsi' is deprecated. Use 'resistance' or 'susceptibility' instead.", call. = FALSE) | ||||
|   if (interpretation %in% c('IR', 'RI')) { | ||||
|     resistance(ab = ab1, include_I = TRUE, minimum = minimum, as_percent = as_percent) | ||||
|   } else if (interpretation == 'R') { | ||||
|     resistance(ab = ab1, include_I = FALSE, minimum = minimum, as_percent = as_percent) | ||||
|   } else  if (interpretation %in% c('IS', 'SI')) { | ||||
|     susceptibility(ab1 = ab1, ab2 = ab2, include_I = TRUE, minimum = minimum, as_percent = as_percent) | ||||
|   } else if (interpretation == 'S') { | ||||
|     susceptibility(ab1 = ab1, ab2 = ab2, include_I = FALSE, minimum = minimum, as_percent = as_percent) | ||||
|                 as_percent = FALSE, | ||||
|                 info = FALSE, | ||||
|                 warning = TRUE) { | ||||
|   ab1.name <- deparse(substitute(ab1)) | ||||
|   if (ab1.name %like% '.[$].') { | ||||
|     ab1.name <- unlist(strsplit(ab1.name, "$", fixed = TRUE)) | ||||
|     ab1.name <- ab1.name[length(ab1.name)] | ||||
|   } | ||||
|   if (!ab1.name %like% '^[a-z]{3,4}$') { | ||||
|     ab1.name <- 'rsi1' | ||||
|   } | ||||
|   if (length(ab1) == 1 & is.character(ab1)) { | ||||
|     stop('`ab1` must be a vector of antibiotic interpretations.', | ||||
|          '\n  Try rsi(', ab1, ', ...) instead of rsi("', ab1, '", ...)', call. = FALSE) | ||||
|   } | ||||
|   ab2.name <- deparse(substitute(ab2)) | ||||
|   if (ab2.name %like% '.[$].') { | ||||
|     ab2.name <- unlist(strsplit(ab2.name, "$", fixed = TRUE)) | ||||
|     ab2.name <- ab2.name[length(ab2.name)] | ||||
|   } | ||||
|   if (!ab2.name %like% '^[a-z]{3,4}$') { | ||||
|     ab2.name <- 'rsi2' | ||||
|   } | ||||
|   if (length(ab2) == 1 & is.character(ab2)) { | ||||
|     stop('`ab2` must be a vector of antibiotic interpretations.', | ||||
|          '\n  Try rsi(', ab2, ', ...) instead of rsi("', ab2, '", ...)', call. = FALSE) | ||||
|   } | ||||
|  | ||||
|   interpretation <- paste(interpretation, collapse = "") | ||||
|  | ||||
|   ab1 <- as.rsi(ab1) | ||||
|   ab2 <- as.rsi(ab2) | ||||
|  | ||||
|   tbl <- tibble(rsi1 = ab1, rsi2 = ab2) | ||||
|   colnames(tbl) <- c(ab1.name, ab2.name) | ||||
|  | ||||
|   if (length(ab2) == 1) { | ||||
|     r <- rsi_df(tbl = tbl, | ||||
|                 ab = ab1.name, | ||||
|                 interpretation = interpretation, | ||||
|                 minimum = minimum, | ||||
|                 as_percent = FALSE, | ||||
|                 info = info, | ||||
|                 warning = warning) | ||||
|   } else { | ||||
|     stop('invalid `interpretation`') | ||||
|     if (length(ab1) != length(ab2)) { | ||||
|       stop('`ab1` (n = ', length(ab1), ') and `ab2` (n = ', length(ab2), ') must be of same length.', call. = FALSE) | ||||
|     } | ||||
|     if (!interpretation %in% c('S', 'IS', 'SI')) { | ||||
|       warning('`interpretation` not set to S or I/S, albeit analysing a combination therapy.', call. = FALSE) | ||||
|     } | ||||
|     r <- rsi_df(tbl = tbl, | ||||
|                 ab = c(ab1.name, ab2.name), | ||||
|                 interpretation = interpretation, | ||||
|                 minimum = minimum, | ||||
|                 as_percent = FALSE, | ||||
|                 info = info, | ||||
|                 warning = warning) | ||||
|   } | ||||
|   if (as_percent == TRUE) { | ||||
|     percent(r, force_zero = TRUE) | ||||
|   } else { | ||||
|     r | ||||
|   } | ||||
| } | ||||
|  | ||||
| #' @importFrom dplyr %>% filter_at vars any_vars all_vars | ||||
| #' @noRd | ||||
| rsi_df <- function(tbl, | ||||
|                    ab, | ||||
|                    interpretation = 'IR', | ||||
|                    minimum = 30, | ||||
|                    as_percent = FALSE, | ||||
|                    info = TRUE, | ||||
|                    warning = TRUE) { | ||||
|  | ||||
|   # in case tbl$interpretation already exists: | ||||
|   interpretations_to_check <- paste(interpretation, collapse = "") | ||||
|  | ||||
|   # validate: | ||||
|   if (min(grepl('^[a-z]{3,4}$', ab)) == 0 & | ||||
|       min(grepl('^rsi[1-2]$', ab)) == 0) { | ||||
|     for (i in 1:length(ab)) { | ||||
|       ab[i] <- paste0('rsi', i) | ||||
|     } | ||||
|   } | ||||
|   if (!grepl('^(S|SI|IS|I|IR|RI|R){1}$', interpretations_to_check)) { | ||||
|     stop('Invalid `interpretation`; must be "S", "SI", "I", "IR", or "R".') | ||||
|   } | ||||
|   if ('is_ic' %in% colnames(tbl)) { | ||||
|     if (n_distinct(tbl$is_ic) > 1 & warning == TRUE) { | ||||
|       warning('Dataset contains isolates from the Intensive Care. Exclude them from proper epidemiological analysis.') | ||||
|     } | ||||
|   } | ||||
|  | ||||
|   # transform when checking for different results | ||||
|   if (interpretations_to_check %in% c('SI', 'IS')) { | ||||
|     for (i in 1:length(ab)) { | ||||
|       tbl[which(tbl[, ab[i]] == 'I'), ab[i]] <- 'S' | ||||
|     } | ||||
|     interpretations_to_check <- 'S' | ||||
|   } | ||||
|   if (interpretations_to_check %in% c('RI', 'IR')) { | ||||
|     for (i in 1:length(ab)) { | ||||
|       tbl[which(tbl[, ab[i]] == 'I'), ab[i]] <- 'R' | ||||
|     } | ||||
|     interpretations_to_check <- 'R' | ||||
|   } | ||||
|  | ||||
|   # get fraction | ||||
|   if (length(ab) == 1) { | ||||
|     numerator <- tbl %>% | ||||
|       filter(pull(., ab[1]) == interpretations_to_check) %>% | ||||
|       nrow() | ||||
|  | ||||
|     denominator <- tbl %>% | ||||
|       filter(pull(., ab[1]) %in% c("S", "I", "R")) %>% | ||||
|       nrow() | ||||
|  | ||||
|   } else if (length(ab) == 2) { | ||||
|     if (interpretations_to_check != 'S') { | ||||
|       warning('`interpretation` not set to S or I/S, albeit analysing a combination therapy.', call. = FALSE) | ||||
|     } | ||||
|     numerator <- tbl %>% | ||||
|       filter_at(vars(ab[1], ab[2]), | ||||
|                 any_vars(. == interpretations_to_check)) %>% | ||||
|       filter_at(vars(ab[1], ab[2]), | ||||
|                 all_vars(. %in% c("S", "R", "I"))) %>% | ||||
|       nrow() | ||||
|  | ||||
|     denominator <- tbl %>% | ||||
|       filter_at(vars(ab[1], ab[2]), | ||||
|                 all_vars(. %in% c("S", "R", "I"))) %>% | ||||
|       nrow() | ||||
|  | ||||
|   } else if (length(ab) == 3) { | ||||
|     if (interpretations_to_check != 'S') { | ||||
|       warning('`interpretation` not set to S or I/S, albeit analysing a combination therapy.', call. = FALSE) | ||||
|     } | ||||
|     numerator <- tbl %>% | ||||
|       filter_at(vars(ab[1], ab[2], ab[3]), | ||||
|                 any_vars(. == interpretations_to_check)) %>% | ||||
|       filter_at(vars(ab[1], ab[2], ab[3]), | ||||
|                 all_vars(. %in% c("S", "R", "I"))) %>% | ||||
|       nrow() | ||||
|  | ||||
|     denominator <- tbl %>% | ||||
|       filter_at(vars(ab[1], ab[2], ab[3]), | ||||
|                 all_vars(. %in% c("S", "R", "I"))) %>% | ||||
|       nrow() | ||||
|  | ||||
|   } else { | ||||
|     stop('Maximum of 3 drugs allowed.') | ||||
|   } | ||||
|  | ||||
|   # build text part | ||||
|   if (info == TRUE) { | ||||
|     cat('n =', denominator) | ||||
|     info.txt1 <- percent(denominator / nrow(tbl)) | ||||
|     if (denominator == 0) { | ||||
|       info.txt1 <- 'none' | ||||
|     } | ||||
|     info.txt2 <- gsub(',', ' and', | ||||
|                       ab %>% | ||||
|                         abname(tolower = TRUE) %>% | ||||
|                         toString(), fixed = TRUE) | ||||
|     info.txt2 <- gsub('rsi1 and rsi2', 'these two drugs', info.txt2, fixed = TRUE) | ||||
|     info.txt2 <- gsub('rsi1', 'this drug', info.txt2, fixed = TRUE) | ||||
|     cat(paste0(' (of ', nrow(tbl), ' in total; ', info.txt1, ' tested on ', info.txt2, ')\n')) | ||||
|   } | ||||
|  | ||||
|   # calculate and format | ||||
|   y <- numerator / denominator | ||||
|   if (as_percent == TRUE) { | ||||
|     y <- percent(y, force_zero = TRUE) | ||||
|   } | ||||
|  | ||||
|   if (denominator < minimum) { | ||||
|     if (warning == TRUE) { | ||||
|       warning(paste0('TOO FEW ISOLATES OF ', toString(ab), ' (n = ', denominator, ', n < ', minimum, '); NO RESULT.')) | ||||
|     } | ||||
|     y <- NA | ||||
|   } | ||||
|  | ||||
|   # output | ||||
|   y | ||||
| } | ||||
|  | ||||
|  | ||||
| #' Predict antimicrobial resistance | ||||
| #' | ||||
| #' Create a prediction model to predict antimicrobial resistance for the next years on statistical solid ground. Standard errors (SE) will be returned as columns \code{se_min} and \code{se_max}. See Examples for a real live example. | ||||
|   | ||||
| @@ -25,7 +25,7 @@ This R package was created for academic research by PhD students of the Faculty | ||||
| This R package contains functions to make **microbiological, epidemiological data analysis easier**. It allows the use of some new classes to work with MIC values and antimicrobial interpretations (i.e. values S, I and R). | ||||
|  | ||||
| With `AMR` you can: | ||||
| * Calculate the resistance (and even co-resistance) of microbial isolates with the `resistance` and `susceptibility` functions, that can also be used with the `dplyr` package (e.g. in conjunction with `summarise`). Our functions use expressions that are not evaluated by R, but by alternative C++ code that is dramatically faster and uses less memory. This is called *hybrid evaluation*. | ||||
| * Calculate the resistance (and even co-resistance) of microbial isolates with the `resistance` and `susceptibility` functions, that can also be used with the `dplyr` package (e.g. in conjunction with `summarise`) | ||||
| * Predict antimicrobial resistance for the nextcoming years with the `rsi_predict` function | ||||
| * Apply [EUCAST rules to isolates](http://www.eucast.org/expert_rules_and_intrinsic_resistance/) with the `EUCAST_rules` function | ||||
| * Identify first isolates of every patient [using guidelines from the CLSI](https://clsi.org/standards/products/microbiology/documents/m39/) (Clinical and Laboratory Standards Institute) with the `first_isolate` function | ||||
| @@ -41,6 +41,8 @@ And it contains: | ||||
|  | ||||
| With the `MDRO` function (abbreviation of Multi Drug Resistant Organisms), you can check your isolates for exceptional resistance with country-specific guidelines or EUCAST rules. Currently guidelines for Germany and the Netherlands are supported. Please suggest addition of your own country here: [https://github.com/msberends/AMR/issues/new](https://github.com/msberends/AMR/issues/new?title=New%20guideline%20for%20MDRO&body=%3C--%20Please%20add%20your%20country%20code,%20guideline%20name,%20version%20and%20source%20below%20and%20remove%20this%20line--%3E). | ||||
|  | ||||
| The functions to calculate microbial resistance use expressions that are not evaluated by R itself, but by alternative C++ code that is 25 to 30 times faster and uses less memory. This is called *hybrid evaluation*. | ||||
|  | ||||
| #### Read all changes and new functions in [NEWS.md](NEWS.md). | ||||
|  | ||||
| ## How to get it? | ||||
|   | ||||
| @@ -14,8 +14,8 @@ susceptibility(ab1, ab2 = NULL, include_I = FALSE, minimum = 30, | ||||
|  | ||||
| n_rsi(ab1, ab2 = NULL) | ||||
|  | ||||
| rsi(ab1, ab2 = NULL, interpretation = "IR", minimum = 30, | ||||
|   as_percent = FALSE) | ||||
| rsi(ab1, ab2 = NA, interpretation = "IR", minimum = 30, | ||||
|   as_percent = FALSE, info = FALSE, warning = TRUE) | ||||
| } | ||||
| \arguments{ | ||||
| \item{ab, ab1, ab2}{vector of antibiotic interpretations, they will be transformed internally with \code{\link{as.rsi}}} | ||||
| @@ -27,6 +27,10 @@ rsi(ab1, ab2 = NULL, interpretation = "IR", minimum = 30, | ||||
| \item{as_percent}{logical to indicate whether the output must be returned as percent (text), will else be a double} | ||||
|  | ||||
| \item{interpretation}{antimicrobial interpretation} | ||||
|  | ||||
| \item{info}{\emph{DEPRECATED} calculate the amount of available isolates and print it, like \code{n = 423}} | ||||
|  | ||||
| \item{warning}{\emph{DEPRECATED} show a warning when the available amount of isolates is below \code{minimum}} | ||||
| } | ||||
| \value{ | ||||
| Double or, when \code{as_percent = TRUE}, a character. | ||||
| @@ -37,7 +41,7 @@ These functions can be used to calculate the (co-)resistance of microbial isolat | ||||
| \details{ | ||||
| \strong{Remember that you should filter your table to let it contain only first isolates!} Use \code{\link{first_isolate}} to determine them in your data set. | ||||
|  | ||||
| All return values are calculated using hybrid evaluation (i.e. using C++), which makes these functions 60-65 times faster than in \code{AMR} v0.2.0 and below. The \code{rsi} function is available for backwards compatibility and deprecated. It now uses the \code{resistance} and \code{susceptibility} functions internally, based on the \code{interpretation} parameter. | ||||
| The functions \code{resistance}, \code{susceptibility} and \code{n_rsi} calculate using hybrid evaluation (i.e. using C++), which makes these functions 25-30 times faster than the old \code{rsi} function. This function is still available for backwards compatibility but is deprecated. | ||||
| \if{html}{ | ||||
|   \cr | ||||
|   To calculate the probability (\emph{p}) of susceptibility of one antibiotic, we use this formula: | ||||
| @@ -98,6 +102,29 @@ my_table \%>\% | ||||
|          genus == "Helicobacter") \%>\% | ||||
|   summarise(p = susceptibility(amox, metr), # amoxicillin with metronidazole | ||||
|             n = n_rsi(amox, metr)) | ||||
|  | ||||
|  | ||||
| # How fast is this hybrid evaluation in C++ compared to R? | ||||
| # In other words: how is the speed improvement of the new `resistance` compared to old `rsi`? | ||||
|  | ||||
| library(microbenchmark) | ||||
| df <- septic_patients \%>\% group_by(hospital_id, bactid) # 317 groups with sizes 1 to 167 | ||||
|  | ||||
| microbenchmark(old_IR = df \%>\% summarise(p = rsi(amox, minimum = 0, interpretation = "IR")), | ||||
|                new_IR = df \%>\% summarise(p = resistance(amox, minimum = 0)), | ||||
|                old_S = df \%>\% summarise(p = rsi(amox, minimum = 0, interpretation = "S")), | ||||
|                new_S = df \%>\% summarise(p = susceptibility(amox, minimum = 0)), | ||||
|                times = 5, | ||||
|                unit = "s") | ||||
|  | ||||
| # Unit: seconds | ||||
| #    expr          min         lq       mean     median         uq        max neval | ||||
| #  old_IR   1.95600230 1.96096857 1.97981537 1.96823318 2.00645711 2.00741568     5 | ||||
| #  new_IR   0.06872808 0.06984932 0.07162866 0.06987306 0.07050094 0.07919192     5 | ||||
| #   old_S   1.68893579 1.69024888 1.72461867 1.69785934 1.70428796 1.84176137     5 | ||||
| #   new_S   0.06737037 0.06838167 0.07431906 0.07745364 0.07827224 0.08011738     5 | ||||
|  | ||||
| # The old function took roughly 2 seconds, the new ones take 0.07 seconds. | ||||
| } | ||||
| } | ||||
| \keyword{antibiotics} | ||||
|   | ||||
| @@ -6,36 +6,36 @@ | ||||
| using namespace Rcpp; | ||||
|  | ||||
| // rsi_calc_S | ||||
| int rsi_calc_S(std::vector<double> x, bool include_I); | ||||
| int rsi_calc_S(DoubleVector x, bool include_I); | ||||
| RcppExport SEXP _AMR_rsi_calc_S(SEXP xSEXP, SEXP include_ISEXP) { | ||||
| BEGIN_RCPP | ||||
|     Rcpp::RObject rcpp_result_gen; | ||||
|     Rcpp::RNGScope rcpp_rngScope_gen; | ||||
|     Rcpp::traits::input_parameter< std::vector<double> >::type x(xSEXP); | ||||
|     Rcpp::traits::input_parameter< DoubleVector >::type x(xSEXP); | ||||
|     Rcpp::traits::input_parameter< bool >::type include_I(include_ISEXP); | ||||
|     rcpp_result_gen = Rcpp::wrap(rsi_calc_S(x, include_I)); | ||||
|     return rcpp_result_gen; | ||||
| END_RCPP | ||||
| } | ||||
| // rsi_calc_R | ||||
| int rsi_calc_R(std::vector<double> x, bool include_I); | ||||
| int rsi_calc_R(DoubleVector x, bool include_I); | ||||
| RcppExport SEXP _AMR_rsi_calc_R(SEXP xSEXP, SEXP include_ISEXP) { | ||||
| BEGIN_RCPP | ||||
|     Rcpp::RObject rcpp_result_gen; | ||||
|     Rcpp::RNGScope rcpp_rngScope_gen; | ||||
|     Rcpp::traits::input_parameter< std::vector<double> >::type x(xSEXP); | ||||
|     Rcpp::traits::input_parameter< DoubleVector >::type x(xSEXP); | ||||
|     Rcpp::traits::input_parameter< bool >::type include_I(include_ISEXP); | ||||
|     rcpp_result_gen = Rcpp::wrap(rsi_calc_R(x, include_I)); | ||||
|     return rcpp_result_gen; | ||||
| END_RCPP | ||||
| } | ||||
| // rsi_calc_total | ||||
| int rsi_calc_total(std::vector<double> x); | ||||
| int rsi_calc_total(DoubleVector x); | ||||
| RcppExport SEXP _AMR_rsi_calc_total(SEXP xSEXP) { | ||||
| BEGIN_RCPP | ||||
|     Rcpp::RObject rcpp_result_gen; | ||||
|     Rcpp::RNGScope rcpp_rngScope_gen; | ||||
|     Rcpp::traits::input_parameter< std::vector<double> >::type x(xSEXP); | ||||
|     Rcpp::traits::input_parameter< DoubleVector >::type x(xSEXP); | ||||
|     rcpp_result_gen = Rcpp::wrap(rsi_calc_total(x)); | ||||
|     return rcpp_result_gen; | ||||
| END_RCPP | ||||
|   | ||||
| @@ -1,12 +1,11 @@ | ||||
| #include <Rcpp.h> | ||||
| #include <vector>        // for std::vector | ||||
| #include <functional>    // for std::less, etc | ||||
| #include <algorithm>     // for count_if | ||||
|  | ||||
| using namespace Rcpp ; | ||||
| using namespace Rcpp; | ||||
|  | ||||
| // [[Rcpp::export]] | ||||
| int rsi_calc_S(std::vector<double> x, bool include_I) { | ||||
| int rsi_calc_S(DoubleVector x, bool include_I) { | ||||
|   if (include_I == TRUE) { | ||||
|     return count_if(x.begin(), x.end(), bind2nd(std::less_equal<double>(), 2)); | ||||
|   } else { | ||||
| @@ -15,7 +14,7 @@ int rsi_calc_S(std::vector<double> x, bool include_I) { | ||||
| } | ||||
|  | ||||
| // [[Rcpp::export]] | ||||
| int rsi_calc_R(std::vector<double> x, bool include_I) { | ||||
| int rsi_calc_R(DoubleVector x, bool include_I) { | ||||
|   if (include_I == TRUE) { | ||||
|     return count_if(x.begin(), x.end(), bind2nd(std::greater_equal<double>(), 2)); | ||||
|   } else { | ||||
| @@ -24,6 +23,6 @@ int rsi_calc_R(std::vector<double> x, bool include_I) { | ||||
| } | ||||
|  | ||||
| // [[Rcpp::export]] | ||||
| int rsi_calc_total(std::vector<double> x) { | ||||
|  return count_if(x.begin(), x.end(), bind2nd(std::less_equal<double>(), 3)); | ||||
| int rsi_calc_total(DoubleVector x) { | ||||
|   return count_if(x.begin(), x.end(), bind2nd(std::less_equal<double>(), 3)); | ||||
| } | ||||
|   | ||||
| @@ -19,7 +19,7 @@ test_that("first isolates work", { | ||||
|       na.rm = TRUE), | ||||
|     1959) | ||||
|  | ||||
|   # septic_patients contains 1961 out of 2000 first *weighted* isolates | ||||
|   # septic_patients contains 1963 out of 2000 first *weighted* isolates | ||||
|   expect_equal( | ||||
|     suppressWarnings( | ||||
|       sum( | ||||
| @@ -31,7 +31,7 @@ test_that("first isolates work", { | ||||
|                       type = "keyantibiotics", | ||||
|                       info = TRUE), | ||||
|         na.rm = TRUE)), | ||||
|     1961) | ||||
|     1963) | ||||
|   # and 1998 when using points | ||||
|   expect_equal( | ||||
|     suppressWarnings( | ||||
|   | ||||
| @@ -2,8 +2,8 @@ context("resistance.R") | ||||
|  | ||||
| test_that("resistance works", { | ||||
|   # amox resistance in `septic_patients` should be around 57.56% | ||||
|   expect_equal(resistance(septic_patients$amox), 0.5756, tolerance = 0.0001) | ||||
|   expect_equal(susceptibility(septic_patients$amox), 1 - 0.5756, tolerance = 0.0001) | ||||
|   expect_equal(resistance(septic_patients$amox, include_I = TRUE), 0.5756, tolerance = 0.0001) | ||||
|   expect_equal(susceptibility(septic_patients$amox, include_I = FALSE), 1 - 0.5756, tolerance = 0.0001) | ||||
|  | ||||
|   # pita+genta susceptibility around 98.09% | ||||
|   expect_equal(susceptibility(septic_patients$pita, | ||||
|   | ||||
		Reference in New Issue
	
	Block a user