2018-08-23 00:40:36 +02:00
# ==================================================================== #
2023-07-08 17:30:05 +02:00
# TITLE: #
2022-10-05 09:12:22 +02:00
# AMR: An R Package for Working with Antimicrobial Resistance Data #
2018-08-23 00:40:36 +02:00
# #
2023-07-08 17:30:05 +02:00
# SOURCE CODE: #
2020-07-08 14:48:06 +02:00
# https://github.com/msberends/AMR #
2018-08-23 00:40:36 +02:00
# #
2023-07-08 17:30:05 +02:00
# PLEASE CITE THIS SOFTWARE AS: #
2022-10-05 09:12:22 +02:00
# Berends MS, Luz CF, Friedrich AW, Sinha BNM, Albers CJ, Glasner C #
# (2022). AMR: An R Package for Working with Antimicrobial Resistance #
# Data. Journal of Statistical Software, 104(3), 1-31. #
2023-05-27 10:39:22 +02:00
# https://doi.org/10.18637/jss.v104.i03 #
2022-10-05 09:12:22 +02:00
# #
2022-12-27 15:16:15 +01:00
# Developed at the University of Groningen and the University Medical #
# Center Groningen in The Netherlands, in collaboration with many #
# colleagues from around the world, see our website. #
2018-08-23 00:40:36 +02:00
# #
2019-01-02 23:24:07 +01:00
# This R package is free software; you can freely use and distribute #
# it for both personal and commercial purposes under the terms of the #
# GNU General Public License version 2.0 (GNU GPL-2), as published by #
# the Free Software Foundation. #
2020-01-05 17:22:09 +01:00
# We created this package for both routine data analysis and academic #
# research and it was publicly released in the hope that it will be #
# useful, but it comes WITHOUT ANY WARRANTY OR LIABILITY. #
2020-10-08 11:16:03 +02:00
# #
# Visit our website for the full manual and a complete tutorial about #
2021-02-02 23:57:35 +01:00
# how to conduct AMR data analysis: https://msberends.github.io/AMR/ #
2018-08-23 00:40:36 +02:00
# ==================================================================== #
2019-06-27 11:57:45 +02:00
dots2vars <- function ( ... ) {
2022-08-28 10:31:50 +02:00
# this function is to give more informative output about
2019-11-10 12:16:56 +01:00
# variable names in count_* and proportion_* functions
2020-05-16 13:05:47 +02:00
dots <- substitute ( list ( ... ) )
2023-02-10 16:18:00 +01:00
dots <- as.character ( dots ) [2 : length ( dots ) ]
paste0 ( dots [dots != " ." ] , collapse = " +" )
2019-06-27 11:57:45 +02:00
}
2023-01-21 23:47:20 +01:00
sir_calc <- function ( ... ,
2019-07-01 14:03:15 +02:00
ab_result ,
minimum = 0 ,
as_percent = FALSE ,
only_all_tested = FALSE ,
only_count = FALSE ) {
2024-05-20 15:27:04 +02:00
meet_criteria ( ab_result , allow_class = c ( " character" , " numeric" , " integer" ) , has_length = c ( 1 : 5 ) )
2023-02-10 16:18:00 +01:00
meet_criteria ( minimum , allow_class = c ( " numeric" , " integer" ) , has_length = 1 , is_positive_or_zero = TRUE , is_finite = TRUE )
2022-10-20 16:08:01 +02:00
meet_criteria ( as_percent , allow_class = " logical" , has_length = 1 )
meet_criteria ( only_all_tested , allow_class = " logical" , has_length = 1 )
meet_criteria ( only_count , allow_class = " logical" , has_length = 1 )
2022-08-28 10:31:50 +02:00
2020-06-22 11:18:40 +02:00
data_vars <- dots2vars ( ... )
2022-08-28 10:31:50 +02:00
dots_df <- switch ( 1 ,
...
)
2020-07-02 21:12:52 +02:00
if ( is.data.frame ( dots_df ) ) {
# make sure to remove all other classes like tibbles, data.tables, etc
dots_df <- as.data.frame ( dots_df , stringsAsFactors = FALSE )
}
2022-08-28 10:31:50 +02:00
2020-09-03 12:31:48 +02:00
dots <- eval ( substitute ( alist ( ... ) ) )
2020-06-22 11:18:40 +02:00
stop_if ( length ( dots ) == 0 , " no variables selected" , call = -2 )
2022-08-28 10:31:50 +02:00
2020-06-22 11:18:40 +02:00
stop_if ( " also_single_tested" %in% names ( dots ) ,
2022-08-28 10:31:50 +02:00
" `also_single_tested` was replaced by `only_all_tested`.\n" ,
" Please read Details in the help page (`?proportion`) as this may have a considerable impact on your analysis." ,
call = -2
)
2018-08-24 11:08:20 +02:00
ndots <- length ( dots )
2022-08-28 10:31:50 +02:00
2020-06-26 10:21:22 +02:00
if ( is.data.frame ( dots_df ) ) {
2023-02-09 13:07:39 +01:00
# data.frame passed with other columns, like: example_isolates %pm>% proportion_S(AMC, GEN)
2022-08-28 10:31:50 +02:00
2019-11-10 12:16:56 +01:00
dots <- as.character ( dots )
2020-06-26 10:21:22 +02:00
# remove first element, it's the data.frame
if ( length ( dots ) == 1 ) {
dots <- character ( 0 )
} else {
dots <- dots [2 : length ( dots ) ]
}
2022-10-05 09:12:22 +02:00
if ( length ( dots ) == 0 || all ( dots == " df" ) ) {
2023-02-09 13:07:39 +01:00
# for complete data.frames, like example_isolates %pm>% select(AMC, GEN) %pm>% proportion_S()
2023-01-21 23:47:20 +01:00
# and the old sir function, which has "df" as name of the first argument
2018-08-24 11:08:20 +02:00
x <- dots_df
} else {
2020-07-02 21:12:52 +02:00
# get dots that are in column names already, and the ones that will be once evaluated using dots_df or global env
2020-09-18 16:05:53 +02:00
# this is to support susceptibility(example_isolates, AMC, any_of(some_vector_with_AB_names))
2022-08-28 10:31:50 +02:00
dots <- c (
dots [dots %in% colnames ( dots_df ) ] ,
eval ( parse ( text = dots [ ! dots %in% colnames ( dots_df ) ] ) , envir = dots_df , enclos = globalenv ( ) )
)
2020-06-26 10:21:22 +02:00
dots_not_exist <- dots [ ! dots %in% colnames ( dots_df ) ]
2021-02-04 16:48:16 +01:00
stop_if ( length ( dots_not_exist ) > 0 , " column(s) not found: " , vector_and ( dots_not_exist , quotes = TRUE ) , call = -2 )
2020-06-26 10:21:22 +02:00
x <- dots_df [ , dots , drop = FALSE ]
2018-08-23 00:40:36 +02:00
}
2018-08-24 11:08:20 +02:00
} else if ( ndots == 1 ) {
2023-02-09 13:07:39 +01:00
# only 1 variable passed (can also be data.frame), like: proportion_S(example_isolates$AMC) and example_isolates$AMC %pm>% proportion_S()
2018-08-24 11:08:20 +02:00
x <- dots_df
2018-08-23 00:40:36 +02:00
} else {
2020-05-19 12:08:49 +02:00
# multiple variables passed without pipe, like: proportion_S(example_isolates$AMC, example_isolates$GEN)
2018-08-24 11:08:20 +02:00
x <- NULL
2020-07-02 21:12:52 +02:00
try ( x <- as.data.frame ( dots , stringsAsFactors = FALSE ) , silent = TRUE )
2018-08-24 11:08:20 +02:00
if ( is.null ( x ) ) {
2023-02-09 13:07:39 +01:00
# support for example_isolates %pm>% group_by(ward) %pm>% summarise(amox = susceptibility(GEN, AMX))
2020-07-02 21:12:52 +02:00
x <- as.data.frame ( list ( ... ) , stringsAsFactors = FALSE )
2018-08-24 11:08:20 +02:00
}
2018-08-23 00:40:36 +02:00
}
2022-08-28 10:31:50 +02:00
2019-05-10 16:44:59 +02:00
if ( is.null ( x ) ) {
2022-03-02 15:38:55 +01:00
warning_ ( " argument is NULL (check if columns exist): returning NA" )
2020-12-17 16:22:25 +01:00
if ( as_percent == TRUE ) {
return ( NA_character_ )
} else {
return ( NA_real_ )
}
2019-05-10 16:44:59 +02:00
}
2022-08-28 10:31:50 +02:00
2018-08-23 00:40:36 +02:00
print_warning <- FALSE
2022-08-28 10:31:50 +02:00
2023-01-21 23:47:20 +01:00
ab_result <- as.sir ( ab_result )
2022-08-28 10:31:50 +02:00
2018-08-23 00:40:36 +02:00
if ( is.data.frame ( x ) ) {
2023-01-21 23:47:20 +01:00
sir_integrity_check <- character ( 0 )
2019-10-11 17:21:02 +02:00
for ( i in seq_len ( ncol ( x ) ) ) {
2023-01-21 23:47:20 +01:00
# check integrity of columns: force 'sir' class
if ( ! is.sir ( x [ , i , drop = TRUE ] ) ) {
sir_integrity_check <- c ( sir_integrity_check , as.character ( x [ , i , drop = TRUE ] ) )
x [ , i ] <- suppressWarnings ( as.sir ( x [ , i , drop = TRUE ] ) ) # warning will be given later
2018-08-23 00:40:36 +02:00
print_warning <- TRUE
}
}
2023-01-21 23:47:20 +01:00
if ( length ( sir_integrity_check ) > 0 ) {
2018-10-19 13:53:31 +02:00
# this will give a warning for invalid results, of all input columns (so only 1 warning)
2023-01-21 23:47:20 +01:00
sir_integrity_check <- as.sir ( sir_integrity_check )
2018-10-19 13:53:31 +02:00
}
2022-08-28 10:31:50 +02:00
2020-11-11 16:49:27 +01:00
x_transposed <- as.list ( as.data.frame ( t ( x ) , stringsAsFactors = FALSE ) )
2023-02-10 16:18:00 +01:00
if ( isTRUE ( only_all_tested ) ) {
2024-05-20 21:29:13 +02:00
get_integers <- function ( x ) {
ints <- rep ( NA_integer_ , length ( x ) )
ints [x == " S" ] <- 1L
ints [x %in% c ( " SDD" , " I" ) ] <- 2L
ints [x == " R" ] <- 3L
ints
}
2020-07-03 10:51:55 +02:00
# no NAs in any column
2022-08-28 10:31:50 +02:00
y <- apply (
2024-05-20 21:29:13 +02:00
X = as.data.frame ( lapply ( x , get_integers ) , stringsAsFactors = FALSE ) ,
2022-08-28 10:31:50 +02:00
MARGIN = 1 ,
FUN = min
)
2024-05-20 21:29:13 +02:00
numerator <- sum ( ! is.na ( y ) & y %in% get_integers ( ab_result ) , na.rm = TRUE )
2022-10-05 09:12:22 +02:00
denominator <- sum ( vapply ( FUN.VALUE = logical ( 1 ) , x_transposed , function ( y ) ! ( anyNA ( y ) ) ) )
2018-10-19 13:53:31 +02:00
} else {
2020-07-03 10:51:55 +02:00
# may contain NAs in any column
2020-09-03 12:31:48 +02:00
other_values <- setdiff ( c ( NA , levels ( ab_result ) ) , ab_result )
2020-12-28 22:24:33 +01:00
numerator <- sum ( vapply ( FUN.VALUE = logical ( 1 ) , x_transposed , function ( y ) any ( y %in% ab_result , na.rm = TRUE ) ) )
2022-10-05 09:12:22 +02:00
denominator <- sum ( vapply ( FUN.VALUE = logical ( 1 ) , x_transposed , function ( y ) ! ( all ( y %in% other_values ) & anyNA ( y ) ) ) )
2018-10-19 13:53:31 +02:00
}
2018-08-23 00:40:36 +02:00
} else {
2019-07-01 14:03:15 +02:00
# x is not a data.frame
2023-01-21 23:47:20 +01:00
if ( ! is.sir ( x ) ) {
x <- as.sir ( x )
2018-08-23 00:40:36 +02:00
print_warning <- TRUE
}
2019-07-01 14:03:15 +02:00
numerator <- sum ( x %in% ab_result , na.rm = TRUE )
denominator <- sum ( x %in% levels ( ab_result ) , na.rm = TRUE )
2018-08-23 00:40:36 +02:00
}
2022-08-28 10:31:50 +02:00
2018-08-23 00:40:36 +02:00
if ( print_warning == TRUE ) {
2023-01-21 23:47:20 +01:00
if ( message_not_thrown_before ( " sir_calc" ) ) {
warning_ ( " Increase speed by transforming to class 'sir' on beforehand:\n" ,
" your_data %>% mutate_if(is_sir_eligible, as.sir)" ,
2022-08-28 10:31:50 +02:00
call = FALSE
)
2020-12-24 23:29:10 +01:00
}
2018-08-23 00:40:36 +02:00
}
2022-08-28 10:31:50 +02:00
2018-08-23 00:40:36 +02:00
if ( only_count == TRUE ) {
2019-07-01 14:03:15 +02:00
return ( numerator )
2018-08-23 00:40:36 +02:00
}
2023-02-09 13:07:39 +01:00
2019-07-01 14:03:15 +02:00
if ( denominator < minimum ) {
if ( data_vars != " " ) {
data_vars <- paste ( " for" , data_vars )
2021-07-03 21:56:53 +02:00
# also add group name if used in dplyr::group_by()
cur_group <- import_fn ( " cur_group" , " dplyr" , error_on_fail = FALSE )
if ( ! is.null ( cur_group ) ) {
group_df <- tryCatch ( cur_group ( ) , error = function ( e ) data.frame ( ) )
if ( NCOL ( group_df ) > 0 ) {
# transform factors to characters
group <- vapply ( FUN.VALUE = character ( 1 ) , group_df , function ( x ) {
if ( is.numeric ( x ) ) {
format ( x )
} else if ( is.logical ( x ) ) {
as.character ( x )
} else {
paste0 ( ' "' , x , ' "' )
}
} )
data_vars <- paste0 ( data_vars , " in group: " , paste0 ( names ( group ) , " = " , group , collapse = " , " ) )
}
}
2019-07-01 14:03:15 +02:00
}
2021-07-03 21:56:53 +02:00
warning_ ( " Introducing NA: " ,
2022-08-28 10:31:50 +02:00
ifelse ( denominator == 0 , " no" , paste ( " only" , denominator ) ) ,
" results available" ,
data_vars ,
" (`minimum` = " , minimum , " )." ,
call = FALSE
)
2020-09-28 01:08:55 +02:00
fraction <- NA_real_
2018-10-12 16:35:18 +02:00
} else {
2019-07-01 14:03:15 +02:00
fraction <- numerator / denominator
2020-09-28 01:08:55 +02:00
fraction [is.nan ( fraction ) ] <- NA_real_
2018-08-23 00:40:36 +02:00
}
2022-08-28 10:31:50 +02:00
2018-08-23 00:40:36 +02:00
if ( as_percent == TRUE ) {
2019-09-30 16:45:36 +02:00
percentage ( fraction , digits = 1 )
2018-08-23 00:40:36 +02:00
} else {
2019-07-01 14:03:15 +02:00
fraction
2018-08-23 00:40:36 +02:00
}
}
2019-05-13 10:10:16 +02:00
2023-01-21 23:47:20 +01:00
sir_calc_df <- function ( type , # "proportion", "count" or "both"
2019-05-13 10:10:16 +02:00
data ,
translate_ab = " name" ,
2021-12-12 09:42:03 +01:00
language = get_AMR_locale ( ) ,
2019-05-13 10:10:16 +02:00
minimum = 30 ,
as_percent = FALSE ,
combine_SI = TRUE ,
2022-10-20 16:08:01 +02:00
confidence_level = 0.95 ) {
meet_criteria ( type , is_in = c ( " proportion" , " count" , " both" ) , has_length = 1 )
2023-12-03 11:34:48 +01:00
meet_criteria ( data , allow_class = " data.frame" , contains_column_class = " sir" )
2022-10-20 16:08:01 +02:00
meet_criteria ( translate_ab , allow_class = c ( " character" , " logical" ) , has_length = 1 , allow_NA = TRUE )
2023-02-10 16:18:00 +01:00
language <- validate_language ( language )
meet_criteria ( minimum , allow_class = c ( " numeric" , " integer" ) , has_length = 1 , is_positive_or_zero = TRUE , is_finite = TRUE )
2022-10-20 16:08:01 +02:00
meet_criteria ( as_percent , allow_class = " logical" , has_length = 1 )
meet_criteria ( combine_SI , allow_class = " logical" , has_length = 1 )
meet_criteria ( confidence_level , allow_class = " numeric" , has_length = 1 )
2022-08-28 10:31:50 +02:00
2020-06-25 17:34:50 +02:00
translate_ab <- get_translate_ab ( translate_ab )
2022-08-28 10:31:50 +02:00
2022-08-27 20:49:37 +02:00
data.bak <- data
2020-05-16 13:05:47 +02:00
# select only groups and antibiotics
2021-07-08 22:23:28 +02:00
if ( is_null_or_grouped_tbl ( data ) ) {
2020-05-16 13:05:47 +02:00
data_has_groups <- TRUE
2023-02-10 16:18:00 +01:00
groups <- get_group_names ( data )
2023-01-21 23:47:20 +01:00
data <- data [ , c ( groups , colnames ( data ) [vapply ( FUN.VALUE = logical ( 1 ) , data , is.sir ) ] ) , drop = FALSE ]
2020-05-16 13:05:47 +02:00
} else {
data_has_groups <- FALSE
2023-01-21 23:47:20 +01:00
data <- data [ , colnames ( data ) [vapply ( FUN.VALUE = logical ( 1 ) , data , is.sir ) ] , drop = FALSE ]
2020-05-16 13:05:47 +02:00
}
2022-08-28 10:31:50 +02:00
2020-05-16 13:05:47 +02:00
data <- as.data.frame ( data , stringsAsFactors = FALSE )
2022-10-20 16:08:01 +02:00
if ( isTRUE ( combine_SI ) ) {
2020-05-16 13:05:47 +02:00
for ( i in seq_len ( ncol ( data ) ) ) {
2023-01-21 23:47:20 +01:00
if ( is.sir ( data [ , i , drop = TRUE ] ) ) {
2020-05-16 13:05:47 +02:00
data [ , i ] <- as.character ( data [ , i , drop = TRUE ] )
2024-05-20 15:27:04 +02:00
if ( " SDD" %in% data [ , i , drop = TRUE ] ) {
if ( message_not_thrown_before ( " sir_calc_df" , combine_SI , entire_session = TRUE ) ) {
message_ ( " Note that `sir_calc_df()` will also count dose-dependent susceptibility, 'SDD', as 'SI' when `combine_SI = TRUE`. This note will be shown once for this session." , as_note = FALSE )
}
}
data [ , i ] <- gsub ( " (I|S|SDD)" , " SI" , data [ , i , drop = TRUE ] )
2019-11-10 12:16:56 +01:00
}
2020-05-16 13:05:47 +02:00
}
2019-05-13 10:10:16 +02:00
}
2022-10-30 14:31:45 +01:00
2020-05-16 13:05:47 +02:00
sum_it <- function ( .data ) {
2022-08-28 10:31:50 +02:00
out <- data.frame (
antibiotic = character ( 0 ) ,
interpretation = character ( 0 ) ,
value = double ( 0 ) ,
2022-10-20 16:08:01 +02:00
ci_min = double ( 0 ) ,
ci_max = double ( 0 ) ,
2022-08-28 10:31:50 +02:00
isolates = integer ( 0 ) ,
stringsAsFactors = FALSE
)
2020-05-16 13:05:47 +02:00
if ( data_has_groups ) {
group_values <- unique ( .data [ , which ( colnames ( .data ) %in% groups ) , drop = FALSE ] )
rownames ( group_values ) <- NULL
.data <- .data [ , which ( ! colnames ( .data ) %in% groups ) , drop = FALSE ]
}
for ( i in seq_len ( ncol ( .data ) ) ) {
2020-06-09 16:18:03 +02:00
values <- .data [ , i , drop = TRUE ]
if ( isTRUE ( combine_SI ) ) {
2024-05-20 15:27:04 +02:00
values <- factor ( values , levels = c ( " SI" , " R" , " N" ) , ordered = TRUE )
2020-06-09 16:18:03 +02:00
} else {
2024-05-20 15:27:04 +02:00
values <- factor ( values , levels = c ( " S" , " SDD" , " I" , " R" , " N" ) , ordered = TRUE )
2020-06-09 16:18:03 +02:00
}
2020-11-11 16:49:27 +01:00
col_results <- as.data.frame ( as.matrix ( table ( values ) ) , stringsAsFactors = FALSE )
2020-05-16 13:05:47 +02:00
col_results $ interpretation <- rownames ( col_results )
col_results $ isolates <- col_results [ , 1 , drop = TRUE ]
2020-06-09 16:18:03 +02:00
if ( NROW ( col_results ) > 0 && sum ( col_results $ isolates , na.rm = TRUE ) > 0 ) {
2020-05-16 13:05:47 +02:00
if ( sum ( col_results $ isolates , na.rm = TRUE ) >= minimum ) {
col_results $ value <- col_results $ isolates / sum ( col_results $ isolates , na.rm = TRUE )
2022-10-30 14:31:45 +01:00
ci <- lapply (
col_results $ isolates ,
function ( x ) {
stats :: binom.test (
x = x ,
n = sum ( col_results $ isolates , na.rm = TRUE ) ,
conf.level = confidence_level
) $ conf.int
}
)
2022-10-20 16:08:01 +02:00
col_results $ ci_min <- vapply ( FUN.VALUE = double ( 1 ) , ci , `[` , 1 )
col_results $ ci_max <- vapply ( FUN.VALUE = double ( 1 ) , ci , `[` , 2 )
2020-05-16 13:05:47 +02:00
} else {
col_results $ value <- rep ( NA_real_ , NROW ( col_results ) )
2022-10-20 16:08:01 +02:00
# confidence intervals also to NA
col_results $ ci_min <- col_results $ value
col_results $ ci_max <- col_results $ value
2020-05-16 13:05:47 +02:00
}
2022-08-28 10:31:50 +02:00
out_new <- data.frame (
antibiotic = ifelse ( isFALSE ( translate_ab ) ,
colnames ( .data ) [i ] ,
ab_property ( colnames ( .data ) [i ] , property = translate_ab , language = language )
) ,
interpretation = col_results $ interpretation ,
value = col_results $ value ,
2022-10-20 16:08:01 +02:00
ci_min = col_results $ ci_min ,
ci_max = col_results $ ci_max ,
2022-08-28 10:31:50 +02:00
isolates = col_results $ isolates ,
stringsAsFactors = FALSE
)
2020-05-16 13:05:47 +02:00
if ( data_has_groups ) {
2020-06-09 16:18:03 +02:00
if ( nrow ( group_values ) < nrow ( out_new ) ) {
# repeat group_values for the number of rows in out_new
repeated <- rep ( seq_len ( nrow ( group_values ) ) ,
2022-08-28 10:31:50 +02:00
each = nrow ( out_new ) / nrow ( group_values )
)
2020-06-09 16:18:03 +02:00
group_values <- group_values [repeated , , drop = FALSE ]
}
2020-05-16 13:05:47 +02:00
out_new <- cbind ( group_values , out_new )
}
2023-03-12 13:02:37 +01:00
out <- rbind_AMR ( out , out_new )
2020-05-16 13:05:47 +02:00
}
}
out
}
2022-08-28 10:31:50 +02:00
2020-09-18 16:05:53 +02:00
# based on pm_apply_grouped_function
apply_group <- function ( .data , fn , groups , drop = FALSE , ... ) {
grouped <- pm_split_into_groups ( .data , groups , drop )
2023-03-12 13:02:37 +01:00
res <- do.call ( rbind_AMR , unname ( lapply ( grouped , fn , ... ) ) )
2020-05-16 13:05:47 +02:00
if ( any ( groups %in% colnames ( res ) ) ) {
class ( res ) <- c ( " grouped_data" , class ( res ) )
2023-02-09 13:07:39 +01:00
res <- pm_set_groups ( res , groups [groups %in% colnames ( res ) ] )
2020-05-16 13:05:47 +02:00
}
res
2019-05-13 10:10:16 +02:00
}
2022-08-28 10:31:50 +02:00
2020-05-16 13:05:47 +02:00
if ( data_has_groups ) {
out <- apply_group ( data , " sum_it" , groups )
} else {
out <- sum_it ( data )
}
2022-08-28 10:31:50 +02:00
2020-05-16 13:05:47 +02:00
# apply factors for right sorting in interpretation
if ( isTRUE ( combine_SI ) ) {
out $ interpretation <- factor ( out $ interpretation , levels = c ( " SI" , " R" ) , ordered = TRUE )
} else {
2023-01-21 23:47:20 +01:00
# don't use as.sir() here, as it would add the class 'sir' and we would like
2020-06-17 01:39:30 +02:00
# the same data structure as output, regardless of input
2024-05-20 15:27:04 +02:00
out $ interpretation <- factor ( out $ interpretation , levels = c ( " S" , " SDD" , " I" , " R" , " N" ) , ordered = TRUE )
2019-05-13 10:10:16 +02:00
}
2024-05-20 21:29:13 +02:00
out <- out [ ! is.na ( out $ interpretation ) , , drop = FALSE ]
2022-08-28 10:31:50 +02:00
2020-05-16 13:05:47 +02:00
if ( data_has_groups ) {
# ordering by the groups and two more: "antibiotic" and "interpretation"
2022-08-28 10:31:50 +02:00
out <- pm_ungroup ( out [do.call ( " order" , out [ , seq_len ( length ( groups ) + 2 ) , drop = FALSE ] ) , , drop = FALSE ] )
2020-05-16 13:05:47 +02:00
} else {
2022-08-27 20:49:37 +02:00
out <- out [order ( out $ antibiotic , out $ interpretation ) , , drop = FALSE ]
2020-05-16 13:05:47 +02:00
}
2022-08-28 10:31:50 +02:00
2020-05-16 13:05:47 +02:00
if ( type == " proportion" ) {
2022-10-20 16:08:01 +02:00
# remove number of isolates
2020-05-16 13:05:47 +02:00
out <- subset ( out , select = - c ( isolates ) )
} else if ( type == " count" ) {
2022-10-20 16:08:01 +02:00
# set value to be number of isolates
2020-05-16 13:05:47 +02:00
out $ value <- out $ isolates
2022-10-20 16:08:01 +02:00
# remove redundant columns
out <- subset ( out , select = - c ( ci_min , ci_max , isolates ) )
2022-08-28 10:31:50 +02:00
}
2020-05-16 13:05:47 +02:00
rownames ( out ) <- NULL
2023-01-05 14:43:18 +01:00
out <- as_original_data_class ( out , class ( data.bak ) ) # will remove tibble groups
2023-12-03 11:34:48 +01:00
structure ( out , class = c ( " sir_df" , class ( out ) ) )
2019-05-13 10:10:16 +02:00
}