2019-05-10 16:44:59 +02:00
# ==================================================================== #
2023-07-08 17:30:05 +02:00
# TITLE: #
2022-10-05 09:12:22 +02:00
# AMR: An R Package for Working with Antimicrobial Resistance Data #
2019-05-10 16:44:59 +02:00
# #
2023-07-08 17:30:05 +02:00
# SOURCE CODE: #
2020-07-08 14:48:06 +02:00
# https://github.com/msberends/AMR #
2019-05-10 16:44:59 +02:00
# #
2023-07-08 17:30:05 +02:00
# PLEASE CITE THIS SOFTWARE AS: #
2024-07-16 14:51:57 +02:00
# Berends MS, Luz CF, Friedrich AW, et al. (2022). #
# AMR: An R Package for Working with Antimicrobial Resistance Data. #
# Journal of Statistical Software, 104(3), 1-31. #
2023-05-27 10:39:22 +02:00
# https://doi.org/10.18637/jss.v104.i03 #
2022-10-05 09:12:22 +02:00
# #
2022-12-27 15:16:15 +01:00
# Developed at the University of Groningen and the University Medical #
# Center Groningen in The Netherlands, in collaboration with many #
# colleagues from around the world, see our website. #
2019-05-10 16:44:59 +02:00
# #
# This R package is free software; you can freely use and distribute #
# it for both personal and commercial purposes under the terms of the #
# GNU General Public License version 2.0 (GNU GPL-2), as published by #
# the Free Software Foundation. #
2020-01-05 17:22:09 +01:00
# We created this package for both routine data analysis and academic #
# research and it was publicly released in the hope that it will be #
# useful, but it comes WITHOUT ANY WARRANTY OR LIABILITY. #
2020-10-08 11:16:03 +02:00
# #
# Visit our website for the full manual and a complete tutorial about #
2021-02-02 23:57:35 +01:00
# how to conduct AMR data analysis: https://msberends.github.io/AMR/ #
2019-05-10 16:44:59 +02:00
# ==================================================================== #
2021-01-18 16:57:56 +01:00
#' Transform Input to Disk Diffusion Diameters
2019-05-10 16:44:59 +02:00
#'
2020-09-03 12:31:48 +02:00
#' This transforms a vector to a new class [`disk`], which is a disk diffusion growth zone size (around an antibiotic disk) in millimetres between 6 and 50.
2019-05-10 16:44:59 +02:00
#' @rdname as.disk
#' @param x vector
2021-05-12 18:15:03 +02:00
#' @param na.rm a [logical] indicating whether missing values should be removed
2023-01-21 23:47:20 +01:00
#' @details Interpret disk values as SIR values with [as.sir()]. It supports guidelines from EUCAST and CLSI.
2022-10-30 14:31:45 +01:00
#'
2022-10-29 14:15:23 +02:00
#' Disk diffusion growth zone sizes must be between 6 and 50 millimetres. Values higher than 50 but lower than 100 will be maximised to 50. All others input values outside the 6-50 range will return `NA`.
2020-09-18 16:05:53 +02:00
#' @return An [integer] with additional class [`disk`]
2019-11-06 14:43:23 +01:00
#' @aliases disk
2019-05-10 16:44:59 +02:00
#' @export
2023-01-21 23:47:20 +01:00
#' @seealso [as.sir()]
2019-05-10 16:44:59 +02:00
#' @examples
2022-08-21 16:37:20 +02:00
#' # transform existing disk zones to the `disk` class (using base R)
2022-08-28 10:31:50 +02:00
#' df <- data.frame(
#' microorganism = "Escherichia coli",
#' AMP = 20,
#' CIP = 14,
#' GEN = 18,
#' TOB = 16
#' )
2020-09-29 23:35:46 +02:00
#' df[, 2:5] <- lapply(df[, 2:5], as.disk)
2022-08-21 16:37:20 +02:00
#' str(df)
2022-08-28 10:31:50 +02:00
#'
2022-08-21 16:52:09 +02:00
#' \donttest{
2022-08-21 16:37:20 +02:00
#' # transforming is easier with dplyr:
#' if (require("dplyr")) {
#' df %>% mutate(across(AMP:TOB, as.disk))
#' }
#' }
2022-08-28 10:31:50 +02:00
#'
2023-01-21 23:47:20 +01:00
#' # interpret disk values, see ?as.sir
#' as.sir(
2022-08-28 10:31:50 +02:00
#' x = as.disk(18),
#' mo = "Strep pneu", # `mo` will be coerced with as.mo()
#' ab = "ampicillin", # and `ab` with as.ab()
#' guideline = "EUCAST"
#' )
2022-08-21 16:37:20 +02:00
#'
#' # interpret whole data set, pretend to be all from urinary tract infections:
2023-01-21 23:47:20 +01:00
#' as.sir(df, uti = TRUE)
2019-05-10 16:44:59 +02:00
as.disk <- function ( x , na.rm = FALSE ) {
2023-07-11 14:29:40 +02:00
meet_criteria ( x , allow_NA = TRUE )
2020-10-19 17:09:19 +02:00
meet_criteria ( na.rm , allow_class = " logical" , has_length = 1 )
2022-08-28 10:31:50 +02:00
2020-05-19 12:08:49 +02:00
if ( ! is.disk ( x ) ) {
2021-01-24 14:48:56 +01:00
x <- unlist ( x )
2022-11-14 15:20:39 +01:00
if ( isTRUE ( na.rm ) ) {
2019-05-10 16:44:59 +02:00
x <- x [ ! is.na ( x ) ]
}
2022-10-05 09:12:22 +02:00
x [trimws2 ( x ) == " " ] <- NA
2019-05-10 16:44:59 +02:00
x.bak <- x
2022-08-28 10:31:50 +02:00
2019-05-10 16:44:59 +02:00
na_before <- length ( x [is.na ( x ) ] )
2022-08-28 10:31:50 +02:00
2020-12-28 22:24:33 +01:00
# heavily based on cleaner::clean_double():
2020-08-16 21:38:42 +02:00
clean_double2 <- function ( x , remove = " [^0-9.,-]" , fixed = FALSE ) {
2022-10-05 09:12:22 +02:00
x <- gsub ( " ," , " ." , x , fixed = TRUE )
2020-08-16 21:38:42 +02:00
# remove ending dot/comma
x <- gsub ( " [,.]$" , " " , x )
# only keep last dot/comma
2020-12-28 22:24:33 +01:00
reverse <- function ( x ) vapply ( FUN.VALUE = character ( 1 ) , lapply ( strsplit ( x , NULL ) , rev ) , paste , collapse = " " )
2022-08-28 10:31:50 +02:00
x <- sub ( " {{dot}}" , " ." ,
gsub ( " ." , " " ,
reverse ( sub ( " ." , " }}tod{{" ,
reverse ( x ) ,
fixed = TRUE
) ) ,
fixed = TRUE
) ,
fixed = TRUE
)
2020-08-16 21:38:42 +02:00
x_clean <- gsub ( remove , " " , x , ignore.case = TRUE , fixed = fixed )
# remove everything that is not a number or dot
2021-05-24 00:06:28 +02:00
as.double ( gsub ( " [^0-9.]+" , " " , x_clean ) )
2020-08-16 21:38:42 +02:00
}
2022-08-28 10:31:50 +02:00
2020-08-16 21:38:42 +02:00
# round up and make it an integer
x <- as.integer ( ceiling ( clean_double2 ( x ) ) )
2022-08-28 10:31:50 +02:00
2019-12-21 10:56:06 +01:00
# disks can never be less than 6 mm (size of smallest disk) or more than 50 mm
2022-10-29 14:15:23 +02:00
x [x < 6 | x > 99 ] <- NA_integer_
x [x > 50 ] <- 50L
2019-05-10 16:44:59 +02:00
na_after <- length ( x [is.na ( x ) ] )
2022-08-28 10:31:50 +02:00
2019-05-10 16:44:59 +02:00
if ( na_before != na_after ) {
2023-02-09 13:07:39 +01:00
list_missing <- x.bak [is.na ( x ) & ! is.na ( x.bak ) ] %pm>%
unique ( ) %pm>%
sort ( ) %pm>%
2021-02-04 16:48:16 +01:00
vector_and ( quotes = TRUE )
2022-11-28 19:39:52 +01:00
cur_col <- get_current_column ( )
warning_ ( " in `as.disk()`: " , na_after - na_before , " result" ,
2023-01-23 15:01:21 +01:00
ifelse ( na_after - na_before > 1 , " s" , " " ) ,
ifelse ( is.null ( cur_col ) , " " , paste0 ( " in column '" , cur_col , " '" ) ) ,
" truncated (" ,
round ( ( ( na_after - na_before ) / length ( x ) ) * 100 ) ,
" %) that were invalid disk zones: " ,
list_missing ,
call = FALSE
2022-08-28 10:31:50 +02:00
)
2019-05-10 16:44:59 +02:00
}
}
2020-11-16 16:57:55 +01:00
set_clean_class ( as.integer ( x ) ,
2022-08-28 10:31:50 +02:00
new_class = c ( " disk" , " integer" )
)
2019-05-10 16:44:59 +02:00
}
2020-02-20 13:19:23 +01:00
all_valid_disks <- function ( x ) {
2020-10-19 17:09:19 +02:00
if ( ! inherits ( x , c ( " disk" , " character" , " numeric" , " integer" ) ) ) {
return ( FALSE )
}
2020-12-17 16:22:25 +01:00
x_disk <- tryCatch ( suppressWarnings ( as.disk ( x [ ! is.na ( x ) ] ) ) ,
2022-08-28 10:31:50 +02:00
error = function ( e ) NA
)
2022-10-05 09:12:22 +02:00
! anyNA ( x_disk ) && ! all ( is.na ( x ) )
2020-02-20 13:19:23 +01:00
}
2021-11-28 23:01:26 +01:00
#' @rdname as.disk
2022-10-19 11:47:57 +02:00
#' @details `NA_disk_` is a missing value of the new `disk` class.
2021-11-28 23:01:26 +01:00
#' @export
NA_disk_ <- set_clean_class ( as.integer ( NA_real_ ) ,
2022-08-28 10:31:50 +02:00
new_class = c ( " disk" , " integer" )
)
2021-11-28 23:01:26 +01:00
2019-05-10 16:44:59 +02:00
#' @rdname as.disk
#' @export
is.disk <- function ( x ) {
2020-02-10 14:18:15 +01:00
inherits ( x , " disk" )
2019-05-10 16:44:59 +02:00
}
2020-08-28 21:55:47 +02:00
# will be exported using s3_register() in R/zzz.R
2020-08-26 11:33:54 +02:00
pillar_shaft.disk <- function ( x , ... ) {
out <- trimws ( format ( x ) )
2020-08-28 21:55:47 +02:00
out [is.na ( x ) ] <- font_na ( NA )
create_pillar_column ( out , align = " right" , width = 2 )
2020-08-26 11:33:54 +02:00
}
2020-05-28 16:48:55 +02:00
#' @method print disk
2019-05-10 16:44:59 +02:00
#' @export
#' @noRd
print.disk <- function ( x , ... ) {
2022-10-19 11:47:57 +02:00
cat ( " Class 'disk'\n" )
2019-05-10 16:44:59 +02:00
print ( as.integer ( x ) , quote = FALSE )
}
2019-08-12 14:48:09 +02:00
2020-05-28 16:48:55 +02:00
#' @method [ disk
2020-03-14 14:05:43 +01:00
#' @export
#' @noRd
" [.disk" <- function ( x , ... ) {
y <- NextMethod ( )
attributes ( y ) <- attributes ( x )
y
}
2020-05-28 16:48:55 +02:00
#' @method [[ disk
2020-03-14 14:05:43 +01:00
#' @export
#' @noRd
" [[.disk" <- function ( x , ... ) {
y <- NextMethod ( )
attributes ( y ) <- attributes ( x )
y
}
2020-05-28 16:48:55 +02:00
#' @method [<- disk
2020-03-14 14:05:43 +01:00
#' @export
#' @noRd
" [<-.disk" <- function ( i , j , ... , value ) {
2020-04-13 21:09:56 +02:00
value <- as.disk ( value )
2020-03-14 14:05:43 +01:00
y <- NextMethod ( )
attributes ( y ) <- attributes ( i )
y
}
2020-05-28 16:48:55 +02:00
#' @method [[<- disk
2020-03-14 14:05:43 +01:00
#' @export
#' @noRd
" [[<-.disk" <- function ( i , j , ... , value ) {
2020-04-13 21:09:56 +02:00
value <- as.disk ( value )
2020-03-14 14:05:43 +01:00
y <- NextMethod ( )
attributes ( y ) <- attributes ( i )
y
}
2020-05-28 16:48:55 +02:00
#' @method c disk
2020-03-14 14:05:43 +01:00
#' @export
#' @noRd
2021-05-03 13:06:43 +02:00
c.disk <- function ( ... ) {
as.disk ( unlist ( lapply ( list ( ... ) , as.character ) ) )
2020-03-14 14:05:43 +01:00
}
2020-09-25 14:44:50 +02:00
#' @method unique disk
#' @export
#' @noRd
unique.disk <- function ( x , incomparables = FALSE , ... ) {
y <- NextMethod ( )
attributes ( y ) <- attributes ( x )
y
}
2020-09-28 01:08:55 +02:00
2021-07-06 16:35:14 +02:00
#' @method rep disk
#' @export
#' @noRd
rep.disk <- function ( x , ... ) {
y <- NextMethod ( )
attributes ( y ) <- attributes ( x )
y
}
2020-09-28 01:08:55 +02:00
# will be exported using s3_register() in R/zzz.R
get_skimmers.disk <- function ( column ) {
2020-12-17 16:22:25 +01:00
skimr :: sfl (
2020-09-28 01:08:55 +02:00
skim_type = " disk" ,
2022-08-28 10:31:50 +02:00
min = ~ min ( as.double ( .) , na.rm = TRUE ) ,
max = ~ max ( as.double ( .) , na.rm = TRUE ) ,
median = ~ stats :: median ( as.double ( .) , na.rm = TRUE ) ,
n_unique = ~ length ( unique ( stats :: na.omit ( .) ) ) ,
hist = ~ skimr :: inline_hist ( stats :: na.omit ( as.double ( .) ) )
2020-09-28 01:08:55 +02:00
)
}