2018-02-21 11:52:31 +01:00
# ==================================================================== #
2023-07-08 17:30:05 +02:00
# TITLE: #
2022-10-05 09:12:22 +02:00
# AMR: An R Package for Working with Antimicrobial Resistance Data #
2018-02-21 11:52:31 +01:00
# #
2023-07-08 17:30:05 +02:00
# SOURCE CODE: #
2020-07-08 14:48:06 +02:00
# https://github.com/msberends/AMR #
2018-02-21 11:52:31 +01:00
# #
2023-07-08 17:30:05 +02:00
# PLEASE CITE THIS SOFTWARE AS: #
2024-07-16 14:51:57 +02:00
# Berends MS, Luz CF, Friedrich AW, et al. (2022). #
# AMR: An R Package for Working with Antimicrobial Resistance Data. #
# Journal of Statistical Software, 104(3), 1-31. #
2023-05-27 10:39:22 +02:00
# https://doi.org/10.18637/jss.v104.i03 #
2022-10-05 09:12:22 +02:00
# #
2022-12-27 15:16:15 +01:00
# Developed at the University of Groningen and the University Medical #
# Center Groningen in The Netherlands, in collaboration with many #
# colleagues from around the world, see our website. #
2018-02-21 11:52:31 +01:00
# #
2019-01-02 23:24:07 +01:00
# This R package is free software; you can freely use and distribute #
# it for both personal and commercial purposes under the terms of the #
# GNU General Public License version 2.0 (GNU GPL-2), as published by #
# the Free Software Foundation. #
2020-01-05 17:22:09 +01:00
# We created this package for both routine data analysis and academic #
# research and it was publicly released in the hope that it will be #
# useful, but it comes WITHOUT ANY WARRANTY OR LIABILITY. #
2020-10-08 11:16:03 +02:00
# #
# Visit our website for the full manual and a complete tutorial about #
2021-02-02 23:57:35 +01:00
# how to conduct AMR data analysis: https://msberends.github.io/AMR/ #
2018-02-21 11:52:31 +01:00
# ==================================================================== #
2024-02-24 15:16:52 +01:00
# these are allowed MIC values and will become factor levels
2023-12-03 11:34:48 +01:00
VALID_MIC_LEVELS <- c (
2024-02-24 15:16:52 +01:00
as.double ( paste0 ( " 0.000" , c ( 1 : 9 ) ) ) ,
as.double ( paste0 ( " 0.00" , c ( 1 : 99 , 1953125 , 390625 , 78125 ) ) ) ,
as.double ( paste0 ( " 0.0" , c ( 1 : 99 , 125 , 128 , 156 , 165 , 256 , 512 , 625 , 3125 , 15625 ) ) ) ,
as.double ( paste0 ( " 0." , c ( 1 : 99 , 125 , 128 , 256 , 512 ) ) ) ,
1 : 9 , 1.5 ,
c ( 10 : 98 ) [9 : 98 %% 2 == TRUE ] ,
2 ^c ( 7 : 12 ) , 192 * c ( 1 : 5 ) , 80 * c ( 2 : 12 )
2022-08-28 10:31:50 +02:00
)
2024-02-24 15:16:52 +01:00
VALID_MIC_LEVELS <- trimws ( gsub ( " [.]?0+$" , " " , format ( unique ( sort ( VALID_MIC_LEVELS ) ) , scientific = FALSE ) , perl = TRUE ) )
operators <- c ( " <" , " <=" , " " , " >=" , " >" )
VALID_MIC_LEVELS <- c ( t ( vapply ( FUN.VALUE = character ( length ( VALID_MIC_LEVELS ) ) ,
c ( " <" , " <=" , " " , " >=" , " >" ) ,
paste0 ,
VALID_MIC_LEVELS ) ) )
2024-06-08 17:35:25 +02:00
COMMON_MIC_VALUES <- c ( 0.001 , 0.002 , 0.004 , 0.008 , 0.016 , 0.032 , 0.064 ,
0.125 , 0.25 , 0.5 , 1 , 2 , 4 , 8 , 16 , 32 ,
64 , 128 , 256 , 512 , 1024 )
2021-11-28 23:01:26 +01:00
2021-01-18 16:57:56 +01:00
#' Transform Input to Minimum Inhibitory Concentrations (MIC)
2018-02-21 11:52:31 +01:00
#'
2021-11-28 23:01:26 +01:00
#' This transforms vectors to a new class [`mic`], which treats the input as decimal numbers, while maintaining operators (such as ">=") and only allowing valid MIC values known to the field of (medical) microbiology.
2018-02-21 11:52:31 +01:00
#' @rdname as.mic
2021-05-12 18:15:03 +02:00
#' @param x a [character] or [numeric] vector
#' @param na.rm a [logical] indicating whether missing values should be removed
2023-12-03 01:06:00 +01:00
#' @param keep_operators a [character] specifying how to handle operators (such as `>` and `<=`) in the input. Accepts one of three values: `"all"` (or `TRUE`) to keep all operators, `"none"` (or `FALSE`) to remove all operators, or `"edges"` to keep operators only at both ends of the range.
2022-05-16 12:52:50 +02:00
#' @param ... arguments passed on to methods
2023-01-21 23:47:20 +01:00
#' @details To interpret MIC values as SIR values, use [as.sir()] on MIC values. It supports guidelines from EUCAST (`r min(as.integer(gsub("[^0-9]", "", subset(clinical_breakpoints, guideline %like% "EUCAST")$guideline)))`-`r max(as.integer(gsub("[^0-9]", "", subset(clinical_breakpoints, guideline %like% "EUCAST")$guideline)))`) and CLSI (`r min(as.integer(gsub("[^0-9]", "", subset(clinical_breakpoints, guideline %like% "CLSI")$guideline)))`-`r max(as.integer(gsub("[^0-9]", "", subset(clinical_breakpoints, guideline %like% "CLSI")$guideline)))`).
2022-08-28 10:31:50 +02:00
#'
2021-05-12 18:15:03 +02:00
#' This class for MIC values is a quite a special data type: formally it is an ordered [factor] with valid MIC values as [factor] levels (to make sure only valid MIC values are retained), but for any mathematical operation it acts as decimal numbers:
2022-08-28 10:31:50 +02:00
#'
2021-03-07 13:52:39 +01:00
#' ```
#' x <- random_mic(10)
#' x
2022-10-19 11:47:57 +02:00
#' #> Class 'mic'
2021-03-07 13:52:39 +01:00
#' #> [1] 16 1 8 8 64 >=128 0.0625 32 32 16
2022-08-28 10:31:50 +02:00
#'
2021-03-07 13:52:39 +01:00
#' is.factor(x)
#' #> [1] TRUE
2022-08-28 10:31:50 +02:00
#'
2021-03-07 13:52:39 +01:00
#' x[1] * 2
#' #> [1] 32
2022-08-28 10:31:50 +02:00
#'
2021-03-07 13:52:39 +01:00
#' median(x)
#' #> [1] 26
#' ```
2022-08-28 10:31:50 +02:00
#'
2021-05-12 18:15:03 +02:00
#' This makes it possible to maintain operators that often come with MIC values, such ">=" and "<=", even when filtering using [numeric] values in data analysis, e.g.:
2022-08-28 10:31:50 +02:00
#'
2021-03-07 13:52:39 +01:00
#' ```
#' x[x > 4]
2022-10-19 11:47:57 +02:00
#' #> Class 'mic'
2021-03-07 13:52:39 +01:00
#' #> [1] 16 8 8 64 >=128 32 32 16
2022-08-28 10:31:50 +02:00
#'
2021-03-07 13:52:39 +01:00
#' df <- data.frame(x, hospital = "A")
#' subset(df, x > 4) # or with dplyr: df %>% filter(x > 4)
#' #> x hospital
#' #> 1 16 A
#' #> 5 64 A
#' #> 6 >=128 A
#' #> 8 32 A
#' #> 9 32 A
#' #> 10 16 A
#' ```
2022-08-28 10:31:50 +02:00
#'
2024-02-24 15:16:52 +01:00
#' All so-called [group generic functions][groupGeneric()] are implemented for the MIC class (such as `!`, `!=`, `<`, `>=`, [exp()], [log2()]). Some functions of the `stats` package are also implemented (such as [quantile()], [median()], [fivenum()]). Since [sd()] and [var()] are non-generic functions, these could not be extended. Use [mad()] as an alternative, or use e.g. `sd(as.numeric(x))` where `x` is your vector of MIC values.
2022-08-28 10:31:50 +02:00
#'
2022-05-09 21:33:27 +02:00
#' Using [as.double()] or [as.numeric()] on MIC values will remove the operators and return a numeric vector. Do **not** use [as.integer()] on MIC values as by the \R convention on [factor]s, it will return the index of the factor levels (which is often useless for regular users).
2022-08-28 10:31:50 +02:00
#'
2022-10-19 11:47:57 +02:00
#' Use [droplevels()] to drop unused levels. At default, it will return a plain factor. Use `droplevels(..., as.mic = TRUE)` to maintain the `mic` class.
2023-12-03 11:34:48 +01:00
#'
2024-05-24 15:07:41 +02:00
#' With [rescale_mic()], existing MIC ranges can be limited to a defined range of MIC values. This can be useful to better compare MIC distributions.
2023-12-04 08:19:02 +01:00
#'
#' For `ggplot2`, use one of the [`scale_*_mic()`][scale_x_mic()] functions to plot MIC values. They allows custom MIC ranges and to plot intermediate log2 levels for missing MIC values.
2024-02-24 15:16:52 +01:00
#' @return Ordered [factor] with additional class [`mic`], that in mathematical operations acts as a [numeric] vector. Bear in mind that the outcome of any mathematical operation on MICs will return a [numeric] value.
2019-11-28 23:00:37 +01:00
#' @aliases mic
2018-02-21 11:52:31 +01:00
#' @export
2023-01-21 23:47:20 +01:00
#' @seealso [as.sir()]
2018-02-22 20:48:48 +01:00
#' @examples
#' mic_data <- as.mic(c(">=32", "1.0", "1", "1.00", 8, "<=0.128", "8", "16", "16"))
2022-08-21 16:37:20 +02:00
#' mic_data
2018-02-22 20:48:48 +01:00
#' is.mic(mic_data)
2018-04-02 16:05:09 +02:00
#'
2023-01-21 23:47:20 +01:00
#' # this can also coerce combined MIC/SIR values:
2022-08-21 16:37:20 +02:00
#' as.mic("<=0.002; S")
2022-08-28 10:31:50 +02:00
#'
2022-08-21 16:37:20 +02:00
#' # mathematical processing treats MICs as numeric values
2021-03-05 15:36:39 +01:00
#' fivenum(mic_data)
#' quantile(mic_data)
#' all(mic_data < 512)
2023-12-03 11:34:48 +01:00
#'
2024-07-16 14:51:57 +02:00
#' # rescale MICs using rescale_mic()
2024-05-24 15:07:41 +02:00
#' rescale_mic(mic_data, mic_range = c(4, 16))
2018-06-19 10:05:38 +02:00
#'
2019-05-10 16:44:59 +02:00
#' # interpret MIC values
2023-01-21 23:47:20 +01:00
#' as.sir(
2022-08-28 10:31:50 +02:00
#' x = as.mic(2),
#' mo = as.mo("Streptococcus pneumoniae"),
#' ab = "AMX",
#' guideline = "EUCAST"
#' )
2023-01-21 23:47:20 +01:00
#' as.sir(
2022-08-28 10:31:50 +02:00
#' x = as.mic(c(0.01, 2, 4, 8)),
#' mo = as.mo("Streptococcus pneumoniae"),
#' ab = "AMX",
#' guideline = "EUCAST"
#' )
2019-05-10 16:44:59 +02:00
#'
2021-02-25 10:33:08 +01:00
#' # plot MIC values, see ?plot
2018-02-22 20:48:48 +01:00
#' plot(mic_data)
2021-02-25 10:33:08 +01:00
#' plot(mic_data, mo = "E. coli", ab = "cipro")
2022-08-28 10:31:50 +02:00
#'
2022-08-27 20:49:37 +02:00
#' if (require("ggplot2")) {
#' autoplot(mic_data, mo = "E. coli", ab = "cipro")
#' }
#' if (require("ggplot2")) {
#' autoplot(mic_data, mo = "E. coli", ab = "cipro", language = "nl") # Dutch
#' }
2023-12-03 01:06:00 +01:00
as.mic <- function ( x , na.rm = FALSE , keep_operators = " all" ) {
2023-07-08 17:30:05 +02:00
meet_criteria ( x , allow_NA = TRUE )
2020-10-19 17:09:19 +02:00
meet_criteria ( na.rm , allow_class = " logical" , has_length = 1 )
2023-12-03 01:06:00 +01:00
meet_criteria ( keep_operators , allow_class = c ( " character" , " logical" ) , is_in = c ( " all" , " none" , " edges" , FALSE , TRUE ) , has_length = 1 )
if ( isTRUE ( keep_operators ) ) {
keep_operators <- " all"
} else if ( isFALSE ( keep_operators ) ) {
keep_operators <- " none"
}
if ( is.mic ( x ) && ( keep_operators == " all" || ! any ( x %like% " [>=<]" , na.rm = TRUE ) ) ) {
2024-04-05 16:44:43 +02:00
if ( ! identical ( levels ( x ) , VALID_MIC_LEVELS ) ) {
# from an older AMR version - just update MIC factor levels
x <- set_clean_class ( factor ( as.character ( x ) , levels = VALID_MIC_LEVELS , ordered = TRUE ) ,
new_class = c ( " mic" , " ordered" , " factor" ) )
2023-12-03 01:06:00 +01:00
}
2024-04-05 16:44:43 +02:00
return ( x )
}
x.bak <- NULL
if ( is.numeric ( x ) ) {
x.bak <- format ( x , scientific = FALSE )
# MICs never have more than 9 decimals, so:
x <- format ( round ( x , 9 ) , scientific = FALSE )
} else {
x <- as.character ( unlist ( x ) )
}
if ( isTRUE ( na.rm ) ) {
x <- x [ ! is.na ( x ) ]
}
x <- trimws2 ( x )
x [x == " " ] <- NA
if ( is.null ( x.bak ) ) {
x.bak <- x
}
# comma to period
x <- gsub ( " ," , " ." , x , fixed = TRUE )
# transform scientific notation
x [x %like% " [-]?[0-9]+([.][0-9]+)?e[-]?[0-9]+" ] <- as.double ( x [x %like% " [-]?[0-9]+([.][0-9]+)?e[-]?[0-9]+" ] )
# transform Unicode for >= and <=
x <- gsub ( " \u2264" , " <=" , x , fixed = TRUE )
x <- gsub ( " \u2265" , " >=" , x , fixed = TRUE )
# remove other invalid characters
x <- gsub ( " [^a-zA-Z0-9.><= ]+" , " " , x , perl = TRUE )
# remove space between operator and number ("<= 0.002" -> "<=0.002")
x <- gsub ( " (<|=|>) +" , " \\1" , x , perl = TRUE )
# transform => to >= and =< to <=
x <- gsub ( " =<" , " <=" , x , fixed = TRUE )
x <- gsub ( " =>" , " >=" , x , fixed = TRUE )
# dots without a leading zero must start with 0
x <- gsub ( " ([^0-9]|^)[.]" , " \\10." , x , perl = TRUE )
# values like "<=0.2560.512" should be 0.512
x <- gsub ( " .*[.].*[.]" , " 0." , x , perl = TRUE )
# remove ending .0
x <- gsub ( " [.]+0$" , " " , x , perl = TRUE )
# remove all after last digit
x <- gsub ( " [^0-9]+$" , " " , x , perl = TRUE )
# keep only one zero before dot
x <- gsub ( " 0+[.]" , " 0." , x , perl = TRUE )
# starting 00 is probably 0.0 if there's no dot yet
x [x %unlike% " [.]" ] <- gsub ( " ^00" , " 0.0" , x [ ! x %like% " [.]" ] )
# remove last zeroes
x <- gsub ( " ([.].?)0+$" , " \\1" , x , perl = TRUE )
x <- gsub ( " (.*[.])0+$" , " \\10" , x , perl = TRUE )
# remove ending .0 again
x [x %like% " [.]" ] <- gsub ( " 0+$" , " " , x [x %like% " [.]" ] )
# never end with dot
x <- gsub ( " [.]$" , " " , x , perl = TRUE )
# trim it
x <- trimws2 ( x )
## previously unempty values now empty - should return a warning later on
x [x.bak != " " & x == " " ] <- " invalid"
na_before <- x [is.na ( x ) | x == " " ] %pm>% length ( )
x [ ! x %in% VALID_MIC_LEVELS ] <- NA
na_after <- x [is.na ( x ) | x == " " ] %pm>% length ( )
if ( na_before != na_after ) {
list_missing <- x.bak [is.na ( x ) & ! is.na ( x.bak ) & x.bak != " " ] %pm>%
unique ( ) %pm>%
sort ( ) %pm>%
vector_and ( quotes = TRUE )
cur_col <- get_current_column ( )
warning_ ( " in `as.mic()`: " , na_after - na_before , " result" ,
ifelse ( na_after - na_before > 1 , " s" , " " ) ,
ifelse ( is.null ( cur_col ) , " " , paste0 ( " in column '" , cur_col , " '" ) ) ,
" truncated (" ,
round ( ( ( na_after - na_before ) / length ( x ) ) * 100 ) ,
" %) that were invalid MICs: " ,
list_missing ,
call = FALSE
2022-08-28 10:31:50 +02:00
)
2018-02-21 11:52:31 +01:00
}
2024-04-05 16:44:43 +02:00
if ( keep_operators == " none" && ! all ( is.na ( x ) ) ) {
x <- gsub ( " [>=<]" , " " , x )
} else if ( keep_operators == " edges" && ! all ( is.na ( x ) ) ) {
dbls <- as.double ( gsub ( " [>=<]" , " " , x ) )
x [dbls == min ( dbls , na.rm = TRUE ) ] <- paste0 ( " <=" , min ( dbls , na.rm = TRUE ) )
x [dbls == max ( dbls , na.rm = TRUE ) ] <- paste0 ( " >=" , max ( dbls , na.rm = TRUE ) )
keep <- x [dbls == max ( dbls , na.rm = TRUE ) | dbls == min ( dbls , na.rm = TRUE ) ]
x [ ! x %in% keep ] <- gsub ( " [>=<]" , " " , x [ ! x %in% keep ] )
}
set_clean_class ( factor ( x , levels = VALID_MIC_LEVELS , ordered = TRUE ) ,
new_class = c ( " mic" , " ordered" , " factor" ) )
2018-02-21 11:52:31 +01:00
}
2023-12-03 11:34:48 +01:00
#' @rdname as.mic
#' @export
is.mic <- function ( x ) {
inherits ( x , " mic" )
2020-02-20 13:19:23 +01:00
}
2021-11-28 23:01:26 +01:00
#' @rdname as.mic
2022-10-19 11:47:57 +02:00
#' @details `NA_mic_` is a missing value of the new `mic` class, analogous to e.g. base \R's [`NA_character_`][base::NA].
2022-05-09 21:33:27 +02:00
#' @format NULL
2021-11-28 23:01:26 +01:00
#' @export
2023-12-03 11:34:48 +01:00
NA_mic_ <- set_clean_class ( factor ( NA , levels = VALID_MIC_LEVELS , ordered = TRUE ) ,
2022-08-28 10:31:50 +02:00
new_class = c ( " mic" , " ordered" , " factor" )
)
2021-11-28 23:01:26 +01:00
2018-02-21 11:52:31 +01:00
#' @rdname as.mic
2024-02-24 15:16:52 +01:00
#' @param mic_range a manual range to limit the MIC values, e.g., `mic_range = c(0.001, 32)`. Use `NA` to set no limit on one side, e.g., `mic_range = c(NA, 32)`.
2018-02-21 11:52:31 +01:00
#' @export
2024-05-24 15:07:41 +02:00
rescale_mic <- function ( x , mic_range , keep_operators = " edges" , as.mic = TRUE ) {
2023-12-03 11:34:48 +01:00
meet_criteria ( mic_range , allow_class = c ( " numeric" , " integer" , " logical" ) , has_length = 2 , allow_NA = TRUE , allow_NULL = TRUE )
stop_ifnot ( all ( mic_range %in% c ( VALID_MIC_LEVELS , NA ) ) ,
2024-02-24 15:16:52 +01:00
" Values in `mic_range` must be valid MIC values. Unvalid: " , vector_and ( mic_range [mic_range %in% c ( VALID_MIC_LEVELS , NA ) ] ) )
2023-12-03 11:34:48 +01:00
x <- as.mic ( x )
if ( is.null ( mic_range ) ) {
mic_range <- c ( NA , NA )
}
mic_range <- as.mic ( mic_range )
min_mic <- mic_range [1 ]
max_mic <- mic_range [2 ]
if ( ! is.na ( min_mic ) ) {
x [x < min_mic ] <- min_mic
}
if ( ! is.na ( max_mic ) ) {
x [x > max_mic ] <- max_mic
}
x <- as.mic ( x , keep_operators = ifelse ( keep_operators == " edges" , " none" , keep_operators ) )
if ( isTRUE ( as.mic ) ) {
if ( keep_operators == " edges" ) {
x [x == min ( x , na.rm = TRUE ) ] <- paste0 ( " <=" , x [x == min ( x , na.rm = TRUE ) ] )
x [x == max ( x , na.rm = TRUE ) ] <- paste0 ( " >=" , x [x == max ( x , na.rm = TRUE ) ] )
}
return ( x )
}
# create a manual factor with levels only within desired range
2024-06-08 17:35:25 +02:00
expanded <- plotrange_as_table ( x ,
2024-09-24 22:39:40 +02:00
expand = TRUE ,
keep_operators = ifelse ( keep_operators == " edges" , " none" , keep_operators ) ,
mic_range = mic_range )
2023-12-03 11:34:48 +01:00
if ( keep_operators == " edges" ) {
names ( expanded ) [1 ] <- paste0 ( " <=" , names ( expanded ) [1 ] )
names ( expanded ) [length ( expanded ) ] <- paste0 ( " >=" , names ( expanded ) [length ( expanded ) ] )
}
# MICs contain all MIC levels, so strip this to only existing levels and their intermediate values
out <- factor ( names ( expanded ) ,
levels = names ( expanded ) ,
ordered = TRUE )
# and only keep the ones in the data
if ( keep_operators == " edges" ) {
out <- out [match ( x , as.double ( as.mic ( out , keep_operators = " all" ) ) ) ]
} else {
out <- out [match ( x , out ) ]
}
out
2018-02-21 11:52:31 +01:00
}
2020-05-28 16:48:55 +02:00
#' @method as.double mic
2018-02-21 11:52:31 +01:00
#' @export
#' @noRd
as.double.mic <- function ( x , ... ) {
2021-03-07 21:16:45 +01:00
as.double ( gsub ( " [<=>]+" , " " , as.character ( x ) , perl = TRUE ) )
2018-02-21 11:52:31 +01:00
}
2020-05-28 16:48:55 +02:00
#' @method as.numeric mic
2018-02-21 11:52:31 +01:00
#' @export
#' @noRd
as.numeric.mic <- function ( x , ... ) {
2021-03-07 21:16:45 +01:00
as.numeric ( gsub ( " [<=>]+" , " " , as.character ( x ) , perl = TRUE ) )
2018-02-21 11:52:31 +01:00
}
2022-05-09 21:33:27 +02:00
#' @rdname as.mic
2020-05-28 16:48:55 +02:00
#' @method droplevels mic
2023-02-22 14:38:57 +01:00
#' @param as.mic a [logical] to indicate whether the `mic` class should be kept - the default is `FALSE`
2018-12-29 22:24:19 +01:00
#' @export
2022-05-16 12:52:50 +02:00
droplevels.mic <- function ( x , as.mic = FALSE , ... ) {
2024-04-07 20:22:59 +02:00
x <- as.mic ( x ) # make sure that currently implemented MIC levels are used
2022-05-16 12:52:50 +02:00
x <- droplevels.factor ( x , ... )
2021-02-25 10:33:08 +01:00
if ( as.mic == TRUE ) {
class ( x ) <- c ( " mic" , " ordered" , " factor" )
}
2018-12-29 22:24:19 +01:00
x
}
2023-12-03 11:34:48 +01:00
all_valid_mics <- function ( x ) {
if ( ! inherits ( x , c ( " mic" , " character" , " factor" , " numeric" , " integer" ) ) ) {
return ( FALSE )
}
x_mic <- tryCatch ( suppressWarnings ( as.mic ( x [ ! is.na ( x ) ] ) ) ,
error = function ( e ) NA
)
! any ( is.na ( x_mic ) ) && ! all ( is.na ( x ) )
}
2020-08-28 21:55:47 +02:00
# will be exported using s3_register() in R/zzz.R
2020-08-26 11:33:54 +02:00
pillar_shaft.mic <- function ( x , ... ) {
2024-04-19 10:18:21 +02:00
if ( ! identical ( levels ( x ) , VALID_MIC_LEVELS ) && message_not_thrown_before ( " pillar_shaft.mic" ) ) {
warning_ ( AMR_env $ sup_1_icon , " These columns contain an outdated or altered structure - convert with `as.mic()` to update" ,
call = FALSE )
}
2020-12-09 09:40:50 +01:00
crude_numbers <- as.double ( x )
operators <- gsub ( " [^<=>]+" , " " , as.character ( x ) )
2022-03-15 17:35:02 +01:00
operators [ ! is.na ( operators ) & operators != " " ] <- font_silver ( operators [ ! is.na ( operators ) & operators != " " ] , collapse = NULL )
2022-03-10 19:33:25 +01:00
out <- trimws ( paste0 ( operators , trimws ( format ( crude_numbers ) ) ) )
2020-08-28 21:55:47 +02:00
out [is.na ( x ) ] <- font_na ( NA )
2023-12-03 01:06:00 +01:00
# make trailing zeroes less visible
out [out %like% " [.]" ] <- gsub ( " ([.]?0+)$" , font_silver ( " \\1" ) , out [out %like% " [.]" ] , perl = TRUE )
2022-03-10 19:33:25 +01:00
create_pillar_column ( out , align = " right" , width = max ( nchar ( font_stripstyle ( out ) ) ) )
2020-08-26 11:33:54 +02:00
}
2020-08-28 21:55:47 +02:00
# will be exported using s3_register() in R/zzz.R
2020-08-26 11:33:54 +02:00
type_sum.mic <- function ( x , ... ) {
2024-04-19 10:18:21 +02:00
if ( ! identical ( levels ( x ) , VALID_MIC_LEVELS ) ) {
paste0 ( " mic" , AMR_env $ sup_1_icon )
} else {
" mic"
}
2020-08-26 11:33:54 +02:00
}
2020-05-28 16:48:55 +02:00
#' @method print mic
2018-02-21 11:52:31 +01:00
#' @export
#' @noRd
print.mic <- function ( x , ... ) {
2024-04-19 10:18:21 +02:00
cat ( " Class 'mic'" )
2024-04-07 20:22:59 +02:00
if ( ! identical ( levels ( x ) , VALID_MIC_LEVELS ) ) {
2024-04-19 10:18:21 +02:00
cat ( font_red ( " with an outdated or altered structure - convert with `as.mic()` to update" ) )
2024-04-07 20:22:59 +02:00
}
2024-04-19 10:18:21 +02:00
cat ( " \n" )
2018-08-01 22:37:28 +02:00
print ( as.character ( x ) , quote = FALSE )
2021-03-07 13:52:39 +01:00
att <- attributes ( x )
if ( " na.action" %in% names ( att ) ) {
cat ( font_silver ( paste0 ( " (NA " , class ( att $ na.action ) , " : " , paste0 ( att $ na.action , collapse = " , " ) , " )\n" ) ) )
}
2018-02-21 11:52:31 +01:00
}
2020-05-28 16:48:55 +02:00
#' @method summary mic
2018-02-21 11:52:31 +01:00
#' @export
#' @noRd
summary.mic <- function ( object , ... ) {
2021-03-07 13:52:39 +01:00
summary ( as.double ( object ) , ... )
2018-02-21 11:52:31 +01:00
}
2021-03-07 21:16:45 +01:00
#' @method as.matrix mic
#' @export
#' @noRd
as.matrix.mic <- function ( x , ... ) {
as.matrix ( as.double ( x ) , ... )
}
2024-04-07 20:22:59 +02:00
#' @method as.vector mic
#' @export
#' @noRd
as.vector.mic <- function ( x , mode = " numneric" , ... ) {
y <- NextMethod ( )
y <- as.mic ( y )
calls <- unlist ( lapply ( sys.calls ( ) , as.character ) )
if ( any ( calls %in% c ( " rbind" , " cbind" ) ) && message_not_thrown_before ( " as.vector.mic" ) ) {
2024-04-19 10:18:21 +02:00
warning_ ( " Functions `rbind()` and `cbind()` cannot preserve the structure of MIC values. Use dplyr's `bind_rows()` or `bind_cols()` instead." , call = FALSE )
2024-04-07 20:22:59 +02:00
}
y
}
#' @method as.list mic
#' @export
#' @noRd
as.list.mic <- function ( x , ... ) {
lapply ( as.list ( as.character ( x ) , ... ) , as.mic )
}
#' @method as.data.frame mic
#' @export
#' @noRd
as.data.frame.mic <- function ( x , ... ) {
as.data.frame.vector ( as.mic ( x ) , ... )
}
2021-03-07 21:16:45 +01:00
2020-05-28 16:48:55 +02:00
#' @method [ mic
2020-04-13 21:09:56 +02:00
#' @export
#' @noRd
" [.mic" <- function ( x , ... ) {
y <- NextMethod ( )
2024-04-07 20:22:59 +02:00
as.mic ( y )
2020-04-13 21:09:56 +02:00
}
2020-05-28 16:48:55 +02:00
#' @method [[ mic
2020-04-13 21:09:56 +02:00
#' @export
#' @noRd
" [[.mic" <- function ( x , ... ) {
y <- NextMethod ( )
2024-04-07 20:22:59 +02:00
as.mic ( y )
2020-04-13 21:09:56 +02:00
}
2020-05-28 16:48:55 +02:00
#' @method [<- mic
2020-04-13 21:09:56 +02:00
#' @export
#' @noRd
" [<-.mic" <- function ( i , j , ... , value ) {
value <- as.mic ( value )
y <- NextMethod ( )
2024-04-07 20:22:59 +02:00
as.mic ( y )
2020-04-13 21:09:56 +02:00
}
2020-05-28 16:48:55 +02:00
#' @method [[<- mic
2020-04-13 21:09:56 +02:00
#' @export
#' @noRd
" [[<-.mic" <- function ( i , j , ... , value ) {
value <- as.mic ( value )
y <- NextMethod ( )
2024-04-07 20:22:59 +02:00
as.mic ( y )
2020-04-13 21:09:56 +02:00
}
2020-05-28 16:48:55 +02:00
#' @method c mic
2020-04-13 21:09:56 +02:00
#' @export
#' @noRd
2021-05-03 13:06:43 +02:00
c.mic <- function ( ... ) {
as.mic ( unlist ( lapply ( list ( ... ) , as.character ) ) )
2020-04-13 21:09:56 +02:00
}
2020-09-25 14:44:50 +02:00
#' @method unique mic
#' @export
#' @noRd
unique.mic <- function ( x , incomparables = FALSE , ... ) {
y <- NextMethod ( )
2024-04-07 20:22:59 +02:00
as.mic ( y )
2020-09-25 14:44:50 +02:00
}
2020-09-28 01:08:55 +02:00
2021-07-06 16:35:14 +02:00
#' @method rep mic
#' @export
#' @noRd
rep.mic <- function ( x , ... ) {
y <- NextMethod ( )
2024-04-07 20:22:59 +02:00
as.mic ( y )
2021-07-06 16:35:14 +02:00
}
2021-03-07 13:52:39 +01:00
#' @method sort mic
2021-03-05 15:36:39 +01:00
#' @export
#' @noRd
2021-03-07 13:52:39 +01:00
sort.mic <- function ( x , decreasing = FALSE , ... ) {
2024-04-07 20:22:59 +02:00
x <- as.mic ( x ) # make sure that currently implemented MIC levels are used
2024-05-30 15:50:17 +02:00
dbl <- as.double ( x )
# make sure that e.g. '<0.001' comes before '0.001', and '>0.001' comes after
dbl [as.character ( x ) %like% " <[0-9]" ] <- dbl [as.character ( x ) %like% " <[0-9]" ] - 0.000002
dbl [as.character ( x ) %like% " <=" ] <- dbl [as.character ( x ) %like% " <=" ] - 0.000001
dbl [as.character ( x ) %like% " >=" ] <- dbl [as.character ( x ) %like% " >=" ] + 0.000001
dbl [as.character ( x ) %like% " >[0-9]" ] <- dbl [as.character ( x ) %like% " >[0-9]" ] + 0.000002
2021-03-07 13:52:39 +01:00
if ( decreasing == TRUE ) {
2024-05-30 15:50:17 +02:00
x [order ( - dbl ) ]
2021-03-07 13:52:39 +01:00
} else {
2024-05-30 15:50:17 +02:00
x [order ( dbl ) ]
2021-03-05 15:36:39 +01:00
}
}
2021-03-07 13:52:39 +01:00
#' @method hist mic
2021-03-07 21:16:45 +01:00
#' @importFrom graphics hist
2021-03-05 15:36:39 +01:00
#' @export
#' @noRd
2021-03-07 13:52:39 +01:00
hist.mic <- function ( x , ... ) {
2022-03-02 15:38:55 +01:00
warning_ ( " in `hist()`: use `plot()` or ggplot2's `autoplot()` for optimal plotting of MIC values" )
2021-03-07 13:52:39 +01:00
hist ( log2 ( x ) )
2021-03-05 15:36:39 +01:00
}
2021-03-07 13:52:39 +01:00
# will be exported using s3_register() in R/zzz.R
get_skimmers.mic <- function ( column ) {
2024-04-07 20:22:59 +02:00
column <- as.mic ( column ) # make sure that currently implemented MIC levels are used
2021-03-07 13:52:39 +01:00
skimr :: sfl (
skim_type = " mic" ,
2022-08-28 10:31:50 +02:00
p0 = ~ stats :: quantile ( ., probs = 0 , na.rm = TRUE , names = FALSE ) ,
p25 = ~ stats :: quantile ( ., probs = 0.25 , na.rm = TRUE , names = FALSE ) ,
p50 = ~ stats :: quantile ( ., probs = 0.5 , na.rm = TRUE , names = FALSE ) ,
p75 = ~ stats :: quantile ( ., probs = 0.75 , na.rm = TRUE , names = FALSE ) ,
p100 = ~ stats :: quantile ( ., probs = 1 , na.rm = TRUE , names = FALSE ) ,
hist = ~ skimr :: inline_hist ( log2 ( stats :: na.omit ( .) ) , 5 )
2021-03-07 13:52:39 +01:00
)
2021-03-05 15:36:39 +01:00
}
2021-03-07 13:52:39 +01:00
# Miscellaneous mathematical functions ------------------------------------
#' @method mean mic
2021-03-05 15:36:39 +01:00
#' @export
#' @noRd
2021-03-07 13:52:39 +01:00
mean.mic <- function ( x , trim = 0 , na.rm = FALSE , ... ) {
mean ( as.double ( x ) , trim = trim , na.rm = na.rm , ... )
2021-03-05 15:36:39 +01:00
}
2021-03-07 13:52:39 +01:00
#' @method median mic
2021-03-07 21:16:45 +01:00
#' @importFrom stats median
2021-03-05 15:36:39 +01:00
#' @export
#' @noRd
2021-03-07 13:52:39 +01:00
median.mic <- function ( x , na.rm = FALSE , ... ) {
2021-03-07 21:16:45 +01:00
median ( as.double ( x ) , na.rm = na.rm , ... )
2021-03-05 15:36:39 +01:00
}
2021-03-07 13:52:39 +01:00
#' @method quantile mic
2021-03-07 21:16:45 +01:00
#' @importFrom stats quantile
2021-03-05 15:36:39 +01:00
#' @export
#' @noRd
2021-03-07 13:52:39 +01:00
quantile.mic <- function ( x , probs = seq ( 0 , 1 , 0.25 ) , na.rm = FALSE ,
names = TRUE , type = 7 , ... ) {
2021-03-07 21:16:45 +01:00
quantile ( as.double ( x ) , probs = probs , na.rm = na.rm , names = names , type = type , ... )
2021-03-05 15:36:39 +01:00
}
2023-10-20 14:51:48 +02:00
# Math (see ?groupGeneric) ------------------------------------------------
2021-03-07 13:52:39 +01:00
2021-03-05 15:36:39 +01:00
#' @export
2023-10-20 14:51:48 +02:00
Math.mic <- function ( x , ... ) {
x <- as.double ( x )
# set class to numeric, because otherwise NextMethod will be factor (since mic is a factor)
.Class <- class ( x )
NextMethod ( .Generic )
2021-03-07 13:52:39 +01:00
}
2023-10-20 14:51:48 +02:00
# Ops (see ?groupGeneric) -------------------------------------------------
2021-03-05 15:36:39 +01:00
#' @export
2023-10-20 14:51:48 +02:00
Ops.mic <- function ( e1 , e2 ) {
2024-04-05 16:44:43 +02:00
e1_chr <- as.character ( e1 )
e2_chr <- character ( 0 )
2023-10-20 14:51:48 +02:00
e1 <- as.double ( e1 )
if ( ! missing ( e2 ) ) {
2024-04-19 10:18:21 +02:00
# when .Generic is `!`, e2 is missing
2024-04-05 16:44:43 +02:00
e2_chr <- as.character ( e2 )
2023-10-20 14:51:48 +02:00
e2 <- as.double ( e2 )
}
2024-04-05 16:44:43 +02:00
if ( as.character ( .Generic ) %in% c ( " <" , " <=" , " ==" , " !=" , " >" , " >=" ) ) {
# make sure that <0.002 is lower than 0.002
# and that >32 is higher than 32, but equal to >=32
e1 [e1_chr %like% " <" & e1_chr %unlike% " =" ] <- e1 [e1_chr %like% " <" & e1_chr %unlike% " =" ] - 0.000001
e1 [e1_chr %like% " >" & e1_chr %unlike% " =" ] <- e1 [e1_chr %like% " >" & e1_chr %unlike% " =" ] + 0.000001
e2 [e2_chr %like% " <" & e2_chr %unlike% " =" ] <- e2 [e2_chr %like% " <" & e2_chr %unlike% " =" ] - 0.000001
e2 [e2_chr %like% " >" & e2_chr %unlike% " =" ] <- e2 [e2_chr %like% " >" & e2_chr %unlike% " =" ] + 0.000001
}
# set .Class to numeric, because otherwise NextMethod will be factor (since mic is a factor)
2023-10-20 14:51:48 +02:00
.Class <- class ( e1 )
NextMethod ( .Generic )
2021-03-05 15:36:39 +01:00
}
2023-10-20 14:51:48 +02:00
# Complex (see ?groupGeneric) ---------------------------------------------
2021-03-05 15:36:39 +01:00
#' @export
2023-10-20 14:51:48 +02:00
Complex.mic <- function ( z ) {
z <- as.double ( z )
# set class to numeric, because otherwise NextMethod will be factor (since mic is a factor)
.Class <- class ( z )
NextMethod ( .Generic )
2021-03-05 15:36:39 +01:00
}
2023-10-20 14:51:48 +02:00
# Summary (see ?groupGeneric) ---------------------------------------------
2021-03-05 15:36:39 +01:00
#' @export
2023-10-20 14:51:48 +02:00
Summary.mic <- function ( ... , na.rm = FALSE ) {
# NextMethod() cannot be called from an anonymous function (`...`), so we get() the generic directly:
fn <- get ( .Generic , envir = .GenericCallEnv )
fn ( as.double ( c ( ... ) ) ,
na.rm = na.rm )
2020-09-28 01:08:55 +02:00
}