2018-02-21 11:52:31 +01:00
# ==================================================================== #
# TITLE #
# Antimicrobial Resistance (AMR) Analysis #
# #
# AUTHORS #
# Berends MS (m.s.berends@umcg.nl), Luz CF (c.f.luz@umcg.nl) #
# #
# LICENCE #
# This program is free software; you can redistribute it and/or modify #
# it under the terms of the GNU General Public License version 2.0, #
# as published by the Free Software Foundation. #
# #
# This program is distributed in the hope that it will be useful, #
# but WITHOUT ANY WARRANTY; without even the implied warranty of #
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the #
# GNU General Public License for more details. #
# ==================================================================== #
2018-04-18 12:24:54 +02:00
#' Pattern Matching
#'
#' Convenience function to compare a vector with a pattern, like \code{\link[base]{grep}}. It always returns a \code{logical} vector and is always case-insensitive.
#' @inheritParams base::grep
#' @return A \code{logical} vector
#' @name like
#' @rdname like
#' @export
#' @source Inherited from the \href{https://github.com/Rdatatable/data.table/blob/master/R/like.R}{\code{like} function from the \code{data.table} package}, but made it case insensitive at default.
#' @examples
#' library(dplyr)
#' # get unique occurences of bacteria whose name start with 'Ent'
#' septic_patients %>%
#' left_join_microorganisms() %>%
#' filter(fullname %like% '^Ent') %>%
#' pull(fullname) %>%
#' unique()
" %like%" <- function ( x , pattern ) {
if ( length ( pattern ) > 1 ) {
pattern <- pattern [1 ]
warning ( ' only the first element of argument `pattern` used for `%like%`' , call. = FALSE )
}
if ( is.factor ( x ) ) {
as.integer ( x ) %in% base :: grep ( pattern , levels ( x ) , ignore.case = TRUE )
2018-02-21 11:52:31 +01:00
} else {
2018-04-18 12:24:54 +02:00
base :: grepl ( pattern , x , ignore.case = TRUE )
2018-02-21 11:52:31 +01:00
}
}
2018-03-19 12:43:22 +01:00
# No export, no Rd
2018-04-18 12:24:54 +02:00
percent <- function ( x , round = 1 , force_zero = FALSE , ... ) {
val <- base :: round ( x * 100 , digits = round )
2018-05-02 14:56:25 +02:00
if ( force_zero == TRUE & any ( val == as.integer ( val ) & ! is.na ( val ) ) ) {
2018-04-19 14:10:57 +02:00
val [val == as.integer ( val ) ] <- paste0 ( val [val == as.integer ( val ) ] , " ." , strrep ( 0 , round ) )
2018-04-18 12:24:54 +02:00
}
2018-05-02 14:56:25 +02:00
pct <- base :: paste0 ( val , " %" )
pct [pct == " NA%" ] <- NA_character_
pct
2018-04-18 12:24:54 +02:00
}
check_available_columns <- function ( tbl , col.list , info = TRUE ) {
# check columns
col.list <- col.list [ ! is.na ( col.list ) ]
names ( col.list ) <- col.list
col.list.bak <- col.list
# are they available as upper case or lower case then?
for ( i in 1 : length ( col.list ) ) {
if ( toupper ( col.list [i ] ) %in% colnames ( tbl ) ) {
col.list [i ] <- toupper ( col.list [i ] )
} else if ( tolower ( col.list [i ] ) %in% colnames ( tbl ) ) {
col.list [i ] <- tolower ( col.list [i ] )
} else if ( ! col.list [i ] %in% colnames ( tbl ) ) {
col.list [i ] <- NA
}
}
if ( ! all ( col.list %in% colnames ( tbl ) ) ) {
if ( info == TRUE ) {
warning ( ' These columns do not exist and will be ignored: ' ,
col.list.bak [ ! ( col.list %in% colnames ( tbl ) ) ] %>% toString ( ) ,
2018-04-25 15:33:58 +02:00
' .\nTHIS MAY STRONGLY INFLUENCE THE OUTCOME.' ,
2018-04-18 12:24:54 +02:00
immediate. = TRUE ,
call. = FALSE )
}
}
col.list
}
# Coefficient of variation (CV)
cv <- function ( x , na.rm = TRUE ) {
2018-06-29 09:06:47 +02:00
stats :: sd ( x , na.rm = na.rm ) / base :: abs ( base :: mean ( x , na.rm = na.rm ) )
2018-04-18 12:24:54 +02:00
}
# Coefficient of dispersion, or coefficient of quartile variation (CQV).
# (Bonett et al., 2006: Confidence interval for a coefficient of quartile variation).
cqv <- function ( x , na.rm = TRUE ) {
cqv.x <-
2018-06-29 09:06:47 +02:00
( stats :: quantile ( x , 0.75 , na.rm = na.rm , type = 6 ) - stats :: quantile ( x , 0.25 , na.rm = na.rm , type = 6 ) ) /
( stats :: quantile ( x , 0.75 , na.rm = na.rm , type = 6 ) + stats :: quantile ( x , 0.25 , na.rm = na.rm , type = 6 ) )
2018-04-18 12:24:54 +02:00
unname ( cqv.x )
}
# show bytes as kB/MB/GB
# size_humanreadable(123456) # 121 kB
# size_humanreadable(12345678) # 11.8 MB
size_humanreadable <- function ( bytes , decimals = 1 ) {
bytes <- bytes %>% as.double ( )
# Adapted from:
# http://jeffreysambells.com/2012/10/25/human-readable-filesize-php
size <- c ( ' B' , ' kB' , ' MB' , ' GB' , ' TB' , ' PB' , ' EB' , ' ZB' , ' YB' )
factor <- floor ( ( nchar ( bytes ) - 1 ) / 3 )
# added slight improvement; no decimals for B and kB:
decimals <- rep ( decimals , length ( bytes ) )
decimals [size [factor + 1 ] %in% c ( ' B' , ' kB' ) ] <- 0
out <- paste ( sprintf ( paste0 ( " %." , decimals , " f" ) , bytes / ( 1024 ^ factor ) ) , size [factor + 1 ] )
out
2018-02-21 11:52:31 +01:00
}
2018-06-27 15:54:56 +02:00
2018-06-29 08:56:03 +02:00
# based on readr::parse_guess
tbl_parse_guess <- function ( tbl ,
date_names = ' en' ,
date_format = ' %Y-%m-%d' ,
time_format = ' %H:%M' ,
decimal_mark = ' .' ,
tz = Sys.timezone ( ) ,
encoding = " UTF-8" ,
na = c ( " " , " NA" , " NULL" ) ) {
date_format <- date_generic ( date_format )
time_format <- date_generic ( time_format )
# set col types with readr
for ( i in 1 : ncol ( tbl ) ) {
if ( ! all ( tbl %>% pull ( i ) %>% class ( ) %in% c ( ' list' , ' matrix' ) ) ) {
tbl [ , i ] <- readr :: parse_guess ( x = tbl %>% pull ( i ) %>% as.character ( ) ,
na = na ,
locale = readr :: locale ( date_names = date_names ,
date_format = date_format ,
time_format = time_format ,
decimal_mark = decimal_mark ,
encoding = encoding ,
tz = tz ,
asciify = FALSE ) )
}
if ( any ( tbl %>% pull ( i ) %>% class ( ) %in% c ( ' factor' , ' character' ) ) ) {
# get values
distinct_val <- tbl %>% pull ( i ) %>% unique ( ) %>% sort ( )
# remove ASCII escape character: https://en.wikipedia.org/wiki/Escape_character#ASCII_escape_character
tbl [ , i ] <- tbl %>% pull ( i ) %>% gsub ( ' \033' , ' ' , ., fixed = TRUE )
# look for RSI, shouldn't all be "" and must be valid antibiotic interpretations
if ( ! all ( distinct_val [ ! is.na ( distinct_val ) ] == ' ' )
& all ( distinct_val [ ! is.na ( distinct_val ) ] %in% c ( ' ' , ' I' , ' I;I' , ' R' , ' R;R' , ' S' , ' S;S' ) ) ) {
tbl [ , i ] <- tbl %>% pull ( i ) %>% as.rsi ( )
}
}
# convert to MIC class
if ( colnames ( tbl ) [i ] %like% ' _mic$' ) {
tbl [ , i ] <- tbl %>% pull ( i ) %>% as.mic ( )
}
}
tbl
}
2018-06-27 15:54:56 +02:00
# transforms date format like "dddd d mmmm yyyy" to "%A %e %B %Y"
date_generic <- function ( format ) {
if ( ! grepl ( ' %' , format , fixed = TRUE ) ) {
# first months and minutes, after that everything is case INsensitive
format <- gsub ( ' mmmm' , ' %B1' , format , fixed = TRUE )
format <- gsub ( ' mmm' , ' %b' , format , fixed = TRUE )
format <- gsub ( ' mm' , ' %m' , format , fixed = TRUE )
format <- gsub ( ' MM' , ' %M1' , format , fixed = TRUE )
format <- format %>%
tolower ( ) %>%
gsub ( ' %b1' , ' %B' , ., fixed = TRUE ) %>%
gsub ( ' %m1' , ' %M' , ., fixed = TRUE )
# dates
format <- gsub ( ' dddd' , ' %A' , format , fixed = TRUE )
format <- gsub ( ' ddd' , ' %a' , format , fixed = TRUE )
format <- gsub ( ' dd' , ' %!' , format , fixed = TRUE )
format <- gsub ( ' d' , ' %e' , format , fixed = TRUE )
format <- gsub ( ' %!' , ' %d' , format , fixed = TRUE )
format <- gsub ( ' ww' , ' %V' , format , fixed = TRUE )
format <- gsub ( ' w' , ' %V' , format , fixed = TRUE )
format <- gsub ( ' qq' , ' Qq' , format , fixed = TRUE ) # so will be 'Q%%q' after this
format <- gsub ( ' kk' , ' Kq' , format , fixed = TRUE )
format <- gsub ( ' k' , ' q' , format , fixed = TRUE )
format <- gsub ( ' q' , ' %%q' , format , fixed = TRUE )
format <- gsub ( ' yyyy_iso' , ' %G' , format , fixed = TRUE )
format <- gsub ( ' jjjj_iso' , ' %G' , format , fixed = TRUE )
format <- gsub ( ' yyyy' , ' %Y' , format , fixed = TRUE )
format <- gsub ( ' jjjj' , ' %Y' , format , fixed = TRUE )
format <- gsub ( ' yy_iso' , ' %g' , format , fixed = TRUE )
format <- gsub ( ' jj_iso' , ' %g' , format , fixed = TRUE )
format <- gsub ( ' yy' , ' %y' , format , fixed = TRUE )
format <- gsub ( ' jj' , ' %y' , format , fixed = TRUE )
# time
format <- gsub ( ' hh' , ' %H' , format , fixed = TRUE )
format <- gsub ( ' h' , ' %k' , format , fixed = TRUE )
format <- gsub ( ' ss' , ' %S' , format , fixed = TRUE )
# seconds since the Epoch, 1970-01-01 00:00:00
format <- gsub ( ' unix' , ' %s' , format , fixed = TRUE )
# Equivalent to %Y-%m-%d (the ISO 8601 date format)
format <- gsub ( ' iso' , ' %F' , format , fixed = TRUE )
}
format
}