1
0
mirror of https://github.com/msberends/AMR.git synced 2024-12-26 06:06:12 +01:00

move tbl_parse_guess

This commit is contained in:
dr. M.S. (Matthijs) Berends 2018-06-29 08:56:03 +02:00
parent 4bdcde9a00
commit 6ee713cec1
4 changed files with 64 additions and 69 deletions

View File

@ -1,6 +1,6 @@
Package: AMR Package: AMR
Version: 0.2.0.9006 Version: 0.2.0.9006
Date: 2018-06-28 Date: 2018-06-29
Title: Antimicrobial Resistance Analysis Title: Antimicrobial Resistance Analysis
Authors@R: c( Authors@R: c(
person( person(

View File

@ -4,7 +4,7 @@
#' #'
#' The data will be read and written as tab-separated by default, which makes it possible to copy and paste from other software like Excel and SPSS without further transformation. #' The data will be read and written as tab-separated by default, which makes it possible to copy and paste from other software like Excel and SPSS without further transformation.
#' #'
#' Supports automatic column type transformation and supports new classes \code{\link{as.rsi}} and \code{\link{as.mic}}. #' This also supports automatic column type transformation, with AMR classes \code{\link{as.rsi}} and \code{\link{as.mic}}.
#' @rdname clipboard #' @rdname clipboard
#' @name clipboard #' @name clipboard
#' @inheritParams utils::read.table #' @inheritParams utils::read.table
@ -12,16 +12,14 @@
#' @inheritParams readr::locale #' @inheritParams readr::locale
#' @param startrow \emph{n}th row to start importing from. When \code{header = TRUE}, the import will start on row \code{startrow} \emph{below} the header. #' @param startrow \emph{n}th row to start importing from. When \code{header = TRUE}, the import will start on row \code{startrow} \emph{below} the header.
#' @param as_vector a logical value indicating whether data consisting of only one column should be imported as vector using \code{\link[dplyr]{pull}}. This will strip off the header. #' @param as_vector a logical value indicating whether data consisting of only one column should be imported as vector using \code{\link[dplyr]{pull}}. This will strip off the header.
#' @param guess_col_types a logical value indicating whether column types should be guessed with the \code{readr} package. #' @param guess_col_types a logical value indicating whether column types should be guessed and transformed automatically with \code{\link[readr]{parse_guess}} from the \code{readr} package. Besides, the antimicrobial classes in this AMR package (\code{\link{as.rsi}} and \code{\link{as.mic}}) are also supported.
#' @param info print info about copying #' @param info print info to console
#' @keywords clipboard clipboard_import clipboard_export import export #' @keywords clipboard clipboard_import clipboard_export import export
#' @importFrom dplyr %>% pull as_tibble #' @importFrom dplyr %>% pull as_tibble
#' @importFrom clipr read_clip_tbl write_clip #' @importFrom clipr read_clip_tbl write_clip
#' @importFrom utils read.delim write.table object.size #' @importFrom utils read.delim write.table object.size
#' @importFrom readr parse_guess locale #' @importFrom readr parse_guess locale
#' @details #' @details
#' When using \code{guess_col_types = TRUE}, all column types will be determined automatically with \code{\link[readr]{parse_guess}} from the \code{readr} package. Besides, the antimicrobial classes in this AMR package (\code{\link{as.rsi}} and \code{\link{as.mic}}) are also supported.
#'
#' \if{html}{ #' \if{html}{
#' \strong{Example for copying from Excel:} #' \strong{Example for copying from Excel:}
#' \out{<div style="text-align: left">}\figure{clipboard_copy.png}\out{</div>} #' \out{<div style="text-align: left">}\figure{clipboard_copy.png}\out{</div>}
@ -50,11 +48,7 @@ clipboard_import <- function(sep = '\t',
time_format = '%H:%M', time_format = '%H:%M',
tz = Sys.timezone(), tz = Sys.timezone(),
encoding = "UTF-8", encoding = "UTF-8",
info = FALSE) { info = TRUE) {
if (clipr::clipr_available() & info == TRUE) {
cat('Importing from clipboard...')
}
# this will fail when clipr is not available # this will fail when clipr is not available
import_tbl <- clipr::read_clip_tbl(file = file, import_tbl <- clipr::read_clip_tbl(file = file,
@ -66,32 +60,28 @@ clipboard_import <- function(sep = '\t',
encoding = 'UTF-8', encoding = 'UTF-8',
stringsAsFactors = FALSE) stringsAsFactors = FALSE)
if (info == TRUE) {
cat('OK\n')
}
# use tibble, so column types will be translated correctly # use tibble, so column types will be translated correctly
import_tbl <- as_tibble(import_tbl) import_tbl <- as_tibble(import_tbl)
if (startrow > 1) { if (startrow > 1) {
# would else lose column headers # would else lose column headers
import_tbl <- import_tbl[startrow:nrow(import_tbl),] import_tbl <- import_tbl[startrow:NROW(import_tbl),]
} }
colnames(import_tbl) <- gsub('[.]+', '_', colnames(import_tbl)) colnames(import_tbl) <- gsub('[.]+', '_', colnames(import_tbl))
if (guess_col_types == TRUE) { if (guess_col_types == TRUE) {
if (info == TRUE) { if (info == TRUE) {
cat('Transforming columns with readr::parse_guess...') cat('Transforming data by guessing column types...')
} }
import_tbl <- clipboard_format(tbl = import_tbl, import_tbl <- tbl_parse_guess(tbl = import_tbl,
date_names = date_names, date_names = date_names,
date_format = date_format, date_format = date_format,
time_format = time_format, time_format = time_format,
decimal_mark = dec, decimal_mark = dec,
tz = tz, tz = tz,
encoding = encoding, encoding = encoding,
na = na) na = na)
if (info == TRUE) { if (info == TRUE) {
cat('OK\n') cat('OK\n')
} }
@ -133,46 +123,3 @@ clipboard_export <- function(x,
} }
clipboard_format <- function(tbl,
date_names = 'en',
date_format = '%Y-%m-%d',
time_format = '%H:%M',
decimal_mark = '.',
tz = Sys.timezone(),
encoding = "UTF-8",
na = c("", "NA", "NULL")) {
date_format <- date_generic(date_format)
time_format <- date_generic(time_format)
# set col types with readr
for (i in 1:ncol(tbl)) {
if (!all(tbl %>% pull(i) %>% class() %in% c('list', 'matrix'))) {
tbl[, i] <- readr::parse_guess(x = tbl %>% pull(i) %>% as.character(),
na = na,
locale = readr::locale(date_names = date_names,
date_format = date_format,
time_format = time_format,
decimal_mark = decimal_mark,
encoding = encoding,
tz = tz,
asciify = FALSE))
}
if (any(tbl %>% pull(i) %>% class() %in% c('factor', 'character'))) {
# get values
distinct_val <- tbl %>% pull(i) %>% unique() %>% sort()
# remove ASCII escape character: https://en.wikipedia.org/wiki/Escape_character#ASCII_escape_character
tbl[, i] <- tbl %>% pull(i) %>% gsub('\033', ' ', ., fixed = TRUE)
# look for RSI, shouldn't all be "" and must be valid antibiotic interpretations
if (!all(distinct_val[!is.na(distinct_val)] == '')
& all(distinct_val[!is.na(distinct_val)] %in% c('', 'I', 'I;I', 'R', 'R;R', 'S', 'S;S'))) {
tbl[, i] <- tbl %>% pull(i) %>% as.rsi()
}
}
# convert to MIC class
if (colnames(tbl)[i] %like% '_mic$') {
tbl[, i] <- tbl %>% pull(i) %>% as.mic()
}
}
tbl
}

View File

@ -115,6 +115,50 @@ size_humanreadable <- function(bytes, decimals = 1) {
out out
} }
# based on readr::parse_guess
tbl_parse_guess <- function(tbl,
date_names = 'en',
date_format = '%Y-%m-%d',
time_format = '%H:%M',
decimal_mark = '.',
tz = Sys.timezone(),
encoding = "UTF-8",
na = c("", "NA", "NULL")) {
date_format <- date_generic(date_format)
time_format <- date_generic(time_format)
# set col types with readr
for (i in 1:ncol(tbl)) {
if (!all(tbl %>% pull(i) %>% class() %in% c('list', 'matrix'))) {
tbl[, i] <- readr::parse_guess(x = tbl %>% pull(i) %>% as.character(),
na = na,
locale = readr::locale(date_names = date_names,
date_format = date_format,
time_format = time_format,
decimal_mark = decimal_mark,
encoding = encoding,
tz = tz,
asciify = FALSE))
}
if (any(tbl %>% pull(i) %>% class() %in% c('factor', 'character'))) {
# get values
distinct_val <- tbl %>% pull(i) %>% unique() %>% sort()
# remove ASCII escape character: https://en.wikipedia.org/wiki/Escape_character#ASCII_escape_character
tbl[, i] <- tbl %>% pull(i) %>% gsub('\033', ' ', ., fixed = TRUE)
# look for RSI, shouldn't all be "" and must be valid antibiotic interpretations
if (!all(distinct_val[!is.na(distinct_val)] == '')
& all(distinct_val[!is.na(distinct_val)] %in% c('', 'I', 'I;I', 'R', 'R;R', 'S', 'S;S'))) {
tbl[, i] <- tbl %>% pull(i) %>% as.rsi()
}
}
# convert to MIC class
if (colnames(tbl)[i] %like% '_mic$') {
tbl[, i] <- tbl %>% pull(i) %>% as.mic()
}
}
tbl
}
# transforms date format like "dddd d mmmm yyyy" to "%A %e %B %Y" # transforms date format like "dddd d mmmm yyyy" to "%A %e %B %Y"
date_generic <- function(format) { date_generic <- function(format) {
if (!grepl('%', format, fixed = TRUE)) { if (!grepl('%', format, fixed = TRUE)) {

View File

@ -5,5 +5,9 @@ test_that("clipboard works", {
clipboard_export(antibiotics) clipboard_export(antibiotics)
expect_identical(antibiotics, expect_identical(antibiotics,
clipboard_import()) clipboard_import(date_format = "yyyy-mm-dd"))
clipboard_export(septic_patients[1:100,])
expect_identical(tbl_parse_guess(septic_patients[1:100,]),
clipboard_import(guess_col_types = TRUE))
}) })