From 6ee713cec10bdee7eb8650f7a93c2e4bb745928f Mon Sep 17 00:00:00 2001 From: "Matthijs S. Berends" Date: Fri, 29 Jun 2018 08:56:03 +0200 Subject: [PATCH] move tbl_parse_guess --- DESCRIPTION | 2 +- R/clipboard.R | 81 ++++++--------------------------- R/misc.R | 44 ++++++++++++++++++ tests/testthat/test-clipboard.R | 6 ++- 4 files changed, 64 insertions(+), 69 deletions(-) diff --git a/DESCRIPTION b/DESCRIPTION index 32838e50..caa918db 100755 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,6 +1,6 @@ Package: AMR Version: 0.2.0.9006 -Date: 2018-06-28 +Date: 2018-06-29 Title: Antimicrobial Resistance Analysis Authors@R: c( person( diff --git a/R/clipboard.R b/R/clipboard.R index dde0d70d..bbe95558 100644 --- a/R/clipboard.R +++ b/R/clipboard.R @@ -4,7 +4,7 @@ #' #' The data will be read and written as tab-separated by default, which makes it possible to copy and paste from other software like Excel and SPSS without further transformation. #' -#' Supports automatic column type transformation and supports new classes \code{\link{as.rsi}} and \code{\link{as.mic}}. +#' This also supports automatic column type transformation, with AMR classes \code{\link{as.rsi}} and \code{\link{as.mic}}. #' @rdname clipboard #' @name clipboard #' @inheritParams utils::read.table @@ -12,16 +12,14 @@ #' @inheritParams readr::locale #' @param startrow \emph{n}th row to start importing from. When \code{header = TRUE}, the import will start on row \code{startrow} \emph{below} the header. #' @param as_vector a logical value indicating whether data consisting of only one column should be imported as vector using \code{\link[dplyr]{pull}}. This will strip off the header. -#' @param guess_col_types a logical value indicating whether column types should be guessed with the \code{readr} package. -#' @param info print info about copying +#' @param guess_col_types a logical value indicating whether column types should be guessed and transformed automatically with \code{\link[readr]{parse_guess}} from the \code{readr} package. Besides, the antimicrobial classes in this AMR package (\code{\link{as.rsi}} and \code{\link{as.mic}}) are also supported. +#' @param info print info to console #' @keywords clipboard clipboard_import clipboard_export import export #' @importFrom dplyr %>% pull as_tibble #' @importFrom clipr read_clip_tbl write_clip #' @importFrom utils read.delim write.table object.size #' @importFrom readr parse_guess locale #' @details -#' When using \code{guess_col_types = TRUE}, all column types will be determined automatically with \code{\link[readr]{parse_guess}} from the \code{readr} package. Besides, the antimicrobial classes in this AMR package (\code{\link{as.rsi}} and \code{\link{as.mic}}) are also supported. -#' #' \if{html}{ #' \strong{Example for copying from Excel:} #' \out{
}\figure{clipboard_copy.png}\out{
} @@ -50,11 +48,7 @@ clipboard_import <- function(sep = '\t', time_format = '%H:%M', tz = Sys.timezone(), encoding = "UTF-8", - info = FALSE) { - - if (clipr::clipr_available() & info == TRUE) { - cat('Importing from clipboard...') - } + info = TRUE) { # this will fail when clipr is not available import_tbl <- clipr::read_clip_tbl(file = file, @@ -66,32 +60,28 @@ clipboard_import <- function(sep = '\t', encoding = 'UTF-8', stringsAsFactors = FALSE) - if (info == TRUE) { - cat('OK\n') - } - # use tibble, so column types will be translated correctly import_tbl <- as_tibble(import_tbl) if (startrow > 1) { # would else lose column headers - import_tbl <- import_tbl[startrow:nrow(import_tbl),] + import_tbl <- import_tbl[startrow:NROW(import_tbl),] } colnames(import_tbl) <- gsub('[.]+', '_', colnames(import_tbl)) if (guess_col_types == TRUE) { if (info == TRUE) { - cat('Transforming columns with readr::parse_guess...') + cat('Transforming data by guessing column types...') } - import_tbl <- clipboard_format(tbl = import_tbl, - date_names = date_names, - date_format = date_format, - time_format = time_format, - decimal_mark = dec, - tz = tz, - encoding = encoding, - na = na) + import_tbl <- tbl_parse_guess(tbl = import_tbl, + date_names = date_names, + date_format = date_format, + time_format = time_format, + decimal_mark = dec, + tz = tz, + encoding = encoding, + na = na) if (info == TRUE) { cat('OK\n') } @@ -133,46 +123,3 @@ clipboard_export <- function(x, } -clipboard_format <- function(tbl, - date_names = 'en', - date_format = '%Y-%m-%d', - time_format = '%H:%M', - decimal_mark = '.', - tz = Sys.timezone(), - encoding = "UTF-8", - na = c("", "NA", "NULL")) { - - date_format <- date_generic(date_format) - time_format <- date_generic(time_format) - # set col types with readr - for (i in 1:ncol(tbl)) { - if (!all(tbl %>% pull(i) %>% class() %in% c('list', 'matrix'))) { - tbl[, i] <- readr::parse_guess(x = tbl %>% pull(i) %>% as.character(), - na = na, - locale = readr::locale(date_names = date_names, - date_format = date_format, - time_format = time_format, - decimal_mark = decimal_mark, - encoding = encoding, - tz = tz, - asciify = FALSE)) - } - if (any(tbl %>% pull(i) %>% class() %in% c('factor', 'character'))) { - # get values - distinct_val <- tbl %>% pull(i) %>% unique() %>% sort() - # remove ASCII escape character: https://en.wikipedia.org/wiki/Escape_character#ASCII_escape_character - tbl[, i] <- tbl %>% pull(i) %>% gsub('\033', ' ', ., fixed = TRUE) - # look for RSI, shouldn't all be "" and must be valid antibiotic interpretations - if (!all(distinct_val[!is.na(distinct_val)] == '') - & all(distinct_val[!is.na(distinct_val)] %in% c('', 'I', 'I;I', 'R', 'R;R', 'S', 'S;S'))) { - tbl[, i] <- tbl %>% pull(i) %>% as.rsi() - } - } - # convert to MIC class - if (colnames(tbl)[i] %like% '_mic$') { - tbl[, i] <- tbl %>% pull(i) %>% as.mic() - } - } - tbl -} - diff --git a/R/misc.R b/R/misc.R index 3a2a58d9..663321c9 100755 --- a/R/misc.R +++ b/R/misc.R @@ -115,6 +115,50 @@ size_humanreadable <- function(bytes, decimals = 1) { out } +# based on readr::parse_guess +tbl_parse_guess <- function(tbl, + date_names = 'en', + date_format = '%Y-%m-%d', + time_format = '%H:%M', + decimal_mark = '.', + tz = Sys.timezone(), + encoding = "UTF-8", + na = c("", "NA", "NULL")) { + + date_format <- date_generic(date_format) + time_format <- date_generic(time_format) + # set col types with readr + for (i in 1:ncol(tbl)) { + if (!all(tbl %>% pull(i) %>% class() %in% c('list', 'matrix'))) { + tbl[, i] <- readr::parse_guess(x = tbl %>% pull(i) %>% as.character(), + na = na, + locale = readr::locale(date_names = date_names, + date_format = date_format, + time_format = time_format, + decimal_mark = decimal_mark, + encoding = encoding, + tz = tz, + asciify = FALSE)) + } + if (any(tbl %>% pull(i) %>% class() %in% c('factor', 'character'))) { + # get values + distinct_val <- tbl %>% pull(i) %>% unique() %>% sort() + # remove ASCII escape character: https://en.wikipedia.org/wiki/Escape_character#ASCII_escape_character + tbl[, i] <- tbl %>% pull(i) %>% gsub('\033', ' ', ., fixed = TRUE) + # look for RSI, shouldn't all be "" and must be valid antibiotic interpretations + if (!all(distinct_val[!is.na(distinct_val)] == '') + & all(distinct_val[!is.na(distinct_val)] %in% c('', 'I', 'I;I', 'R', 'R;R', 'S', 'S;S'))) { + tbl[, i] <- tbl %>% pull(i) %>% as.rsi() + } + } + # convert to MIC class + if (colnames(tbl)[i] %like% '_mic$') { + tbl[, i] <- tbl %>% pull(i) %>% as.mic() + } + } + tbl +} + # transforms date format like "dddd d mmmm yyyy" to "%A %e %B %Y" date_generic <- function(format) { if (!grepl('%', format, fixed = TRUE)) { diff --git a/tests/testthat/test-clipboard.R b/tests/testthat/test-clipboard.R index 9429e96f..12a2612c 100644 --- a/tests/testthat/test-clipboard.R +++ b/tests/testthat/test-clipboard.R @@ -5,5 +5,9 @@ test_that("clipboard works", { clipboard_export(antibiotics) expect_identical(antibiotics, - clipboard_import()) + clipboard_import(date_format = "yyyy-mm-dd")) + + clipboard_export(septic_patients[1:100,]) + expect_identical(tbl_parse_guess(septic_patients[1:100,]), + clipboard_import(guess_col_types = TRUE)) })