mirror of
https://github.com/msberends/AMR.git
synced 2024-12-25 19:26:13 +01:00
move tbl_parse_guess
This commit is contained in:
parent
4bdcde9a00
commit
6ee713cec1
@ -1,6 +1,6 @@
|
||||
Package: AMR
|
||||
Version: 0.2.0.9006
|
||||
Date: 2018-06-28
|
||||
Date: 2018-06-29
|
||||
Title: Antimicrobial Resistance Analysis
|
||||
Authors@R: c(
|
||||
person(
|
||||
|
@ -4,7 +4,7 @@
|
||||
#'
|
||||
#' The data will be read and written as tab-separated by default, which makes it possible to copy and paste from other software like Excel and SPSS without further transformation.
|
||||
#'
|
||||
#' Supports automatic column type transformation and supports new classes \code{\link{as.rsi}} and \code{\link{as.mic}}.
|
||||
#' This also supports automatic column type transformation, with AMR classes \code{\link{as.rsi}} and \code{\link{as.mic}}.
|
||||
#' @rdname clipboard
|
||||
#' @name clipboard
|
||||
#' @inheritParams utils::read.table
|
||||
@ -12,16 +12,14 @@
|
||||
#' @inheritParams readr::locale
|
||||
#' @param startrow \emph{n}th row to start importing from. When \code{header = TRUE}, the import will start on row \code{startrow} \emph{below} the header.
|
||||
#' @param as_vector a logical value indicating whether data consisting of only one column should be imported as vector using \code{\link[dplyr]{pull}}. This will strip off the header.
|
||||
#' @param guess_col_types a logical value indicating whether column types should be guessed with the \code{readr} package.
|
||||
#' @param info print info about copying
|
||||
#' @param guess_col_types a logical value indicating whether column types should be guessed and transformed automatically with \code{\link[readr]{parse_guess}} from the \code{readr} package. Besides, the antimicrobial classes in this AMR package (\code{\link{as.rsi}} and \code{\link{as.mic}}) are also supported.
|
||||
#' @param info print info to console
|
||||
#' @keywords clipboard clipboard_import clipboard_export import export
|
||||
#' @importFrom dplyr %>% pull as_tibble
|
||||
#' @importFrom clipr read_clip_tbl write_clip
|
||||
#' @importFrom utils read.delim write.table object.size
|
||||
#' @importFrom readr parse_guess locale
|
||||
#' @details
|
||||
#' When using \code{guess_col_types = TRUE}, all column types will be determined automatically with \code{\link[readr]{parse_guess}} from the \code{readr} package. Besides, the antimicrobial classes in this AMR package (\code{\link{as.rsi}} and \code{\link{as.mic}}) are also supported.
|
||||
#'
|
||||
#' \if{html}{
|
||||
#' \strong{Example for copying from Excel:}
|
||||
#' \out{<div style="text-align: left">}\figure{clipboard_copy.png}\out{</div>}
|
||||
@ -50,11 +48,7 @@ clipboard_import <- function(sep = '\t',
|
||||
time_format = '%H:%M',
|
||||
tz = Sys.timezone(),
|
||||
encoding = "UTF-8",
|
||||
info = FALSE) {
|
||||
|
||||
if (clipr::clipr_available() & info == TRUE) {
|
||||
cat('Importing from clipboard...')
|
||||
}
|
||||
info = TRUE) {
|
||||
|
||||
# this will fail when clipr is not available
|
||||
import_tbl <- clipr::read_clip_tbl(file = file,
|
||||
@ -66,32 +60,28 @@ clipboard_import <- function(sep = '\t',
|
||||
encoding = 'UTF-8',
|
||||
stringsAsFactors = FALSE)
|
||||
|
||||
if (info == TRUE) {
|
||||
cat('OK\n')
|
||||
}
|
||||
|
||||
# use tibble, so column types will be translated correctly
|
||||
import_tbl <- as_tibble(import_tbl)
|
||||
|
||||
if (startrow > 1) {
|
||||
# would else lose column headers
|
||||
import_tbl <- import_tbl[startrow:nrow(import_tbl),]
|
||||
import_tbl <- import_tbl[startrow:NROW(import_tbl),]
|
||||
}
|
||||
|
||||
colnames(import_tbl) <- gsub('[.]+', '_', colnames(import_tbl))
|
||||
|
||||
if (guess_col_types == TRUE) {
|
||||
if (info == TRUE) {
|
||||
cat('Transforming columns with readr::parse_guess...')
|
||||
cat('Transforming data by guessing column types...')
|
||||
}
|
||||
import_tbl <- clipboard_format(tbl = import_tbl,
|
||||
date_names = date_names,
|
||||
date_format = date_format,
|
||||
time_format = time_format,
|
||||
decimal_mark = dec,
|
||||
tz = tz,
|
||||
encoding = encoding,
|
||||
na = na)
|
||||
import_tbl <- tbl_parse_guess(tbl = import_tbl,
|
||||
date_names = date_names,
|
||||
date_format = date_format,
|
||||
time_format = time_format,
|
||||
decimal_mark = dec,
|
||||
tz = tz,
|
||||
encoding = encoding,
|
||||
na = na)
|
||||
if (info == TRUE) {
|
||||
cat('OK\n')
|
||||
}
|
||||
@ -133,46 +123,3 @@ clipboard_export <- function(x,
|
||||
|
||||
}
|
||||
|
||||
clipboard_format <- function(tbl,
|
||||
date_names = 'en',
|
||||
date_format = '%Y-%m-%d',
|
||||
time_format = '%H:%M',
|
||||
decimal_mark = '.',
|
||||
tz = Sys.timezone(),
|
||||
encoding = "UTF-8",
|
||||
na = c("", "NA", "NULL")) {
|
||||
|
||||
date_format <- date_generic(date_format)
|
||||
time_format <- date_generic(time_format)
|
||||
# set col types with readr
|
||||
for (i in 1:ncol(tbl)) {
|
||||
if (!all(tbl %>% pull(i) %>% class() %in% c('list', 'matrix'))) {
|
||||
tbl[, i] <- readr::parse_guess(x = tbl %>% pull(i) %>% as.character(),
|
||||
na = na,
|
||||
locale = readr::locale(date_names = date_names,
|
||||
date_format = date_format,
|
||||
time_format = time_format,
|
||||
decimal_mark = decimal_mark,
|
||||
encoding = encoding,
|
||||
tz = tz,
|
||||
asciify = FALSE))
|
||||
}
|
||||
if (any(tbl %>% pull(i) %>% class() %in% c('factor', 'character'))) {
|
||||
# get values
|
||||
distinct_val <- tbl %>% pull(i) %>% unique() %>% sort()
|
||||
# remove ASCII escape character: https://en.wikipedia.org/wiki/Escape_character#ASCII_escape_character
|
||||
tbl[, i] <- tbl %>% pull(i) %>% gsub('\033', ' ', ., fixed = TRUE)
|
||||
# look for RSI, shouldn't all be "" and must be valid antibiotic interpretations
|
||||
if (!all(distinct_val[!is.na(distinct_val)] == '')
|
||||
& all(distinct_val[!is.na(distinct_val)] %in% c('', 'I', 'I;I', 'R', 'R;R', 'S', 'S;S'))) {
|
||||
tbl[, i] <- tbl %>% pull(i) %>% as.rsi()
|
||||
}
|
||||
}
|
||||
# convert to MIC class
|
||||
if (colnames(tbl)[i] %like% '_mic$') {
|
||||
tbl[, i] <- tbl %>% pull(i) %>% as.mic()
|
||||
}
|
||||
}
|
||||
tbl
|
||||
}
|
||||
|
||||
|
44
R/misc.R
44
R/misc.R
@ -115,6 +115,50 @@ size_humanreadable <- function(bytes, decimals = 1) {
|
||||
out
|
||||
}
|
||||
|
||||
# based on readr::parse_guess
|
||||
tbl_parse_guess <- function(tbl,
|
||||
date_names = 'en',
|
||||
date_format = '%Y-%m-%d',
|
||||
time_format = '%H:%M',
|
||||
decimal_mark = '.',
|
||||
tz = Sys.timezone(),
|
||||
encoding = "UTF-8",
|
||||
na = c("", "NA", "NULL")) {
|
||||
|
||||
date_format <- date_generic(date_format)
|
||||
time_format <- date_generic(time_format)
|
||||
# set col types with readr
|
||||
for (i in 1:ncol(tbl)) {
|
||||
if (!all(tbl %>% pull(i) %>% class() %in% c('list', 'matrix'))) {
|
||||
tbl[, i] <- readr::parse_guess(x = tbl %>% pull(i) %>% as.character(),
|
||||
na = na,
|
||||
locale = readr::locale(date_names = date_names,
|
||||
date_format = date_format,
|
||||
time_format = time_format,
|
||||
decimal_mark = decimal_mark,
|
||||
encoding = encoding,
|
||||
tz = tz,
|
||||
asciify = FALSE))
|
||||
}
|
||||
if (any(tbl %>% pull(i) %>% class() %in% c('factor', 'character'))) {
|
||||
# get values
|
||||
distinct_val <- tbl %>% pull(i) %>% unique() %>% sort()
|
||||
# remove ASCII escape character: https://en.wikipedia.org/wiki/Escape_character#ASCII_escape_character
|
||||
tbl[, i] <- tbl %>% pull(i) %>% gsub('\033', ' ', ., fixed = TRUE)
|
||||
# look for RSI, shouldn't all be "" and must be valid antibiotic interpretations
|
||||
if (!all(distinct_val[!is.na(distinct_val)] == '')
|
||||
& all(distinct_val[!is.na(distinct_val)] %in% c('', 'I', 'I;I', 'R', 'R;R', 'S', 'S;S'))) {
|
||||
tbl[, i] <- tbl %>% pull(i) %>% as.rsi()
|
||||
}
|
||||
}
|
||||
# convert to MIC class
|
||||
if (colnames(tbl)[i] %like% '_mic$') {
|
||||
tbl[, i] <- tbl %>% pull(i) %>% as.mic()
|
||||
}
|
||||
}
|
||||
tbl
|
||||
}
|
||||
|
||||
# transforms date format like "dddd d mmmm yyyy" to "%A %e %B %Y"
|
||||
date_generic <- function(format) {
|
||||
if (!grepl('%', format, fixed = TRUE)) {
|
||||
|
@ -5,5 +5,9 @@ test_that("clipboard works", {
|
||||
|
||||
clipboard_export(antibiotics)
|
||||
expect_identical(antibiotics,
|
||||
clipboard_import())
|
||||
clipboard_import(date_format = "yyyy-mm-dd"))
|
||||
|
||||
clipboard_export(septic_patients[1:100,])
|
||||
expect_identical(tbl_parse_guess(septic_patients[1:100,]),
|
||||
clipboard_import(guess_col_types = TRUE))
|
||||
})
|
||||
|
Loading…
Reference in New Issue
Block a user