mirror of
https://github.com/msberends/AMR.git
synced 2024-12-25 18:46:11 +01:00
move tbl_parse_guess
This commit is contained in:
parent
4bdcde9a00
commit
6ee713cec1
@ -1,6 +1,6 @@
|
|||||||
Package: AMR
|
Package: AMR
|
||||||
Version: 0.2.0.9006
|
Version: 0.2.0.9006
|
||||||
Date: 2018-06-28
|
Date: 2018-06-29
|
||||||
Title: Antimicrobial Resistance Analysis
|
Title: Antimicrobial Resistance Analysis
|
||||||
Authors@R: c(
|
Authors@R: c(
|
||||||
person(
|
person(
|
||||||
|
@ -4,7 +4,7 @@
|
|||||||
#'
|
#'
|
||||||
#' The data will be read and written as tab-separated by default, which makes it possible to copy and paste from other software like Excel and SPSS without further transformation.
|
#' The data will be read and written as tab-separated by default, which makes it possible to copy and paste from other software like Excel and SPSS without further transformation.
|
||||||
#'
|
#'
|
||||||
#' Supports automatic column type transformation and supports new classes \code{\link{as.rsi}} and \code{\link{as.mic}}.
|
#' This also supports automatic column type transformation, with AMR classes \code{\link{as.rsi}} and \code{\link{as.mic}}.
|
||||||
#' @rdname clipboard
|
#' @rdname clipboard
|
||||||
#' @name clipboard
|
#' @name clipboard
|
||||||
#' @inheritParams utils::read.table
|
#' @inheritParams utils::read.table
|
||||||
@ -12,16 +12,14 @@
|
|||||||
#' @inheritParams readr::locale
|
#' @inheritParams readr::locale
|
||||||
#' @param startrow \emph{n}th row to start importing from. When \code{header = TRUE}, the import will start on row \code{startrow} \emph{below} the header.
|
#' @param startrow \emph{n}th row to start importing from. When \code{header = TRUE}, the import will start on row \code{startrow} \emph{below} the header.
|
||||||
#' @param as_vector a logical value indicating whether data consisting of only one column should be imported as vector using \code{\link[dplyr]{pull}}. This will strip off the header.
|
#' @param as_vector a logical value indicating whether data consisting of only one column should be imported as vector using \code{\link[dplyr]{pull}}. This will strip off the header.
|
||||||
#' @param guess_col_types a logical value indicating whether column types should be guessed with the \code{readr} package.
|
#' @param guess_col_types a logical value indicating whether column types should be guessed and transformed automatically with \code{\link[readr]{parse_guess}} from the \code{readr} package. Besides, the antimicrobial classes in this AMR package (\code{\link{as.rsi}} and \code{\link{as.mic}}) are also supported.
|
||||||
#' @param info print info about copying
|
#' @param info print info to console
|
||||||
#' @keywords clipboard clipboard_import clipboard_export import export
|
#' @keywords clipboard clipboard_import clipboard_export import export
|
||||||
#' @importFrom dplyr %>% pull as_tibble
|
#' @importFrom dplyr %>% pull as_tibble
|
||||||
#' @importFrom clipr read_clip_tbl write_clip
|
#' @importFrom clipr read_clip_tbl write_clip
|
||||||
#' @importFrom utils read.delim write.table object.size
|
#' @importFrom utils read.delim write.table object.size
|
||||||
#' @importFrom readr parse_guess locale
|
#' @importFrom readr parse_guess locale
|
||||||
#' @details
|
#' @details
|
||||||
#' When using \code{guess_col_types = TRUE}, all column types will be determined automatically with \code{\link[readr]{parse_guess}} from the \code{readr} package. Besides, the antimicrobial classes in this AMR package (\code{\link{as.rsi}} and \code{\link{as.mic}}) are also supported.
|
|
||||||
#'
|
|
||||||
#' \if{html}{
|
#' \if{html}{
|
||||||
#' \strong{Example for copying from Excel:}
|
#' \strong{Example for copying from Excel:}
|
||||||
#' \out{<div style="text-align: left">}\figure{clipboard_copy.png}\out{</div>}
|
#' \out{<div style="text-align: left">}\figure{clipboard_copy.png}\out{</div>}
|
||||||
@ -50,11 +48,7 @@ clipboard_import <- function(sep = '\t',
|
|||||||
time_format = '%H:%M',
|
time_format = '%H:%M',
|
||||||
tz = Sys.timezone(),
|
tz = Sys.timezone(),
|
||||||
encoding = "UTF-8",
|
encoding = "UTF-8",
|
||||||
info = FALSE) {
|
info = TRUE) {
|
||||||
|
|
||||||
if (clipr::clipr_available() & info == TRUE) {
|
|
||||||
cat('Importing from clipboard...')
|
|
||||||
}
|
|
||||||
|
|
||||||
# this will fail when clipr is not available
|
# this will fail when clipr is not available
|
||||||
import_tbl <- clipr::read_clip_tbl(file = file,
|
import_tbl <- clipr::read_clip_tbl(file = file,
|
||||||
@ -66,32 +60,28 @@ clipboard_import <- function(sep = '\t',
|
|||||||
encoding = 'UTF-8',
|
encoding = 'UTF-8',
|
||||||
stringsAsFactors = FALSE)
|
stringsAsFactors = FALSE)
|
||||||
|
|
||||||
if (info == TRUE) {
|
|
||||||
cat('OK\n')
|
|
||||||
}
|
|
||||||
|
|
||||||
# use tibble, so column types will be translated correctly
|
# use tibble, so column types will be translated correctly
|
||||||
import_tbl <- as_tibble(import_tbl)
|
import_tbl <- as_tibble(import_tbl)
|
||||||
|
|
||||||
if (startrow > 1) {
|
if (startrow > 1) {
|
||||||
# would else lose column headers
|
# would else lose column headers
|
||||||
import_tbl <- import_tbl[startrow:nrow(import_tbl),]
|
import_tbl <- import_tbl[startrow:NROW(import_tbl),]
|
||||||
}
|
}
|
||||||
|
|
||||||
colnames(import_tbl) <- gsub('[.]+', '_', colnames(import_tbl))
|
colnames(import_tbl) <- gsub('[.]+', '_', colnames(import_tbl))
|
||||||
|
|
||||||
if (guess_col_types == TRUE) {
|
if (guess_col_types == TRUE) {
|
||||||
if (info == TRUE) {
|
if (info == TRUE) {
|
||||||
cat('Transforming columns with readr::parse_guess...')
|
cat('Transforming data by guessing column types...')
|
||||||
}
|
}
|
||||||
import_tbl <- clipboard_format(tbl = import_tbl,
|
import_tbl <- tbl_parse_guess(tbl = import_tbl,
|
||||||
date_names = date_names,
|
date_names = date_names,
|
||||||
date_format = date_format,
|
date_format = date_format,
|
||||||
time_format = time_format,
|
time_format = time_format,
|
||||||
decimal_mark = dec,
|
decimal_mark = dec,
|
||||||
tz = tz,
|
tz = tz,
|
||||||
encoding = encoding,
|
encoding = encoding,
|
||||||
na = na)
|
na = na)
|
||||||
if (info == TRUE) {
|
if (info == TRUE) {
|
||||||
cat('OK\n')
|
cat('OK\n')
|
||||||
}
|
}
|
||||||
@ -133,46 +123,3 @@ clipboard_export <- function(x,
|
|||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
clipboard_format <- function(tbl,
|
|
||||||
date_names = 'en',
|
|
||||||
date_format = '%Y-%m-%d',
|
|
||||||
time_format = '%H:%M',
|
|
||||||
decimal_mark = '.',
|
|
||||||
tz = Sys.timezone(),
|
|
||||||
encoding = "UTF-8",
|
|
||||||
na = c("", "NA", "NULL")) {
|
|
||||||
|
|
||||||
date_format <- date_generic(date_format)
|
|
||||||
time_format <- date_generic(time_format)
|
|
||||||
# set col types with readr
|
|
||||||
for (i in 1:ncol(tbl)) {
|
|
||||||
if (!all(tbl %>% pull(i) %>% class() %in% c('list', 'matrix'))) {
|
|
||||||
tbl[, i] <- readr::parse_guess(x = tbl %>% pull(i) %>% as.character(),
|
|
||||||
na = na,
|
|
||||||
locale = readr::locale(date_names = date_names,
|
|
||||||
date_format = date_format,
|
|
||||||
time_format = time_format,
|
|
||||||
decimal_mark = decimal_mark,
|
|
||||||
encoding = encoding,
|
|
||||||
tz = tz,
|
|
||||||
asciify = FALSE))
|
|
||||||
}
|
|
||||||
if (any(tbl %>% pull(i) %>% class() %in% c('factor', 'character'))) {
|
|
||||||
# get values
|
|
||||||
distinct_val <- tbl %>% pull(i) %>% unique() %>% sort()
|
|
||||||
# remove ASCII escape character: https://en.wikipedia.org/wiki/Escape_character#ASCII_escape_character
|
|
||||||
tbl[, i] <- tbl %>% pull(i) %>% gsub('\033', ' ', ., fixed = TRUE)
|
|
||||||
# look for RSI, shouldn't all be "" and must be valid antibiotic interpretations
|
|
||||||
if (!all(distinct_val[!is.na(distinct_val)] == '')
|
|
||||||
& all(distinct_val[!is.na(distinct_val)] %in% c('', 'I', 'I;I', 'R', 'R;R', 'S', 'S;S'))) {
|
|
||||||
tbl[, i] <- tbl %>% pull(i) %>% as.rsi()
|
|
||||||
}
|
|
||||||
}
|
|
||||||
# convert to MIC class
|
|
||||||
if (colnames(tbl)[i] %like% '_mic$') {
|
|
||||||
tbl[, i] <- tbl %>% pull(i) %>% as.mic()
|
|
||||||
}
|
|
||||||
}
|
|
||||||
tbl
|
|
||||||
}
|
|
||||||
|
|
||||||
|
44
R/misc.R
44
R/misc.R
@ -115,6 +115,50 @@ size_humanreadable <- function(bytes, decimals = 1) {
|
|||||||
out
|
out
|
||||||
}
|
}
|
||||||
|
|
||||||
|
# based on readr::parse_guess
|
||||||
|
tbl_parse_guess <- function(tbl,
|
||||||
|
date_names = 'en',
|
||||||
|
date_format = '%Y-%m-%d',
|
||||||
|
time_format = '%H:%M',
|
||||||
|
decimal_mark = '.',
|
||||||
|
tz = Sys.timezone(),
|
||||||
|
encoding = "UTF-8",
|
||||||
|
na = c("", "NA", "NULL")) {
|
||||||
|
|
||||||
|
date_format <- date_generic(date_format)
|
||||||
|
time_format <- date_generic(time_format)
|
||||||
|
# set col types with readr
|
||||||
|
for (i in 1:ncol(tbl)) {
|
||||||
|
if (!all(tbl %>% pull(i) %>% class() %in% c('list', 'matrix'))) {
|
||||||
|
tbl[, i] <- readr::parse_guess(x = tbl %>% pull(i) %>% as.character(),
|
||||||
|
na = na,
|
||||||
|
locale = readr::locale(date_names = date_names,
|
||||||
|
date_format = date_format,
|
||||||
|
time_format = time_format,
|
||||||
|
decimal_mark = decimal_mark,
|
||||||
|
encoding = encoding,
|
||||||
|
tz = tz,
|
||||||
|
asciify = FALSE))
|
||||||
|
}
|
||||||
|
if (any(tbl %>% pull(i) %>% class() %in% c('factor', 'character'))) {
|
||||||
|
# get values
|
||||||
|
distinct_val <- tbl %>% pull(i) %>% unique() %>% sort()
|
||||||
|
# remove ASCII escape character: https://en.wikipedia.org/wiki/Escape_character#ASCII_escape_character
|
||||||
|
tbl[, i] <- tbl %>% pull(i) %>% gsub('\033', ' ', ., fixed = TRUE)
|
||||||
|
# look for RSI, shouldn't all be "" and must be valid antibiotic interpretations
|
||||||
|
if (!all(distinct_val[!is.na(distinct_val)] == '')
|
||||||
|
& all(distinct_val[!is.na(distinct_val)] %in% c('', 'I', 'I;I', 'R', 'R;R', 'S', 'S;S'))) {
|
||||||
|
tbl[, i] <- tbl %>% pull(i) %>% as.rsi()
|
||||||
|
}
|
||||||
|
}
|
||||||
|
# convert to MIC class
|
||||||
|
if (colnames(tbl)[i] %like% '_mic$') {
|
||||||
|
tbl[, i] <- tbl %>% pull(i) %>% as.mic()
|
||||||
|
}
|
||||||
|
}
|
||||||
|
tbl
|
||||||
|
}
|
||||||
|
|
||||||
# transforms date format like "dddd d mmmm yyyy" to "%A %e %B %Y"
|
# transforms date format like "dddd d mmmm yyyy" to "%A %e %B %Y"
|
||||||
date_generic <- function(format) {
|
date_generic <- function(format) {
|
||||||
if (!grepl('%', format, fixed = TRUE)) {
|
if (!grepl('%', format, fixed = TRUE)) {
|
||||||
|
@ -5,5 +5,9 @@ test_that("clipboard works", {
|
|||||||
|
|
||||||
clipboard_export(antibiotics)
|
clipboard_export(antibiotics)
|
||||||
expect_identical(antibiotics,
|
expect_identical(antibiotics,
|
||||||
clipboard_import())
|
clipboard_import(date_format = "yyyy-mm-dd"))
|
||||||
|
|
||||||
|
clipboard_export(septic_patients[1:100,])
|
||||||
|
expect_identical(tbl_parse_guess(septic_patients[1:100,]),
|
||||||
|
clipboard_import(guess_col_types = TRUE))
|
||||||
})
|
})
|
||||||
|
Loading…
Reference in New Issue
Block a user