1
0
mirror of https://github.com/msberends/AMR.git synced 2024-12-24 18:06:11 +01:00

read.4D improvements

This commit is contained in:
dr. M.S. (Matthijs) Berends 2018-11-15 12:42:35 +01:00
parent 5cb8f3cf72
commit fab64e6728
10 changed files with 89 additions and 51 deletions

View File

@ -67,4 +67,4 @@ BugReports: https://gitlab.com/msberends/AMR/issues
License: GPL-2 | file LICENSE
Encoding: UTF-8
LazyData: true
RoxygenNote: 6.1.0
RoxygenNote: 6.1.1

View File

@ -119,7 +119,7 @@ export(portion_S)
export(portion_SI)
export(portion_df)
export(ratio)
export(read_4D)
export(read.4D)
export(resistance_predict)
export(right_join_microorganisms)
export(rsi)
@ -239,6 +239,6 @@ importFrom(stats,mad)
importFrom(stats,pchisq)
importFrom(stats,predict)
importFrom(stats,sd)
importFrom(utils,View)
importFrom(utils,browseVignettes)
importFrom(utils,installed.packages)
importFrom(xml2,read_html)

View File

@ -5,7 +5,7 @@
* Function `count_all` to get all available isolates (that like all `portion_*` and `count_*` functions also supports `summarise` and `group_by`), the old `n_rsi` is now an alias of `count_all`
* Function `get_locale` to determine language for language-dependent output for some `mo_*` functions. This is now the default value for their `language` parameter, by which the system language will be used at default.
* Data sets `microorganismsDT`, `microorganisms.prevDT`, `microorganisms.unprevDT` and `microorganisms.oldDT` to improve the speed of `as.mo`. They are for reference only, since they are primarily for internal use of `as.mo`.
* Function `read_4D` to read from the 4D database of the MMB department of the UMCG
* Function `read.4D` to read from the 4D database of the MMB department of the UMCG
* Functions `mo_authors` and `mo_year` to get specific values about the scientific reference of a taxonomic entry
#### Changed
@ -57,6 +57,7 @@
* Speed improvement for `is.rsi.eligible`, now 15-20 times faster
* In `g.test`, when `sum(x)` is below 1000 or any of the expected values is below 5, Fisher's Exact Test will be suggested
* `ab_name` will try to fall back on `as.atc` when no results are found
* Removed the addin to view data sets
#### Other
* New dependency on package `crayon`, to support formatted text in the console

View File

@ -47,7 +47,7 @@ get_locale <- function() {
} else if (grepl("^(Portuguese|Portugu.s|pt_|PT_)", lang, ignore.case = FALSE)) {
"pt"
} else {
# other language, set to English
# other language -> set to English
"en"
}
}

View File

@ -49,6 +49,7 @@
#'
#' \code{ggplot_rsi} is a wrapper around all above functions that uses data as first input. This makes it possible to use this function after a pipe (\code{\%>\%}). See Examples.
#' @rdname ggplot_rsi
#' @importFrom utils installed.packages
#' @export
#' @examples
#' library(dplyr)

View File

@ -26,24 +26,6 @@ addin_insert_like <- function() {
rstudioapi::insertText(" %like% ")
}
# No export, no Rd
#' @importFrom utils View
addin_open_antibiotics <- function() {
View(antibiotics)
}
# No export, no Rd
#' @importFrom utils View
addin_open_microorganisms <- function() {
View(microorganisms)
}
# No export, no Rd
#' @importFrom utils View
addin_open_septic_patients <- function() {
View(septic_patients)
}
# No export, no Rd
percent <- function(x, round = 1, force_zero = FALSE, ...) {
val <- base::round(x * 100, digits = round)

16
R/mo.R
View File

@ -181,7 +181,7 @@ exec_as.mo <- function(x, Becker = FALSE, Lancefield = FALSE, allow_uncertain =
x[is.null(x)] <- NA
# support tidyverse selection like: df %>% select(colA)
if (!is.vector(x)) {
if (!is.vector(x) & !is.null(dim(x))) {
x <- pull(x, 1)
}
}
@ -360,9 +360,15 @@ exec_as.mo <- function(x, Becker = FALSE, Lancefield = FALSE, allow_uncertain =
# next
}
if (x_backup[i] %in% AMR::microorganisms.umcg[, 1]) {
ref_certe <- AMR::microorganisms.umcg[AMR::microorganisms.umcg[, 1] == x_backup[i], 2]
ref_mo <- AMR::microorganisms.certe[AMR::microorganisms.certe[, 1] == ref_certe, 2]
x[i] <- microorganismsDT[mo == ref_mo, ..property][[1]][1L]
mo_umcg <- AMR::microorganisms.umcg[AMR::microorganisms.umcg[, 1] == x_backup[i], 2]
mo_found <- AMR::microorganisms.certe[AMR::microorganisms.certe[, 1] == mo_umcg, 2]
if (length(mo_found) == 0) {
# not found
x[i] <- NA_character_
failures <- c(failures, x_backup[i])
} else {
x[i] <- microorganismsDT[mo == mo_found, ..property][[1]][1L]
}
next
}
if (x_backup[i] %in% reference_df[, 1]) {
@ -575,7 +581,7 @@ exec_as.mo <- function(x, Becker = FALSE, Lancefield = FALSE, allow_uncertain =
failures <- failures[!failures %in% c(NA, NULL, NaN)]
if (length(failures) > 0) {
warning("These ", length(failures) , " values could not be coerced: ",
warning("These ", length(failures) , " values could not be coerced to a valid MO code: ",
paste('"', unique(failures), '"', sep = "", collapse = ', '),
".",
call. = FALSE)

View File

@ -18,11 +18,15 @@
#' Read data from 4D database
#'
#' This function is only useful for the MMB department of the UMCG. Use this function to \strong{import data by just defining the \code{file} parameter}. It will automatically transform birth dates and calculate patients age, translate the data set to English, transform the \code{mo} with \code{\link{as.mo}} and transform all antimicrobial columns with \code{\link{as.rsi}}.
#' This function is only useful for the MMB department of the UMCG. Use this function to \strong{import data by just defining the \code{file} parameter}. It will automatically transform birth dates and calculate patients age, translate the column names to English, transform the \code{mo} with \code{\link{as.mo}} and transform all antimicrobial columns with \code{\link{as.rsi}}.
#' @inheritParams utils::read.table
#' @param info a logical to indicate whether info about the import should be printed, defaults to \code{TRUE} in interactive sessions
#' @details Column names will be transformed, but the original column names are set as a "label" attribute and can be seen in e.g. RStudio Viewer.
#' @export
read_4D <- function(file,
read.4D <- function(file,
info = interactive(),
header = TRUE,
row.names = NULL,
sep = "\t",
quote = "\"'",
dec = ",",
@ -36,7 +40,11 @@ read_4D <- function(file,
fileEncoding = "UTF-8",
encoding = "UTF-8") {
if (info == TRUE) {
message("Importing data... ", appendLF = FALSE)
}
data_4D <- utils::read.table(file = file,
row.names = row.names,
header = header,
sep = sep,
quote = quote,
@ -57,7 +65,7 @@ read_4D <- function(file,
posixlt <- as.POSIXlt(date_regular)
# born after today will be born 100 years ago
# based on https://stackoverflow.com/a/3312971/4575331
posixlt[date_regular > Sys.Date()]$year <- posixlt[date_regular > Sys.Date()]$year - 100
posixlt[date_regular > Sys.Date() & !is.na(posixlt)]$year <- posixlt[date_regular > Sys.Date() & !is.na(posixlt)]$year - 100
as.Date(posixlt)
}
to_age_4D <- function(from, to) {
@ -71,6 +79,15 @@ read_4D <- function(file,
age - 1, age)
}
if (info == TRUE) {
message("OK\nTransforming column names... ", appendLF = FALSE)
}
if ("row.names" %in% colnames(data_4D) & all(is.na(data_4D[, ncol(data_4D)]))) {
# remove first column name "row.names" and remove last empty column
colnames(data_4D) <- c(colnames(data_4D)[2:ncol(data_4D)], "_skip_last")
data_4D <- data_4D[, -ncol(data_4D)]
}
colnames(data_4D) <- tolower(colnames(data_4D))
if (all(c("afnamedat", "gebdatum") %in% colnames(data_4D))) {
# add age
@ -91,6 +108,10 @@ read_4D <- function(file,
# order of columns
data_4D <- data_4D[, cols_wanted]
# backup original column names
colnames.bak <- toupper(colnames(data_4D))
colnames.bak[colnames.bak == "AGE"] <- NULL
# rename of columns
colnames(data_4D) <- gsub("patientnr", "patient_id", colnames(data_4D), fixed = TRUE)
colnames(data_4D) <- gsub("gebdatum", "date_birth", colnames(data_4D), fixed = TRUE)
@ -104,6 +125,9 @@ read_4D <- function(file,
colnames(data_4D) <- gsub("mat", "specimen_group", colnames(data_4D), fixed = TRUE)
colnames(data_4D) <- gsub("mocode", "mo", colnames(data_4D), fixed = TRUE)
if (info == TRUE) {
message("OK\nTransforming dates and age... ", appendLF = FALSE)
}
if ("date_birth" %in% colnames(data_4D)) {
data_4D$date_birth <- to_date_4D(data_4D$date_birth)
@ -117,6 +141,10 @@ read_4D <- function(file,
if ("gender" %in% colnames(data_4D)) {
data_4D$gender[data_4D$gender == "V"] <- "F"
}
if (info == TRUE) {
message("OK\nTransforming MO codes... ", appendLF = FALSE)
}
if ("mo" %in% colnames(data_4D)) {
data_4D$mo <- as.mo(data_4D$mo)
# column right of mo is:
@ -129,6 +157,18 @@ read_4D <- function(file,
}
}
# set original column names as label (can be seen in RStudio Viewer)
if (info == TRUE) {
message("OK\nSetting original column names as label... ", appendLF = FALSE)
}
for (i in 1:ncol(data_4D)) {
attr(data_4D[, i], "label") <- colnames.bak[i]
}
if (info == TRUE) {
message("OK")
}
data_4D
}

View File

@ -5,15 +5,3 @@ Interactive: false
Name: Insert %like%
Binding: addin_insert_like
Interactive: false
Name: View 'antibiotics' data set
Binding: addin_open_antibiotics
Interactive: false
Name: View 'microorganisms' data set
Binding: addin_open_microorganisms
Interactive: false
Name: View 'septic_patients' data set
Binding: addin_open_septic_patients
Interactive: false

View File

@ -1,13 +1,13 @@
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/read_4d.R
\name{read_4D}
\alias{read_4D}
% Please edit documentation in R/read.4d.R
\name{read.4D}
\alias{read.4D}
\title{Read data from 4D database}
\usage{
read_4D(file, header = TRUE, sep = "\\t", quote = "\\"'",
dec = ",", na.strings = c("NA", "", "."), skip = 2,
check.names = TRUE, strip.white = TRUE, fill = TRUE,
blank.lines.skip = TRUE, stringsAsFactors = FALSE,
read.4D(file, info = interactive(), header = TRUE, row.names = NULL,
sep = "\\t", quote = "\\"'", dec = ",", na.strings = c("NA", "",
"."), skip = 2, check.names = TRUE, strip.white = TRUE,
fill = TRUE, blank.lines.skip = TRUE, stringsAsFactors = FALSE,
fileEncoding = "UTF-8", encoding = "UTF-8")
}
\arguments{
@ -32,12 +32,29 @@ read_4D(file, header = TRUE, sep = "\\t", quote = "\\"'",
\code{\link{url}}.)
}
\item{info}{a logical to indicate whether info about the import should be printed, defaults to \code{TRUE} in interactive sessions}
\item{header}{a logical value indicating whether the file contains the
names of the variables as its first line. If missing, the value is
determined from the file format: \code{header} is set to \code{TRUE}
if and only if the first row contains one fewer field than the
number of columns.}
\item{row.names}{a vector of row names. This can be a vector giving
the actual row names, or a single number giving the column of the
table which contains the row names, or character string giving the
name of the table column containing the row names.
If there is a header and the first row contains one fewer field than
the number of columns, the first column in the input is used for the
row names. Otherwise if \code{row.names} is missing, the rows are
numbered.
Using \code{row.names = NULL} forces row numbering. Missing or
\code{NULL} \code{row.names} generate row names that are considered
to be \sQuote{automatic} (and not preserved by \code{\link{as.matrix}}).
}
\item{sep}{the field separator character. Values on each line of the
file are separated by this character. If \code{sep = ""} (the
default for \code{read.table}) the separator is \sQuote{white space},
@ -100,5 +117,8 @@ read_4D(file, header = TRUE, sep = "\\t", quote = "\\"'",
}
}
\description{
This function is only useful for the MMB department of the UMCG. Use this function to \strong{import data by just defining the \code{file} parameter}. It will automatically transform birth dates and calculate patients age, translate the data set to English, transform the \code{mo} with \code{\link{as.mo}} and transform all antimicrobial columns with \code{\link{as.rsi}}.
This function is only useful for the MMB department of the UMCG. Use this function to \strong{import data by just defining the \code{file} parameter}. It will automatically transform birth dates and calculate patients age, translate the column names to English, transform the \code{mo} with \code{\link{as.mo}} and transform all antimicrobial columns with \code{\link{as.rsi}}.
}
\details{
Column names will be transformed, but the original column names are set as a "label" attribute and can be seen in e.g. RStudio Viewer.
}