This commit is contained in:
dr. M.S. (Matthijs) Berends 2018-11-05 15:30:49 +01:00
parent d07e9b904e
commit d15bb2ab3f
6 changed files with 256 additions and 6 deletions

View File

@ -116,6 +116,7 @@ export(portion_S)
export(portion_SI)
export(portion_df)
export(ratio)
export(read_4D)
export(resistance_predict)
export(right_join_microorganisms)
export(rsi)

View File

@ -5,6 +5,7 @@
* Function `count_all` to get all available isolates (that like all `portion_*` and `count_*` functions also supports `summarise` and `group_by`), the old `n_rsi` is now an alias of `count_all`
* Function `get_locale` to determine language for language-dependent output for some `mo_*` functions. This is now the default value for their `language` parameter, by which the system language will be used at default.
* Data sets `microorganismsDT`, `microorganisms.prevDT`, `microorganisms.unprevDT` and `microorganisms.oldDT` to improve the speed of `as.mo`. They are for reference only, since they are primarily for internal use of `as.mo`.
* Function `read_4D` to read from the 4D database of the MMB department of the UMCG
#### Changed
* Big changes to the `EUCAST_rules` function:

134
R/read_4d.R Normal file
View File

@ -0,0 +1,134 @@
# ==================================================================== #
# TITLE #
# Antimicrobial Resistance (AMR) Analysis #
# #
# AUTHORS #
# Berends MS (m.s.berends@umcg.nl), Luz CF (c.f.luz@umcg.nl) #
# #
# LICENCE #
# This program is free software; you can redistribute it and/or modify #
# it under the terms of the GNU General Public License version 2.0, #
# as published by the Free Software Foundation. #
# #
# This program is distributed in the hope that it will be useful, #
# but WITHOUT ANY WARRANTY; without even the implied warranty of #
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the #
# GNU General Public License for more details. #
# ==================================================================== #
#' Read data from 4D database
#'
#' This function is only useful for the MMB department of the UMCG. Use this function to \strong{import data by just defining the \code{file} parameter}. It will automatically transform birth dates and calculate patients age, translate the data set to English, transform the \code{mo} with \code{\link{as.mo}} and transform all antimicrobial columns with \code{\link{as.rsi}}.
#' @inheritParams utils::read.table
#' @export
read_4D <- function(file,
header = TRUE,
sep = "\t",
quote = "\"'",
dec = ",",
na.strings = c("NA", "", "."),
skip = 2,
check.names = TRUE,
strip.white = TRUE,
fill = TRUE,
blank.lines.skip = TRUE,
stringsAsFactors = FALSE,
fileEncoding = "UTF-8",
encoding = "UTF-8") {
data_4D <- utils::read.table(file = file,
header = header,
sep = sep,
quote = quote,
dec = dec,
na.strings = na.strings,
skip = skip,
check.names = check.names,
strip.white = strip.white,
fill = fill,
blank.lines.skip = blank.lines.skip,
stringsAsFactors = stringsAsFactors,
fileEncoding = fileEncoding,
encoding = encoding)
# helper functions
to_date_4D <- function(x) {
date_regular <- as.Date(x, format = "%d-%m-%y")
posixlt <- as.POSIXlt(date_regular)
# born after today will be born 100 years ago
# based on https://stackoverflow.com/a/3312971/4575331
posixlt[date_regular > Sys.Date()]$year <- posixlt[date_regular > Sys.Date()]$year - 100
as.Date(posixlt)
}
to_age_4D <- function(from, to) {
from_lt = as.POSIXlt(from)
to_lt = as.POSIXlt(to)
age = to_lt$year - from_lt$year
ifelse(to_lt$mon < from_lt$mon |
(to_lt$mon == from_lt$mon & to_lt$mday < from_lt$mday),
age - 1, age)
}
colnames(data_4D) <- tolower(colnames(data_4D))
if (all(c("afnamedat", "gebdatum") %in% colnames(data_4D))) {
# add age
data_4D$age <- NA_integer_
}
cols_wanted <- c("patientnr", "gebdatum", "age", "mv", "monsternr", "afnamedat", "bepaling",
"afd.", "spec", "mat", "matbijz.", "mocode",
"amfo", "amox", "anid", "azit", "casp", "cecl", "cefe", "cfcl",
"cfot", "cfox", "cfta", "cftr", "cfur", "chlo", "cipr", "clin",
"cocl", "ctta", "dapt", "doxy", "eryt", "fluo", "fluz", "fosf",
"fusi", "gehi", "gent", "imip", "kana", "levo", "line", "mero",
"metr", "mico", "mino", "moxi", "mupi", "nali", "nitr", "norf",
"oxac", "peni", "pipe", "pita", "poly", "posa", "quda", "rifa",
"spat", "teic", "tige", "tobr", "trim", "trsu", "vana", "vanb",
"vanc", "vori")
# this ones actually exist
cols_wanted <- cols_wanted[cols_wanted %in% colnames(data_4D)]
# order of columns
data_4D <- data_4D[, cols_wanted]
# rename of columns
colnames(data_4D) <- gsub("patientnr", "patient_id", colnames(data_4D), fixed = TRUE)
colnames(data_4D) <- gsub("gebdatum", "date_birth", colnames(data_4D), fixed = TRUE)
colnames(data_4D) <- gsub("mv", "gender", colnames(data_4D), fixed = TRUE)
colnames(data_4D) <- gsub("monsternr", "sample_id", colnames(data_4D), fixed = TRUE)
colnames(data_4D) <- gsub("afnamedat", "date_received", colnames(data_4D), fixed = TRUE)
colnames(data_4D) <- gsub("bepaling", "sample_test", colnames(data_4D), fixed = TRUE)
colnames(data_4D) <- gsub("afd.", "department", colnames(data_4D), fixed = TRUE)
colnames(data_4D) <- gsub("spec", "specialty", colnames(data_4D), fixed = TRUE)
colnames(data_4D) <- gsub("matbijz.", "specimen_type", colnames(data_4D), fixed = TRUE)
colnames(data_4D) <- gsub("mat", "specimen_group", colnames(data_4D), fixed = TRUE)
colnames(data_4D) <- gsub("mocode", "mo", colnames(data_4D), fixed = TRUE)
if ("date_birth" %in% colnames(data_4D)) {
data_4D$date_birth <- to_date_4D(data_4D$date_birth)
}
if ("date_received" %in% colnames(data_4D)) {
data_4D$date_received <- to_date_4D(data_4D$date_received)
}
if ("age" %in% colnames(data_4D)) {
data_4D$age <- to_age_4D(data_4D$date_birth, data_4D$date_received)
}
if ("gender" %in% colnames(data_4D)) {
data_4D$gender[data_4D$gender == "V"] <- "F"
}
if ("mo" %in% colnames(data_4D)) {
data_4D$mo <- as.mo(data_4D$mo)
# column right of mo is:
drug1 <- colnames(data_4D)[grep("^mo$", colnames(data_4D)) + 1]
if (!is.na(drug1)) {
# and last is:
drug_last <- colnames(data_4D)[length(data_4D)]
# transform those to rsi:
data_4D <- suppressWarnings(mutate_at(data_4D, vars(drug1:drug_last), as.rsi))
}
}
data_4D
}

View File

@ -599,14 +599,24 @@ mo_fullname("CoNS", language = "en") # or just mo_fullname("CoNS") on an English
mo_fullname("CoNS", language = "fr") # or just mo_fullname("CoNS") on a French system
# "Staphylococcus à coagulase négative (CoNS)"
microbenchmark(A = mo_fullname("CoNS", language = "en"),
B = mo_fullname("CoNS", language = "fr"),
microbenchmark(en = mo_fullname("CoNS", language = "en"),
de = mo_fullname("CoNS", language = "de"),
nl = mo_fullname("CoNS", language = "nl"),
es = mo_fullname("CoNS", language = "es"),
it = mo_fullname("CoNS", language = "it"),
fr = mo_fullname("CoNS", language = "fr"),
pt = mo_fullname("CoNS", language = "pt"),
times = 10,
unit = "ms")
# Unit: milliseconds
# expr min lq mean median uq max neval
# A 6.080733 6.33684 6.467129 6.493773 6.593926 6.963666 10
# B 14.076651 14.10452 14.446035 14.315893 14.636918 15.254106 10
# expr min lq mean median uq max neval
# en 6.093583 6.51724 6.555105 6.562986 6.630663 6.99698 100
# de 13.934874 14.35137 16.891587 14.462210 14.764658 43.63956 100
# nl 13.900092 14.34729 15.943268 14.424565 14.581535 43.76283 100
# es 13.833813 14.34596 14.574783 14.439757 14.653994 17.49168 100
# it 13.811883 14.36621 15.179060 14.453515 14.812359 43.64284 100
# fr 13.798683 14.37019 16.344731 14.468775 14.697610 48.62923 100
# pt 13.789674 14.36244 15.706321 14.443772 14.679905 44.76701 100
```
Currently supported are German, Dutch, Spanish, Italian, French and Portuguese.

104
man/read_4D.Rd Normal file
View File

@ -0,0 +1,104 @@
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/read_4d.R
\name{read_4D}
\alias{read_4D}
\title{Read data from 4D database}
\usage{
read_4D(file, header = TRUE, sep = "\\t", quote = "\\"'",
dec = ",", na.strings = c("NA", "", "."), skip = 2,
check.names = TRUE, strip.white = TRUE, fill = TRUE,
blank.lines.skip = TRUE, stringsAsFactors = FALSE,
fileEncoding = "UTF-8", encoding = "UTF-8")
}
\arguments{
\item{file}{the name of the file which the data are to be read from.
Each row of the table appears as one line of the file. If it does
not contain an \emph{absolute} path, the file name is
\emph{relative} to the current working directory,
\code{\link{getwd}()}. Tilde-expansion is performed where supported.
This can be a compressed file (see \code{\link{file}}).
Alternatively, \code{file} can be a readable text-mode
\link{connection} (which will be opened for reading if
necessary, and if so \code{\link{close}}d (and hence destroyed) at
the end of the function call). (If \code{\link{stdin}()} is used,
the prompts for lines may be somewhat confusing. Terminate input
with a blank line or an EOF signal, \code{Ctrl-D} on Unix and
\code{Ctrl-Z} on Windows. Any pushback on \code{stdin()} will be
cleared before return.)
\code{file} can also be a complete URL. (For the supported URL
schemes, see the \sQuote{URLs} section of the help for
\code{\link{url}}.)
}
\item{header}{a logical value indicating whether the file contains the
names of the variables as its first line. If missing, the value is
determined from the file format: \code{header} is set to \code{TRUE}
if and only if the first row contains one fewer field than the
number of columns.}
\item{sep}{the field separator character. Values on each line of the
file are separated by this character. If \code{sep = ""} (the
default for \code{read.table}) the separator is \sQuote{white space},
that is one or more spaces, tabs, newlines or carriage returns.}
\item{quote}{the set of quoting characters. To disable quoting
altogether, use \code{quote = ""}. See \code{\link{scan}} for the
behaviour on quotes embedded in quotes. Quoting is only considered
for columns read as character, which is all of them unless
\code{colClasses} is specified.}
\item{dec}{the character used in the file for decimal points.}
\item{na.strings}{a character vector of strings which are to be
interpreted as \code{\link{NA}} values. Blank fields are also
considered to be missing values in logical, integer, numeric and
complex fields. Note that the test happens \emph{after}
white space is stripped from the input, so \code{na.strings}
values may need their own white space stripped in advance.}
\item{skip}{integer: the number of lines of the data file to skip before
beginning to read data.}
\item{check.names}{logical. If \code{TRUE} then the names of the
variables in the data frame are checked to ensure that they are
syntactically valid variable names. If necessary they are adjusted
(by \code{\link{make.names}}) so that they are, and also to ensure
that there are no duplicates.}
\item{strip.white}{logical. Used only when \code{sep} has
been specified, and allows the stripping of leading and trailing
white space from unquoted \code{character} fields (\code{numeric} fields
are always stripped). See \code{\link{scan}} for further details
(including the exact meaning of \sQuote{white space}),
remembering that the columns may include the row names.}
\item{fill}{logical. If \code{TRUE} then in case the rows have unequal
length, blank fields are implicitly added. See \sQuote{Details}.}
\item{blank.lines.skip}{logical: if \code{TRUE} blank lines in the
input are ignored.}
\item{stringsAsFactors}{logical: should character vectors be converted
to factors? Note that this is overridden by \code{as.is} and
\code{colClasses}, both of which allow finer control.}
\item{fileEncoding}{character string: if non-empty declares the
encoding used on a file (not a connection) so the character data can
be re-encoded. See the \sQuote{Encoding} section of the help for
\code{\link{file}}, the \sQuote{R Data Import/Export Manual} and
\sQuote{Note}.
}
\item{encoding}{encoding to be assumed for input strings. It is
used to mark character strings as known to be in
Latin-1 or UTF-8 (see \code{\link{Encoding}}): it is not used to
re-encode the input, but allows \R to handle encoded strings in
their native encoding (if one of those two). See \sQuote{Value}
and \sQuote{Note}.
}
}
\description{
This function is only useful for the MMB department of the UMCG. Use this function to \strong{import data by just defining the \code{file} parameter}. It will automatically transform birth dates and calculate patients age, translate the data set to English, transform the \code{mo} with \code{\link{as.mo}} and transform all antimicrobial columns with \code{\link{as.rsi}}.
}

View File

@ -9,7 +9,7 @@ test_that("get_locale works", {
expect_identical(mo_fullname("CoNS", "nl"), "Coagulase-negatieve Staphylococcus (CNS)")
expect_identical(mo_fullname("CoNS", "es"), "Staphylococcus coagulasa negativo (CoNS)")
expect_identical(mo_fullname("CoNS", "it"), "Staphylococcus negativo coagulasi (CoNS)")
expect_identical(mo_fullname("CoNS", "fr"), "Staphylococcus à coagulase négative (CoNS)")
expect_identical(mo_fullname("CoNS", "fr"), "Staphylococcus \u00e0 coagulase n\u00e9gative (CoNS)")
expect_identical(mo_fullname("CoNS", "pt"), "Staphylococcus coagulase negativo (CoNS)")
})