mirror of
https://github.com/msberends/AMR.git
synced 2024-12-25 19:26:13 +01:00
read_4D
This commit is contained in:
parent
d07e9b904e
commit
d15bb2ab3f
@ -116,6 +116,7 @@ export(portion_S)
|
||||
export(portion_SI)
|
||||
export(portion_df)
|
||||
export(ratio)
|
||||
export(read_4D)
|
||||
export(resistance_predict)
|
||||
export(right_join_microorganisms)
|
||||
export(rsi)
|
||||
|
1
NEWS.md
1
NEWS.md
@ -5,6 +5,7 @@
|
||||
* Function `count_all` to get all available isolates (that like all `portion_*` and `count_*` functions also supports `summarise` and `group_by`), the old `n_rsi` is now an alias of `count_all`
|
||||
* Function `get_locale` to determine language for language-dependent output for some `mo_*` functions. This is now the default value for their `language` parameter, by which the system language will be used at default.
|
||||
* Data sets `microorganismsDT`, `microorganisms.prevDT`, `microorganisms.unprevDT` and `microorganisms.oldDT` to improve the speed of `as.mo`. They are for reference only, since they are primarily for internal use of `as.mo`.
|
||||
* Function `read_4D` to read from the 4D database of the MMB department of the UMCG
|
||||
|
||||
#### Changed
|
||||
* Big changes to the `EUCAST_rules` function:
|
||||
|
134
R/read_4d.R
Normal file
134
R/read_4d.R
Normal file
@ -0,0 +1,134 @@
|
||||
# ==================================================================== #
|
||||
# TITLE #
|
||||
# Antimicrobial Resistance (AMR) Analysis #
|
||||
# #
|
||||
# AUTHORS #
|
||||
# Berends MS (m.s.berends@umcg.nl), Luz CF (c.f.luz@umcg.nl) #
|
||||
# #
|
||||
# LICENCE #
|
||||
# This program is free software; you can redistribute it and/or modify #
|
||||
# it under the terms of the GNU General Public License version 2.0, #
|
||||
# as published by the Free Software Foundation. #
|
||||
# #
|
||||
# This program is distributed in the hope that it will be useful, #
|
||||
# but WITHOUT ANY WARRANTY; without even the implied warranty of #
|
||||
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the #
|
||||
# GNU General Public License for more details. #
|
||||
# ==================================================================== #
|
||||
|
||||
#' Read data from 4D database
|
||||
#'
|
||||
#' This function is only useful for the MMB department of the UMCG. Use this function to \strong{import data by just defining the \code{file} parameter}. It will automatically transform birth dates and calculate patients age, translate the data set to English, transform the \code{mo} with \code{\link{as.mo}} and transform all antimicrobial columns with \code{\link{as.rsi}}.
|
||||
#' @inheritParams utils::read.table
|
||||
#' @export
|
||||
read_4D <- function(file,
|
||||
header = TRUE,
|
||||
sep = "\t",
|
||||
quote = "\"'",
|
||||
dec = ",",
|
||||
na.strings = c("NA", "", "."),
|
||||
skip = 2,
|
||||
check.names = TRUE,
|
||||
strip.white = TRUE,
|
||||
fill = TRUE,
|
||||
blank.lines.skip = TRUE,
|
||||
stringsAsFactors = FALSE,
|
||||
fileEncoding = "UTF-8",
|
||||
encoding = "UTF-8") {
|
||||
|
||||
data_4D <- utils::read.table(file = file,
|
||||
header = header,
|
||||
sep = sep,
|
||||
quote = quote,
|
||||
dec = dec,
|
||||
na.strings = na.strings,
|
||||
skip = skip,
|
||||
check.names = check.names,
|
||||
strip.white = strip.white,
|
||||
fill = fill,
|
||||
blank.lines.skip = blank.lines.skip,
|
||||
stringsAsFactors = stringsAsFactors,
|
||||
fileEncoding = fileEncoding,
|
||||
encoding = encoding)
|
||||
|
||||
# helper functions
|
||||
to_date_4D <- function(x) {
|
||||
date_regular <- as.Date(x, format = "%d-%m-%y")
|
||||
posixlt <- as.POSIXlt(date_regular)
|
||||
# born after today will be born 100 years ago
|
||||
# based on https://stackoverflow.com/a/3312971/4575331
|
||||
posixlt[date_regular > Sys.Date()]$year <- posixlt[date_regular > Sys.Date()]$year - 100
|
||||
as.Date(posixlt)
|
||||
}
|
||||
to_age_4D <- function(from, to) {
|
||||
from_lt = as.POSIXlt(from)
|
||||
to_lt = as.POSIXlt(to)
|
||||
|
||||
age = to_lt$year - from_lt$year
|
||||
|
||||
ifelse(to_lt$mon < from_lt$mon |
|
||||
(to_lt$mon == from_lt$mon & to_lt$mday < from_lt$mday),
|
||||
age - 1, age)
|
||||
}
|
||||
|
||||
colnames(data_4D) <- tolower(colnames(data_4D))
|
||||
if (all(c("afnamedat", "gebdatum") %in% colnames(data_4D))) {
|
||||
# add age
|
||||
data_4D$age <- NA_integer_
|
||||
}
|
||||
cols_wanted <- c("patientnr", "gebdatum", "age", "mv", "monsternr", "afnamedat", "bepaling",
|
||||
"afd.", "spec", "mat", "matbijz.", "mocode",
|
||||
"amfo", "amox", "anid", "azit", "casp", "cecl", "cefe", "cfcl",
|
||||
"cfot", "cfox", "cfta", "cftr", "cfur", "chlo", "cipr", "clin",
|
||||
"cocl", "ctta", "dapt", "doxy", "eryt", "fluo", "fluz", "fosf",
|
||||
"fusi", "gehi", "gent", "imip", "kana", "levo", "line", "mero",
|
||||
"metr", "mico", "mino", "moxi", "mupi", "nali", "nitr", "norf",
|
||||
"oxac", "peni", "pipe", "pita", "poly", "posa", "quda", "rifa",
|
||||
"spat", "teic", "tige", "tobr", "trim", "trsu", "vana", "vanb",
|
||||
"vanc", "vori")
|
||||
# this ones actually exist
|
||||
cols_wanted <- cols_wanted[cols_wanted %in% colnames(data_4D)]
|
||||
# order of columns
|
||||
data_4D <- data_4D[, cols_wanted]
|
||||
|
||||
# rename of columns
|
||||
colnames(data_4D) <- gsub("patientnr", "patient_id", colnames(data_4D), fixed = TRUE)
|
||||
colnames(data_4D) <- gsub("gebdatum", "date_birth", colnames(data_4D), fixed = TRUE)
|
||||
colnames(data_4D) <- gsub("mv", "gender", colnames(data_4D), fixed = TRUE)
|
||||
colnames(data_4D) <- gsub("monsternr", "sample_id", colnames(data_4D), fixed = TRUE)
|
||||
colnames(data_4D) <- gsub("afnamedat", "date_received", colnames(data_4D), fixed = TRUE)
|
||||
colnames(data_4D) <- gsub("bepaling", "sample_test", colnames(data_4D), fixed = TRUE)
|
||||
colnames(data_4D) <- gsub("afd.", "department", colnames(data_4D), fixed = TRUE)
|
||||
colnames(data_4D) <- gsub("spec", "specialty", colnames(data_4D), fixed = TRUE)
|
||||
colnames(data_4D) <- gsub("matbijz.", "specimen_type", colnames(data_4D), fixed = TRUE)
|
||||
colnames(data_4D) <- gsub("mat", "specimen_group", colnames(data_4D), fixed = TRUE)
|
||||
colnames(data_4D) <- gsub("mocode", "mo", colnames(data_4D), fixed = TRUE)
|
||||
|
||||
if ("date_birth" %in% colnames(data_4D)) {
|
||||
data_4D$date_birth <- to_date_4D(data_4D$date_birth)
|
||||
|
||||
}
|
||||
if ("date_received" %in% colnames(data_4D)) {
|
||||
data_4D$date_received <- to_date_4D(data_4D$date_received)
|
||||
}
|
||||
if ("age" %in% colnames(data_4D)) {
|
||||
data_4D$age <- to_age_4D(data_4D$date_birth, data_4D$date_received)
|
||||
}
|
||||
if ("gender" %in% colnames(data_4D)) {
|
||||
data_4D$gender[data_4D$gender == "V"] <- "F"
|
||||
}
|
||||
if ("mo" %in% colnames(data_4D)) {
|
||||
data_4D$mo <- as.mo(data_4D$mo)
|
||||
# column right of mo is:
|
||||
drug1 <- colnames(data_4D)[grep("^mo$", colnames(data_4D)) + 1]
|
||||
if (!is.na(drug1)) {
|
||||
# and last is:
|
||||
drug_last <- colnames(data_4D)[length(data_4D)]
|
||||
# transform those to rsi:
|
||||
data_4D <- suppressWarnings(mutate_at(data_4D, vars(drug1:drug_last), as.rsi))
|
||||
}
|
||||
}
|
||||
|
||||
data_4D
|
||||
}
|
||||
|
20
README.md
20
README.md
@ -599,14 +599,24 @@ mo_fullname("CoNS", language = "en") # or just mo_fullname("CoNS") on an English
|
||||
mo_fullname("CoNS", language = "fr") # or just mo_fullname("CoNS") on a French system
|
||||
# "Staphylococcus à coagulase négative (CoNS)"
|
||||
|
||||
microbenchmark(A = mo_fullname("CoNS", language = "en"),
|
||||
B = mo_fullname("CoNS", language = "fr"),
|
||||
microbenchmark(en = mo_fullname("CoNS", language = "en"),
|
||||
de = mo_fullname("CoNS", language = "de"),
|
||||
nl = mo_fullname("CoNS", language = "nl"),
|
||||
es = mo_fullname("CoNS", language = "es"),
|
||||
it = mo_fullname("CoNS", language = "it"),
|
||||
fr = mo_fullname("CoNS", language = "fr"),
|
||||
pt = mo_fullname("CoNS", language = "pt"),
|
||||
times = 10,
|
||||
unit = "ms")
|
||||
# Unit: milliseconds
|
||||
# expr min lq mean median uq max neval
|
||||
# A 6.080733 6.33684 6.467129 6.493773 6.593926 6.963666 10
|
||||
# B 14.076651 14.10452 14.446035 14.315893 14.636918 15.254106 10
|
||||
# expr min lq mean median uq max neval
|
||||
# en 6.093583 6.51724 6.555105 6.562986 6.630663 6.99698 100
|
||||
# de 13.934874 14.35137 16.891587 14.462210 14.764658 43.63956 100
|
||||
# nl 13.900092 14.34729 15.943268 14.424565 14.581535 43.76283 100
|
||||
# es 13.833813 14.34596 14.574783 14.439757 14.653994 17.49168 100
|
||||
# it 13.811883 14.36621 15.179060 14.453515 14.812359 43.64284 100
|
||||
# fr 13.798683 14.37019 16.344731 14.468775 14.697610 48.62923 100
|
||||
# pt 13.789674 14.36244 15.706321 14.443772 14.679905 44.76701 100
|
||||
```
|
||||
|
||||
Currently supported are German, Dutch, Spanish, Italian, French and Portuguese.
|
||||
|
104
man/read_4D.Rd
Normal file
104
man/read_4D.Rd
Normal file
@ -0,0 +1,104 @@
|
||||
% Generated by roxygen2: do not edit by hand
|
||||
% Please edit documentation in R/read_4d.R
|
||||
\name{read_4D}
|
||||
\alias{read_4D}
|
||||
\title{Read data from 4D database}
|
||||
\usage{
|
||||
read_4D(file, header = TRUE, sep = "\\t", quote = "\\"'",
|
||||
dec = ",", na.strings = c("NA", "", "."), skip = 2,
|
||||
check.names = TRUE, strip.white = TRUE, fill = TRUE,
|
||||
blank.lines.skip = TRUE, stringsAsFactors = FALSE,
|
||||
fileEncoding = "UTF-8", encoding = "UTF-8")
|
||||
}
|
||||
\arguments{
|
||||
\item{file}{the name of the file which the data are to be read from.
|
||||
Each row of the table appears as one line of the file. If it does
|
||||
not contain an \emph{absolute} path, the file name is
|
||||
\emph{relative} to the current working directory,
|
||||
\code{\link{getwd}()}. Tilde-expansion is performed where supported.
|
||||
This can be a compressed file (see \code{\link{file}}).
|
||||
|
||||
Alternatively, \code{file} can be a readable text-mode
|
||||
\link{connection} (which will be opened for reading if
|
||||
necessary, and if so \code{\link{close}}d (and hence destroyed) at
|
||||
the end of the function call). (If \code{\link{stdin}()} is used,
|
||||
the prompts for lines may be somewhat confusing. Terminate input
|
||||
with a blank line or an EOF signal, \code{Ctrl-D} on Unix and
|
||||
\code{Ctrl-Z} on Windows. Any pushback on \code{stdin()} will be
|
||||
cleared before return.)
|
||||
|
||||
\code{file} can also be a complete URL. (For the supported URL
|
||||
schemes, see the \sQuote{URLs} section of the help for
|
||||
\code{\link{url}}.)
|
||||
}
|
||||
|
||||
\item{header}{a logical value indicating whether the file contains the
|
||||
names of the variables as its first line. If missing, the value is
|
||||
determined from the file format: \code{header} is set to \code{TRUE}
|
||||
if and only if the first row contains one fewer field than the
|
||||
number of columns.}
|
||||
|
||||
\item{sep}{the field separator character. Values on each line of the
|
||||
file are separated by this character. If \code{sep = ""} (the
|
||||
default for \code{read.table}) the separator is \sQuote{white space},
|
||||
that is one or more spaces, tabs, newlines or carriage returns.}
|
||||
|
||||
\item{quote}{the set of quoting characters. To disable quoting
|
||||
altogether, use \code{quote = ""}. See \code{\link{scan}} for the
|
||||
behaviour on quotes embedded in quotes. Quoting is only considered
|
||||
for columns read as character, which is all of them unless
|
||||
\code{colClasses} is specified.}
|
||||
|
||||
\item{dec}{the character used in the file for decimal points.}
|
||||
|
||||
\item{na.strings}{a character vector of strings which are to be
|
||||
interpreted as \code{\link{NA}} values. Blank fields are also
|
||||
considered to be missing values in logical, integer, numeric and
|
||||
complex fields. Note that the test happens \emph{after}
|
||||
white space is stripped from the input, so \code{na.strings}
|
||||
values may need their own white space stripped in advance.}
|
||||
|
||||
\item{skip}{integer: the number of lines of the data file to skip before
|
||||
beginning to read data.}
|
||||
|
||||
\item{check.names}{logical. If \code{TRUE} then the names of the
|
||||
variables in the data frame are checked to ensure that they are
|
||||
syntactically valid variable names. If necessary they are adjusted
|
||||
(by \code{\link{make.names}}) so that they are, and also to ensure
|
||||
that there are no duplicates.}
|
||||
|
||||
\item{strip.white}{logical. Used only when \code{sep} has
|
||||
been specified, and allows the stripping of leading and trailing
|
||||
white space from unquoted \code{character} fields (\code{numeric} fields
|
||||
are always stripped). See \code{\link{scan}} for further details
|
||||
(including the exact meaning of \sQuote{white space}),
|
||||
remembering that the columns may include the row names.}
|
||||
|
||||
\item{fill}{logical. If \code{TRUE} then in case the rows have unequal
|
||||
length, blank fields are implicitly added. See \sQuote{Details}.}
|
||||
|
||||
\item{blank.lines.skip}{logical: if \code{TRUE} blank lines in the
|
||||
input are ignored.}
|
||||
|
||||
\item{stringsAsFactors}{logical: should character vectors be converted
|
||||
to factors? Note that this is overridden by \code{as.is} and
|
||||
\code{colClasses}, both of which allow finer control.}
|
||||
|
||||
\item{fileEncoding}{character string: if non-empty declares the
|
||||
encoding used on a file (not a connection) so the character data can
|
||||
be re-encoded. See the \sQuote{Encoding} section of the help for
|
||||
\code{\link{file}}, the \sQuote{R Data Import/Export Manual} and
|
||||
\sQuote{Note}.
|
||||
}
|
||||
|
||||
\item{encoding}{encoding to be assumed for input strings. It is
|
||||
used to mark character strings as known to be in
|
||||
Latin-1 or UTF-8 (see \code{\link{Encoding}}): it is not used to
|
||||
re-encode the input, but allows \R to handle encoded strings in
|
||||
their native encoding (if one of those two). See \sQuote{Value}
|
||||
and \sQuote{Note}.
|
||||
}
|
||||
}
|
||||
\description{
|
||||
This function is only useful for the MMB department of the UMCG. Use this function to \strong{import data by just defining the \code{file} parameter}. It will automatically transform birth dates and calculate patients age, translate the data set to English, transform the \code{mo} with \code{\link{as.mo}} and transform all antimicrobial columns with \code{\link{as.rsi}}.
|
||||
}
|
@ -9,7 +9,7 @@ test_that("get_locale works", {
|
||||
expect_identical(mo_fullname("CoNS", "nl"), "Coagulase-negatieve Staphylococcus (CNS)")
|
||||
expect_identical(mo_fullname("CoNS", "es"), "Staphylococcus coagulasa negativo (CoNS)")
|
||||
expect_identical(mo_fullname("CoNS", "it"), "Staphylococcus negativo coagulasi (CoNS)")
|
||||
expect_identical(mo_fullname("CoNS", "fr"), "Staphylococcus à coagulase négative (CoNS)")
|
||||
expect_identical(mo_fullname("CoNS", "fr"), "Staphylococcus \u00e0 coagulase n\u00e9gative (CoNS)")
|
||||
expect_identical(mo_fullname("CoNS", "pt"), "Staphylococcus coagulase negativo (CoNS)")
|
||||
|
||||
})
|
||||
|
Loading…
Reference in New Issue
Block a user