read_4D

2026-02-28 09:37:41 +01:00 · 2018-11-05 15:30:49 +01:00
parent d07e9b904e
commit d15bb2ab3f
6 changed files with 256 additions and 6 deletions
--- a/1
+++ b/1
@@ -116,6 +116,7 @@ export(portion_S)
 export(portion_SI)
 export(portion_df)
 export(ratio)
 export(read_4D)
 export(resistance_predict)
 export(right_join_microorganisms)
 export(rsi)
--- a/NEWS.md
+++ b/NEWS.md
@@ -5,6 +5,7 @@
 * Function `count_all` to get all available isolates (that like all `portion_*` and `count_*` functions also supports `summarise` and `group_by`), the old `n_rsi` is now an alias of `count_all`
 * Function `get_locale` to determine language for language-dependent output for some `mo_*` functions. This is now the default value for their `language` parameter, by which the system language will be used at default.
 * Data sets `microorganismsDT`, `microorganisms.prevDT`, `microorganisms.unprevDT` and `microorganisms.oldDT` to improve the speed of `as.mo`. They are for reference only, since they are primarily for internal use of `as.mo`.
 * Function `read_4D` to read from the 4D database of the MMB department of the UMCG
 #### Changed
 * Big changes to the `EUCAST_rules` function:
--- a/R/read_4d.R
+++ b/R/read_4d.R
@@ -0,0 +1,134 @@
 # ==================================================================== #
 # TITLE                                                                #
 # Antimicrobial Resistance (AMR) Analysis                              #
 #                                                                      #
 # AUTHORS                                                              #
 # Berends MS (m.s.berends@umcg.nl), Luz CF (c.f.luz@umcg.nl)           #
 #                                                                      #
 # LICENCE                                                              #
 # This program is free software; you can redistribute it and/or modify #
 # it under the terms of the GNU General Public License version 2.0,    #
 # as published by the Free Software Foundation.                        #
 #                                                                      #
 # This program is distributed in the hope that it will be useful,      #
 # but WITHOUT ANY WARRANTY; without even the implied warranty of       #
 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the        #
 # GNU General Public License for more details.                         #
 # ==================================================================== #
 #' Read data from 4D database
 #'
 #' This function is only useful for the MMB department of the UMCG. Use this function to \strong{import data by just defining the \code{file} parameter}. It will automatically transform birth dates and calculate patients age, translate the data set to English, transform the \code{mo} with \code{\link{as.mo}} and transform all antimicrobial columns with \code{\link{as.rsi}}.
 #' @inheritParams utils::read.table
 #' @export
 read_4D <- function(file,
                    header = TRUE,
                    sep = "\t",
                    quote = "\"'",
                    dec = ",",
                    na.strings = c("NA", "", "."),
                    skip = 2,
                    check.names = TRUE,
                    strip.white = TRUE,
                    fill = TRUE,
                    blank.lines.skip = TRUE,
                    stringsAsFactors = FALSE,
                    fileEncoding = "UTF-8",
                    encoding = "UTF-8") {
  data_4D <- utils::read.table(file = file,
                               header = header,
                               sep = sep,
                               quote = quote,
                               dec = dec,
                               na.strings = na.strings,
                               skip = skip,
                               check.names = check.names,
                               strip.white = strip.white,
                               fill = fill,
                               blank.lines.skip = blank.lines.skip,
                               stringsAsFactors = stringsAsFactors,
                               fileEncoding = fileEncoding,
                               encoding = encoding)
  # helper functions
  to_date_4D <- function(x) {
    date_regular <- as.Date(x, format = "%d-%m-%y")
    posixlt <- as.POSIXlt(date_regular)
    # born after today will be born 100 years ago
    # based on https://stackoverflow.com/a/3312971/4575331
    posixlt[date_regular > Sys.Date()]$year <- posixlt[date_regular > Sys.Date()]$year - 100
    as.Date(posixlt)
  }
  to_age_4D <- function(from, to) {
    from_lt = as.POSIXlt(from)
    to_lt = as.POSIXlt(to)
    age = to_lt$year - from_lt$year
    ifelse(to_lt$mon < from_lt$mon |
             (to_lt$mon == from_lt$mon & to_lt$mday < from_lt$mday),
           age - 1, age)
  }
  colnames(data_4D) <- tolower(colnames(data_4D))
  if (all(c("afnamedat", "gebdatum") %in% colnames(data_4D))) {
    # add age
    data_4D$age <- NA_integer_
  }
  cols_wanted <- c("patientnr", "gebdatum", "age", "mv", "monsternr", "afnamedat", "bepaling",
                   "afd.", "spec", "mat", "matbijz.",  "mocode",
                   "amfo", "amox", "anid", "azit", "casp", "cecl", "cefe", "cfcl",
                   "cfot", "cfox", "cfta", "cftr", "cfur", "chlo", "cipr", "clin",
                   "cocl", "ctta", "dapt", "doxy", "eryt", "fluo", "fluz", "fosf",
                   "fusi", "gehi", "gent", "imip", "kana", "levo", "line", "mero",
                   "metr", "mico", "mino", "moxi", "mupi", "nali", "nitr", "norf",
                   "oxac", "peni", "pipe", "pita", "poly", "posa", "quda", "rifa",
                   "spat", "teic", "tige", "tobr", "trim", "trsu", "vana", "vanb",
                   "vanc", "vori")
  # this ones actually exist
  cols_wanted <- cols_wanted[cols_wanted %in% colnames(data_4D)]
  # order of columns
  data_4D <- data_4D[, cols_wanted]
  # rename of columns
  colnames(data_4D) <- gsub("patientnr", "patient_id", colnames(data_4D), fixed = TRUE)
  colnames(data_4D) <- gsub("gebdatum", "date_birth", colnames(data_4D), fixed = TRUE)
  colnames(data_4D) <- gsub("mv", "gender", colnames(data_4D), fixed = TRUE)
  colnames(data_4D) <- gsub("monsternr", "sample_id", colnames(data_4D), fixed = TRUE)
  colnames(data_4D) <- gsub("afnamedat", "date_received", colnames(data_4D), fixed = TRUE)
  colnames(data_4D) <- gsub("bepaling", "sample_test", colnames(data_4D), fixed = TRUE)
  colnames(data_4D) <- gsub("afd.", "department", colnames(data_4D), fixed = TRUE)
  colnames(data_4D) <- gsub("spec", "specialty", colnames(data_4D), fixed = TRUE)
  colnames(data_4D) <- gsub("matbijz.", "specimen_type", colnames(data_4D), fixed = TRUE)
  colnames(data_4D) <- gsub("mat", "specimen_group", colnames(data_4D), fixed = TRUE)
  colnames(data_4D) <- gsub("mocode", "mo", colnames(data_4D), fixed = TRUE)
  if ("date_birth" %in% colnames(data_4D)) {
    data_4D$date_birth <- to_date_4D(data_4D$date_birth)
  }
  if ("date_received" %in% colnames(data_4D)) {
    data_4D$date_received <- to_date_4D(data_4D$date_received)
  }
  if ("age" %in% colnames(data_4D)) {
    data_4D$age <- to_age_4D(data_4D$date_birth, data_4D$date_received)
  }
  if ("gender" %in% colnames(data_4D)) {
    data_4D$gender[data_4D$gender == "V"] <- "F"
  }
  if ("mo" %in% colnames(data_4D)) {
    data_4D$mo <- as.mo(data_4D$mo)
    # column right of mo is:
    drug1 <- colnames(data_4D)[grep("^mo$", colnames(data_4D)) + 1]
    if (!is.na(drug1)) {
      # and last is:
      drug_last <- colnames(data_4D)[length(data_4D)]
      # transform those to rsi:
      data_4D <- suppressWarnings(mutate_at(data_4D, vars(drug1:drug_last), as.rsi))
    }
  }
  data_4D
 }
--- a/README.md
+++ b/README.md
@@ -599,14 +599,24 @@ mo_fullname("CoNS", language = "en") # or just mo_fullname("CoNS") on an English
 mo_fullname("CoNS", language = "fr") # or just mo_fullname("CoNS") on a French system
 # "Staphylococcus à coagulase négative (CoNS)"
-microbenchmark(A = mo_fullname("CoNS", language = "en"),
+microbenchmark(en = mo_fullname("CoNS", language = "en"),
-               B = mo_fullname("CoNS", language = "fr"),
+               de = mo_fullname("CoNS", language = "de"),
               nl = mo_fullname("CoNS", language = "nl"),
               es = mo_fullname("CoNS", language = "es"),
               it = mo_fullname("CoNS", language = "it"),
               fr = mo_fullname("CoNS", language = "fr"),
               pt = mo_fullname("CoNS", language = "pt"),
               times = 10,
               unit = "ms")
 # Unit: milliseconds
 #  expr       min       lq      mean    median        uq      max neval
-#     A  6.080733  6.33684  6.467129  6.493773  6.593926  6.963666    10
+#    en  6.093583  6.51724  6.555105  6.562986  6.630663  6.99698   100
-#     B 14.076651 14.10452 14.446035 14.315893 14.636918 15.254106    10
+#    de 13.934874 14.35137 16.891587 14.462210 14.764658 43.63956   100
 #    nl 13.900092 14.34729 15.943268 14.424565 14.581535 43.76283   100
 #    es 13.833813 14.34596 14.574783 14.439757 14.653994 17.49168   100
 #    it 13.811883 14.36621 15.179060 14.453515 14.812359 43.64284   100
 #    fr 13.798683 14.37019 16.344731 14.468775 14.697610 48.62923   100
 #    pt 13.789674 14.36244 15.706321 14.443772 14.679905 44.76701   100
 ```
 Currently supported are German, Dutch, Spanish, Italian, French and Portuguese.
--- a/man/read_4D.Rd
+++ b/man/read_4D.Rd
@@ -0,0 +1,104 @@
 % Generated by roxygen2: do not edit by hand
 % Please edit documentation in R/read_4d.R
 \name{read_4D}
 \alias{read_4D}
 \title{Read data from 4D database}
 \usage{
 read_4D(file, header = TRUE, sep = "\\t", quote = "\\"'",
  dec = ",", na.strings = c("NA", "", "."), skip = 2,
  check.names = TRUE, strip.white = TRUE, fill = TRUE,
  blank.lines.skip = TRUE, stringsAsFactors = FALSE,
  fileEncoding = "UTF-8", encoding = "UTF-8")
 }
 \arguments{
 \item{file}{the name of the file which the data are to be read from.
    Each row of the table appears as one line of the file.  If it does
    not contain an \emph{absolute} path, the file name is
    \emph{relative} to the current working directory,
    \code{\link{getwd}()}. Tilde-expansion is performed where supported.
    This can be a compressed file (see \code{\link{file}}).
    Alternatively, \code{file} can be a readable text-mode
    \link{connection} (which will be opened for reading if
    necessary, and if so \code{\link{close}}d (and hence destroyed) at
    the end of the function call).  (If \code{\link{stdin}()} is used,
    the prompts for lines may be somewhat confusing.  Terminate input
    with a blank line or an EOF signal, \code{Ctrl-D} on Unix and
    \code{Ctrl-Z} on Windows.  Any pushback on \code{stdin()} will be
    cleared before return.)
    \code{file} can also be a complete URL.  (For the supported URL
    schemes, see the \sQuote{URLs} section of the help for
    \code{\link{url}}.)
  }
 \item{header}{a logical value indicating whether the file contains the
    names of the variables as its first line.  If missing, the value is
    determined from the file format: \code{header} is set to \code{TRUE}
    if and only if the first row contains one fewer field than the
    number of columns.}
 \item{sep}{the field separator character.  Values on each line of the
    file are separated by this character.  If \code{sep = ""} (the
    default for \code{read.table}) the separator is \sQuote{white space},
    that is one or more spaces, tabs, newlines or carriage returns.}
 \item{quote}{the set of quoting characters. To disable quoting
    altogether, use \code{quote = ""}.  See \code{\link{scan}} for the
    behaviour on quotes embedded in quotes.  Quoting is only considered
    for columns read as character, which is all of them unless
    \code{colClasses} is specified.}
 \item{dec}{the character used in the file for decimal points.}
 \item{na.strings}{a character vector of strings which are to be
    interpreted as \code{\link{NA}} values.  Blank fields are also
    considered to be missing values in logical, integer, numeric and
    complex fields.  Note that the test happens \emph{after} 
    white space is stripped from the input, so \code{na.strings} 
    values may need their own white space stripped in advance.}
 \item{skip}{integer: the number of lines of the data file to skip before
    beginning to read data.}
 \item{check.names}{logical.  If \code{TRUE} then the names of the
    variables in the data frame are checked to ensure that they are
    syntactically valid variable names.  If necessary they are adjusted
    (by \code{\link{make.names}}) so that they are, and also to ensure
    that there are no duplicates.}
 \item{strip.white}{logical. Used only when \code{sep} has
    been specified, and allows the stripping of leading and trailing
    white space from unquoted \code{character} fields (\code{numeric} fields
    are always stripped).  See \code{\link{scan}} for further details
    (including the exact meaning of \sQuote{white space}),
    remembering that the columns may include the row names.}
 \item{fill}{logical. If \code{TRUE} then in case the rows have unequal
    length, blank fields are implicitly added.  See \sQuote{Details}.}
 \item{blank.lines.skip}{logical: if \code{TRUE} blank lines in the
    input are ignored.}
 \item{stringsAsFactors}{logical: should character vectors be converted
    to factors?  Note that this is overridden by \code{as.is} and
    \code{colClasses}, both of which allow finer control.}
 \item{fileEncoding}{character string: if non-empty declares the
    encoding used on a file (not a connection) so the character data can
    be re-encoded.  See the \sQuote{Encoding} section of the help for
    \code{\link{file}}, the \sQuote{R Data Import/Export Manual} and
    \sQuote{Note}.
  }
 \item{encoding}{encoding to be assumed for input strings.  It is
    used to mark character strings as known to be in
    Latin-1 or UTF-8 (see \code{\link{Encoding}}): it is not used to
    re-encode the input, but allows \R to handle encoded strings in
    their native encoding (if one of those two).  See \sQuote{Value}
    and \sQuote{Note}.
  }
 }
 \description{
 This function is only useful for the MMB department of the UMCG. Use this function to \strong{import data by just defining the \code{file} parameter}. It will automatically transform birth dates and calculate patients age, translate the data set to English, transform the \code{mo} with \code{\link{as.mo}} and transform all antimicrobial columns with \code{\link{as.rsi}}.
 }
--- a/tests/testthat/test-get_locale.R
+++ b/tests/testthat/test-get_locale.R
@@ -9,7 +9,7 @@ test_that("get_locale works", {
  expect_identical(mo_fullname("CoNS", "nl"), "Coagulase-negatieve Staphylococcus (CNS)")
  expect_identical(mo_fullname("CoNS", "es"), "Staphylococcus coagulasa negativo (CoNS)")
  expect_identical(mo_fullname("CoNS", "it"), "Staphylococcus negativo coagulasi (CoNS)")
-  expect_identical(mo_fullname("CoNS", "fr"), "Staphylococcus à coagulase négative (CoNS)")
+  expect_identical(mo_fullname("CoNS", "fr"), "Staphylococcus \u00e0 coagulase n\u00e9gative (CoNS)")
  expect_identical(mo_fullname("CoNS", "pt"), "Staphylococcus coagulase negativo (CoNS)")
 })