read.4D improvements

2026-01-11 16:34:36 +01:00 · 2018-11-15 12:42:35 +01:00
parent 5cb8f3cf72
commit fab64e6728
10 changed files with 89 additions and 51 deletions
--- a/2
+++ b/2
@@ -67,4 +67,4 @@ BugReports: https://gitlab.com/msberends/AMR/issues
 License: GPL-2 | file LICENSE
 Encoding: UTF-8
 LazyData: true
-RoxygenNote: 6.1.0
+RoxygenNote: 6.1.1
--- a/4
+++ b/4
@@ -119,7 +119,7 @@ export(portion_S)
 export(portion_SI)
 export(portion_df)
 export(ratio)
-export(read_4D)
+export(read.4D)
 export(resistance_predict)
 export(right_join_microorganisms)
 export(rsi)
@@ -239,6 +239,6 @@ importFrom(stats,mad)
 importFrom(stats,pchisq)
 importFrom(stats,predict)
 importFrom(stats,sd)
 importFrom(utils,View)
 importFrom(utils,browseVignettes)
 importFrom(utils,installed.packages)
 importFrom(xml2,read_html)
--- a/NEWS.md
+++ b/NEWS.md
@@ -5,7 +5,7 @@
 * Function `count_all` to get all available isolates (that like all `portion_*` and `count_*` functions also supports `summarise` and `group_by`), the old `n_rsi` is now an alias of `count_all`
 * Function `get_locale` to determine language for language-dependent output for some `mo_*` functions. This is now the default value for their `language` parameter, by which the system language will be used at default.
 * Data sets `microorganismsDT`, `microorganisms.prevDT`, `microorganisms.unprevDT` and `microorganisms.oldDT` to improve the speed of `as.mo`. They are for reference only, since they are primarily for internal use of `as.mo`.
-* Function `read_4D` to read from the 4D database of the MMB department of the UMCG
+* Function `read.4D` to read from the 4D database of the MMB department of the UMCG
 * Functions `mo_authors` and `mo_year` to get specific values about the scientific reference of a taxonomic entry
 #### Changed
@@ -57,6 +57,7 @@
 * Speed improvement for `is.rsi.eligible`, now 15-20 times faster
 * In `g.test`, when `sum(x)` is below 1000 or any of the expected values is below 5, Fisher's Exact Test will be suggested
 * `ab_name` will try to fall back on `as.atc` when no results are found
 * Removed the addin to view data sets
 #### Other
 * New dependency on package `crayon`, to support formatted text in the console
--- a/R/get_locale.R
+++ b/R/get_locale.R
@@ -47,7 +47,7 @@ get_locale <- function() {
  } else if (grepl("^(Portuguese|Portugu.s|pt_|PT_)", lang, ignore.case = FALSE)) {
    "pt"
  } else {
-    # other language, set to English
+    # other language -> set to English
    "en"
  }
 }
--- a/R/ggplot_rsi.R
+++ b/R/ggplot_rsi.R
@@ -49,6 +49,7 @@
 #'
 #' \code{ggplot_rsi} is a wrapper around all above functions that uses data as first input. This makes it possible to use this function after a pipe (\code{\%>\%}). See Examples.
 #' @rdname ggplot_rsi
 #' @importFrom utils installed.packages
 #' @export
 #' @examples
 #' library(dplyr)
--- a/R/misc.R
+++ b/R/misc.R
@@ -26,24 +26,6 @@ addin_insert_like <- function() {
  rstudioapi::insertText(" %like% ")
 }
 #  No export, no Rd
 #' @importFrom utils View
 addin_open_antibiotics <- function() {
  View(antibiotics)
 }
 #  No export, no Rd
 #' @importFrom utils View
 addin_open_microorganisms <- function() {
  View(microorganisms)
 }
 #  No export, no Rd
 #' @importFrom utils View
 addin_open_septic_patients <- function() {
  View(septic_patients)
 }
 # No export, no Rd
 percent <- function(x, round = 1, force_zero = FALSE, ...) {
  val <- base::round(x * 100, digits = round)
--- a/R/mo.R
+++ b/R/mo.R
@@ -181,7 +181,7 @@ exec_as.mo <- function(x, Becker = FALSE, Lancefield = FALSE, allow_uncertain =
    x[is.null(x)] <- NA
    # support tidyverse selection like: df %>% select(colA)
-    if (!is.vector(x)) {
+    if (!is.vector(x) & !is.null(dim(x))) {
      x <- pull(x, 1)
    }
  }
@@ -360,9 +360,15 @@ exec_as.mo <- function(x, Becker = FALSE, Lancefield = FALSE, allow_uncertain =
        # next
      }
      if (x_backup[i] %in% AMR::microorganisms.umcg[, 1]) {
-        ref_certe <- AMR::microorganisms.umcg[AMR::microorganisms.umcg[, 1] == x_backup[i], 2]
+        mo_umcg <- AMR::microorganisms.umcg[AMR::microorganisms.umcg[, 1] == x_backup[i], 2]
-        ref_mo <- AMR::microorganisms.certe[AMR::microorganisms.certe[, 1] == ref_certe, 2]
+        mo_found <- AMR::microorganisms.certe[AMR::microorganisms.certe[, 1] == mo_umcg, 2]
-        x[i] <- microorganismsDT[mo == ref_mo, ..property][[1]][1L]
+        if (length(mo_found) == 0) {
          # not found
          x[i] <- NA_character_
          failures <- c(failures, x_backup[i])
        } else {
          x[i] <- microorganismsDT[mo == mo_found, ..property][[1]][1L]
        }
        next
      }
      if (x_backup[i] %in% reference_df[, 1]) {
@@ -575,7 +581,7 @@ exec_as.mo <- function(x, Becker = FALSE, Lancefield = FALSE, allow_uncertain =
  failures <- failures[!failures %in% c(NA, NULL, NaN)]
  if (length(failures) > 0) {
-    warning("These ", length(failures) , " values could not be coerced: ",
+    warning("These ", length(failures) , " values could not be coerced to a valid MO code: ",
            paste('"', unique(failures), '"', sep = "", collapse = ', '),
            ".",
            call. = FALSE)
--- a/R/read.4d.R
+++ b/R/read.4d.R
@@ -18,11 +18,15 @@
 #' Read data from 4D database
 #'
-#' This function is only useful for the MMB department of the UMCG. Use this function to \strong{import data by just defining the \code{file} parameter}. It will automatically transform birth dates and calculate patients age, translate the data set to English, transform the \code{mo} with \code{\link{as.mo}} and transform all antimicrobial columns with \code{\link{as.rsi}}.
+#' This function is only useful for the MMB department of the UMCG. Use this function to \strong{import data by just defining the \code{file} parameter}. It will automatically transform birth dates and calculate patients age, translate the column names to English, transform the \code{mo} with \code{\link{as.mo}} and transform all antimicrobial columns with \code{\link{as.rsi}}.
 #' @inheritParams utils::read.table
 #' @param info a logical to indicate whether info about the import should be printed, defaults to \code{TRUE} in interactive sessions
 #' @details Column names will be transformed, but the original column names are set as a "label" attribute and can be seen in e.g. RStudio Viewer.
 #' @export
-read_4D <- function(file,
+read.4D <- function(file,
                    info = interactive(),
                    header = TRUE,
                    row.names = NULL,
                    sep = "\t",
                    quote = "\"'",
                    dec = ",",
@@ -36,7 +40,11 @@ read_4D <- function(file,
                    fileEncoding = "UTF-8",
                    encoding = "UTF-8") {
  if (info == TRUE) {
    message("Importing data... ", appendLF = FALSE)
  }
  data_4D <- utils::read.table(file = file,
                               row.names = row.names,
                               header = header,
                               sep = sep,
                               quote = quote,
@@ -57,7 +65,7 @@ read_4D <- function(file,
    posixlt <- as.POSIXlt(date_regular)
    # born after today will be born 100 years ago
    # based on https://stackoverflow.com/a/3312971/4575331
-    posixlt[date_regular > Sys.Date()]$year <- posixlt[date_regular > Sys.Date()]$year - 100
+    posixlt[date_regular > Sys.Date() & !is.na(posixlt)]$year <- posixlt[date_regular > Sys.Date() & !is.na(posixlt)]$year - 100
    as.Date(posixlt)
  }
  to_age_4D <- function(from, to) {
@@ -71,6 +79,15 @@ read_4D <- function(file,
           age - 1, age)
  }
  if (info == TRUE) {
    message("OK\nTransforming column names... ", appendLF = FALSE)
  }
  if ("row.names" %in% colnames(data_4D) & all(is.na(data_4D[, ncol(data_4D)]))) {
    # remove first column name "row.names" and remove last empty column
    colnames(data_4D) <- c(colnames(data_4D)[2:ncol(data_4D)], "_skip_last")
    data_4D <- data_4D[, -ncol(data_4D)]
  }
  colnames(data_4D) <- tolower(colnames(data_4D))
  if (all(c("afnamedat", "gebdatum") %in% colnames(data_4D))) {
    # add age
@@ -91,6 +108,10 @@ read_4D <- function(file,
  # order of columns
  data_4D <- data_4D[, cols_wanted]
  # backup original column names
  colnames.bak <- toupper(colnames(data_4D))
  colnames.bak[colnames.bak == "AGE"] <- NULL
  # rename of columns
  colnames(data_4D) <- gsub("patientnr", "patient_id", colnames(data_4D), fixed = TRUE)
  colnames(data_4D) <- gsub("gebdatum", "date_birth", colnames(data_4D), fixed = TRUE)
@@ -104,6 +125,9 @@ read_4D <- function(file,
  colnames(data_4D) <- gsub("mat", "specimen_group", colnames(data_4D), fixed = TRUE)
  colnames(data_4D) <- gsub("mocode", "mo", colnames(data_4D), fixed = TRUE)
  if (info == TRUE) {
    message("OK\nTransforming dates and age... ", appendLF = FALSE)
  }
  if ("date_birth" %in% colnames(data_4D)) {
    data_4D$date_birth <- to_date_4D(data_4D$date_birth)
@@ -117,6 +141,10 @@ read_4D <- function(file,
  if ("gender" %in% colnames(data_4D)) {
    data_4D$gender[data_4D$gender == "V"] <- "F"
  }
  if (info == TRUE) {
    message("OK\nTransforming MO codes... ", appendLF = FALSE)
  }
  if ("mo" %in% colnames(data_4D)) {
    data_4D$mo <- as.mo(data_4D$mo)
    # column right of mo is:
@@ -129,6 +157,18 @@ read_4D <- function(file,
    }
  }
  # set original column names as label (can be seen in RStudio Viewer)
  if (info == TRUE) {
    message("OK\nSetting original column names as label... ", appendLF = FALSE)
  }
  for (i in 1:ncol(data_4D)) {
    attr(data_4D[, i], "label") <- colnames.bak[i]
  }
  if (info == TRUE) {
    message("OK")
  }
  data_4D
 }
--- a/inst/rstudio/addins.dcf
+++ b/inst/rstudio/addins.dcf
@@ -5,15 +5,3 @@ Interactive: false
 Name: Insert %like%
 Binding: addin_insert_like
 Interactive: false
 Name: View 'antibiotics' data set
 Binding: addin_open_antibiotics
 Interactive: false
 Name: View 'microorganisms' data set
 Binding: addin_open_microorganisms
 Interactive: false
 Name: View 'septic_patients' data set
 Binding: addin_open_septic_patients
 Interactive: false
--- a/man/read.4D.Rd
+++ b/man/read.4D.Rd
@@ -1,13 +1,13 @@
 % Generated by roxygen2: do not edit by hand
-% Please edit documentation in R/read_4d.R
+% Please edit documentation in R/read.4d.R
-\name{read_4D}
+\name{read.4D}
-\alias{read_4D}
+\alias{read.4D}
 \title{Read data from 4D database}
 \usage{
-read_4D(file, header = TRUE, sep = "\\t", quote = "\\"'",
+read.4D(file, info = interactive(), header = TRUE, row.names = NULL,
-  dec = ",", na.strings = c("NA", "", "."), skip = 2,
+  sep = "\\t", quote = "\\"'", dec = ",", na.strings = c("NA", "",
-  check.names = TRUE, strip.white = TRUE, fill = TRUE,
+  "."), skip = 2, check.names = TRUE, strip.white = TRUE,
-  blank.lines.skip = TRUE, stringsAsFactors = FALSE,
+  fill = TRUE, blank.lines.skip = TRUE, stringsAsFactors = FALSE,
  fileEncoding = "UTF-8", encoding = "UTF-8")
 }
 \arguments{
@@ -32,12 +32,29 @@ read_4D(file, header = TRUE, sep = "\\t", quote = "\\"'",
    \code{\link{url}}.)
  }
 \item{info}{a logical to indicate whether info about the import should be printed, defaults to \code{TRUE} in interactive sessions}
 \item{header}{a logical value indicating whether the file contains the
    names of the variables as its first line.  If missing, the value is
    determined from the file format: \code{header} is set to \code{TRUE}
    if and only if the first row contains one fewer field than the
    number of columns.}
 \item{row.names}{a vector of row names.  This can be a vector giving
    the actual row names, or a single number giving the column of the
    table which contains the row names, or character string giving the
    name of the table column containing the row names.
    If there is a header and the first row contains one fewer field than
    the number of columns, the first column in the input is used for the
    row names.  Otherwise if \code{row.names} is missing, the rows are
    numbered.
    Using \code{row.names = NULL} forces row numbering. Missing or
    \code{NULL} \code{row.names} generate row names that are considered
    to be \sQuote{automatic} (and not preserved by \code{\link{as.matrix}}).
  }
 \item{sep}{the field separator character.  Values on each line of the
    file are separated by this character.  If \code{sep = ""} (the
    default for \code{read.table}) the separator is \sQuote{white space},
@@ -100,5 +117,8 @@ read_4D(file, header = TRUE, sep = "\\t", quote = "\\"'",
  }
 }
 \description{
-This function is only useful for the MMB department of the UMCG. Use this function to \strong{import data by just defining the \code{file} parameter}. It will automatically transform birth dates and calculate patients age, translate the data set to English, transform the \code{mo} with \code{\link{as.mo}} and transform all antimicrobial columns with \code{\link{as.rsi}}.
+This function is only useful for the MMB department of the UMCG. Use this function to \strong{import data by just defining the \code{file} parameter}. It will automatically transform birth dates and calculate patients age, translate the column names to English, transform the \code{mo} with \code{\link{as.mo}} and transform all antimicrobial columns with \code{\link{as.rsi}}.
 }
 \details{
 Column names will be transformed, but the original column names are set as a "label" attribute and can be seen in e.g. RStudio Viewer.
 }