diff --git a/DESCRIPTION b/DESCRIPTION index 7d2e89c6..f7fea146 100755 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -67,4 +67,4 @@ BugReports: https://gitlab.com/msberends/AMR/issues License: GPL-2 | file LICENSE Encoding: UTF-8 LazyData: true -RoxygenNote: 6.1.0 +RoxygenNote: 6.1.1 diff --git a/NAMESPACE b/NAMESPACE index 55e1b8ee..cbb34207 100755 --- a/NAMESPACE +++ b/NAMESPACE @@ -119,7 +119,7 @@ export(portion_S) export(portion_SI) export(portion_df) export(ratio) -export(read_4D) +export(read.4D) export(resistance_predict) export(right_join_microorganisms) export(rsi) @@ -239,6 +239,6 @@ importFrom(stats,mad) importFrom(stats,pchisq) importFrom(stats,predict) importFrom(stats,sd) -importFrom(utils,View) importFrom(utils,browseVignettes) +importFrom(utils,installed.packages) importFrom(xml2,read_html) diff --git a/NEWS.md b/NEWS.md index 8900db76..c45d4e3f 100755 --- a/NEWS.md +++ b/NEWS.md @@ -5,7 +5,7 @@ * Function `count_all` to get all available isolates (that like all `portion_*` and `count_*` functions also supports `summarise` and `group_by`), the old `n_rsi` is now an alias of `count_all` * Function `get_locale` to determine language for language-dependent output for some `mo_*` functions. This is now the default value for their `language` parameter, by which the system language will be used at default. * Data sets `microorganismsDT`, `microorganisms.prevDT`, `microorganisms.unprevDT` and `microorganisms.oldDT` to improve the speed of `as.mo`. They are for reference only, since they are primarily for internal use of `as.mo`. -* Function `read_4D` to read from the 4D database of the MMB department of the UMCG +* Function `read.4D` to read from the 4D database of the MMB department of the UMCG * Functions `mo_authors` and `mo_year` to get specific values about the scientific reference of a taxonomic entry #### Changed @@ -57,6 +57,7 @@ * Speed improvement for `is.rsi.eligible`, now 15-20 times faster * In `g.test`, when `sum(x)` is below 1000 or any of the expected values is below 5, Fisher's Exact Test will be suggested * `ab_name` will try to fall back on `as.atc` when no results are found +* Removed the addin to view data sets #### Other * New dependency on package `crayon`, to support formatted text in the console diff --git a/R/get_locale.R b/R/get_locale.R index 0ea567e6..18a4e6db 100644 --- a/R/get_locale.R +++ b/R/get_locale.R @@ -47,7 +47,7 @@ get_locale <- function() { } else if (grepl("^(Portuguese|Portugu.s|pt_|PT_)", lang, ignore.case = FALSE)) { "pt" } else { - # other language, set to English + # other language -> set to English "en" } } diff --git a/R/ggplot_rsi.R b/R/ggplot_rsi.R index 38bd527b..e5fa46e6 100644 --- a/R/ggplot_rsi.R +++ b/R/ggplot_rsi.R @@ -49,6 +49,7 @@ #' #' \code{ggplot_rsi} is a wrapper around all above functions that uses data as first input. This makes it possible to use this function after a pipe (\code{\%>\%}). See Examples. #' @rdname ggplot_rsi +#' @importFrom utils installed.packages #' @export #' @examples #' library(dplyr) diff --git a/R/misc.R b/R/misc.R index 83cfd2ef..ddd7d796 100755 --- a/R/misc.R +++ b/R/misc.R @@ -26,24 +26,6 @@ addin_insert_like <- function() { rstudioapi::insertText(" %like% ") } -# No export, no Rd -#' @importFrom utils View -addin_open_antibiotics <- function() { - View(antibiotics) -} - -# No export, no Rd -#' @importFrom utils View -addin_open_microorganisms <- function() { - View(microorganisms) -} - -# No export, no Rd -#' @importFrom utils View -addin_open_septic_patients <- function() { - View(septic_patients) -} - # No export, no Rd percent <- function(x, round = 1, force_zero = FALSE, ...) { val <- base::round(x * 100, digits = round) diff --git a/R/mo.R b/R/mo.R index 30075e6d..a27b0e9f 100644 --- a/R/mo.R +++ b/R/mo.R @@ -181,7 +181,7 @@ exec_as.mo <- function(x, Becker = FALSE, Lancefield = FALSE, allow_uncertain = x[is.null(x)] <- NA # support tidyverse selection like: df %>% select(colA) - if (!is.vector(x)) { + if (!is.vector(x) & !is.null(dim(x))) { x <- pull(x, 1) } } @@ -360,9 +360,15 @@ exec_as.mo <- function(x, Becker = FALSE, Lancefield = FALSE, allow_uncertain = # next } if (x_backup[i] %in% AMR::microorganisms.umcg[, 1]) { - ref_certe <- AMR::microorganisms.umcg[AMR::microorganisms.umcg[, 1] == x_backup[i], 2] - ref_mo <- AMR::microorganisms.certe[AMR::microorganisms.certe[, 1] == ref_certe, 2] - x[i] <- microorganismsDT[mo == ref_mo, ..property][[1]][1L] + mo_umcg <- AMR::microorganisms.umcg[AMR::microorganisms.umcg[, 1] == x_backup[i], 2] + mo_found <- AMR::microorganisms.certe[AMR::microorganisms.certe[, 1] == mo_umcg, 2] + if (length(mo_found) == 0) { + # not found + x[i] <- NA_character_ + failures <- c(failures, x_backup[i]) + } else { + x[i] <- microorganismsDT[mo == mo_found, ..property][[1]][1L] + } next } if (x_backup[i] %in% reference_df[, 1]) { @@ -575,7 +581,7 @@ exec_as.mo <- function(x, Becker = FALSE, Lancefield = FALSE, allow_uncertain = failures <- failures[!failures %in% c(NA, NULL, NaN)] if (length(failures) > 0) { - warning("These ", length(failures) , " values could not be coerced: ", + warning("These ", length(failures) , " values could not be coerced to a valid MO code: ", paste('"', unique(failures), '"', sep = "", collapse = ', '), ".", call. = FALSE) diff --git a/R/read_4d.R b/R/read.4d.R similarity index 77% rename from R/read_4d.R rename to R/read.4d.R index bdf1bf2b..a1b88ba9 100644 --- a/R/read_4d.R +++ b/R/read.4d.R @@ -18,11 +18,15 @@ #' Read data from 4D database #' -#' This function is only useful for the MMB department of the UMCG. Use this function to \strong{import data by just defining the \code{file} parameter}. It will automatically transform birth dates and calculate patients age, translate the data set to English, transform the \code{mo} with \code{\link{as.mo}} and transform all antimicrobial columns with \code{\link{as.rsi}}. +#' This function is only useful for the MMB department of the UMCG. Use this function to \strong{import data by just defining the \code{file} parameter}. It will automatically transform birth dates and calculate patients age, translate the column names to English, transform the \code{mo} with \code{\link{as.mo}} and transform all antimicrobial columns with \code{\link{as.rsi}}. #' @inheritParams utils::read.table +#' @param info a logical to indicate whether info about the import should be printed, defaults to \code{TRUE} in interactive sessions +#' @details Column names will be transformed, but the original column names are set as a "label" attribute and can be seen in e.g. RStudio Viewer. #' @export -read_4D <- function(file, +read.4D <- function(file, + info = interactive(), header = TRUE, + row.names = NULL, sep = "\t", quote = "\"'", dec = ",", @@ -36,7 +40,11 @@ read_4D <- function(file, fileEncoding = "UTF-8", encoding = "UTF-8") { + if (info == TRUE) { + message("Importing data... ", appendLF = FALSE) + } data_4D <- utils::read.table(file = file, + row.names = row.names, header = header, sep = sep, quote = quote, @@ -57,7 +65,7 @@ read_4D <- function(file, posixlt <- as.POSIXlt(date_regular) # born after today will be born 100 years ago # based on https://stackoverflow.com/a/3312971/4575331 - posixlt[date_regular > Sys.Date()]$year <- posixlt[date_regular > Sys.Date()]$year - 100 + posixlt[date_regular > Sys.Date() & !is.na(posixlt)]$year <- posixlt[date_regular > Sys.Date() & !is.na(posixlt)]$year - 100 as.Date(posixlt) } to_age_4D <- function(from, to) { @@ -71,6 +79,15 @@ read_4D <- function(file, age - 1, age) } + if (info == TRUE) { + message("OK\nTransforming column names... ", appendLF = FALSE) + } + if ("row.names" %in% colnames(data_4D) & all(is.na(data_4D[, ncol(data_4D)]))) { + # remove first column name "row.names" and remove last empty column + colnames(data_4D) <- c(colnames(data_4D)[2:ncol(data_4D)], "_skip_last") + data_4D <- data_4D[, -ncol(data_4D)] + } + colnames(data_4D) <- tolower(colnames(data_4D)) if (all(c("afnamedat", "gebdatum") %in% colnames(data_4D))) { # add age @@ -91,6 +108,10 @@ read_4D <- function(file, # order of columns data_4D <- data_4D[, cols_wanted] + # backup original column names + colnames.bak <- toupper(colnames(data_4D)) + colnames.bak[colnames.bak == "AGE"] <- NULL + # rename of columns colnames(data_4D) <- gsub("patientnr", "patient_id", colnames(data_4D), fixed = TRUE) colnames(data_4D) <- gsub("gebdatum", "date_birth", colnames(data_4D), fixed = TRUE) @@ -104,6 +125,9 @@ read_4D <- function(file, colnames(data_4D) <- gsub("mat", "specimen_group", colnames(data_4D), fixed = TRUE) colnames(data_4D) <- gsub("mocode", "mo", colnames(data_4D), fixed = TRUE) + if (info == TRUE) { + message("OK\nTransforming dates and age... ", appendLF = FALSE) + } if ("date_birth" %in% colnames(data_4D)) { data_4D$date_birth <- to_date_4D(data_4D$date_birth) @@ -117,6 +141,10 @@ read_4D <- function(file, if ("gender" %in% colnames(data_4D)) { data_4D$gender[data_4D$gender == "V"] <- "F" } + + if (info == TRUE) { + message("OK\nTransforming MO codes... ", appendLF = FALSE) + } if ("mo" %in% colnames(data_4D)) { data_4D$mo <- as.mo(data_4D$mo) # column right of mo is: @@ -129,6 +157,18 @@ read_4D <- function(file, } } + # set original column names as label (can be seen in RStudio Viewer) + if (info == TRUE) { + message("OK\nSetting original column names as label... ", appendLF = FALSE) + } + for (i in 1:ncol(data_4D)) { + attr(data_4D[, i], "label") <- colnames.bak[i] + } + + if (info == TRUE) { + message("OK") + } + data_4D } diff --git a/inst/rstudio/addins.dcf b/inst/rstudio/addins.dcf index 594d10b2..d43c060a 100644 --- a/inst/rstudio/addins.dcf +++ b/inst/rstudio/addins.dcf @@ -5,15 +5,3 @@ Interactive: false Name: Insert %like% Binding: addin_insert_like Interactive: false - -Name: View 'antibiotics' data set -Binding: addin_open_antibiotics -Interactive: false - -Name: View 'microorganisms' data set -Binding: addin_open_microorganisms -Interactive: false - -Name: View 'septic_patients' data set -Binding: addin_open_septic_patients -Interactive: false diff --git a/man/read_4D.Rd b/man/read.4D.Rd similarity index 75% rename from man/read_4D.Rd rename to man/read.4D.Rd index 03e64d56..e85e8784 100644 --- a/man/read_4D.Rd +++ b/man/read.4D.Rd @@ -1,13 +1,13 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/read_4d.R -\name{read_4D} -\alias{read_4D} +% Please edit documentation in R/read.4d.R +\name{read.4D} +\alias{read.4D} \title{Read data from 4D database} \usage{ -read_4D(file, header = TRUE, sep = "\\t", quote = "\\"'", - dec = ",", na.strings = c("NA", "", "."), skip = 2, - check.names = TRUE, strip.white = TRUE, fill = TRUE, - blank.lines.skip = TRUE, stringsAsFactors = FALSE, +read.4D(file, info = interactive(), header = TRUE, row.names = NULL, + sep = "\\t", quote = "\\"'", dec = ",", na.strings = c("NA", "", + "."), skip = 2, check.names = TRUE, strip.white = TRUE, + fill = TRUE, blank.lines.skip = TRUE, stringsAsFactors = FALSE, fileEncoding = "UTF-8", encoding = "UTF-8") } \arguments{ @@ -32,12 +32,29 @@ read_4D(file, header = TRUE, sep = "\\t", quote = "\\"'", \code{\link{url}}.) } +\item{info}{a logical to indicate whether info about the import should be printed, defaults to \code{TRUE} in interactive sessions} + \item{header}{a logical value indicating whether the file contains the names of the variables as its first line. If missing, the value is determined from the file format: \code{header} is set to \code{TRUE} if and only if the first row contains one fewer field than the number of columns.} +\item{row.names}{a vector of row names. This can be a vector giving + the actual row names, or a single number giving the column of the + table which contains the row names, or character string giving the + name of the table column containing the row names. + + If there is a header and the first row contains one fewer field than + the number of columns, the first column in the input is used for the + row names. Otherwise if \code{row.names} is missing, the rows are + numbered. + + Using \code{row.names = NULL} forces row numbering. Missing or + \code{NULL} \code{row.names} generate row names that are considered + to be \sQuote{automatic} (and not preserved by \code{\link{as.matrix}}). + } + \item{sep}{the field separator character. Values on each line of the file are separated by this character. If \code{sep = ""} (the default for \code{read.table}) the separator is \sQuote{white space}, @@ -100,5 +117,8 @@ read_4D(file, header = TRUE, sep = "\\t", quote = "\\"'", } } \description{ -This function is only useful for the MMB department of the UMCG. Use this function to \strong{import data by just defining the \code{file} parameter}. It will automatically transform birth dates and calculate patients age, translate the data set to English, transform the \code{mo} with \code{\link{as.mo}} and transform all antimicrobial columns with \code{\link{as.rsi}}. +This function is only useful for the MMB department of the UMCG. Use this function to \strong{import data by just defining the \code{file} parameter}. It will automatically transform birth dates and calculate patients age, translate the column names to English, transform the \code{mo} with \code{\link{as.mo}} and transform all antimicrobial columns with \code{\link{as.rsi}}. +} +\details{ +Column names will be transformed, but the original column names are set as a "label" attribute and can be seen in e.g. RStudio Viewer. }