MDRO, freq tables, new print format for tibbles

2025-12-04 01:22:01 +01:00 · 2018-04-18 12:24:54 +02:00
parent 3165c50d06
commit 2509e2413d
23 changed files with 1453 additions and 85 deletions
--- a/9
+++ b/9
@@ -1,6 +1,6 @@
 Package: AMR
-Version: 0.1.2
+Version: 0.2.0
-Date: 2018-04-02
+Date: 2018-04-18
 Title: Antimicrobial Resistance Analysis
 Authors@R: c(
    person(
@@ -28,9 +28,12 @@ Depends:
    R (>= 3.2.0)
 Imports:
    dplyr (>= 0.7.0),
    data.table (>= 1.10.0),
    reshape2 (>= 1.4.0),
    xml2 (>= 1.0.0),
-    rvest (>= 0.3.2)
+    knitr (>= 1.0.0),
    rvest (>= 0.3.2),
    tibble
 Suggests:
    testthat (>= 1.0.2),
    covr (>= 3.0.1)
--- a/20
+++ b/20
@@ -7,11 +7,18 @@ S3method(barplot,mic)
 S3method(barplot,rsi)
 S3method(plot,mic)
 S3method(plot,rsi)
 S3method(print,data.table)
 S3method(print,mic)
 S3method(print,rsi)
 S3method(print,tbl)
 S3method(print,tbl_df)
 S3method(summary,mic)
 S3method(summary,rsi)
 export("%like%")
 export(BRMO)
 export(EUCAST_rules)
 export(MDRO)
 export(MRGN)
 export(abname)
 export(anti_join_microorganisms)
 export(as.mic)
@@ -20,6 +27,8 @@ export(atc_property)
 export(clipboard_export)
 export(clipboard_import)
 export(first_isolate)
 export(freq)
 export(frequency_tbl)
 export(full_join_microorganisms)
 export(guess_bactid)
 export(inner_join_microorganisms)
@@ -41,8 +50,11 @@ exportMethods(barplot.mic)
 exportMethods(barplot.rsi)
 exportMethods(plot.mic)
 exportMethods(plot.rsi)
 exportMethods(print.data.table)
 exportMethods(print.mic)
 exportMethods(print.rsi)
 exportMethods(print.tbl)
 exportMethods(print.tbl_df)
 exportMethods(summary.mic)
 exportMethods(summary.rsi)
 importFrom(dplyr,"%>%")
@@ -52,15 +64,19 @@ importFrom(dplyr,arrange)
 importFrom(dplyr,arrange_at)
 importFrom(dplyr,as_tibble)
 importFrom(dplyr,between)
 importFrom(dplyr,desc)
 importFrom(dplyr,filter)
 importFrom(dplyr,filter_at)
 importFrom(dplyr,group_by)
 importFrom(dplyr,group_by_at)
 importFrom(dplyr,group_size)
 importFrom(dplyr,group_vars)
 importFrom(dplyr,if_else)
 importFrom(dplyr,lag)
 importFrom(dplyr,left_join)
 importFrom(dplyr,mutate)
 importFrom(dplyr,n_distinct)
 importFrom(dplyr,n_groups)
 importFrom(dplyr,progress_estimated)
 importFrom(dplyr,pull)
 importFrom(dplyr,row_number)
@@ -69,6 +85,7 @@ importFrom(dplyr,slice)
 importFrom(dplyr,summarise)
 importFrom(dplyr,tibble)
 importFrom(dplyr,vars)
 importFrom(grDevices,boxplot.stats)
 importFrom(graphics,axis)
 importFrom(graphics,barplot)
 importFrom(graphics,plot)
@@ -76,6 +93,9 @@ importFrom(graphics,text)
 importFrom(reshape2,dcast)
 importFrom(rvest,html_nodes)
 importFrom(rvest,html_table)
 importFrom(stats,fivenum)
 importFrom(stats,quantile)
 importFrom(stats,sd)
 importFrom(utils,object.size)
 importFrom(utils,packageDescription)
 importFrom(utils,read.delim)
--- a/28
+++ b/28
@@ -1,28 +0,0 @@
 ## 0.1.2
 - Added full support for Windows, Linux and macOS
 - New function `guess_bactid` to determine the ID of a microorganism based on genus/species or known abbreviations like MRSA
 - New functions `clipboard_import` and `clipboard_export` as helper functions to quickly copy and paste from/to software like Excel and SPSS
 - New algorithm to determine weighted isolates, can now be `"points"` or `"keyantibiotics"`, see `?first_isolate`
 - Renamed dataset `ablist` to `antibiotics`
 - Renamed dataset `bactlist` to `microorganisms`
 - Added more microorganisms to `bactlist`
 - Added analysis examples on help page of dataset `septic_patients`
 - Added support for character vector in join functions
 - Added warnings when applying a join results in more rows after than before the join
 - Altered `%like%` to make it case insensitive
 - For parameters of functions `first_isolate`, `EUCAST_rules` the column names are now case-insensitive
 - Functions `as.rsi` and `as.mic` now add the package name and version as attribute
 - Expanded README.md
 - Added unit testing with the `testthat` package
 - Added build tests for Linux and macOS using Travis CI (https://travis-ci.org/msberends/AMR)
 - Added Line coverage checking using CodeCov (https://codecov.io/gh/msberends/AMR/tree/master/R)
 ## 0.1.1
 - `EUCAST_rules` applies for amoxicillin even if ampicillin is missing
 - Edited column names to comply with GLIMS, the laboratory information system
 - Added more valid MIC values
 - Renamed 'Daily Defined Dose' to 'Defined Daily Dose'
 - Added barplots for `rsi` and `mic` classes
 ## 0.1.0
 - First submission to CRAN.
--- a/NEWS.md
+++ b/NEWS.md
@@ -0,0 +1,37 @@
 ## 0.2.0
 #### New
 - Full support for Windows, Linux and macOS
 - Function `guess_bactid` to determine the ID of a microorganism based on genus/species or known abbreviations like MRSA
 - Functions `clipboard_import` and `clipboard_export` as helper functions to quickly copy and paste from/to software like Excel and SPSS
 - Function `MDRO` to determine Multi Drug Resistant Organisms (MDRO) with support for country-specific guidelines. Suggest your own via https://github.com/msberends/AMR/issues/new. Functions `BRMO` and `MRGN` are wrappers for Dutch and German guidelines, respectively
 - Function `freq` to create frequency tables, with additional info in a header
 - New algorithm to determine weighted isolates, can now be `"points"` or `"keyantibiotics"`, see `?first_isolate`
 - New print format for tibbles and data.tables
 #### Changed
 - Renamed dataset `ablist` to `antibiotics`
 - Renamed dataset `bactlist` to `microorganisms`
 - Added more microorganisms to `bactlist`
 - Added analysis examples on help page of dataset `septic_patients`
 - Added support for character vector in `join` functions
 - Added warnings when a join results in more rows after than before the join
 - Altered `%like%` to make it case insensitive
 - For parameters of functions `first_isolate` and `EUCAST_rules` the column names are now case-insensitive
 - Functions `as.rsi` and `as.mic` now add the package name and version as attribute
 #### Other
 - Expanded README.md with more examples
 - Added ORC IDs of authors to DESCRIPTION file
 - Added unit testing with the `testthat` package
 - Added build tests for Linux and macOS using Travis CI (https://travis-ci.org/msberends/AMR)
 - Added Line coverage checking using CodeCov (https://codecov.io/gh/msberends/AMR/tree/master/R)
 ## 0.1.1
 - `EUCAST_rules` applies for amoxicillin even if ampicillin is missing
 - Edited column names to comply with GLIMS, the laboratory information system
 - Added more valid MIC values
 - Renamed 'Daily Defined Dose' to 'Defined Daily Dose'
 - Added barplots for `rsi` and `mic` classes
 ## 0.1.0
 - First submission to CRAN.
--- a/R/eucast.R
+++ b/R/eucast.R
--- a/R/first_isolates.R
+++ b/R/first_isolates.R
@@ -439,45 +439,26 @@ key_antibiotics <- function(tbl,
  col.list <- c(amox, cfot, cfta, cftr, cfur, cipr, clar,
                clin, clox, doxy, gent, line, mero, peni,
                pita, rifa, teic, trsu, vanc)
-  col.list <- col.list[!is.na(col.list)]
+  col.list <- check_available_columns(tbl = tbl, col.list = col.list, info = info)
-  col.list.bak <- col.list
+  amox <- col.list[amox]
-  # are they available as upper case or lower case then?
+  cfot <- col.list[cfot]
-  for (i in 1:length(col.list)) {
+  cfta <- col.list[cfta]
-    if (toupper(col.list[i]) %in% colnames(tbl)) {
+  cftr <- col.list[cftr]
-      col.list[i] <- toupper(col.list[i])
+  cfur <- col.list[cfur]
-    } else if (tolower(col.list[i]) %in% colnames(tbl)) {
+  cipr <- col.list[cipr]
-      col.list[i] <- tolower(col.list[i])
+  clar <- col.list[clar]
-    } else if (!col.list[i] %in% colnames(tbl)) {
+  clin <- col.list[clin]
-      col.list[i] <- NA
+  clox <- col.list[clox]
-    }
+  doxy <- col.list[doxy]
-  }
+  gent <- col.list[gent]
-  if (!all(col.list %in% colnames(tbl))) {
+  line <- col.list[line]
-    if (info == TRUE) {
+  mero <- col.list[mero]
-      warning('These columns do not exist and will be ignored: ',
+  peni <- col.list[peni]
-              col.list.bak[!(col.list %in% colnames(tbl))] %>% toString(),
+  pita <- col.list[pita]
-              immediate. = TRUE,
+  rifa <- col.list[rifa]
-              call. = FALSE)
+  teic <- col.list[teic]
-    }
+  trsu <- col.list[trsu]
-  }
+  vanc <- col.list[vanc]
  amox <- col.list[1]
  cfot <- col.list[2]
  cfta <- col.list[3]
  cftr <- col.list[4]
  cfur <- col.list[5]
  cipr <- col.list[6]
  clar <- col.list[7]
  clin <- col.list[8]
  clox <- col.list[9]
  doxy <- col.list[10]
  gent <- col.list[11]
  line <- col.list[12]
  mero <- col.list[13]
  peni <- col.list[14]
  pita <- col.list[15]
  rifa <- col.list[16]
  teic <- col.list[17]
  trsu <- col.list[18]
  vanc <- col.list[19]
  # join microorganisms
  tbl <- tbl %>% left_join_microorganisms(col_bactid)
--- a/R/freq.R
+++ b/R/freq.R
@@ -0,0 +1,361 @@
 # ==================================================================== #
 # TITLE                                                                #
 # Antimicrobial Resistance (AMR) Analysis                              #
 #                                                                      #
 # AUTHORS                                                              #
 # Berends MS (m.s.berends@umcg.nl), Luz CF (c.f.luz@umcg.nl)           #
 #                                                                      #
 # LICENCE                                                              #
 # This program is free software; you can redistribute it and/or modify #
 # it under the terms of the GNU General Public License version 2.0,    #
 # as published by the Free Software Foundation.                        #
 #                                                                      #
 # This program is distributed in the hope that it will be useful,      #
 # but WITHOUT ANY WARRANTY; without even the implied warranty of       #
 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the        #
 # GNU General Public License for more details.                         #
 # ==================================================================== #
 #' Frequency table
 #'
 #' Create a frequency table of a vector of data, a single column or a maximum of 9 columns of a data frame. Supports markdown for reports.
 #' @param x data
 #' @param sort.count Sort on count. Use \code{FALSE} to sort alphabetically on item.
 #' @param nmax number of row to print. Use \code{nmax = 0} or \code{nmax = NA} to print all rows.
 #' @param na.rm a logical value indicating whether NA values should be removed from the frequency table. The header will always print the amount of\code{NA}s.
 #' @param markdown print table in markdown format (this forces \code{nmax = NA})
 #' @param toConsole Print table to the console. Use \code{FALSE} to assign the table to an object.
 #' @param digits how many significant digits are to be used for numeric values (not for the items themselves, that depends on \code{\link{getOption}("digits")})
 #' @param sep a character string to separate the terms when selecting multiple columns
 #' @details For numeric values, the next values will be calculated and shown into the header:
 #' \itemize{
 #'   \item{Mean, using \code{\link[base]{mean}}}
 #'   \item{Standard deviation, using \code{\link[stats]{sd}}}
 #'   \item{Five numbers of Tukey (min, Q1, median, Q3, max), using \code{\link[stats]{fivenum}}}
 #'   \item{Outliers (count and list), using \code{\link{boxplot.stats}}}
 #'   \item{Coefficient of variation (CV), the standard deviation divided by the mean}
 #'   \item{Coefficient of quartile variation (CQV, sometimes called coefficient of dispersion), calculated as \code{(Q3 - Q1) / (Q3 + Q1)} using \code{\link{quantile}} with \code{type = 6} as quantile algorithm to comply with SPSS standards}
 #' }
 #' @importFrom stats fivenum sd quantile
 #' @importFrom grDevices boxplot.stats
 #' @importFrom dplyr %>% select pull n_distinct group_by arrange desc mutate summarise
 #' @keywords summary summarise frequency freq
 #' @rdname freq
 #' @export
 #' @examples
 #' library(dplyr)
 #'
 #' freq(septic_patients$hospital_id)
 #'
 #' septic_patients %>%
 #'   filter(hospital_id == "A") %>%
 #'   select(bactid) %>%
 #'   freq()
 #'
 #' # select multiple columns; they will be pasted together
 #' septic_patients %>%
 #'   left_join_microorganisms %>%
 #'   filter(hospital_id == "A") %>%
 #'   select(genus, species) %>%
 #'   freq()
 #'
 #' # save frequency table to an object
 #' years <- septic_patients %>%
 #'   mutate(year = format(date, "%Y")) %>%
 #'   select(year) %>%
 #'   freq(toConsole = FALSE)
 freq <- function(x,
                 sort.count = TRUE,
                 nmax = 15,
                 na.rm = TRUE,
                 markdown = FALSE,
                 toConsole = TRUE,
                 digits = 2,
                 sep = " ") {
  mult.columns <- 0
  if (NROW(x) == 0) {
    cat('\nNo observations.\n')
    return(invisible())
  }
  if (!is.null(ncol(x))) {
    if (ncol(x) == 1 & any(class(x) == 'data.frame')) {
      x <- x %>% pull(1)
    } else if (ncol(x) < 10) {
      mult.columns <- ncol(x)
      colnames(x) <- LETTERS[1:ncol(x)]
      if (ncol(x) == 2) {
        x$total <- paste(x$A %>% as.character(),
                          x$B %>% as.character(),
                          sep = sep)
      } else if (ncol(x) == 3) {
        x$total <- paste(x$A %>% as.character(),
                          x$B %>% as.character(),
                          x$C %>% as.character(),
                          sep = sep)
      } else if (ncol(x) == 4) {
        x$total <- paste(x$A %>% as.character(),
                          x$B %>% as.character(),
                          x$C %>% as.character(),
                          x$D %>% as.character(),
                          sep = sep)
      } else if (ncol(x) == 5) {
        x$total <- paste(x$A %>% as.character(),
                          x$B %>% as.character(),
                          x$C %>% as.character(),
                          x$D %>% as.character(),
                          x$E %>% as.character(),
                          sep = sep)
      } else if (ncol(x) == 6) {
        x$total <- paste(x$A %>% as.character(),
                          x$B %>% as.character(),
                          x$C %>% as.character(),
                          x$D %>% as.character(),
                          x$E %>% as.character(),
                          x$F %>% as.character(),
                          sep = sep)
      } else if (ncol(x) == 7) {
        x$total <- paste(x$A %>% as.character(),
                          x$B %>% as.character(),
                          x$C %>% as.character(),
                          x$D %>% as.character(),
                          x$E %>% as.character(),
                          x$F %>% as.character(),
                          x$G %>% as.character(),
                          sep = sep)
      } else if (ncol(x) == 8) {
        x$total <- paste(x$A %>% as.character(),
                          x$B %>% as.character(),
                          x$C %>% as.character(),
                          x$D %>% as.character(),
                          x$E %>% as.character(),
                          x$F %>% as.character(),
                          x$G %>% as.character(),
                          x$H %>% as.character(),
                          sep = sep)
      } else if (ncol(x) == 9) {
        x$total <- paste(x$A %>% as.character(),
                          x$B %>% as.character(),
                          x$C %>% as.character(),
                          x$D %>% as.character(),
                          x$E %>% as.character(),
                          x$F %>% as.character(),
                          x$G %>% as.character(),
                          x$H %>% as.character(),
                          x$I %>% as.character(),
                          sep = sep)
      }
      x <- x$total
    } else {
      stop('A maximum of 9 columns can be analysed at the same time.', call. = FALSE)
    }
  }
  if (markdown == TRUE & toConsole == FALSE) {
    warning('`toConsole = FALSE` will be ignored when `markdown = TRUE`.')
  }
  if (mult.columns > 1) {
    NAs <- x[is.na(x) | x == trimws(strrep('NA ', mult.columns))]
  } else {
    NAs <- x[is.na(x)]
  }
  if (na.rm == TRUE) {
    x <- x[!x %in% NAs]
  }
  if (missing(sort.count) & any(class(x) %in% c('double', 'integer', 'numeric', 'raw', 'single', 'factor'))) {
    # sort on item/level at default when x is numeric or a factor and sort.count is not set
    sort.count <- FALSE
  }
  header <- character(0)
  markdown_line <- ''
  if (markdown == TRUE) {
    markdown_line <- '\n'
  }
  x_align <- 'l'
  if (mult.columns > 0) {
    header <- header %>% paste0(markdown_line, 'Columns:   ', mult.columns)
  } else {
    header <- header %>% paste0(markdown_line, 'Class:     ', class(x) %>% rev() %>% paste(collapse = " > "))
  }
  if (is.list(x) | is.matrix(x) | is.environment(x) | is.function(x)) {
    cat(header, "\n")
    stop('`freq()` does not support lists, matrices, environments or functions.', call. = FALSE)
  }
  header <- header %>% paste0(markdown_line, '\nLength:    ', (NAs %>% length() + x %>% length()) %>% format(),
                              ' (of which NA: ', NAs %>% length() %>% format(),
                              ' = ', (NAs %>% length() / (NAs %>% length() + x %>% length())) %>% percent(force_zero = TRUE), ')')
  header <- header %>% paste0(markdown_line, '\nUnique:    ', x %>% n_distinct() %>% format())
  header.numbers.done <- FALSE
  if (any(class(x) %in% c('double', 'integer', 'numeric', 'raw', 'single'))) {
    # right align number
    x_align <- 'r'
    header <- header %>% paste0('\n')
    header <- header %>% paste(markdown_line, '\nMean:     ', x %>% base::mean(na.rm = TRUE) %>% format(digits = digits))
    header <- header %>% paste0(markdown_line, '\nStd. dev.: ', x %>% stats::sd(na.rm = TRUE) %>% format(digits = digits),
                                ' (CV: ', x %>% cv(na.rm = TRUE) %>% format(digits = digits), ')')
    header <- header %>% paste0(markdown_line, '\nFive-Num:  ', x %>% stats::fivenum(na.rm = TRUE) %>% format(digits = digits) %>% trimws() %>% paste(collapse = '  |  '),
                                ' (CQV: ', x %>% cqv(na.rm = TRUE) %>% format(digits = digits), ')')
    outlier_length <- length(boxplot.stats(x)$out)
    header <- header %>% paste0(markdown_line, '\nOutliers:  ', outlier_length)
    if (outlier_length > 0) {
      header <- header %>% paste0(' (unique: ', boxplot.stats(x)$out %>% unique() %>% length(), ')')
    }
  }
  formatdates <- "%e %B %Y" # = d mmmm yyyy
  if (any(class(x) == 'hms')) {
    x <- x %>% as.POSIXlt()
    formatdates <- "%H:%M:%S"
  }
  if (any(class(x) %in% c('Date', 'POSIXct', 'POSIXlt'))) {
    header <- header %>% paste0('\n')
    mindatum <- x %>% min()
    maxdatum <- x %>% max()
    header <- header %>% paste0(markdown_line, '\nOldest:    ', mindatum %>% format(formatdates) %>% trimws())
    header <- header %>% paste0(markdown_line, '\nNewest:    ', maxdatum %>% format(formatdates) %>% trimws(),
                                ' (+', difftime(maxdatum, mindatum, units = 'auto') %>% as.double() %>% format(), ')')
  }
  if (any(class(x) == 'POSIXlt')) {
    x <- x %>% format(formatdates)
  }
  if (toConsole == TRUE) {
    cat(header)
  }
  if (all(is.na(x))) {
    cat('\n\nNo observations.\n')
    return(invisible())
  }
  if (n_distinct(x) == length(x)) {
    warning('All observations are unique.', call. = FALSE)
  }
  if (nmax == 0 | is.na(nmax)) {
    nmax <- length(x)
  }
  nmax.1 <- min(length(x), nmax + 1)
  # create table with counts and percentages
  if (any(class(x) == 'factor')) {
    df <- tibble::tibble(Item = x,
                         Fctlvl = x %>% as.integer()) %>%
      group_by(Item, Fctlvl)
    column_names <- c('Item', 'Count', 'Percent', 'Cum. Count', 'Cum. Percent', '(Factor Level)')
    column_align <- c('l', 'r', 'r', 'r', 'r', 'r')
  } else {
    df <- tibble::tibble(Item = x) %>%
      group_by(Item)
    column_names <- c('Item', 'Count', 'Percent', 'Cum. Count', 'Cum. Percent')
    column_align <- c(x_align, 'r', 'r', 'r', 'r')
  }
  df <- df %>%
    summarise(Count = n(),
              Percent = (n() / length(x)) %>% percent(force_zero = TRUE))
  if (df$Item %>% paste(collapse = ',') %like% '\033') {
    df <- df %>%
      mutate(Item = Item %>%
               # remove escape char
               # see https://en.wikipedia.org/wiki/Escape_character#ASCII_escape_character
               gsub('\033', ' ', ., fixed = TRUE))
  }
  # sort according to setting
  if (sort.count == TRUE) {
    df <- df %>% arrange(desc(Count))
  } else {
    if (any(class(x) == 'factor')) {
      df <- df %>% arrange(Fctlvl)
    } else {
      df <- df %>% arrange(Item)
    }
  }
  # add cumulative values
  df$Cum <- cumsum(df$Count)
  df$CumTot <- (df$Cum / sum(df$Count, na.rm = TRUE)) %>% percent(force_zero = TRUE)
  df$Cum <- df$Cum %>% format()
  if (any(class(x) == 'factor')) {
    # put factor last
    df <- df %>% select(Item, Count, Percent, Cum, CumTot, Fctlvl)
  }
  if (markdown == TRUE) {
    tblformat <- 'markdown'
  } else {
    tblformat <- 'pandoc'
  }
  if (toConsole == FALSE) {
    # assign to object
    df[, 3] <- df[, 2] / sum(df[, 2], na.rm = TRUE)
    df[, 4] <- cumsum(df[, 2])
    df[, 5] <- df[, 4] / sum(df[, 2], na.rm = TRUE)
    return(df)
  } else {
    # save old NA setting for kable
    opt.old <- options()$knitr.kable.NA
    options(knitr.kable.NA = "<NA>")
    Count.rest <- sum(df[nmax.1:nrow(df), 'Count'], na.rm = TRUE)
    if (any(class(x) %in% c('double', 'integer', 'numeric', 'raw', 'single'))) {
      df <- df %>% mutate(Item = format(Item))
    }
    df <- df %>% mutate(Count = format(Count))
    if (nrow(df) > nmax.1 & markdown == FALSE) {
      df2 <- df[1:nmax,]
      print(
        knitr::kable(df2,
                     format = tblformat,
                     col.names = column_names,
                     align = column_align,
                     padding = 1)
      )
      cat('... and ',
          format(nrow(df) - nmax),
          ' more ',
          paste0('(n = ',
                 format(Count.rest),
                 '; ',
                 (Count.rest / length(x)) %>% percent(force_zero = TRUE),
                 ')'),
          '. Use `nmax` to show more rows.\n\n', sep = '')
    } else {
      print(
        knitr::kable(df,
                     format = tblformat,
                     col.names = column_names,
                     align = column_align,
                     padding = 1)
      )
    }
    cat('\n')
    # reset old kable setting
    options(knitr.kable.NA = opt.old)
    return(invisible())
  }
 }
 #' @rdname freq
 #' @export
 frequency_tbl <- freq
--- a/R/globals.R
+++ b/R/globals.R
@@ -16,16 +16,20 @@
 # GNU General Public License for more details.                         #
 # ==================================================================== #
-globalVariables(c('.',
+globalVariables(c('abname',
                  'abname',
                  'bactid',
                  'cnt',
                  'Count',
                  'Cum',
                  'CumTot',
                  'date_lab',
                  'days_diff',
                  'Fctlvl',
                  'first_isolate_row_index',
                  'fullname',
                  'genus',
                  'gramstain',
                  'Item',
                  'key_ab',
                  'key_ab_lag',
                  'key_ab_other',
@@ -34,6 +38,9 @@ globalVariables(c('.',
                  'n',
                  'other_pat_or_mo',
                  'patient_id',
                  'Percent',
                  'quantile',
                  'real_first_isolate',
                  'species',
-                  'y'))
+                  'y',
                  '.'))
--- a/R/mdro.R
+++ b/R/mdro.R
@@ -0,0 +1,203 @@
 # ==================================================================== #
 # TITLE                                                                #
 # Antimicrobial Resistance (AMR) Analysis                              #
 #                                                                      #
 # AUTHORS                                                              #
 # Berends MS (m.s.berends@umcg.nl), Luz CF (c.f.luz@umcg.nl)           #
 #                                                                      #
 # LICENCE                                                              #
 # This program is free software; you can redistribute it and/or modify #
 # it under the terms of the GNU General Public License version 2.0,    #
 # as published by the Free Software Foundation.                        #
 #                                                                      #
 # This program is distributed in the hope that it will be useful,      #
 # but WITHOUT ANY WARRANTY; without even the implied warranty of       #
 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the        #
 # GNU General Public License for more details.                         #
 # ==================================================================== #
 #' Determine multidrug-resistant organisms (MDRO)
 #'
 #' Determine which isolates are multidrug-resistant organisms (MDRO) according to country-specific guidelines.
 #' @param tbl table with antibiotic columns, like e.g. \code{amox} and \code{amcl}
 #' @param country country to determine guidelines. Should be a code from the \href{https://en.wikipedia.org/wiki/ISO_3166-1_alpha-2#Officially_assigned_code_elements}{list of ISO 3166-1 alpha-2 country codes}. Case-insensitive. Currently supported are \code{de} (Germany) and \code{nl} (the Netherlands).
 #' @param col_bactid column name of the bacteria ID in \code{tbl} - values of this column should be present in \code{microorganisms$bactid}, see \code{\link{microorganisms}}
 #' @param info print progress
 #' @param aminoglycosides,quinolones,carbapenems character vector with column names of antibiotics
 #' @param ceftazidime,piperacillin,trimethoprim_sulfa,penicillin,vancomycin column names of antibiotics
 #' @param ... parameters that are passed on to \code{MDR}
 #' @return Ordered factor with values \code{Positive}, \code{Unconfirmed}, \code{Negative}.
 #' @rdname MDRO
 #' @export
 MDRO <- function(tbl,
                 country,
                 col_bactid = 'bactid',
                 info = TRUE,
                 aminoglycosides = c('gent', 'tobr', 'kana'),
                 quinolones = c('cipr', 'norf'),
                 carbapenems = c('imip', 'mero', 'erta'),
                 ceftazidime = 'cfta',
                 piperacillin = 'pita',
                 trimethoprim_sulfa = 'trsu',
                 penicillin = 'peni',
                 vancomycin = 'vanc') {
  # strip whitespaces
  country <- trimws(country)
  if (length(country) > 1) {
    stop('`country` must be a length one character string.', call. = FALSE)
  }
  if (!country %like% '^[a-z]{2}$') {
    stop('This is not a valid ISO 3166-1 alpha-2 country code: "', country, '". Please see ?MDRO.', call. = FALSE)
  }
  # create list and make country code case-independent
  guideline <- list(country = list(code = tolower(country)))
  # support per country
  if (guideline$country$code == 'de') {
    guideline$country$name <- 'Germany'
    guideline$name <- ''
    guideline$version <- ''
    guideline$source <- ''
  } else if (guideline$country$code == 'nl') {
    guideline$country$name <- 'The Netherlands'
    guideline$name <- 'WIP-Richtlijn BRMO'
    guideline$version <- 'Revision of December 2017'
    guideline$source <- 'https://www.rivm.nl/Documenten_en_publicaties/Professioneel_Praktisch/Richtlijnen/Infectieziekten/WIP_Richtlijnen/WIP_Richtlijnen/Ziekenhuizen/WIP_richtlijn_BRMO_Bijzonder_Resistente_Micro_Organismen_ZKH'
  # add here more countries like this:
  # } else if (country$code == 'AA') {
  #   country$name <- 'country name'
  } else {
    stop('This country code is currently unsupported: ', guideline$country$code, call. = FALSE)
  }
  # Console colours
  # source: http://www.tldp.org/HOWTO/Bash-Prompt-HOWTO/x329.html
  ANSI_red <- "\033[31m"
  ANSI_blue <- "\033[34m"
  ANSI_reset <- "\033[0m"
  if (info == TRUE) {
    cat("Determining Highly Resistant Microorganisms (MDRO), according to:\n",
        "Guideline: ", ANSI_red, guideline$name, ", ", guideline$version, ANSI_reset, "\n",
        "Country  : ", ANSI_red, guideline$country$name, ANSI_reset, "\n",
        "Source   : ", ANSI_blue, guideline$source, ANSI_reset, "\n",
        "\n", sep = "")
  }
  # join microorganisms
  tbl <- tbl %>% left_join_microorganisms(col_bactid)
  tbl$MDRO <- 1
  if (guideline$country$code == 'nl') {
    # BRMO; Bijzonder Resistente Micro-Organismen
    aminoglycosides <- aminoglycosides[aminoglycosides %in% colnames(tbl)]
    quinolones <- quinolones[quinolones %in% colnames(tbl)]
    carbapenems <- carbapenems[carbapenems %in% colnames(tbl)]
    if (!ceftazidime %in% colnames(tbl)) { ceftazidime <- NA }
    if (!piperacillin %in% colnames(tbl)) { piperacillin <- NA }
    if (!trimethoprim_sulfa %in% colnames(tbl)) { trimethoprim_sulfa <- NA }
    if (!penicillin %in% colnames(tbl)) { penicillin <- NA }
    if (!vancomycin %in% colnames(tbl)) { vancomycin <- NA }
    # Table 1
    tbl[which(
      tbl$family == 'Enterobacteriaceae'
      & rowSums(tbl[, aminoglycosides] == 'R', na.rm = TRUE) >= 1
      & rowSums(tbl[, quinolones] == 'R', na.rm = TRUE) >= 1
    ), 'MDRO'] <- 4
    tbl[which(
      tbl$family == 'Enterobacteriaceae'
      & rowSums(tbl[, carbapenems] == 'R', na.rm = TRUE) >= 1
    ), 'MDRO'] <- 3
    # rest is negative
    tbl[which(
      tbl$family == 'Enterobacteriaceae'
      & tbl$MDRO == 1
    ), 'MDRO'] <- 2
    # Table 2
    tbl[which(
      tbl$genus == 'Acinetobacter'
      & rowSums(tbl[, carbapenems] == 'R', na.rm = TRUE) >= 1
    ), 'MDRO'] <- 3
    tbl[which(
      tbl$genus == 'Acinetobacter'
      & rowSums(tbl[, aminoglycosides] == 'R', na.rm = TRUE) >= 1
      & rowSums(tbl[, quinolones] == 'R', na.rm = TRUE) >= 1
    ), 'MDRO'] <- 4
    # rest of Acinetobacter is negative
    tbl[which(
      tbl$genus == 'Acinetobacter'
      & tbl$MDRO == 1
    ), 'MDRO'] <- 2
    tbl[which(
      tbl$fullname %like% 'Stenotrophomonas maltophilia'
      & tbl[, trimethoprim_sulfa] == 'R'
    ), 'MDRO'] <- 4
    # rest of Stenotrophomonas is negative
    tbl[which(
      tbl$fullname %like% 'Stenotrophomonas maltophilia'
      & tbl$MDRO == 1
    ), 'MDRO'] <- 2
    tbl[which(
      tbl$fullname %like% 'Pseudomonas aeruginosa'
      & sum(rowSums(tbl[, carbapenems] == 'R', na.rm = TRUE) >= 1,
           rowSums(tbl[, aminoglycosides] == 'R', na.rm = TRUE) >= 1,
           rowSums(tbl[, quinolones] == 'R', na.rm = TRUE) >= 1,
           tbl[, ceftazidime] == 'R',
           tbl[, piperacillin] == 'R') >= 3
    ), 'MDRO'] <- 4
    # rest of Pseudomonas is negative
    tbl[which(
      tbl$fullname %like% 'Pseudomonas aeruginosa'
      & tbl$MDRO == 1
    ), 'MDRO'] <- 2
    # Table 3
    tbl[which(
      tbl$fullname %like% 'Streptococcus pneumoniae'
      & tbl[, penicillin] == 'R'
    ), 'MDRO'] <- 4
    tbl[which(
      tbl$fullname %like% 'Streptococcus pneumoniae'
      & tbl[, vancomycin] == 'R'
    ), 'MDRO'] <- 4
    # rest of Streptococcus pneumoniae is negative
    tbl[which(
      tbl$fullname %like% 'Streptococcus pneumoniae'
      & tbl$MDRO == 1
    ), 'MDRO'] <- 2
    tbl[which(
      tbl$fullname %like% 'Enterococcus faecium'
      &  rowSums(tbl[, c(penicillin, vancomycin)] == 'R', na.rm = TRUE) >= 1
    ), 'MDRO'] <- 4
    # rest of Enterococcus faecium is negative
    tbl[which(
      tbl$fullname %like% 'Enterococcus faecium'
      & tbl$MDRO == 1
    ), 'MDRO'] <- 2
  }
  factor(x =  tbl$MDRO,
         levels = c(1:4),
         labels = c('Unknown', 'Negative', 'Unconfirmed', 'Positive'),
         ordered = TRUE)
 }
 #' @rdname MDRO
 #' @export
 BRMO <- function(tbl, country = "nl", ...) {
  MDRO(tbl = tbl, country = country, ...)
 }
 #' @rdname MDRO
 #' @export
 MRGN <- function(tbl, country = "de", ...) {
  MDRO(tbl = tbl, country = country, ...)
 }
--- a/R/misc.R
+++ b/R/misc.R
@@ -16,18 +16,98 @@
 # GNU General Public License for more details.                         #
 # ==================================================================== #
-# No export, no Rd
+#' Pattern Matching
-"%like%" <- function(vector, pattern) {
+#'
-  # Source: https://github.com/Rdatatable/data.table/blob/master/R/like.R
+#' Convenience function to compare a vector with a pattern, like \code{\link[base]{grep}}. It always returns a \code{logical} vector and is always case-insensitive.
-  # But made it case insensitive
+#' @inheritParams base::grep
-  if (is.factor(vector)) {
+#' @return A \code{logical} vector
-    as.integer(vector) %in% grep(pattern, levels(vector), ignore.case = TRUE)
+#' @name like
 #' @rdname like
 #' @export
 #' @source Inherited from the \href{https://github.com/Rdatatable/data.table/blob/master/R/like.R}{\code{like} function from the \code{data.table} package}, but made it case insensitive at default.
 #' @examples
 #' library(dplyr)
 #' # get unique occurences of bacteria whose name start with 'Ent'
 #' septic_patients %>%
 #'   left_join_microorganisms() %>%
 #'   filter(fullname %like% '^Ent') %>%
 #'   pull(fullname) %>%
 #'   unique()
 "%like%" <- function(x, pattern) {
  if (length(pattern) > 1) {
    pattern <- pattern[1]
    warning('only the first element of argument `pattern` used for `%like%`', call. = FALSE)
  }
  if (is.factor(x)) {
    as.integer(x) %in% base::grep(pattern, levels(x), ignore.case = TRUE)
  } else {
-    grepl(pattern, vector, ignore.case = TRUE)
+    base::grepl(pattern, x, ignore.case = TRUE)
  }
 }
 # No export, no Rd
-percent <- function(x, round = 1, ...) {
+percent <- function(x, round = 1, force_zero = FALSE, ...) {
-  base::paste0(base::round(x * 100, digits = round), "%")
+  val <- base::round(x * 100, digits = round)
  if (force_zero & any(val == as.integer(val))) {
    val[val == as.integer(val)] <- paste0(val[val == as.integer(val)], ".", base::strrep(0, round))
  }
  base::paste0(val, "%")
 }
 check_available_columns <- function(tbl, col.list, info = TRUE) {
  # check columns
  col.list <- col.list[!is.na(col.list)]
  names(col.list) <- col.list
  col.list.bak <- col.list
  # are they available as upper case or lower case then?
  for (i in 1:length(col.list)) {
    if (toupper(col.list[i]) %in% colnames(tbl)) {
      col.list[i] <- toupper(col.list[i])
    } else if (tolower(col.list[i]) %in% colnames(tbl)) {
      col.list[i] <- tolower(col.list[i])
    } else if (!col.list[i] %in% colnames(tbl)) {
      col.list[i] <- NA
    }
  }
  if (!all(col.list %in% colnames(tbl))) {
    if (info == TRUE) {
      warning('These columns do not exist and will be ignored: ',
              col.list.bak[!(col.list %in% colnames(tbl))] %>% toString(),
              immediate. = TRUE,
              call. = FALSE)
    }
  }
  col.list
 }
 # Coefficient of variation (CV)
 cv <- function(x, na.rm = TRUE) {
  cv.x <- sd(x, na.rm = na.rm) / abs(mean(x, na.rm = na.rm))
  cv.x
 }
 # Coefficient of dispersion, or coefficient of quartile variation (CQV).
 # (Bonett et al., 2006: Confidence interval for a coefficient of quartile variation).
 cqv <- function(x, na.rm = TRUE) {
  cqv.x <-
    (quantile(x, 0.75, na.rm = na.rm, type = 6) - quantile(x, 0.25, na.rm = na.rm, type = 6)) /
    (quantile(x, 0.75, na.rm = na.rm, type = 6) + quantile(x, 0.25, na.rm = na.rm, type = 6))
  unname(cqv.x)
 }
 # show bytes as kB/MB/GB
 # size_humanreadable(123456) # 121 kB
 # size_humanreadable(12345678) # 11.8 MB
 size_humanreadable <- function(bytes, decimals = 1) {
  bytes <- bytes %>% as.double()
  # Adapted from:
  # http://jeffreysambells.com/2012/10/25/human-readable-filesize-php
  size <- c('B','kB','MB','GB','TB','PB','EB','ZB','YB')
  factor <- floor((nchar(bytes) - 1) / 3)
  # added slight improvement; no decimals for B and kB:
  decimals <- rep(decimals, length(bytes))
  decimals[size[factor + 1] %in% c('B', 'kB')] <- 0
  out <- paste(sprintf(paste0("%.", decimals, "f"), bytes / (1024 ^ factor)), size[factor + 1])
  out
 }
--- a/R/print.R
+++ b/R/print.R
@@ -0,0 +1,349 @@
 # ==================================================================== #
 # TITLE                                                                #
 # Antimicrobial Resistance (AMR) Analysis                              #
 #                                                                      #
 # AUTHORS                                                              #
 # Berends MS (m.s.berends@umcg.nl), Luz CF (c.f.luz@umcg.nl)           #
 #                                                                      #
 # LICENCE                                                              #
 # This program is free software; you can redistribute it and/or modify #
 # it under the terms of the GNU General Public License version 2.0,    #
 # as published by the Free Software Foundation.                        #
 #                                                                      #
 # This program is distributed in the hope that it will be useful,      #
 # but WITHOUT ANY WARRANTY; without even the implied warranty of       #
 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the        #
 # GNU General Public License for more details.                         #
 # ==================================================================== #
 #' Printing Data Tables and Tibbles
 #'
 #' Print a data table or tibble. It prints: \cr- The \strong{first and last rows} like \code{data.table}s are printed by the \code{data.table} package,\cr- A \strong{header} and \strong{left aligned text} like \code{tibble}s are printed by the \code{tibble} package with info about grouped variables,\cr- \strong{Unchanged values} and \strong{support for row names} like \code{data.frame}s are printed by the \code{base} package.
 #' @inheritParams base::print.data.frame
 #' @param nmax amount of rows to print in total. When the total amount of rows exceeds this limit, the first and last \code{nmax / 2} rows will be printed. Use \code{nmax = NA} to print all rows.
 #' @param header print header with information about data size and tibble grouping
 #' @param print.keys print keys for \code{data.table}
 #' @param na value to print instead of NA
 #' @param width amount of white spaces to keep between columns, must be at least 1
 #' @rdname print
 #' @name print
 #' @importFrom dplyr %>% n_groups group_vars group_size filter pull select
 #' @exportMethod print.tbl_df
 #' @export
 #' @examples
 #' # still showing all values unchanged:
 #' library(dplyr)
 #' starwars
 #' print(starwars, width = 3)
 #'
 #' # compare the significance notation of this
 #' starwars %>% select(birth_year)
 #' # with this
 #' tibble:::print.tbl_df(starwars %>% select(birth_year))
 #'
 #' # supports info about groups (look at header)
 #' starwars %>% group_by(homeworld, gender)
 print.tbl_df <- function(x,
                         nmax = 10,
                         header = TRUE,
                         row.names = TRUE,
                         right = FALSE,
                         width = 1,
                         na = "<NA>",
                         ...) {
  prettyprint_df(x = x,
                 nmax = nmax,
                 header = header,
                 row.names = row.names,
                 print.keys = FALSE,
                 right = right,
                 width = width,
                 na = na,
                 ...)
 }
 #' @rdname print
 #' @exportMethod print.tbl
 #' @export
 print.tbl <- function(x, ...) {
  prettyprint_df(x, ...)
 }
 #' @rdname print
 #' @exportMethod print.data.table
 #' @export
 print.data.table <- function(x,
                             nmax = 10,
                             header = TRUE,
                             row.names = TRUE,
                             print.keys = FALSE,
                             right = FALSE,
                             width = 1,
                             na = "<NA>",
                             ...) {
  prettyprint_df(x = x,
                 nmax = nmax,
                 header = header,
                 row.names = row.names,
                 print.keys = print.keys,
                 right = right,
                 width = width,
                 na = na,
                 ...)
 }
 printDT <- data.table:::print.data.table
 prettyprint_df <- function(x,
                           nmax = 10,
                           header = TRUE,
                           row.names = TRUE,
                           print.keys = FALSE,
                           right = FALSE,
                           width = 1,
                           na = "<NA>",
                           ...) {
  ansi_reset <- "\u001B[0m"
  ansi_black <- "\u001B[30m"
  ansi_red <- "\u001B[31m"
  ansi_green <- "\u001B[32m"
  ansi_yellow <- "\u001B[33m"
  ansi_blue <- "\u001B[34m"
  ansi_purple <- "\u001B[35m"
  ansi_cyan <- "\u001B[36m"
  ansi_white <- "\u001B[37m"
  ansi_gray <- "\u001B[38;5;246m"
  if (width < 1) {
    stop('`width` must be at least 1.', call. = FALSE)
  }
  if (is.na(nmax)) {
    nmax <- NROW(x)
  }
  n <- nmax
  if (n %% 2 == 1) {
    # odd number; add 1
    n <- n + 1
  }
  width <- width - 1
  if ('tbl_df' %in% class(x)) {
    type <- 'tibble'
  } else if ('data.table' %in% class(x)) {
    type <- 'data.table'
  } else {
    type <- 'data.frame'
  }
  if (header == TRUE) {
    if (NCOL(x) == 1) {
      vars <- 'variable'
    } else {
      vars <- 'variables'
    }
    size <- object.size(x) %>% as.double() %>% size_humanreadable()
    cat(paste0("A ", type,": ",
               format(NROW(x)),
               " obs. of ",
               format(NCOL(x)),
               " ", vars,
               ansi_gray, " (", size, ")\n", ansi_reset))
    if ('grouped_df' %in% class(x) & n_groups(x) > 0) {
      cat(paste0("Grouped by ",
                 x %>% group_vars() %>% paste0(ansi_red, ., ansi_reset) %>% paste0(collapse = " and "),
                 ansi_gray,
                 " (",
                 x %>% n_groups(),
                 " groups with sizes between ",
                 x %>% group_size() %>% min(),
                 " and ",
                 x %>% group_size() %>% max(),
                 ")\n",
                 ansi_reset))
    }
    if (!is.null(attributes(x)$qry)) {
      cat(paste0(ansi_gray, "This data contains a query. Use qry() to view it.\n", ansi_reset))
    }
    cat("\n")
  }
  # data.table where keys should be printed
  if (print.keys == TRUE) {
    printDT(x,
            class = header,
            row.names = row.names,
            print.keys = TRUE,
            right = right,
            ...
    )
    return(invisible())
  }
  # tibbles give warning when setting column names
  x <- x %>% base::as.data.frame(stringsAsFactors = FALSE)
  # extra space of 3 chars, right to row name or number
  if (NROW(x) > 0) {
    maxrowchars <- rownames(x) %>% nchar() %>% max() + 3
    rownames(x) <- paste0(rownames(x), strrep(" ", maxrowchars - nchar(rownames(x))))
  } else {
    maxrowchars <- 0
  }
  x.bak <- x
  if (n + 1 < nrow(x)) {
    # remove in between part, 1 extra for ~~~~ between first and last part
    rows_list <- c(1:(n / 2 + 1), (nrow(x) - (n / 2) + 1):nrow(x))
    x <- x %>% filter(row_number() %in% rows_list)
    rownames(x) <- rownames(x.bak)[rows_list]
    # set inbetweener between parts
    rownames(x)[n / 2 + 1] <- strrep("~", maxrowchars)
  }
  if (header == TRUE) {
    # add 1 row for classes
    # class will be marked up per column
    if (NROW(x.bak) > 0) {
      rownames.x <- rownames(x)
      x <- x %>% filter(row_number() == 1) %>% rbind(x, stringsAsFactors = FALSE)
      rownames(x) <- c('*', rownames.x)
    }
    # select 1st class per column and abbreviate
    classes <- x.bak %>%
      sapply(class) %>%
      lapply(
        function(c) {
          # do print all POSIX classes like "POSct/t"
          if ('POSIXct' %in% c) {
            paste0('POS',
                   c %>%
                     gsub('POSIX', '', .) %>%
                     paste0(collapse = '/'))
          } else {
            c[[1]]
          }
        }) %>%
      unlist() %>%
      gsub("character",  "chr", ., fixed = TRUE) %>%
      gsub("complex",    "cplx", ., fixed = TRUE) %>%
      gsub("Date",       "Date", ., fixed = TRUE) %>%
      gsub("double",     "dbl", ., fixed = TRUE) %>%
      gsub("expression", "expr", ., fixed = TRUE) %>%
      gsub("factor",     "fct", ., fixed = TRUE) %>%
      gsub("IDate",      "IDat", ., fixed = TRUE) %>%
      gsub("integer",    "int", ., fixed = TRUE) %>%
      gsub("integer64",  "i64", ., fixed = TRUE) %>%
      gsub("list",       "list", ., fixed = TRUE) %>%
      gsub("logical",    "lgl", ., fixed = TRUE) %>%
      gsub("numeric",    "dbl", ., fixed = TRUE) %>%
      gsub("ordered",    "ord", ., fixed = TRUE) %>%
      gsub("percent",    "pct", ., fixed = TRUE) %>%
      gsub("single",     "sgl", ., fixed = TRUE) %>%
      paste0("<", ., ">")
  }
  # markup cols
  for (i in 1:ncol(x)) {
    if (all(!class(x[, i]) %in% class(x.bak[, i]))) {
      class(x[, i]) <- class(x.bak[, i])
    }
    try(x[, i] <- format(x %>% pull(i)), silent = TRUE)
    # replace NAs
    if (nchar(na) < 2) {
      # make as long as the text "NA"
      na <- paste0(na, strrep(" ", 2 - nchar(na)))
    }
    try(x[, i] <- gsub("^NA$", na, trimws(x[, i], 'both')), silent = TRUE)
    # place class into 1st row
    if (header == TRUE) {
      x[1, i] <- classes[i]
    }
    # dashes between two parts when exceeding nmax
    maxvalchars <- max(colnames(x)[i] %>% nchar(), x[, i] %>% nchar() %>% max())
    if (n + 1 < nrow(x.bak)) {
      x[n / 2 + if_else(header == TRUE, 2, 1), i] <- strrep("~", maxvalchars)
    }
    # align according to `right` parameter, but only factors and text, but not MICs
    if (any(x.bak %>% pull(i) %>% class() %in% c('factor', 'character'))
        & !("mic" %in% (x.bak %>% pull(i) %>% class()))) {
      vals <- x %>% pull(i) %>% trimws('both')
      colname <- colnames(x)[i] %>% trimws('both')
      if (right == FALSE) {
        vals <- paste0(vals, strrep(" ", maxvalchars - nchar(vals)))
        colname <- paste0(colname, strrep(" ", maxvalchars - nchar(colname)))
      } else {
        vals <- paste0(strrep(" ", maxvalchars - nchar(vals)), vals)
        colname <- paste0(strrep(" ", maxvalchars - nchar(colname)), colname)
      }
      x[, i] <- vals
      colnames(x)[i] <- colname
    }
    # add left padding according to `width` parameter
    # but not in 1st col when row names are off
    if (row.names == TRUE | i > 1) {
      x[, i] <- paste0(strrep(" ", width), x[, i])
      colnames(x)[i] <- paste0(strrep(" ", width), colnames(x)[i])
    }
    # strip columns that do not fit (3 chars as margin)
    width_console <- options()$width
    width_until_col <- x %>%
      select(1:i) %>%
      apply(1, paste, collapse = strrep(" ", width + 1)) %>%
      nchar() %>%
      max()
    width_until_col_before <- x %>%
      select(1:(max(i, 2) - 1)) %>%
      apply(1, paste, collapse = strrep(" ", width + 1)) %>%
      nchar() %>%
      max()
    extraspace <- maxrowchars + nchar(rownames(x)[length(rownames(x))])
    width_until_colnames <- colnames(x)[1:i] %>% paste0(collapse = strrep(" ", width + 1)) %>% nchar() + extraspace
    width_until_colnames_before <- colnames(x)[1:(max(i, 2) - 1)] %>% paste0(collapse = strrep(" ", width + 1)) %>% nchar() + extraspace
    if (i > 1 &
        (width_until_col > width_console
         | width_until_colnames > width_console)) {
      if (width_until_col_before > width_console
          | width_until_colnames_before > width_console) {
        x <- x[, 1:(i - 2)]
      } else {
        x <- x[, 1:(i - 1)]
      }
      break
    }
  }
  # empty table, row name of header should be "*"
  if (NROW(x.bak) == 0) {
    rownames(x) <- '* '
  }
  # and here it is...
  suppressWarnings(
    base::print.data.frame(x, row.names = row.names, ...)
  )
  # print rest of col names when they were stripped
  if (ncol(x) < ncol(x.bak)) {
    x.notshown <- x.bak %>% select((ncol(x) + 1):ncol(x.bak))
    if (ncol(x.notshown) == 1) {
      cat('...and 1 more column: ')
    } else {
      cat('...and', ncol(x.notshown), 'more columns: ')
    }
    cat(x.notshown %>%
          colnames() %>%
          paste0(' ', ansi_gray, classes[(ncol(x) + 1):ncol(x.bak)], ansi_reset) %>%
          paste0(collapse = ", "), '\n')
  }
 }
--- a/README.md
+++ b/README.md
@@ -90,6 +90,104 @@ after
 # 5 PSEAER    R    R    -    -    R
 ```
 ### Frequency tables
 Base R lacks a simple function to create frequency tables. We created such a function that works with almost all data types: `freq()` (or `frequency_tbl()`).
 ```r
 ## Factors sort on item by default:
 freq(septic_patients$hospital_id)
 # Class:     factor
 # Length:    2000 (of which NA: 0 = 0.0%)
 # Unique:    5
 # 
 # Item    Count   Percent   Cum. Count   Cum. Percent   (Factor Level)
 # -----  ------  --------  -----------  -------------  ---------------
 # A         233     11.7%          233          11.7%                1
 # B         583     29.1%          816          40.8%                2
 # C         221     11.1%         1037          51.8%                3
 # D         650     32.5%         1687          84.4%                4
 # E         313     15.7%         2000         100.0%                5
 ## This can be changed with the `sort.count` parameter:
 freq(septic_patients$hospital_id, sort.count = TRUE)
 # Class:     factor
 # Length:    2000 (of which NA: 0 = 0.0%)
 # Unique:    5
 # 
 # Item    Count   Percent   Cum. Count   Cum. Percent   (Factor Level)
 # -----  ------  --------  -----------  -------------  ---------------
 # D         650     32.5%          650          32.5%                4
 # B         583     29.1%         1233          61.7%                2
 # E         313     15.7%         1546          77.3%                5
 # A         233     11.7%         1779          88.9%                1
 # C         221     11.1%         2000         100.0%                3
 ## Other types, like numbers or dates, sort on count by default:
 > freq(septic_patients$date)
 # Class:     Date
 # Length:    2000 (of which NA: 0 = 0.0%)
 # Unique:    1662
 # 
 # Oldest:    2 January 2001
 # Newest:    18 October 2017 (+6133)
 # 
 # Item          Count   Percent   Cum. Count   Cum. Percent
 # -----------  ------  --------  -----------  -------------
 # 2008-12-24        5      0.2%            5           0.2%
 # 2010-12-10        4      0.2%            9           0.4%
 # 2011-03-03        4      0.2%           13           0.6%
 # 2013-06-24        4      0.2%           17           0.8%
 # 2017-09-01        4      0.2%           21           1.1%
 # 2002-09-02        3      0.2%           24           1.2%
 # 2003-10-14        3      0.2%           27           1.4%
 # 2004-06-25        3      0.2%           30           1.5%
 # 2004-06-27        3      0.2%           33           1.7%
 # 2004-10-29        3      0.2%           36           1.8%
 # 2005-09-27        3      0.2%           39           2.0%
 # 2006-08-01        3      0.2%           42           2.1%
 # 2006-10-10        3      0.2%           45           2.2%
 # 2007-11-16        3      0.2%           48           2.4%
 # 2008-03-09        3      0.2%           51           2.5%
 # ... and 1647 more (n = 1949; 97.5%). Use `nmax` to show more rows.
 ## For numeric values, some extra descriptive statistics will be calculated:
 > freq(runif(n = 10, min = 1, max = 5))
 # Class:     numeric
 # Length:    10 (of which NA: 0 = 0.0%)
 # Unique:    10
 #   
 # Mean:      3
 # Std. dev.: 0.93 (CV: 0.31)
 # Five-Num:  1.1  |  2.3  |  3.1  |  3.8  |  4.0 (CQV: 0.25)
 # Outliers:  0
 # 
 #      Item   Count   Percent   Cum. Count   Cum. Percent
 # ---------  ------  --------  -----------  -------------
 #  1.132033       1     10.0%            1          10.0%
 #  2.226903       1     10.0%            2          20.0%
 #  2.280779       1     10.0%            3          30.0%
 #  2.640898       1     10.0%            4          40.0%
 #  2.913462       1     10.0%            5          50.0%
 #  3.364201       1     10.0%            6          60.0%
 #  3.771975       1     10.0%            7          70.0%
 #  3.802861       1     10.0%            8          80.0%
 #  3.803547       1     10.0%            9          90.0%
 #  3.985691       1     10.0%           10         100.0%
 # 
 # Warning message:
 # All observations are unique. 
 ```
 Learn more about this function with:
 ```r
 ?freq
 ```
 ### New classes
 This package contains two new S3 classes: `mic` for MIC values (e.g. from Vitek or Phoenix) and `rsi` for antimicrobial drug interpretations (i.e. S, I and R). Both are actually ordered factors under the hood (an MIC of `2` being higher than `<=1` but lower than `>=32`, and for class `rsi` factors are ordered as `S < I < R`). 
 Both classes have extensions for existing generic functions like `print`, `summary` and `plot`.
--- a/man/EUCAST.Rd
+++ b/man/EUCAST.Rd
@@ -1,5 +1,5 @@
 % Generated by roxygen2: do not edit by hand
-% Please edit documentation in R/EUCAST.R
+% Please edit documentation in R/eucast.R
 \name{EUCAST_rules}
 \alias{EUCAST_rules}
 \alias{interpretive_reading}
--- a/man/MDRO.Rd
+++ b/man/MDRO.Rd
@@ -0,0 +1,39 @@
 % Generated by roxygen2: do not edit by hand
 % Please edit documentation in R/mdro.R
 \name{MDRO}
 \alias{MDRO}
 \alias{BRMO}
 \alias{MRGN}
 \title{Determine multidrug-resistant organisms (MDRO)}
 \usage{
 MDRO(tbl, country, col_bactid = "bactid", info = TRUE,
  aminoglycosides = c("gent", "tobr", "kana"), quinolones = c("cipr",
  "norf"), carbapenems = c("imip", "mero", "erta"), ceftazidime = "cfta",
  piperacillin = "pita", trimethoprim_sulfa = "trsu", penicillin = "peni",
  vancomycin = "vanc")
 BRMO(tbl, country = "nl", ...)
 MRGN(tbl, country = "de", ...)
 }
 \arguments{
 \item{tbl}{table with antibiotic columns, like e.g. \code{amox} and \code{amcl}}
 \item{country}{country to determine guidelines. Should be a code from the \href{https://en.wikipedia.org/wiki/ISO_3166-1_alpha-2#Officially_assigned_code_elements}{list of ISO 3166-1 alpha-2 country codes}. Case-insensitive. Currently supported are \code{de} (Germany) and \code{nl} (the Netherlands).}
 \item{col_bactid}{column name of the bacteria ID in \code{tbl} - values of this column should be present in \code{microorganisms$bactid}, see \code{\link{microorganisms}}}
 \item{info}{print progress}
 \item{aminoglycosides, quinolones, carbapenems}{character vector with column names of antibiotics}
 \item{ceftazidime, piperacillin, trimethoprim_sulfa, penicillin, vancomycin}{column names of antibiotics}
 \item{...}{parameters that are passed on to \code{MDR}}
 }
 \value{
 Ordered factor with values \code{Positive}, \code{Unconfirmed}, \code{Negative}.
 }
 \description{
 Determine which isolates are multidrug-resistant organisms (MDRO) according to country-specific guidelines.
 }
--- a/man/freq.Rd
+++ b/man/freq.Rd
@@ -0,0 +1,71 @@
 % Generated by roxygen2: do not edit by hand
 % Please edit documentation in R/freq.R
 \name{freq}
 \alias{freq}
 \alias{frequency_tbl}
 \title{Frequency table}
 \usage{
 freq(x, sort.count = TRUE, nmax = 15, na.rm = TRUE, markdown = FALSE,
  toConsole = TRUE, digits = 2, sep = " ")
 frequency_tbl(x, sort.count = TRUE, nmax = 15, na.rm = TRUE,
  markdown = FALSE, toConsole = TRUE, digits = 2, sep = " ")
 }
 \arguments{
 \item{x}{data}
 \item{sort.count}{Sort on count. Use \code{FALSE} to sort alphabetically on item.}
 \item{nmax}{number of row to print. Use \code{nmax = 0} or \code{nmax = NA} to print all rows.}
 \item{na.rm}{a logical value indicating whether NA values should be removed from the frequency table. The header will always print the amount of\code{NA}s.}
 \item{markdown}{print table in markdown format (this forces \code{nmax = NA})}
 \item{toConsole}{Print table to the console. Use \code{FALSE} to assign the table to an object.}
 \item{digits}{how many significant digits are to be used for numeric values (not for the items themselves, that depends on \code{\link{getOption}("digits")})}
 \item{sep}{a character string to separate the terms when selecting multiple columns}
 }
 \description{
 Create a frequency table of a vector of data, a single column or a maximum of 9 columns of a data frame. Supports markdown for reports.
 }
 \details{
 For numeric values, the next values will be calculated and shown into the header:
 \itemize{
  \item{Mean, using \code{\link[base]{mean}}}
  \item{Standard deviation, using \code{\link[stats]{sd}}}
  \item{Five numbers of Tukey (min, Q1, median, Q3, max), using \code{\link[stats]{fivenum}}}
  \item{Outliers (count and list), using \code{\link{boxplot.stats}}}
  \item{Coefficient of variation (CV), the standard deviation divided by the mean}
  \item{Coefficient of quartile variation (CQV, sometimes called coefficient of dispersion), calculated as \code{(Q3 - Q1) / (Q3 + Q1)} using \code{\link{quantile}} with \code{type = 6} as quantile algorithm to comply with SPSS standards}
 }
 }
 \examples{
 library(dplyr)
 freq(septic_patients$hospital_id)
 septic_patients \%>\%
  filter(hospital_id == "A") \%>\%
  select(bactid) \%>\%
  freq()
 # select multiple columns; they will be pasted together
 septic_patients \%>\%
  left_join_microorganisms \%>\%
  filter(hospital_id == "A") \%>\%
  select(genus, species) \%>\%
  freq()
 # save frequency table to an object
 years <- septic_patients \%>\%
  mutate(year = format(date, "\%Y")) \%>\%
  select(year) \%>\%
  freq(toConsole = FALSE)
 }
 \keyword{freq}
 \keyword{frequency}
 \keyword{summarise}
 \keyword{summary}
--- a/man/like.Rd
+++ b/man/like.Rd
@@ -0,0 +1,40 @@
 % Generated by roxygen2: do not edit by hand
 % Please edit documentation in R/misc.R
 \name{like}
 \alias{like}
 \alias{\%like\%}
 \title{Pattern Matching}
 \source{
 Inherited from the \href{https://github.com/Rdatatable/data.table/blob/master/R/like.R}{\code{like} function from the \code{data.table} package}, but made it case insensitive at default.
 }
 \usage{
 x \%like\% pattern
 }
 \arguments{
 \item{x}{a character vector where matches are sought, or an
    object which can be coerced by \code{as.character} to a character
    vector.  \link{Long vectors} are supported.}
 \item{pattern}{character string containing a \link{regular expression}
    (or character string for \code{fixed = TRUE}) to be matched
    in the given character vector.  Coerced by
    \code{\link{as.character}} to a character string if possible.  If a
    character vector of length 2 or more is supplied, the first element
    is used with a warning.  Missing values are allowed except for
    \code{regexpr} and \code{gregexpr}.}
 }
 \value{
 A \code{logical} vector
 }
 \description{
 Convenience function to compare a vector with a pattern, like \code{\link[base]{grep}}. It always returns a \code{logical} vector and is always case-insensitive.
 }
 \examples{
 library(dplyr)
 # get unique occurences of bacteria whose name start with 'Ent'
 septic_patients \%>\%
  left_join_microorganisms() \%>\%
  filter(fullname \%like\% '^Ent') \%>\%
  pull(fullname) \%>\%
  unique()
 }
--- a/man/mo_property.Rd
+++ b/man/mo_property.Rd
@@ -1,5 +1,5 @@
 % Generated by roxygen2: do not edit by hand
-% Please edit documentation in R/EUCAST.R
+% Please edit documentation in R/eucast.R
 \name{mo_property}
 \alias{mo_property}
 \title{Poperties of a microorganism}
--- a/man/print.Rd
+++ b/man/print.Rd
@@ -0,0 +1,55 @@
 % Generated by roxygen2: do not edit by hand
 % Please edit documentation in R/print.R
 \name{print}
 \alias{print}
 \alias{print.tbl_df}
 \alias{print.tbl}
 \alias{print.data.table}
 \title{Printing Data Tables and Tibbles}
 \usage{
 \method{print}{tbl_df}(x, nmax = 10, header = TRUE, row.names = TRUE,
  right = FALSE, width = 1, na = "<NA>", ...)
 \method{print}{tbl}(x, ...)
 \method{print}{data.table}(x, nmax = 10, header = TRUE, row.names = TRUE,
  print.keys = FALSE, right = FALSE, width = 1, na = "<NA>", ...)
 }
 \arguments{
 \item{x}{object of class \code{data.frame}.}
 \item{nmax}{amount of rows to print in total. When the total amount of rows exceeds this limit, the first and last \code{nmax / 2} rows will be printed. Use \code{nmax = NA} to print all rows.}
 \item{header}{print header with information about data size and tibble grouping}
 \item{row.names}{logical (or character vector), indicating whether (or
    what) row names should be printed.}
 \item{right}{logical, indicating whether or not strings should be
    right-aligned.  The default is right-alignment.}
 \item{width}{amount of white spaces to keep between columns, must be at least 1}
 \item{na}{value to print instead of NA}
 \item{...}{optional arguments to \code{print} or \code{plot} methods.}
 \item{print.keys}{print keys for \code{data.table}}
 }
 \description{
 Print a data table or tibble. It prints: \cr- The \strong{first and last rows} like \code{data.table}s are printed by the \code{data.table} package,\cr- A \strong{header} and \strong{left aligned text} like \code{tibble}s are printed by the \code{tibble} package with info about grouped variables,\cr- \strong{Unchanged values} and \strong{support for row names} like \code{data.frame}s are printed by the \code{base} package.
 }
 \examples{
 # still showing all values unchanged:
 library(dplyr)
 starwars
 print(starwars, width = 3)
 # compare the significance notation of this
 starwars \%>\% select(birth_year)
 # with this
 tibble:::print.tbl_df(starwars \%>\% select(birth_year))
 # supports info about groups (look at header)
 starwars \%>\% group_by(homeworld, gender)
 }
--- a/tests/testthat/test-eucast.R
+++ b/tests/testthat/test-eucast.R
@@ -1,4 +1,4 @@
-context("EUCAST.R")
+context("eucast.R")
 test_that("EUCAST rules work", {
  a <- data.frame(bactid = c("KLEPNE",  # Klebsiella pneumoniae
--- a/tests/testthat/test-freq.R
+++ b/tests/testthat/test-freq.R
@@ -0,0 +1,26 @@
 context("freq.R")
 test_that("frequency table works", {
  expect_equal(nrow(freq(c(1, 1, 2, 2, 3, 3, 4, 4, 5, 5), toConsole = FALSE)), 5)
  expect_equal(nrow(frequency_tbl(c(1, 1, 2, 2, 3, 3, 4, 4, 5, 5), toConsole = FALSE)), 5)
  # date column of septic_patients should contain 1662 unique dates
  expect_equal(nrow(freq(septic_patients$date, toConsole = FALSE)), 1662)
  expect_equal(nrow(freq(septic_patients$date, toConsole = FALSE)),
               length(unique(septic_patients$date)))
  expect_output(freq(septic_patients$age))
  expect_output(freq(septic_patients$date))
  expect_output(freq(septic_patients$hospital_id))
  library(dplyr)
  expect_output(septic_patients %>% select(1:2) %>% freq())
  expect_output(septic_patients %>% select(1:3) %>% freq())
  expect_output(septic_patients %>% select(1:4) %>% freq())
  expect_output(septic_patients %>% select(1:5) %>% freq())
  expect_output(septic_patients %>% select(1:6) %>% freq())
  expect_output(septic_patients %>% select(1:7) %>% freq())
  expect_output(septic_patients %>% select(1:8) %>% freq())
  expect_output(septic_patients %>% select(1:9) %>% freq())
 })
--- a/tests/testthat/test-mdro.R
+++ b/tests/testthat/test-mdro.R
@@ -0,0 +1,16 @@
 context("mdro.R")
 test_that("MDRO works", {
  library(dplyr)
  outcome <- MDRO(septic_patients, "nl", info = FALSE)
  # check class
  expect_equal(outcome %>% class(), c('ordered', 'factor'))
  # septic_patients should have these finding using Dutch guidelines
  expect_equal(outcome %>% freq(toConsole = FALSE) %>% pull(Count), c(1152, 824, 3, 21))
  expect_equal(BRMO(septic_patients, info = FALSE), MDRO(septic_patients, "nl", info = FALSE))
 })
--- a/tests/testthat/test-misc.R
+++ b/tests/testthat/test-misc.R
@@ -10,5 +10,7 @@ test_that("`like` works", {
 test_that("percentages works", {
  expect_equal(percent(0.25), "25%")
  expect_equal(percent(0.5), "50%")
  expect_equal(percent(0.500, force_zero = TRUE), "50.0%")
  expect_equal(percent(0.1234), "12.3%")
 })
--- a/tests/testthat/test-print.R
+++ b/tests/testthat/test-print.R
@@ -0,0 +1,8 @@
 context("print.R")
 test_that("tibble printing works", {
  library(dplyr)
  expect_output(print(starwars))
  expect_output(print(septic_patients))
 })