From 81af7be36e657b88019afa6bf80b8eaabf31c243 Mon Sep 17 00:00:00 2001
From: "Matthijs S. Berends" <m.s.berends@umcg.nl>
Date: Fri, 14 Dec 2018 07:23:25 +0100
Subject: [PATCH] note about portions

---
 DESCRIPTION          |  4 ++--
 NEWS.md              |  1 +
 R/first_isolate.R    | 10 +++++++---
 R/portion.R          |  2 +-
 man/first_isolate.Rd |  4 ++--
 man/portion.Rd       |  2 +-
 6 files changed, 14 insertions(+), 9 deletions(-)

diff --git a/DESCRIPTION b/DESCRIPTION
index 222717b5d..19ed5388a 100755
--- a/DESCRIPTION
+++ b/DESCRIPTION
@@ -1,6 +1,6 @@
 Package: AMR
-Version: 0.5.0.9003
-Date: 2018-12-10
+Version: 0.5.0.9004
+Date: 2018-12-14
 Title: Antimicrobial Resistance Analysis
 Authors@R: c(
     person(
diff --git a/NEWS.md b/NEWS.md
index e913b50b1..0e2d7d7cd 100755
--- a/NEWS.md
+++ b/NEWS.md
@@ -15,6 +15,7 @@
 * Function `first_isolate`:
   * Will now use a column named like "patid" for the patient ID (parameter `col_patientid`), when this parameter was left blank
   * Will now use a column named like "key(...)ab" or "key(...)antibiotics" for the key antibiotics (parameter `col_keyantibiotics`), when this parameter was left blank
+* A note to the manual pages of the `portion` functions, that low counts can infuence the outcome and that the `portion` functions may camouflage this, since they only return the portion (albeit being dependent on the `minimum` parameter)
 * Function `mo_taxonomy` now contains the kingdom too
 * Function `first_isolate` will now use a column named like "patid" for the patient ID, when this parameter was left blank
 * Reduce false positives for `is.rsi.eligible`
diff --git a/R/first_isolate.R b/R/first_isolate.R
index 415b249c7..59d91f69d 100755
--- a/R/first_isolate.R
+++ b/R/first_isolate.R
@@ -23,10 +23,10 @@
 #' @param col_date column name of the result date (or date that is was received on the lab), defaults to the first column of class \code{Date}
 #' @param col_patient_id column name of the unique IDs of the patients, defaults to the first column that starts with 'patient' or 'patid' (case insensitive)
 #' @param col_mo column name of the unique IDs of the microorganisms (see \code{\link{mo}}), defaults to the first column of class \code{mo}. Values will be coerced using \code{\link{as.mo}}.
-#' @param col_testcode column name of the test codes. Use \code{col_testcode = NA} to \strong{not} exclude certain test codes (like test codes for screening). In that case \code{testcodes_exclude} will be ignored. Supports tidyverse-like quotation.
+#' @param col_testcode column name of the test codes. Use \code{col_testcode = NULL} to \strong{not} exclude certain test codes (like test codes for screening). In that case \code{testcodes_exclude} will be ignored.
 #' @param col_specimen column name of the specimen type or group
 #' @param col_icu column name of the logicals (\code{TRUE}/\code{FALSE}) whether a ward or department is an Intensive Care Unit (ICU)
-#' @param col_keyantibiotics column name of the key antibiotics to determine first \emph{weighted} isolates, see \code{\link{key_antibiotics}}. Supports tidyverse-like quotation. Defaults to the first column that starts with 'key' followed by 'ab' or 'antibiotics' (case insensitive). Use \code{col_keyantibiotics = FALSE} to prevent this.
+#' @param col_keyantibiotics column name of the key antibiotics to determine first \emph{weighted} isolates, see \code{\link{key_antibiotics}}. Defaults to the first column that starts with 'key' followed by 'ab' or 'antibiotics' (case insensitive). Use \code{col_keyantibiotics = FALSE} to prevent this.
 #' @param episode_days episode in days after which a genus/species combination will be determined as 'first isolate' again
 #' @param testcodes_exclude character vector with test codes that should be excluded (case-insensitive)
 #' @param icu_exclude logical whether ICU isolates should be excluded
@@ -187,7 +187,7 @@ first_isolate <- function(tbl,
   # -- key antibiotics
   if (is.null(col_keyantibiotics) & any(colnames(tbl) %like% "^key.*(ab|antibiotics)")) {
     col_keyantibiotics <- colnames(tbl)[colnames(tbl) %like% "^key.*(ab|antibiotics)"][1]
-    message("NOTE: Using column `", col_keyantibiotics, "` as input for `col_keyantibiotics`.")
+    message("NOTE: Using column `", col_keyantibiotics, "` as input for `col_keyantibiotics`. Use col_keyantibiotics = FALSE to prevent this.")
   }
   if (isFALSE(col_keyantibiotics)) {
     col_keyantibiotics <- NULL
@@ -205,6 +205,10 @@ first_isolate <- function(tbl,
       stop('Please check tbl for existance.')
     }
 
+    if (is.na(column)) {
+      column <- NULL
+    }
+
     if (!is.null(column)) {
       if (!(column %in% colnames(tblname))) {
         stop('Column `', column, '` not found.')
diff --git a/R/portion.R b/R/portion.R
index cc89e09a8..f86defca3 100755
--- a/R/portion.R
+++ b/R/portion.R
@@ -30,7 +30,7 @@
 #' @param combine_IR a logical to indicate whether all values of I and R must be merged into one, so the output only consists of S vs. IR (susceptible vs. non-susceptible)
 #' @details \strong{Remember that you should filter your table to let it contain only first isolates!} Use \code{\link{first_isolate}} to determine them in your data set.
 #'
-#' These functions are not meant to count isolates, but to calculate the portion of resistance/susceptibility. If a column has been transformed with \code{\link{as.rsi}}, just use e.g. \code{isolates[isolates == "R"]} to get the resistant ones. You could then calculate the \code{\link{length}} of it.
+#' These functions are not meant to count isolates, but to calculate the portion of resistance/susceptibility. Use the \code{\link[AMR]{count}} functions to count isolates. \emph{Low counts can infuence the outcome - these \code{portion} functions may camouflage this, since they only return the portion albeit being dependent on the \code{minimum} parameter.}
 #'
 #' \code{portion_df} takes any variable from \code{data} that has an \code{"rsi"} class (created with \code{\link{as.rsi}}) and calculates the portions R, I and S. The resulting \emph{tidy data} (see Source) \code{data.frame} will have three rows (S/I/R) and a column for each variable with class \code{"rsi"}.
 #'
diff --git a/man/first_isolate.Rd b/man/first_isolate.Rd
index afc0e3706..68239b91d 100755
--- a/man/first_isolate.Rd
+++ b/man/first_isolate.Rd
@@ -25,13 +25,13 @@ first_isolate(tbl, col_date = NULL, col_patient_id = NULL,
 
 \item{col_mo}{column name of the unique IDs of the microorganisms (see \code{\link{mo}}), defaults to the first column of class \code{mo}. Values will be coerced using \code{\link{as.mo}}.}
 
-\item{col_testcode}{column name of the test codes. Use \code{col_testcode = NA} to \strong{not} exclude certain test codes (like test codes for screening). In that case \code{testcodes_exclude} will be ignored. Supports tidyverse-like quotation.}
+\item{col_testcode}{column name of the test codes. Use \code{col_testcode = NULL} to \strong{not} exclude certain test codes (like test codes for screening). In that case \code{testcodes_exclude} will be ignored.}
 
 \item{col_specimen}{column name of the specimen type or group}
 
 \item{col_icu}{column name of the logicals (\code{TRUE}/\code{FALSE}) whether a ward or department is an Intensive Care Unit (ICU)}
 
-\item{col_keyantibiotics}{column name of the key antibiotics to determine first \emph{weighted} isolates, see \code{\link{key_antibiotics}}. Supports tidyverse-like quotation. Defaults to the first column that starts with 'key' followed by 'ab' or 'antibiotics' (case insensitive). Use \code{col_keyantibiotics = FALSE} to prevent this.}
+\item{col_keyantibiotics}{column name of the key antibiotics to determine first \emph{weighted} isolates, see \code{\link{key_antibiotics}}. Defaults to the first column that starts with 'key' followed by 'ab' or 'antibiotics' (case insensitive). Use \code{col_keyantibiotics = FALSE} to prevent this.}
 
 \item{episode_days}{episode in days after which a genus/species combination will be determined as 'first isolate' again}
 
diff --git a/man/portion.Rd b/man/portion.Rd
index 3b8820dd9..590f9505f 100644
--- a/man/portion.Rd
+++ b/man/portion.Rd
@@ -59,7 +59,7 @@ These functions can be used to calculate the (co-)resistance of microbial isolat
 \details{
 \strong{Remember that you should filter your table to let it contain only first isolates!} Use \code{\link{first_isolate}} to determine them in your data set.
 
-These functions are not meant to count isolates, but to calculate the portion of resistance/susceptibility. If a column has been transformed with \code{\link{as.rsi}}, just use e.g. \code{isolates[isolates == "R"]} to get the resistant ones. You could then calculate the \code{\link{length}} of it.
+These functions are not meant to count isolates, but to calculate the portion of resistance/susceptibility. Use the \code{\link[AMR]{count}} functions to count isolates. \emph{Low counts can infuence the outcome - these \code{portion} functions may camouflage this, since they only return the portion albeit being dependent on the \code{minimum} parameter.}
 
 \code{portion_df} takes any variable from \code{data} that has an \code{"rsi"} class (created with \code{\link{as.rsi}}) and calculates the portions R, I and S. The resulting \emph{tidy data} (see Source) \code{data.frame} will have three rows (S/I/R) and a column for each variable with class \code{"rsi"}.