fix first isolate

2025-07-08 14:01:55 +02:00 · 2023-02-10 13:13:17 +01:00
parent 1a0dc4bf46
commit 70a7ba0206
13 changed files with 222 additions and 56 deletions
--- a/R/first_isolate.R
+++ b/R/first_isolate.R
@ -33,7 +33,7 @@
 #' @param x a [data.frame] containing isolates. Can be left blank for automatic determination, see *Examples*.
 #' @param col_date column name of the result date (or date that is was received on the lab), defaults to the first column with a date class
 #' @param col_patient_id column name of the unique IDs of the patients, defaults to the first column that starts with 'patient' or 'patid' (case insensitive)
-#' @param col_mo column name of the IDs of the microorganisms (see [as.mo()]), defaults to the first column of class [`mo`]. Values will be coerced using [as.mo()].
+#' @param col_mo column name of the names or codes of the microorganisms (see [as.mo()]), defaults to the first column of class [`mo`]. Values will be coerced using [as.mo()].
 #' @param col_testcode column name of the test codes. Use `col_testcode = NULL` to **not** exclude certain test codes (such as test codes for screening). In that case `testcodes_exclude` will be ignored.
 #' @param col_specimen column name of the specimen type or group
 #' @param col_icu column name of the logicals (`TRUE`/`FALSE`) whether a ward or department is an Intensive Care Unit (ICU). This can also be a [logical] vector with the same length as rows in `x`.
@ -133,7 +133,7 @@
 #' # `example_isolates` is a data set available in the AMR package.
 #' # See ?example_isolates.
 #'
-#' example_isolates[first_isolate(), ]
+#' example_isolates[first_isolate(info = TRUE), ]
 #' \donttest{
 #' # get all first Gram-negatives
 #' example_isolates[which(first_isolate(info = FALSE) & mo_is_gram_negative()), ]
@ -141,7 +141,7 @@
 #' if (require("dplyr")) {
 #'   # filter on first isolates using dplyr:
 #'   example_isolates %>%
-#'     filter(first_isolate())
+#'     filter(first_isolate(info = TRUE))
 #' }
 #' if (require("dplyr")) {
 #'   # short-hand version:
@ -152,7 +152,7 @@
 #'   # flag the first isolates per group:
 #'   example_isolates %>%
 #'     group_by(ward) %>%
-#'     mutate(first = first_isolate()) %>%
+#'     mutate(first = first_isolate(info = TRUE)) %>%
 #'     select(ward, date, patient, mo, first)
 #' }
 #' }
@ -468,19 +468,19 @@ first_isolate <- function(x = NULL,
  x$other_pat_or_mo <- !(x$newvar_patient_id == pm_lag(x$newvar_patient_id) & x$newvar_genus_species == pm_lag(x$newvar_genus_species))

  x$episode_group <- paste(x$newvar_patient_id, x$newvar_genus_species)
+
  x$more_than_episode_ago <- unlist(
    lapply(
      split(
        x$newvar_date,
        x$episode_group
      ),
-      exec_episode, # this will skip meet_criteria() in is_new_episode(), saving time
-      type = "logical",
+      is_new_episode,
      episode_days = episode_days
    ),
    use.names = FALSE
  )
-
+  
  if (!is.null(col_keyantimicrobials)) {
    # with key antibiotics
    x$other_key_ab <- !antimicrobials_equal(
@ -490,40 +490,35 @@ first_isolate <- function(x = NULL,
      ignore_I = ignore_I,
      points_threshold = points_threshold
    )
-    x$newvar_first_isolate <- pm_if_else(
-      x$newvar_row_index_sorted >= row.start &
-        x$newvar_row_index_sorted <= row.end &
-        x$newvar_genus_species != "" &
-        (x$other_pat_or_mo | x$more_than_episode_ago | x$other_key_ab),
-      TRUE,
-      FALSE
-    )
+    x$newvar_first_isolate <- x$newvar_row_index_sorted >= row.start &
+      x$newvar_row_index_sorted <= row.end &
+      x$newvar_genus_species != "" &
+      (x$other_pat_or_mo | x$more_than_episode_ago | x$other_key_ab)
  } else {
    # no key antibiotics
-    x$newvar_first_isolate <- pm_if_else(
-      x$newvar_row_index_sorted >= row.start &
-        x$newvar_row_index_sorted <= row.end &
-        x$newvar_genus_species != "" &
-        (x$other_pat_or_mo | x$more_than_episode_ago),
-      TRUE,
-      FALSE
-    )
+    x$newvar_first_isolate <- x$newvar_row_index_sorted >= row.start &
+      x$newvar_row_index_sorted <= row.end &
+      x$newvar_genus_species != "" &
+      (x$other_pat_or_mo | x$more_than_episode_ago)
  }
-
+  
  # first one as TRUE
  x[row.start, "newvar_first_isolate"] <- TRUE
  # no tests that should be included, or ICU
  if (!is.null(col_testcode)) {
    x[which(x[, col_testcode] %in% tolower(testcodes_exclude)), "newvar_first_isolate"] <- FALSE
  }
+  
  if (!is.null(col_icu)) {
    if (icu_exclude == TRUE) {
-      message_("Excluding ", format(sum(col_icu, na.rm = TRUE), big.mark = ","), " isolates from ICU.",
-        add_fn = font_black,
-        as_note = FALSE
-      )
+      if (isTRUE(info)) {
+        message_("Excluding ", format(sum(col_icu, na.rm = TRUE), big.mark = ","), " isolates from ICU.",
+                 add_fn = font_black,
+                 as_note = FALSE
+        )
+      }
      x[which(col_icu), "newvar_first_isolate"] <- FALSE
-    } else {
+    } else if (isTRUE(info)) {
      message_("Including isolates from ICU.",
        add_fn = font_black,
        as_note = FALSE
@ -532,7 +527,7 @@ first_isolate <- function(x = NULL,
  }

  decimal.mark <- getOption("OutDec")
-  big.mark <- ifelse(decimal.mark != ",", ",", ".")
+  big.mark <- ifelse(decimal.mark != ",", ",", " ")

  if (isTRUE(info)) {
    # print group name if used in dplyr::group_by()
--- a/R/get_episode.R
+++ b/R/get_episode.R
@ -29,7 +29,7 @@

 #' Determine (New) Episodes for Patients
 #'
-#' These functions determine which items in a vector can be considered (the start of) a new episode, based on the argument `episode_days`. This can be used to determine clinical episodes for any epidemiological analysis. The [get_episode()] function returns the index number of the episode per group, while the [is_new_episode()] function returns values `TRUE`/`FALSE` for where [get_episode()] returns 1, and is thus equal to `get_episode(...) == 1`.
+#' These functions determine which items in a vector can be considered (the start of) a new episode, based on the argument `episode_days`. This can be used to determine clinical episodes for any epidemiological analysis. The [get_episode()] function returns the index number of the episode per group, while the [is_new_episode()] function returns `TRUE` for every new [get_episode()] index, and is thus equal to `!duplicated(get_episode(...))`.
 #' @param x vector of dates (class `Date` or `POSIXt`), will be sorted internally to determine episodes
 #' @param episode_days required episode length in days, can also be less than a day or `Inf`, see *Details*
 #' @param ... ignored, only in place to allow future extensions
@ -83,7 +83,8 @@
 #'       patient,
 #'       new_index = get_episode(date, 60),
 #'       new_logical = is_new_episode(date, 60)
-#'     )
+#'     ) %>% 
+#'     arrange(patient, ward, date)
 #' }
 #' 
 #' if (require("dplyr")) {
@ -109,12 +110,7 @@
 get_episode <- function(x, episode_days, ...) {
  meet_criteria(x, allow_class = c("Date", "POSIXt"), allow_NA = TRUE)
  meet_criteria(episode_days, allow_class = c("numeric", "integer"), has_length = 1, is_positive = TRUE, is_finite = FALSE)
-  
-  exec_episode(
-    x = x,
-    episode_days = episode_days,
-    ... = ...
-  )
+  exec_episode(x, episode_days, ...)
 }

 #' @rdname get_episode
@ -122,10 +118,10 @@ get_episode <- function(x, episode_days, ...) {
 is_new_episode <- function(x, episode_days, ...) {
  meet_criteria(x, allow_class = c("Date", "POSIXt"), allow_NA = TRUE)
  meet_criteria(episode_days, allow_class = c("numeric", "integer"), has_length = 1, is_positive = TRUE, is_finite = FALSE)
-  get_episode(x, episode_days, ...) == 1
+  !duplicated(exec_episode(x, episode_days, ...))
 }

-exec_episode <- function(x, type, episode_days, ...) {
+exec_episode <- function(x, episode_days, ...) {
  x <- as.double(as.POSIXct(x)) # as.POSIXct() required for Date classes
  
  # since x is now in seconds, get seconds from episode_days as well