dplyr 0.8.0 support, fixes #7

2025-07-09 15:41:56 +02:00 · 2018-12-22 22:39:34 +01:00
parent b937662a97
commit 0b8084871d
29 changed files with 555 additions and 479 deletions
--- a/man/AMR-deprecated.Rd
+++ b/man/AMR-deprecated.Rd
@ -2,18 +2,9 @@
 % Please edit documentation in R/deprecated.R
 \name{AMR-deprecated}
 \alias{AMR-deprecated}
-\alias{as.bactid}
-\alias{is.bactid}
-\alias{guess_bactid}
 \alias{ratio}
 \title{Deprecated functions}
 \usage{
-as.bactid(...)
-
-is.bactid(...)
-
-guess_bactid(...)
-
 ratio(x, ratio)
 }
 \description{
--- a/man/age.Rd
+++ b/man/age.Rd
@ -4,12 +4,12 @@
 \alias{age}
 \title{Age in years of individuals}
 \usage{
-age(x, y = Sys.Date())
+age(x, reference = Sys.Date())
 }
 \arguments{
-\item{x}{date(s) - will be coerced with \code{\link{as.POSIXlt}}}
+\item{x}{date(s), will be coerced with \code{\link{as.POSIXlt}}}

-\item{y}{reference date(s) - defaults to \code{\link{Sys.Date}} - will be coerced with \code{\link{as.POSIXlt}}}
+\item{reference}{reference date(s) (defaults to today), will be coerced with \code{\link{as.POSIXlt}}}
 }
 \value{
 Integer (no decimals)
@ -18,5 +18,5 @@ Integer (no decimals)
 Calculates age in years based on a reference date, which is the sytem time at default.
 }
 \seealso{
-age_groups
+\code{\link{age_groups}} to splits age into groups
 }
--- a/man/age_groups.Rd
+++ b/man/age_groups.Rd
@ -9,13 +9,13 @@ age_groups(x, split_at = c(12, 25, 55, 75))
 \arguments{
 \item{x}{age, e.g. calculated with \code{\link{age}}}

-\item{split_at}{values to split \code{x}, defaults to 0-11, 12-24, 26-54, 55-74 and 75+. See Details.}
+\item{split_at}{values to split \code{x} at, defaults to age groups 0-11, 12-24, 26-54, 55-74 and 75+. See Details.}
 }
 \value{
 Ordered \code{\link{factor}}
 }
 \description{
-Splits ages into groups defined by the \code{split} parameter.
+Split ages into age groups defined by the \code{split} parameter. This allows for easier demographic (antimicrobial resistance) analysis.
 }
 \details{
 To split ages, the input can be:
@ -65,7 +65,7 @@ septic_patients \%>\%
  ggplot_rsi(x = "age_group")
 }
 \seealso{
-age
+\code{\link{age}} to determine ages based on one or more reference dates
 }
 \keyword{age}
 \keyword{age_group}
--- a/man/eucast_rules.Rd
+++ b/man/eucast_rules.Rd
@ -40,7 +40,7 @@ eucast_rules(tbl, col_mo = NULL, info = TRUE,
  pita = "pita", poly = "poly", pris = "pris", qida = "qida",
  rifa = "rifa", roxi = "roxi", siso = "siso", teic = "teic",
  tetr = "tetr", tica = "tica", tige = "tige", tobr = "tobr",
-  trim = "trim", trsu = "trsu", vanc = "vanc", col_bactid = NULL)
+  trim = "trim", trsu = "trsu", vanc = "vanc")

 EUCAST_rules(...)

@ -59,8 +59,6 @@ interpretive_reading(...)

 \item{amcl, amik, amox, ampi, azit, azlo, aztr, cefa, cfep, cfot, cfox, cfra, cfta, cftr, cfur, chlo, cipr, clar, clin, clox, coli, czol, dapt, doxy, erta, eryt, fosf, fusi, gent, imip, kana, levo, linc, line, mero, mezl, mino, moxi, nali, neom, neti, nitr, norf, novo, oflo, oxac, peni, pipe, pita, poly, pris, qida, rifa, roxi, siso, teic, tetr, tica, tige, tobr, trim, trsu, vanc}{column name of an antibiotic, see Antibiotics}

-\item{col_bactid}{deprecated, use \code{col_mo} instead.}
-
 \item{...}{parameters that are passed on to \code{eucast_rules}}
 }
 \value{
--- a/man/first_isolate.Rd
+++ b/man/first_isolate.Rd
@ -2,6 +2,8 @@
 % Please edit documentation in R/first_isolate.R
 \name{first_isolate}
 \alias{first_isolate}
+\alias{filter_first_isolate}
+\alias{filter_first_weighted_isolate}
 \title{Determine first (weighted) isolates}
 \source{
 Methodology of this function is based on: \strong{M39 Analysis and Presentation of Cumulative Antimicrobial Susceptibility Test Data, 4th Edition}, 2014, \emph{Clinical and Laboratory Standards Institute (CLSI)}. \url{https://clsi.org/standards/products/microbiology/documents/m39/}.
@ -11,10 +13,15 @@ first_isolate(tbl, col_date = NULL, col_patient_id = NULL,
  col_mo = NULL, col_testcode = NULL, col_specimen = NULL,
  col_icu = NULL, col_keyantibiotics = NULL, episode_days = 365,
  testcodes_exclude = NULL, icu_exclude = FALSE,
-  filter_specimen = NULL, output_logical = TRUE,
-  type = "keyantibiotics", ignore_I = TRUE, points_threshold = 2,
-  info = TRUE, col_bactid = NULL, col_genus = NULL,
-  col_species = NULL)
+  specimen_group = NULL, type = "keyantibiotics", ignore_I = TRUE,
+  points_threshold = 2, info = TRUE, ...)
+
+filter_first_isolate(tbl, col_date = NULL, col_patient_id = NULL,
+  col_mo = NULL, ...)
+
+filter_first_weighted_isolate(tbl, col_date = NULL,
+  col_patient_id = NULL, col_mo = NULL, col_keyantibiotics = NULL,
+  ...)
 }
 \arguments{
 \item{tbl}{a \code{data.frame} containing isolates.}
@ -37,11 +44,9 @@ first_isolate(tbl, col_date = NULL, col_patient_id = NULL,

 \item{testcodes_exclude}{character vector with test codes that should be excluded (case-insensitive)}

-\item{icu_exclude}{logical whether ICU isolates should be excluded}
+\item{icu_exclude}{logical whether ICU isolates should be excluded (rows with value \code{TRUE} in column \code{col_icu})}

-\item{filter_specimen}{specimen group or type that should be excluded}
-
-\item{output_logical}{return output as \code{logical} (will else be the values \code{0} or \code{1})}
+\item{specimen_group}{value in column \code{col_specimen} to filter on}

 \item{type}{type to determine weighed isolates; can be \code{"keyantibiotics"} or \code{"points"}, see Details}

@ -51,21 +56,34 @@ first_isolate(tbl, col_date = NULL, col_patient_id = NULL,

 \item{info}{print progress}

-\item{col_bactid}{(deprecated, use \code{col_mo} instead)}
-
-\item{col_genus}{(deprecated, use \code{col_mo} instead) column name of the genus of the microorganisms}
-
-\item{col_species}{(deprecated, use \code{col_mo} instead) column name of the species of the microorganisms}
+\item{...}{parameters passed on to the \code{first_isolate} function}
 }
 \value{
-A vector to add to table, see Examples.
+Logical vector
 }
 \description{
 Determine first (weighted) isolates of all microorganisms of every patient per episode and (if needed) per specimen type.
 }
 \details{
 \strong{WHY THIS IS SO IMPORTANT} \cr
-    To conduct an analysis of antimicrobial resistance, you should only include the first isolate of every patient per episode \href{https://www.ncbi.nlm.nih.gov/pubmed/17304462}{[1]}. If you would not do this, you could easily get an overestimate or underestimate of the resistance of an antibiotic. Imagine that a patient was admitted with an MRSA and that it was found in 5 different blood cultures the following week. The resistance percentage of oxacillin of all \emph{S. aureus} isolates would be overestimated, because you included this MRSA more than once. It would be \href{https://en.wikipedia.org/wiki/Selection_bias}{selection bias}.
+To conduct an analysis of antimicrobial resistance, you should only include the first isolate of every patient per episode \href{https://www.ncbi.nlm.nih.gov/pubmed/17304462}{[1]}. If you would not do this, you could easily get an overestimate or underestimate of the resistance of an antibiotic. Imagine that a patient was admitted with an MRSA and that it was found in 5 different blood cultures the following week. The resistance percentage of oxacillin of all \emph{S. aureus} isolates would be overestimated, because you included this MRSA more than once. It would be \href{https://en.wikipedia.org/wiki/Selection_bias}{selection bias}.
+
+The function \code{filter_first_isolate} is essentially equal to:
+\preformatted{
+ tbl \%>\%
+   mutate(only_firsts = first_isolate(tbl, ...)) \%>\%
+   filter(only_firsts == TRUE) \%>\%
+   select(-only_firsts)
+}
+The function \code{filter_first_weighted_isolate} is essentially equal to:
+\preformatted{
+ tbl \%>\%
+   mutate(keyab = key_antibiotics(.)) \%>\%
+   mutate(only_weighted_firsts = first_isolate(tbl,
+                                               col_keyantibiotics = "keyab", ...)) \%>\%
+   filter(only_weighted_firsts == TRUE) \%>\%
+   select(-only_weighted_firsts)
+}
 }
 \section{Key antibiotics}{

@ -83,20 +101,29 @@ Determine first (weighted) isolates of all microorganisms of every patient per e
 ?septic_patients

 library(dplyr)
-my_patients <- septic_patients \%>\%
+# Filter on first isolates:
+septic_patients \%>\%
  mutate(first_isolate = first_isolate(.,
                                       col_date = "date",
                                       col_patient_id = "patient_id",
-                                       col_mo = "mo"))
+                                       col_mo = "mo")) \%>\%
+  filter(first_isolate == TRUE)
+
+# Which can be shortened to:
+septic_patients \%>\%
+  filter_first_isolate()
+# or for first weighted isolates:
+septic_patients \%>\%
+  filter_first_weighted_isolate()

 # Now let's see if first isolates matter:
-A <- my_patients \%>\%
+A <- septic_patients \%>\%
  group_by(hospital_id) \%>\%
  summarise(count = n_rsi(gent),            # gentamicin availability
            resistance = portion_IR(gent))  # gentamicin resistance

-B <- my_patients \%>\%
-  filter(first_isolate == TRUE) \%>\%         # the 1st isolate filter
+B <- septic_patients \%>\%
+  filter_first_weighted_isolate() \%>\%       # the 1st isolate filter
  group_by(hospital_id) \%>\%
  summarise(count = n_rsi(gent),            # gentamicin availability
            resistance = portion_IR(gent))  # gentamicin resistance
@ -106,6 +133,7 @@ B <- my_patients \%>\%
 # Gentamicin resitance in hospital D appears to be 5.4\% higher than
 # when you (erroneously) would have used all isolates!

+
 ## OTHER EXAMPLES:

 \dontrun{
@ -122,29 +150,29 @@ tbl$first_isolate_weighed <-

 tbl$first_blood_isolate <-
  first_isolate(tbl,
-                filter_specimen = 'Blood')
+                specimen_group = 'Blood')

 tbl$first_blood_isolate_weighed <-
  first_isolate(tbl,
-                filter_specimen = 'Blood',
+                specimen_group = 'Blood',
                col_keyantibiotics = 'keyab')

 tbl$first_urine_isolate <-
  first_isolate(tbl,
-                filter_specimen = 'Urine')
+                specimen_group = 'Urine')

 tbl$first_urine_isolate_weighed <-
  first_isolate(tbl,
-                filter_specimen = 'Urine',
+                specimen_group = 'Urine',
                col_keyantibiotics = 'keyab')

 tbl$first_resp_isolate <-
  first_isolate(tbl,
-                filter_specimen = 'Respiratory')
+                specimen_group = 'Respiratory')

 tbl$first_resp_isolate_weighed <-
  first_isolate(tbl,
-                filter_specimen = 'Respiratory',
+                specimen_group = 'Respiratory',
                col_keyantibiotics = 'keyab')
 }
 }
--- a/man/freq.Rd
+++ b/man/freq.Rd
@ -10,15 +10,16 @@
 frequency_tbl(x, ..., sort.count = TRUE,
  nmax = getOption("max.print.freq"), na.rm = TRUE, row.names = TRUE,
  markdown = !interactive(), digits = 2, quote = FALSE,
-  header = !markdown, title = NULL, na = "<NA>", sep = " ",
-  decimal.mark = getOption("OutDec"), big.mark = ifelse(decimal.mark !=
-  ",", ",", "."))
+  header = !markdown, title = NULL, na = "<NA>", droplevels = TRUE,
+  sep = " ", decimal.mark = getOption("OutDec"),
+  big.mark = ifelse(decimal.mark != ",", ",", "."))

 freq(x, ..., sort.count = TRUE, nmax = getOption("max.print.freq"),
  na.rm = TRUE, row.names = TRUE, markdown = !interactive(),
  digits = 2, quote = FALSE, header = !markdown, title = NULL,
-  na = "<NA>", sep = " ", decimal.mark = getOption("OutDec"),
-  big.mark = ifelse(decimal.mark != ",", ",", "."))
+  na = "<NA>", droplevels = TRUE, sep = " ",
+  decimal.mark = getOption("OutDec"), big.mark = ifelse(decimal.mark !=
+  ",", ",", "."))

 top_freq(f, n)

@ -52,6 +53,8 @@ top_freq(f, n)

 \item{na}{a character string to should be used to show empty (\code{NA}) values (only useful when \code{na.rm = FALSE})}

+\item{droplevels}{a logical value indicating whether in factors empty levels should be dropped}
+
 \item{sep}{a character string to separate the terms when selecting multiple columns}

 \item{decimal.mark}{%
@ -94,6 +97,7 @@ For dates and times of any class, these additional values will be calculated wit
  \item{Median, using \code{\link[stats]{median}}, with percentage since oldest}
 }

+In factors, all factor levels that are not existing in the input data will be dropped.

 The function \code{top_freq} uses \code{\link[dplyr]{top_n}} internally and will include more than \code{n} rows if there are ties.
 }
--- a/man/key_antibiotics.Rd
+++ b/man/key_antibiotics.Rd
@ -5,14 +5,13 @@
 \alias{key_antibiotics_equal}
 \title{Key antibiotics for first \emph{weighted} isolates}
 \usage{
-key_antibiotics(tbl, col_mo = "mo", universal_1 = "amox",
+key_antibiotics(tbl, col_mo = NULL, universal_1 = "amox",
  universal_2 = "amcl", universal_3 = "cfur", universal_4 = "pita",
  universal_5 = "cipr", universal_6 = "trsu", GramPos_1 = "vanc",
  GramPos_2 = "teic", GramPos_3 = "tetr", GramPos_4 = "eryt",
  GramPos_5 = "oxac", GramPos_6 = "rifa", GramNeg_1 = "gent",
  GramNeg_2 = "tobr", GramNeg_3 = "coli", GramNeg_4 = "cfot",
-  GramNeg_5 = "cfta", GramNeg_6 = "mero", warnings = TRUE,
-  col_bactid = "bactid")
+  GramNeg_5 = "cfta", GramNeg_6 = "mero", warnings = TRUE, ...)

 key_antibiotics_equal(x, y, type = c("keyantibiotics", "points"),
  ignore_I = TRUE, points_threshold = 2, info = FALSE)
@ -30,7 +29,7 @@ key_antibiotics_equal(x, y, type = c("keyantibiotics", "points"),

 \item{warnings}{give warning about missing antibiotic columns, they will anyway be ignored}

-\item{col_bactid}{(deprecated, use \code{col_mo} instead)}
+\item{...}{other parameters passed on to function}

 \item{x, y}{characters to compare}

@ -71,18 +70,15 @@ The function \code{key_antibiotics} returns a character vector with 12 antibioti
 \examples{
 # septic_patients is a dataset available in the AMR package
 ?septic_patients
-my_patients <- septic_patients
-
 library(dplyr)
 # set key antibiotics to a new variable
-my_patients <- my_patients \%>\%
+my_patients <- septic_patients \%>\%
  mutate(keyab = key_antibiotics(.)) \%>\%
  mutate(
    # now calculate first isolates
-    first_regular = first_isolate(., "date", "patient_id", "mo"),
+    first_regular = first_isolate(., col_keyantibiotics = FALSE),
    # and first WEIGHTED isolates
-    first_weighted = first_isolate(., "date", "patient_id", "mo",
-                                   col_keyantibiotics = "keyab")
+    first_weighted = first_isolate(., col_keyantibiotics = "keyab")
  )

 # Check the difference, in this data set it results in 7\% more isolates:
@ -95,7 +91,7 @@ strainA <- "SSSRR.S.R..S"
 strainB <- "SSSIRSSSRSSS"

 key_antibiotics_equal(strainA, strainB)
-# TRUE, because I is ignored (as are missing values)
+# TRUE, because I is ignored (as well as missing values)

 key_antibiotics_equal(strainA, strainB, ignore_I = FALSE)
 # FALSE, because I is not ignored and so the 4th value differs
--- a/man/mdro.Rd
+++ b/man/mdro.Rd
@ -22,8 +22,7 @@ mdro(tbl, country = NULL, col_mo = NULL, info = TRUE,
  peni = "peni", pipe = "pipe", pita = "pita", poly = "poly",
  qida = "qida", rifa = "rifa", roxi = "roxi", siso = "siso",
  teic = "teic", tetr = "tetr", tica = "tica", tige = "tige",
-  tobr = "tobr", trim = "trim", trsu = "trsu", vanc = "vanc",
-  col_bactid = NULL)
+  tobr = "tobr", trim = "trim", trsu = "trsu", vanc = "vanc")

 brmo(..., country = "nl")

@ -160,8 +159,6 @@ eucast_exceptional_phenotypes(tbl, country = "EUCAST", ...)

 \item{vanc}{column name of an antibiotic, see Antibiotics}

-\item{col_bactid}{deprecated, use \code{col_mo} instead.}
-
 \item{...}{parameters that are passed on to methods}
 }
 \value{
--- a/man/resistance_predict.Rd
+++ b/man/resistance_predict.Rd
@ -89,11 +89,11 @@ septic_patients \%>\%
 if (!require(ggplot2)) {

  data <- septic_patients \%>\%
-    filter(mo == "ESCCOL") \%>\%
+    filter(mo == as.mo("E. coli")) \%>\%
    resistance_predict(col_ab = "amox",
-                      col_date = "date",
-                      info = FALSE,
-                      minimum = 15)
+                       col_date = "date",
+                       info = FALSE,
+                        minimum = 15)

  ggplot(data,
         aes(x = year)) +