update to septic_patients, speed improvements

2025-08-24 23:42:18 +02:00 · 2018-07-25 14:17:04 +02:00
parent 03a3cb397b
commit d9e204031d
26 changed files with 273 additions and 233 deletions
--- a/man/as.mic.Rd
+++ b/man/as.mic.Rd
@@ -15,19 +15,22 @@ is.mic(x)
 \item{na.rm}{a logical indicating whether missing values should be removed}
 }
 \value{
-Ordered factor with new class \code{mic} and new attributes \code{package} and \code{package.version}
+Ordered factor with new class \code{mic} and new attribute \code{package}
 }
 \description{
-This transforms a vector to a new class\code{mic}, which is an ordered factor with valid MIC values as levels. Invalid MIC values will be translated as \code{NA} with a warning.
+This transforms a vector to a new class \code{mic}, which is an ordered factor with valid MIC values as levels. Invalid MIC values will be translated as \code{NA} with a warning.
 }
 \examples{
 mic_data <- as.mic(c(">=32", "1.0", "1", "1.00", 8, "<=0.128", "8", "16", "16"))
 is.mic(mic_data)

 # this can also coerce combined MIC/RSI values:
-as.mic("<=0.002; R") # will return <=0.002
+as.mic("<=0.002; S") # will return <=0.002

 plot(mic_data)
 barplot(mic_data)
 }
+\seealso{
+\code{\link{as.rsi}}
+}
 \keyword{mic}
--- a/man/as.rsi.Rd
+++ b/man/as.rsi.Rd
@@ -13,7 +13,7 @@ is.rsi(x)
 \item{x}{vector}
 }
 \value{
-Ordered factor with new class \code{rsi} and new attributes \code{package} and \code{package.version}
+Ordered factor with new class \code{rsi} and new attribute \code{package}
 }
 \description{
 This transforms a vector to a new class \code{rsi}, which is an ordered factor with levels \code{S < I < R}. Invalid antimicrobial interpretations will be translated as \code{NA} with a warning.
@@ -24,9 +24,12 @@ rsi_data <- as.rsi(c(rep("S", 474), rep("I", 36), rep("R", 370), "A", "B", "C"))
 is.rsi(rsi_data)

 # this can also coerce combined MIC/RSI values:
-as.rsi("<= 0.002; R") # will return R
+as.rsi("<= 0.002; S") # will return S

 plot(rsi_data)    # for percentages
 barplot(rsi_data) # for frequencies
 }
+\seealso{
+\code{\link{as.mic}}
+}
 \keyword{rsi}
--- a/man/first_isolate.Rd
+++ b/man/first_isolate.Rd
@@ -65,7 +65,7 @@ Determine first (weighted) isolates of all microorganisms of every patient per e
 }
 \section{Key antibiotics}{

-    There are two ways to determine whether isolates can be included as first \emph{weighted} isolates: \cr
+    There are two ways to determine whether isolates can be included as first \emph{weighted} isolates which will give generally the same results: \cr

    \strong{1. Using} \code{type = "keyantibiotics"} \strong{and parameter} \code{ignore_I} \cr
    Any difference from S to R (or vice versa) will (re)select an isolate as a first weighted isolate. With \code{ignore_I = FALSE}, also differences from I to S|R (or vice versa) will lead to this. This is a reliable method and 30-35 times faster than method 2. \cr
@@ -85,6 +85,24 @@ my_patients$first_isolate <- my_patients \%>\%
                col_patient_id = "patient_id",
                col_bactid = "bactid")

+# Now let's see if first isolates matter:
+A <- my_patients \%>\%
+  group_by(hospital_id) \%>\%
+  summarise(count = n_rsi(gent), # gentamicin
+            resistance = resistance(gent))
+
+B <- my_patients \%>\%
+  filter(first_isolate == TRUE) \%>\%
+  group_by(hospital_id) \%>\%
+  summarise(count = n_rsi(gent), # gentamicin
+            resistance = resistance(gent))
+
+# Have a look at A and B. B is more reliable because every isolate is
+# counted once. Gentamicin resitance in hospital D seems to be 5\%
+# higher than originally thought.
+
+## OTHER EXAMPLES:
+
 \dontrun{

 # set key antibiotics to a new variable
--- a/man/key_antibiotics.Rd
+++ b/man/key_antibiotics.Rd
@@ -56,7 +56,7 @@ The function \code{key_antibiotics} returns a character vector with 12 antibioti
 }
 \section{Key antibiotics}{

-    There are two ways to determine whether isolates can be included as first \emph{weighted} isolates: \cr
+    There are two ways to determine whether isolates can be included as first \emph{weighted} isolates which will give generally the same results: \cr

    \strong{1. Using} \code{type = "keyantibiotics"} \strong{and parameter} \code{ignore_I} \cr
    Any difference from S to R (or vice versa) will (re)select an isolate as a first weighted isolate. With \code{ignore_I = FALSE}, also differences from I to S|R (or vice versa) will lead to this. This is a reliable method and 30-35 times faster than method 2. \cr
@@ -66,19 +66,26 @@ The function \code{key_antibiotics} returns a character vector with 12 antibioti
 }

 \examples{
-\dontrun{
+# septic_patients is a dataset available in the AMR package
+?septic_patients
+my_patients <- septic_patients
+
+library(dplyr)
 # set key antibiotics to a new variable
-tbl$keyab <- key_antibiotics(tbl)
+my_patients <- my_patients \%>\%
+  mutate(keyab = key_antibiotics(.)) \%>\%
+  mutate(
+    # now calculate first isolates
+    first_regular = first_isolate(., "date", "patient_id", "bactid"),
+    # and first WEIGHTED isolates
+    first_weighted = first_isolate(., "date", "patient_id", "bactid",
+                                   col_keyantibiotics = "keyab")
+  )

-# add regular first isolates
-tbl$first_isolate <-
-  first_isolate(tbl)
+# Check the difference, in this data set it results in 7\% more isolates:
+sum(my_patients$first_regular, na.rm = TRUE)
+sum(my_patients$first_weighted, na.rm = TRUE)

-# add first WEIGHTED isolates using key antibiotics
-tbl$first_isolate_weighed <-
-  first_isolate(tbl,
-                col_keyantibiotics = 'keyab')
-}

 # output of the `key_antibiotics` function could be like this:
 strainA <- "SSSRR.S.R..S"
--- a/man/microorganisms.Rd
+++ b/man/microorganisms.Rd
@@ -19,9 +19,6 @@
  \item{\code{type_nl}}{Type of microorganism in Dutch, like \code{"Bacterie"} and \code{"Schimmel/gist"}}
  \item{\code{gramstain_nl}}{Gram of microorganism in Dutch, like \code{"Negatieve staven"}}
 }}
-\source{
-MOLIS (LIS of Certe) - \url{https://www.certe.nl}
-}
 \usage{
 microorganisms
 }
--- a/man/microorganisms.umcg.Rd
+++ b/man/microorganisms.umcg.Rd
@@ -9,9 +9,6 @@
  \item{\code{mocode}}{Code of microorganism according to UMCG MMB}
  \item{\code{bactid}}{Code of microorganism in \code{\link{microorganisms}}}
 }}
-\source{
-MOLIS (LIS of Certe) - \url{https://www.certe.nl} \cr \cr GLIMS (LIS of UMCG) - \url{https://www.umcg.nl}
-}
 \usage{
 microorganisms.umcg
 }
--- a/man/septic_patients.Rd
+++ b/man/septic_patients.Rd
@@ -4,7 +4,7 @@
 \name{septic_patients}
 \alias{septic_patients}
 \title{Dataset with 2000 blood culture isolates of septic patients}
-\format{A data.frame with 2000 observations and 47 variables:
+\format{A data.frame with 2000 observations and 49 variables:
 \describe{
  \item{\code{date}}{date of receipt at the laboratory}
  \item{\code{hospital_id}}{ID of the hospital}
@@ -15,11 +15,8 @@
  \item{\code{sex}}{sex of the patient}
  \item{\code{patient_id}}{ID of the patient, first 10 characters of an SHA hash containing irretrievable information}
  \item{\code{bactid}}{ID of microorganism, see \code{\link{microorganisms}}}
-  \item{\code{peni:mupi}}{38 different antibiotics with class \code{rsi} (see \code{\link{as.rsi}}); these column names occur in \code{\link{antibiotics}} and can be translated with \code{\link{abname}}}
+  \item{\code{peni:rifa}}{40 different antibiotics with class \code{rsi} (see \code{\link{as.rsi}}); these column names occur in \code{\link{antibiotics}} data set and can be translated with \code{\link{abname}}}
 }}
-\source{
-MOLIS (LIS of Certe) - \url{https://www.certe.nl}
-}
 \usage{
 septic_patients
 }
@@ -45,15 +42,15 @@ my_data <- my_data \%>\%
 # ANALYSIS #
 # -------- #

-# 1. Get the amoxicillin resistance percentages
-#    of E. coli, divided by hospital:
+# 1. Get the amoxicillin resistance percentages (p)
+#     and numbers (n) of E. coli, divided by hospital:

 my_data \%>\%
-  filter(bactid == "ESCCOL",
+  filter(bactid == guess_bactid("E. coli"),
         first_isolates == TRUE) \%>\%
  group_by(hospital_id) \%>\%
-  summarise(n = n(),
-            amoxicillin_resistance = rsi(amox))
+  summarise(n = n_rsi(amox),
+            p = resistance(amox))


 # 2. Get the amoxicillin/clavulanic acid resistance
@@ -63,7 +60,7 @@ my_data \%>\%
  filter(bactid == guess_bactid("E. coli"),
         first_isolates == TRUE) \%>\%
  group_by(year = format(date, "\%Y")) \%>\%
-  summarise(n = n(),
-            amoxclav_resistance = rsi(amcl, minimum = 20))
+  summarise(n = n_rsi(amcl),
+            p = resistance(amcl, minimum = 20))
 }
 \keyword{datasets}