AMR/man/first_isolate.Rd

% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/first_isolate.R
\name{first_isolate}
\alias{first_isolate}
\title{Determine first (weighted) isolates}
\source{
Methodology of this function is based on: "M39 Analysis and Presentation of Cumulative Antimicrobial Susceptibility Test Data, 4th Edition", 2014, Clinical and Laboratory Standards Institute. \url{https://clsi.org/standards/products/microbiology/documents/m39/}.
}
\usage{
first_isolate(tbl, col_date, col_patient_id, col_bactid = NA,
  col_testcode = NA, col_specimen = NA, col_icu = NA,
  col_keyantibiotics = NA, episode_days = 365, testcodes_exclude = "",
  icu_exclude = FALSE, filter_specimen = NA, output_logical = TRUE,
  type = "keyantibiotics", ignore_I = TRUE, points_threshold = 2,
  info = TRUE, col_genus = NA, col_species = NA)
}
\arguments{
\item{tbl}{a \code{data.frame} containing isolates.}

\item{col_date}{column name of the result date (or date that is was received on the lab)}

\item{col_patient_id}{column name of the unique IDs of the patients}

\item{col_bactid}{column name of the unique IDs of the microorganisms: \code{bactid}'s. If this column has another class than \code{"bactid"}, values will be coerced using \code{\link{as.bactid}}.}

\item{col_testcode}{column name of the test codes. Use \code{col_testcode = NA} to \strong{not} exclude certain test codes (like test codes for screening). In that case \code{testcodes_exclude} will be ignored. Supports tidyverse-like quotation.}

\item{col_specimen}{column name of the specimen type or group}

\item{col_icu}{column name of the logicals (\code{TRUE}/\code{FALSE}) whether a ward or department is an Intensive Care Unit (ICU)}

\item{col_keyantibiotics}{column name of the key antibiotics to determine first \emph{weighted} isolates, see \code{\link{key_antibiotics}}. Supports tidyverse-like quotation.}

\item{episode_days}{episode in days after which a genus/species combination will be determined as 'first isolate' again}

\item{testcodes_exclude}{character vector with test codes that should be excluded (case-insensitive)}

\item{icu_exclude}{logical whether ICU isolates should be excluded}

\item{filter_specimen}{specimen group or type that should be excluded}

\item{output_logical}{return output as \code{logical} (will else be the values \code{0} or \code{1})}

\item{type}{type to determine weighed isolates; can be \code{"keyantibiotics"} or \code{"points"}, see Details}

\item{ignore_I}{logical to determine whether antibiotic interpretations with \code{"I"} will be ignored when \code{type = "keyantibiotics"}, see Details}

\item{points_threshold}{points until the comparison of key antibiotics will lead to inclusion of an isolate when \code{type = "points"}, see Details}

\item{info}{print progress}

\item{col_genus}{(deprecated, use \code{col_bactid} instead) column name of the genus of the microorganisms}

\item{col_species}{(deprecated, use \code{col_bactid} instead) column name of the species of the microorganisms}
}
\value{
A vector to add to table, see Examples.
}
\description{
Determine first (weighted) isolates of all microorganisms of every patient per episode and (if needed) per specimen type.
}
\details{
\strong{WHY THIS IS SO IMPORTANT} \cr
    To conduct an analysis of antimicrobial resistance, you should only include the first isolate of every patient per episode \href{https://www.ncbi.nlm.nih.gov/pubmed/17304462}{[1]}. If you would not do this, you could easily get an overestimate or underestimate of the resistance of an antibiotic. Imagine that a patient was admitted with an MRSA and that it was found in 5 different blood cultures the following week. The resistance percentage of oxacillin of all \emph{S. aureus} isolates would be overestimated, because you included this MRSA more than once. It would be \href{https://en.wikipedia.org/wiki/Selection_bias}{selection bias}.
}
\section{Key antibiotics}{

    There are two ways to determine whether isolates can be included as first \emph{weighted} isolates which will give generally the same results: \cr

    \strong{1. Using} \code{type = "keyantibiotics"} \strong{and parameter} \code{ignore_I} \cr
    Any difference from S to R (or vice versa) will (re)select an isolate as a first weighted isolate. With \code{ignore_I = FALSE}, also differences from I to S|R (or vice versa) will lead to this. This is a reliable method and 30-35 times faster than method 2. \cr

    \strong{2. Using} \code{type = "points"} \strong{and parameter} \code{points_threshold} \cr
    A difference from I to S|R (or vice versa) means 0.5 points, a difference from S to R (or vice versa) means 1 point. When the sum of points exceeds \code{points_threshold}, an isolate will be (re)selected as a first weighted isolate.
}

\examples{
# septic_patients is a dataset available in the AMR package
?septic_patients
my_patients <- septic_patients

library(dplyr)
my_patients$first_isolate <- my_patients \%>\%
  first_isolate(col_date = "date",
                col_patient_id = "patient_id",
                col_bactid = "bactid")

# Now let's see if first isolates matter:
A <- my_patients \%>\%
  group_by(hospital_id) \%>\%
  summarise(count = n_rsi(gent), # gentamicin
            resistance = resistance(gent))

B <- my_patients \%>\%
  filter(first_isolate == TRUE) \%>\%
  group_by(hospital_id) \%>\%
  summarise(count = n_rsi(gent), # gentamicin
            resistance = resistance(gent))

# Have a look at A and B. B is more reliable because every isolate is
# counted once. Gentamicin resitance in hospital D seems to be 5\%
# higher than originally thought.

## OTHER EXAMPLES:

\dontrun{

# set key antibiotics to a new variable
tbl$keyab <- key_antibiotics(tbl)

tbl$first_isolate <-
  first_isolate(tbl)

tbl$first_isolate_weighed <-
  first_isolate(tbl,
                col_keyantibiotics = 'keyab')

tbl$first_blood_isolate <-
  first_isolate(tbl,
                filter_specimen = 'Blood')

tbl$first_blood_isolate_weighed <-
  first_isolate(tbl,
                filter_specimen = 'Blood',
                col_keyantibiotics = 'keyab')

tbl$first_urine_isolate <-
  first_isolate(tbl,
                filter_specimen = 'Urine')

tbl$first_urine_isolate_weighed <-
  first_isolate(tbl,
                filter_specimen = 'Urine',
                col_keyantibiotics = 'keyab')

tbl$first_resp_isolate <-
  first_isolate(tbl,
                filter_specimen = 'Respiratory')

tbl$first_resp_isolate_weighed <-
  first_isolate(tbl,
                filter_specimen = 'Respiratory',
                col_keyantibiotics = 'keyab')
}
}
\seealso{
\code{\link{key_antibiotics}}
}
\keyword{first}
\keyword{isolate}
\keyword{isolates}
first commit 2018-02-21 11:52:31 +01:00			`% Generated by roxygen2: do not edit by hand`
new class bactid 2018-07-23 14:14:03 +02:00			`% Please edit documentation in R/first_isolate.R`
first commit 2018-02-21 11:52:31 +01:00			`\name{first_isolate}`
			`\alias{first_isolate}`
			`\title{Determine first (weighted) isolates}`
more unit tests 2018-04-20 13:45:34 +02:00			`\source{`
			`Methodology of this function is based on: "M39 Analysis and Presentation of Cumulative Antimicrobial Susceptibility Test Data, 4th Edition", 2014, Clinical and Laboratory Standards Institute. \url{https://clsi.org/standards/products/microbiology/documents/m39/}.`
			`}`
first commit 2018-02-21 11:52:31 +01:00			`\usage{`
- For functions `first_isolate`, `EUCAST_rules` the antibiotic column names are case-insensitive - Functions `first_isolate`, `EUCAST_rules` and `rsi_predict` supports tidyverse-like evaluation of parameters (no need to quote columns them anymore) - Functions `clipboard_import` and `clipboard_export` as helper functions to quickly copy and paste from/to software like Excel and SPSS - Renamed dataset `bactlist` to `microorganisms` 2018-03-23 14:46:02 +01:00			`first_isolate(tbl, col_date, col_patient_id, col_bactid = NA,`
- Added new algorithm to determine weighted isolates, can now be `points` or `keyantibiotics, see `?first_isolate` - Function `first_isolate` supports tidyverse-like evaluation of parameters (no need to quote them anymore) - Functions `as.rsi` and `as.mic` now add the package name and version as attribute 2018-03-19 20:39:23 +01:00			`col_testcode = NA, col_specimen = NA, col_icu = NA,`
			`col_keyantibiotics = NA, episode_days = 365, testcodes_exclude = "",`
			`icu_exclude = FALSE, filter_specimen = NA, output_logical = TRUE,`
			`type = "keyantibiotics", ignore_I = TRUE, points_threshold = 2,`
- For functions `first_isolate`, `EUCAST_rules` the antibiotic column names are case-insensitive - Functions `first_isolate`, `EUCAST_rules` and `rsi_predict` supports tidyverse-like evaluation of parameters (no need to quote columns them anymore) - Functions `clipboard_import` and `clipboard_export` as helper functions to quickly copy and paste from/to software like Excel and SPSS - Renamed dataset `bactlist` to `microorganisms` 2018-03-23 14:46:02 +01:00			`info = TRUE, col_genus = NA, col_species = NA)`
first commit 2018-02-21 11:52:31 +01:00			`}`
			`\arguments{`
			`\item{tbl}{a \code{data.frame} containing isolates.}`

fix clipboard on linux 2018-04-02 11:11:21 +02:00			`\item{col_date}{column name of the result date (or date that is was received on the lab)}`
first commit 2018-02-21 11:52:31 +01:00
fix clipboard on linux 2018-04-02 11:11:21 +02:00			`\item{col_patient_id}{column name of the unique IDs of the patients}`
first commit 2018-02-21 11:52:31 +01:00
new class bactid 2018-07-23 14:14:03 +02:00			`\item{col_bactid}{column name of the unique IDs of the microorganisms: \code{bactid}'s. If this column has another class than \code{"bactid"}, values will be coerced using \code{\link{as.bactid}}.}`
first commit 2018-02-21 11:52:31 +01:00
- Added new algorithm to determine weighted isolates, can now be `points` or `keyantibiotics, see `?first_isolate` - Function `first_isolate` supports tidyverse-like evaluation of parameters (no need to quote them anymore) - Functions `as.rsi` and `as.mic` now add the package name and version as attribute 2018-03-19 20:39:23 +01:00			`\item{col_testcode}{column name of the test codes. Use \code{col_testcode = NA} to \strong{not} exclude certain test codes (like test codes for screening). In that case \code{testcodes_exclude} will be ignored. Supports tidyverse-like quotation.}`
first commit 2018-02-21 11:52:31 +01:00
fix clipboard on linux 2018-04-02 11:11:21 +02:00			`\item{col_specimen}{column name of the specimen type or group}`
first commit 2018-02-21 11:52:31 +01:00
fix clipboard on linux 2018-04-02 11:11:21 +02:00			`\item{col_icu}{column name of the logicals (\code{TRUE}/\code{FALSE}) whether a ward or department is an Intensive Care Unit (ICU)}`
first commit 2018-02-21 11:52:31 +01:00
- Added new algorithm to determine weighted isolates, can now be `points` or `keyantibiotics, see `?first_isolate` - Function `first_isolate` supports tidyverse-like evaluation of parameters (no need to quote them anymore) - Functions `as.rsi` and `as.mic` now add the package name and version as attribute 2018-03-19 20:39:23 +01:00			`\item{col_keyantibiotics}{column name of the key antibiotics to determine first \emph{weighted} isolates, see \code{\link{key_antibiotics}}. Supports tidyverse-like quotation.}`
first commit 2018-02-21 11:52:31 +01:00
			`\item{episode_days}{episode in days after which a genus/species combination will be determined as 'first isolate' again}`

- Added new algorithm to determine weighted isolates, can now be `points` or `keyantibiotics, see `?first_isolate` - Function `first_isolate` supports tidyverse-like evaluation of parameters (no need to quote them anymore) - Functions `as.rsi` and `as.mic` now add the package name and version as attribute 2018-03-19 20:39:23 +01:00			`\item{testcodes_exclude}{character vector with test codes that should be excluded (case-insensitive)}`
first commit 2018-02-21 11:52:31 +01:00
			`\item{icu_exclude}{logical whether ICU isolates should be excluded}`

			`\item{filter_specimen}{specimen group or type that should be excluded}`

- Added new algorithm to determine weighted isolates, can now be `points` or `keyantibiotics, see `?first_isolate` - Function `first_isolate` supports tidyverse-like evaluation of parameters (no need to quote them anymore) - Functions `as.rsi` and `as.mic` now add the package name and version as attribute 2018-03-19 20:39:23 +01:00			`\item{output_logical}{return output as \code{logical} (will else be the values \code{0} or \code{1})}`
first commit 2018-02-21 11:52:31 +01:00
- Added new algorithm to determine weighted isolates, can now be `points` or `keyantibiotics, see `?first_isolate` - Function `first_isolate` supports tidyverse-like evaluation of parameters (no need to quote them anymore) - Functions `as.rsi` and `as.mic` now add the package name and version as attribute 2018-03-19 20:39:23 +01:00			`\item{type}{type to determine weighed isolates; can be \code{"keyantibiotics"} or \code{"points"}, see Details}`

			`\item{ignore_I}{logical to determine whether antibiotic interpretations with \code{"I"} will be ignored when \code{type = "keyantibiotics"}, see Details}`

			`\item{points_threshold}{points until the comparison of key antibiotics will lead to inclusion of an isolate when \code{type = "points"}, see Details}`
first commit 2018-02-21 11:52:31 +01:00
			`\item{info}{print progress}`
- For functions `first_isolate`, `EUCAST_rules` the antibiotic column names are case-insensitive - Functions `first_isolate`, `EUCAST_rules` and `rsi_predict` supports tidyverse-like evaluation of parameters (no need to quote columns them anymore) - Functions `clipboard_import` and `clipboard_export` as helper functions to quickly copy and paste from/to software like Excel and SPSS - Renamed dataset `bactlist` to `microorganisms` 2018-03-23 14:46:02 +01:00
fix clipboard on linux 2018-04-02 11:11:21 +02:00			`\item{col_genus}{(deprecated, use \code{col_bactid} instead) column name of the genus of the microorganisms}`
- For functions `first_isolate`, `EUCAST_rules` the antibiotic column names are case-insensitive - Functions `first_isolate`, `EUCAST_rules` and `rsi_predict` supports tidyverse-like evaluation of parameters (no need to quote columns them anymore) - Functions `clipboard_import` and `clipboard_export` as helper functions to quickly copy and paste from/to software like Excel and SPSS - Renamed dataset `bactlist` to `microorganisms` 2018-03-23 14:46:02 +01:00
fix clipboard on linux 2018-04-02 11:11:21 +02:00			`\item{col_species}{(deprecated, use \code{col_bactid} instead) column name of the species of the microorganisms}`
first commit 2018-02-21 11:52:31 +01:00			`}`
			`\value{`
			`A vector to add to table, see Examples.`
			`}`
			`\description{`
			`Determine first (weighted) isolates of all microorganisms of every patient per episode and (if needed) per specimen type.`
			`}`
			`\details{`
- Added new algorithm to determine weighted isolates, can now be `points` or `keyantibiotics, see `?first_isolate` - Function `first_isolate` supports tidyverse-like evaluation of parameters (no need to quote them anymore) - Functions `as.rsi` and `as.mic` now add the package name and version as attribute 2018-03-19 20:39:23 +01:00			`\strong{WHY THIS IS SO IMPORTANT} \cr`
added septic_patients 2018-02-27 20:01:02 +01:00			To conduct an analysis of antimicrobial resistance, you should only include the first isolate of every patient per episode \href{https://www.ncbi.nlm.nih.gov/pubmed/17304462}{[1]}. If you would not do this, you could easily get an overestimate or underestimate of the resistance of an antibiotic. Imagine that a patient was admitted with an MRSA and that it was found in 5 different blood cultures the following week. The resistance percentage of oxacillin of all \emph{S. aureus} isolates would be overestimated, because you included this MRSA more than once. It would be \href{https://en.wikipedia.org/wiki/Selection_bias}{selection bias}.
new algorithm key abs 2018-07-17 13:02:05 +02:00			`}`
			`\section{Key antibiotics}{`

update to septic_patients, speed improvements 2018-07-25 14:17:04 +02:00			`There are two ways to determine whether isolates can be included as first \emph{weighted} isolates which will give generally the same results: \cr`
add MIC values add badges to readme 2018-03-13 11:57:30 +01:00
use guess_bactid for GLIMS codes 2018-03-19 21:23:21 +01:00			`\strong{1. Using} \code{type = "keyantibiotics"} \strong{and parameter} \code{ignore_I} \cr`
new algorithm key abs 2018-07-17 13:02:05 +02:00			`Any difference from S to R (or vice versa) will (re)select an isolate as a first weighted isolate. With \code{ignore_I = FALSE}, also differences from I to S\|R (or vice versa) will lead to this. This is a reliable method and 30-35 times faster than method 2. \cr`

use guess_bactid for GLIMS codes 2018-03-19 21:23:21 +01:00			`\strong{2. Using} \code{type = "points"} \strong{and parameter} \code{points_threshold} \cr`
new algorithm key abs 2018-07-17 13:02:05 +02:00			`A difference from I to S\|R (or vice versa) means 0.5 points, a difference from S to R (or vice versa) means 1 point. When the sum of points exceeds \code{points_threshold}, an isolate will be (re)selected as a first weighted isolate.`
first commit 2018-02-21 11:52:31 +01:00			`}`
new algorithm key abs 2018-07-17 13:02:05 +02:00
first commit 2018-02-21 11:52:31 +01:00			`\examples{`
- Added new algorithm to determine weighted isolates, can now be `points` or `keyantibiotics, see `?first_isolate` - Function `first_isolate` supports tidyverse-like evaluation of parameters (no need to quote them anymore) - Functions `as.rsi` and `as.mic` now add the package name and version as attribute 2018-03-19 20:39:23 +01:00			`# septic_patients is a dataset available in the AMR package`
			`?septic_patients`
			`my_patients <- septic_patients`

			`library(dplyr)`
			`my_patients$first_isolate <- my_patients \%>\%`
fix clipboard on linux 2018-04-02 11:11:21 +02:00			`first_isolate(col_date = "date",`
			`col_patient_id = "patient_id",`
			`col_bactid = "bactid")`

update to septic_patients, speed improvements 2018-07-25 14:17:04 +02:00			`# Now let's see if first isolates matter:`
			`A <- my_patients \%>\%`
			`group_by(hospital_id) \%>\%`
			`summarise(count = n_rsi(gent), # gentamicin`
			`resistance = resistance(gent))`

			`B <- my_patients \%>\%`
			`filter(first_isolate == TRUE) \%>\%`
			`group_by(hospital_id) \%>\%`
			`summarise(count = n_rsi(gent), # gentamicin`
			`resistance = resistance(gent))`

			`# Have a look at A and B. B is more reliable because every isolate is`
			`# counted once. Gentamicin resitance in hospital D seems to be 5\%`
			`# higher than originally thought.`

			`## OTHER EXAMPLES:`

first commit 2018-02-21 11:52:31 +01:00			`\dontrun{`

First CRAN submission edits 2018-02-22 20:48:48 +01:00			`# set key antibiotics to a new variable`
first commit 2018-02-21 11:52:31 +01:00			`tbl$keyab <- key_antibiotics(tbl)`

			`tbl$first_isolate <-`
			`first_isolate(tbl)`

			`tbl$first_isolate_weighed <-`
			`first_isolate(tbl,`
			`col_keyantibiotics = 'keyab')`

			`tbl$first_blood_isolate <-`
			`first_isolate(tbl,`
			`filter_specimen = 'Blood')`

			`tbl$first_blood_isolate_weighed <-`
			`first_isolate(tbl,`
			`filter_specimen = 'Blood',`
			`col_keyantibiotics = 'keyab')`

			`tbl$first_urine_isolate <-`
			`first_isolate(tbl,`
			`filter_specimen = 'Urine')`

			`tbl$first_urine_isolate_weighed <-`
			`first_isolate(tbl,`
			`filter_specimen = 'Urine',`
			`col_keyantibiotics = 'keyab')`

			`tbl$first_resp_isolate <-`
			`first_isolate(tbl,`
			`filter_specimen = 'Respiratory')`

			`tbl$first_resp_isolate_weighed <-`
			`first_isolate(tbl,`
			`filter_specimen = 'Respiratory',`
			`col_keyantibiotics = 'keyab')`
			`}`
			`}`
new algorithm key abs 2018-07-17 13:02:05 +02:00			`\seealso{`
check fails 2018-07-17 14:48:11 +02:00			`\code{\link{key_antibiotics}}`
new algorithm key abs 2018-07-17 13:02:05 +02:00			`}`
first commit 2018-02-21 11:52:31 +01:00			`\keyword{first}`
			`\keyword{isolate}`
			`\keyword{isolates}`