1
0
mirror of https://github.com/msberends/AMR.git synced 2025-07-09 15:41:56 +02:00

dplyr 0.8.0 support, fixes #7

This commit is contained in:
2018-12-22 22:39:34 +01:00
parent b937662a97
commit 0b8084871d
29 changed files with 555 additions and 479 deletions

View File

@ -2,18 +2,9 @@
% Please edit documentation in R/deprecated.R
\name{AMR-deprecated}
\alias{AMR-deprecated}
\alias{as.bactid}
\alias{is.bactid}
\alias{guess_bactid}
\alias{ratio}
\title{Deprecated functions}
\usage{
as.bactid(...)
is.bactid(...)
guess_bactid(...)
ratio(x, ratio)
}
\description{

View File

@ -4,12 +4,12 @@
\alias{age}
\title{Age in years of individuals}
\usage{
age(x, y = Sys.Date())
age(x, reference = Sys.Date())
}
\arguments{
\item{x}{date(s) - will be coerced with \code{\link{as.POSIXlt}}}
\item{x}{date(s), will be coerced with \code{\link{as.POSIXlt}}}
\item{y}{reference date(s) - defaults to \code{\link{Sys.Date}} - will be coerced with \code{\link{as.POSIXlt}}}
\item{reference}{reference date(s) (defaults to today), will be coerced with \code{\link{as.POSIXlt}}}
}
\value{
Integer (no decimals)
@ -18,5 +18,5 @@ Integer (no decimals)
Calculates age in years based on a reference date, which is the sytem time at default.
}
\seealso{
age_groups
\code{\link{age_groups}} to splits age into groups
}

View File

@ -9,13 +9,13 @@ age_groups(x, split_at = c(12, 25, 55, 75))
\arguments{
\item{x}{age, e.g. calculated with \code{\link{age}}}
\item{split_at}{values to split \code{x}, defaults to 0-11, 12-24, 26-54, 55-74 and 75+. See Details.}
\item{split_at}{values to split \code{x} at, defaults to age groups 0-11, 12-24, 26-54, 55-74 and 75+. See Details.}
}
\value{
Ordered \code{\link{factor}}
}
\description{
Splits ages into groups defined by the \code{split} parameter.
Split ages into age groups defined by the \code{split} parameter. This allows for easier demographic (antimicrobial resistance) analysis.
}
\details{
To split ages, the input can be:
@ -65,7 +65,7 @@ septic_patients \%>\%
ggplot_rsi(x = "age_group")
}
\seealso{
age
\code{\link{age}} to determine ages based on one or more reference dates
}
\keyword{age}
\keyword{age_group}

View File

@ -40,7 +40,7 @@ eucast_rules(tbl, col_mo = NULL, info = TRUE,
pita = "pita", poly = "poly", pris = "pris", qida = "qida",
rifa = "rifa", roxi = "roxi", siso = "siso", teic = "teic",
tetr = "tetr", tica = "tica", tige = "tige", tobr = "tobr",
trim = "trim", trsu = "trsu", vanc = "vanc", col_bactid = NULL)
trim = "trim", trsu = "trsu", vanc = "vanc")
EUCAST_rules(...)
@ -59,8 +59,6 @@ interpretive_reading(...)
\item{amcl, amik, amox, ampi, azit, azlo, aztr, cefa, cfep, cfot, cfox, cfra, cfta, cftr, cfur, chlo, cipr, clar, clin, clox, coli, czol, dapt, doxy, erta, eryt, fosf, fusi, gent, imip, kana, levo, linc, line, mero, mezl, mino, moxi, nali, neom, neti, nitr, norf, novo, oflo, oxac, peni, pipe, pita, poly, pris, qida, rifa, roxi, siso, teic, tetr, tica, tige, tobr, trim, trsu, vanc}{column name of an antibiotic, see Antibiotics}
\item{col_bactid}{deprecated, use \code{col_mo} instead.}
\item{...}{parameters that are passed on to \code{eucast_rules}}
}
\value{

View File

@ -2,6 +2,8 @@
% Please edit documentation in R/first_isolate.R
\name{first_isolate}
\alias{first_isolate}
\alias{filter_first_isolate}
\alias{filter_first_weighted_isolate}
\title{Determine first (weighted) isolates}
\source{
Methodology of this function is based on: \strong{M39 Analysis and Presentation of Cumulative Antimicrobial Susceptibility Test Data, 4th Edition}, 2014, \emph{Clinical and Laboratory Standards Institute (CLSI)}. \url{https://clsi.org/standards/products/microbiology/documents/m39/}.
@ -11,10 +13,15 @@ first_isolate(tbl, col_date = NULL, col_patient_id = NULL,
col_mo = NULL, col_testcode = NULL, col_specimen = NULL,
col_icu = NULL, col_keyantibiotics = NULL, episode_days = 365,
testcodes_exclude = NULL, icu_exclude = FALSE,
filter_specimen = NULL, output_logical = TRUE,
type = "keyantibiotics", ignore_I = TRUE, points_threshold = 2,
info = TRUE, col_bactid = NULL, col_genus = NULL,
col_species = NULL)
specimen_group = NULL, type = "keyantibiotics", ignore_I = TRUE,
points_threshold = 2, info = TRUE, ...)
filter_first_isolate(tbl, col_date = NULL, col_patient_id = NULL,
col_mo = NULL, ...)
filter_first_weighted_isolate(tbl, col_date = NULL,
col_patient_id = NULL, col_mo = NULL, col_keyantibiotics = NULL,
...)
}
\arguments{
\item{tbl}{a \code{data.frame} containing isolates.}
@ -37,11 +44,9 @@ first_isolate(tbl, col_date = NULL, col_patient_id = NULL,
\item{testcodes_exclude}{character vector with test codes that should be excluded (case-insensitive)}
\item{icu_exclude}{logical whether ICU isolates should be excluded}
\item{icu_exclude}{logical whether ICU isolates should be excluded (rows with value \code{TRUE} in column \code{col_icu})}
\item{filter_specimen}{specimen group or type that should be excluded}
\item{output_logical}{return output as \code{logical} (will else be the values \code{0} or \code{1})}
\item{specimen_group}{value in column \code{col_specimen} to filter on}
\item{type}{type to determine weighed isolates; can be \code{"keyantibiotics"} or \code{"points"}, see Details}
@ -51,21 +56,34 @@ first_isolate(tbl, col_date = NULL, col_patient_id = NULL,
\item{info}{print progress}
\item{col_bactid}{(deprecated, use \code{col_mo} instead)}
\item{col_genus}{(deprecated, use \code{col_mo} instead) column name of the genus of the microorganisms}
\item{col_species}{(deprecated, use \code{col_mo} instead) column name of the species of the microorganisms}
\item{...}{parameters passed on to the \code{first_isolate} function}
}
\value{
A vector to add to table, see Examples.
Logical vector
}
\description{
Determine first (weighted) isolates of all microorganisms of every patient per episode and (if needed) per specimen type.
}
\details{
\strong{WHY THIS IS SO IMPORTANT} \cr
To conduct an analysis of antimicrobial resistance, you should only include the first isolate of every patient per episode \href{https://www.ncbi.nlm.nih.gov/pubmed/17304462}{[1]}. If you would not do this, you could easily get an overestimate or underestimate of the resistance of an antibiotic. Imagine that a patient was admitted with an MRSA and that it was found in 5 different blood cultures the following week. The resistance percentage of oxacillin of all \emph{S. aureus} isolates would be overestimated, because you included this MRSA more than once. It would be \href{https://en.wikipedia.org/wiki/Selection_bias}{selection bias}.
To conduct an analysis of antimicrobial resistance, you should only include the first isolate of every patient per episode \href{https://www.ncbi.nlm.nih.gov/pubmed/17304462}{[1]}. If you would not do this, you could easily get an overestimate or underestimate of the resistance of an antibiotic. Imagine that a patient was admitted with an MRSA and that it was found in 5 different blood cultures the following week. The resistance percentage of oxacillin of all \emph{S. aureus} isolates would be overestimated, because you included this MRSA more than once. It would be \href{https://en.wikipedia.org/wiki/Selection_bias}{selection bias}.
The function \code{filter_first_isolate} is essentially equal to:
\preformatted{
tbl \%>\%
mutate(only_firsts = first_isolate(tbl, ...)) \%>\%
filter(only_firsts == TRUE) \%>\%
select(-only_firsts)
}
The function \code{filter_first_weighted_isolate} is essentially equal to:
\preformatted{
tbl \%>\%
mutate(keyab = key_antibiotics(.)) \%>\%
mutate(only_weighted_firsts = first_isolate(tbl,
col_keyantibiotics = "keyab", ...)) \%>\%
filter(only_weighted_firsts == TRUE) \%>\%
select(-only_weighted_firsts)
}
}
\section{Key antibiotics}{
@ -83,20 +101,29 @@ Determine first (weighted) isolates of all microorganisms of every patient per e
?septic_patients
library(dplyr)
my_patients <- septic_patients \%>\%
# Filter on first isolates:
septic_patients \%>\%
mutate(first_isolate = first_isolate(.,
col_date = "date",
col_patient_id = "patient_id",
col_mo = "mo"))
col_mo = "mo")) \%>\%
filter(first_isolate == TRUE)
# Which can be shortened to:
septic_patients \%>\%
filter_first_isolate()
# or for first weighted isolates:
septic_patients \%>\%
filter_first_weighted_isolate()
# Now let's see if first isolates matter:
A <- my_patients \%>\%
A <- septic_patients \%>\%
group_by(hospital_id) \%>\%
summarise(count = n_rsi(gent), # gentamicin availability
resistance = portion_IR(gent)) # gentamicin resistance
B <- my_patients \%>\%
filter(first_isolate == TRUE) \%>\% # the 1st isolate filter
B <- septic_patients \%>\%
filter_first_weighted_isolate() \%>\% # the 1st isolate filter
group_by(hospital_id) \%>\%
summarise(count = n_rsi(gent), # gentamicin availability
resistance = portion_IR(gent)) # gentamicin resistance
@ -106,6 +133,7 @@ B <- my_patients \%>\%
# Gentamicin resitance in hospital D appears to be 5.4\% higher than
# when you (erroneously) would have used all isolates!
## OTHER EXAMPLES:
\dontrun{
@ -122,29 +150,29 @@ tbl$first_isolate_weighed <-
tbl$first_blood_isolate <-
first_isolate(tbl,
filter_specimen = 'Blood')
specimen_group = 'Blood')
tbl$first_blood_isolate_weighed <-
first_isolate(tbl,
filter_specimen = 'Blood',
specimen_group = 'Blood',
col_keyantibiotics = 'keyab')
tbl$first_urine_isolate <-
first_isolate(tbl,
filter_specimen = 'Urine')
specimen_group = 'Urine')
tbl$first_urine_isolate_weighed <-
first_isolate(tbl,
filter_specimen = 'Urine',
specimen_group = 'Urine',
col_keyantibiotics = 'keyab')
tbl$first_resp_isolate <-
first_isolate(tbl,
filter_specimen = 'Respiratory')
specimen_group = 'Respiratory')
tbl$first_resp_isolate_weighed <-
first_isolate(tbl,
filter_specimen = 'Respiratory',
specimen_group = 'Respiratory',
col_keyantibiotics = 'keyab')
}
}

View File

@ -10,15 +10,16 @@
frequency_tbl(x, ..., sort.count = TRUE,
nmax = getOption("max.print.freq"), na.rm = TRUE, row.names = TRUE,
markdown = !interactive(), digits = 2, quote = FALSE,
header = !markdown, title = NULL, na = "<NA>", sep = " ",
decimal.mark = getOption("OutDec"), big.mark = ifelse(decimal.mark !=
",", ",", "."))
header = !markdown, title = NULL, na = "<NA>", droplevels = TRUE,
sep = " ", decimal.mark = getOption("OutDec"),
big.mark = ifelse(decimal.mark != ",", ",", "."))
freq(x, ..., sort.count = TRUE, nmax = getOption("max.print.freq"),
na.rm = TRUE, row.names = TRUE, markdown = !interactive(),
digits = 2, quote = FALSE, header = !markdown, title = NULL,
na = "<NA>", sep = " ", decimal.mark = getOption("OutDec"),
big.mark = ifelse(decimal.mark != ",", ",", "."))
na = "<NA>", droplevels = TRUE, sep = " ",
decimal.mark = getOption("OutDec"), big.mark = ifelse(decimal.mark !=
",", ",", "."))
top_freq(f, n)
@ -52,6 +53,8 @@ top_freq(f, n)
\item{na}{a character string to should be used to show empty (\code{NA}) values (only useful when \code{na.rm = FALSE})}
\item{droplevels}{a logical value indicating whether in factors empty levels should be dropped}
\item{sep}{a character string to separate the terms when selecting multiple columns}
\item{decimal.mark}{%
@ -94,6 +97,7 @@ For dates and times of any class, these additional values will be calculated wit
\item{Median, using \code{\link[stats]{median}}, with percentage since oldest}
}
In factors, all factor levels that are not existing in the input data will be dropped.
The function \code{top_freq} uses \code{\link[dplyr]{top_n}} internally and will include more than \code{n} rows if there are ties.
}

View File

@ -5,14 +5,13 @@
\alias{key_antibiotics_equal}
\title{Key antibiotics for first \emph{weighted} isolates}
\usage{
key_antibiotics(tbl, col_mo = "mo", universal_1 = "amox",
key_antibiotics(tbl, col_mo = NULL, universal_1 = "amox",
universal_2 = "amcl", universal_3 = "cfur", universal_4 = "pita",
universal_5 = "cipr", universal_6 = "trsu", GramPos_1 = "vanc",
GramPos_2 = "teic", GramPos_3 = "tetr", GramPos_4 = "eryt",
GramPos_5 = "oxac", GramPos_6 = "rifa", GramNeg_1 = "gent",
GramNeg_2 = "tobr", GramNeg_3 = "coli", GramNeg_4 = "cfot",
GramNeg_5 = "cfta", GramNeg_6 = "mero", warnings = TRUE,
col_bactid = "bactid")
GramNeg_5 = "cfta", GramNeg_6 = "mero", warnings = TRUE, ...)
key_antibiotics_equal(x, y, type = c("keyantibiotics", "points"),
ignore_I = TRUE, points_threshold = 2, info = FALSE)
@ -30,7 +29,7 @@ key_antibiotics_equal(x, y, type = c("keyantibiotics", "points"),
\item{warnings}{give warning about missing antibiotic columns, they will anyway be ignored}
\item{col_bactid}{(deprecated, use \code{col_mo} instead)}
\item{...}{other parameters passed on to function}
\item{x, y}{characters to compare}
@ -71,18 +70,15 @@ The function \code{key_antibiotics} returns a character vector with 12 antibioti
\examples{
# septic_patients is a dataset available in the AMR package
?septic_patients
my_patients <- septic_patients
library(dplyr)
# set key antibiotics to a new variable
my_patients <- my_patients \%>\%
my_patients <- septic_patients \%>\%
mutate(keyab = key_antibiotics(.)) \%>\%
mutate(
# now calculate first isolates
first_regular = first_isolate(., "date", "patient_id", "mo"),
first_regular = first_isolate(., col_keyantibiotics = FALSE),
# and first WEIGHTED isolates
first_weighted = first_isolate(., "date", "patient_id", "mo",
col_keyantibiotics = "keyab")
first_weighted = first_isolate(., col_keyantibiotics = "keyab")
)
# Check the difference, in this data set it results in 7\% more isolates:
@ -95,7 +91,7 @@ strainA <- "SSSRR.S.R..S"
strainB <- "SSSIRSSSRSSS"
key_antibiotics_equal(strainA, strainB)
# TRUE, because I is ignored (as are missing values)
# TRUE, because I is ignored (as well as missing values)
key_antibiotics_equal(strainA, strainB, ignore_I = FALSE)
# FALSE, because I is not ignored and so the 4th value differs

View File

@ -22,8 +22,7 @@ mdro(tbl, country = NULL, col_mo = NULL, info = TRUE,
peni = "peni", pipe = "pipe", pita = "pita", poly = "poly",
qida = "qida", rifa = "rifa", roxi = "roxi", siso = "siso",
teic = "teic", tetr = "tetr", tica = "tica", tige = "tige",
tobr = "tobr", trim = "trim", trsu = "trsu", vanc = "vanc",
col_bactid = NULL)
tobr = "tobr", trim = "trim", trsu = "trsu", vanc = "vanc")
brmo(..., country = "nl")
@ -160,8 +159,6 @@ eucast_exceptional_phenotypes(tbl, country = "EUCAST", ...)
\item{vanc}{column name of an antibiotic, see Antibiotics}
\item{col_bactid}{deprecated, use \code{col_mo} instead.}
\item{...}{parameters that are passed on to methods}
}
\value{

View File

@ -89,11 +89,11 @@ septic_patients \%>\%
if (!require(ggplot2)) {
data <- septic_patients \%>\%
filter(mo == "ESCCOL") \%>\%
filter(mo == as.mo("E. coli")) \%>\%
resistance_predict(col_ab = "amox",
col_date = "date",
info = FALSE,
minimum = 15)
col_date = "date",
info = FALSE,
minimum = 15)
ggplot(data,
aes(x = year)) +