From 2018d5b4778ecd568db7c66cc7fea7559d7f97f5 Mon Sep 17 00:00:00 2001 From: "Matthijs S. Berends" Date: Sat, 15 Dec 2018 22:40:07 +0100 Subject: [PATCH] age and age_groups --- DESCRIPTION | 4 +- NAMESPACE | 2 + NEWS.md | 4 +- R/age.R | 147 ++++++++++++++++++++++++++++++++++++++ R/ggplot_rsi.R | 11 +++ man/age.Rd | 22 ++++++ man/age_groups.Rd | 66 +++++++++++++++++ man/ggplot_rsi.Rd | 11 +++ tests/testthat/test-age.R | 29 ++++++++ 9 files changed, 293 insertions(+), 3 deletions(-) create mode 100644 R/age.R create mode 100644 man/age.Rd create mode 100644 man/age_groups.Rd create mode 100644 tests/testthat/test-age.R diff --git a/DESCRIPTION b/DESCRIPTION index 19ed5388..d76ea4a0 100755 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,6 +1,6 @@ Package: AMR -Version: 0.5.0.9004 -Date: 2018-12-14 +Version: 0.5.0.9005 +Date: 2018-12-15 Title: Antimicrobial Resistance Analysis Authors@R: c( person( diff --git a/NAMESPACE b/NAMESPACE index 90ce44fe..1f82d17b 100755 --- a/NAMESPACE +++ b/NAMESPACE @@ -46,6 +46,8 @@ export(ab_tradenames) export(ab_trivial_nl) export(ab_umcg) export(abname) +export(age) +export(age_groups) export(anti_join_microorganisms) export(as.atc) export(as.bactid) diff --git a/NEWS.md b/NEWS.md index 4acd3280..ded89d9c 100755 --- a/NEWS.md +++ b/NEWS.md @@ -3,6 +3,8 @@ #### New * Function `mo_failures` to review values that could not be coerced to a valid MO code, using `as.mo`. This latter function will now only show a maximum of 25 uncoerced values. * Function `mo_renamed` to get a list of all returned values from `as.mo` that have had taxonomic renaming +* Function `age` to calculate the (patients) age in years +* Function `age_groups` to split ages into custom or predefined groups (like children or elderly). This allows for easier antimicrobial resistance per age group. #### Changed * Improvements for `as.mo`: @@ -16,7 +18,7 @@ * Function `first_isolate`: * Will now use a column named like "patid" for the patient ID (parameter `col_patientid`), when this parameter was left blank * Will now use a column named like "key(...)ab" or "key(...)antibiotics" for the key antibiotics (parameter `col_keyantibiotics`), when this parameter was left blank -* A note to the manual pages of the `portion` functions, that low counts can infuence the outcome and that the `portion` functions may camouflage this, since they only return the portion (albeit being dependent on the `minimum` parameter) +* A note to the manual pages of the `portion` functions, that low counts can influence the outcome and that the `portion` functions may camouflage this, since they only return the portion (albeit being dependent on the `minimum` parameter) * Function `mo_taxonomy` now contains the kingdom too * Function `first_isolate` will now use a column named like "patid" for the patient ID, when this parameter was left blank * Reduce false positives for `is.rsi.eligible` diff --git a/R/age.R b/R/age.R new file mode 100644 index 00000000..ac0aff7b --- /dev/null +++ b/R/age.R @@ -0,0 +1,147 @@ +# ==================================================================== # +# TITLE # +# Antimicrobial Resistance (AMR) Analysis # +# # +# AUTHORS # +# Berends MS (m.s.berends@umcg.nl), Luz CF (c.f.luz@umcg.nl) # +# # +# LICENCE # +# This program is free software; you can redistribute it and/or modify # +# it under the terms of the GNU General Public License version 2.0, # +# as published by the Free Software Foundation. # +# # +# This program is distributed in the hope that it will be useful, # +# but WITHOUT ANY WARRANTY; without even the implied warranty of # +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # +# GNU General Public License for more details. # +# ==================================================================== # + +#' Age in years of individuals +#' +#' Calculates age in years based on a reference date, which is the sytem time at default. +#' @param x date(s) - will be coerced with \code{\link{as.POSIXlt}} +#' @param y reference date(s) - defaults to \code{\link{Sys.Date}} - will be coerced with \code{\link{as.POSIXlt}} +#' @return Integer (no decimals) +#' @seealso age_groups +#' @importFrom dplyr if_else +#' @export +age <- function(x, y = Sys.Date()) { + if (length(x) != length(y)) { + if (length(y) == 1) { + y <- rep(y, length(x)) + } else { + stop("`x` and `y` must be of same length, or `y` must be of length 1.") + } + } + x <- base::as.POSIXlt(x) + y <- base::as.POSIXlt(y) + if (any(y < x)) { + stop("`y` cannot be lower (older) than `x`.") + } + years_gap <- y$year - x$year + # from https://stackoverflow.com/a/25450756/4575331 + ages <- if_else(y$mon < x$mon | (y$mon == x$mon & y$mday < x$mday), + as.integer(years_gap - 1), + as.integer(years_gap)) + if (any(ages > 120)) { + warning("Some ages are >120.") + } + ages +} + +#' Split ages in age groups +#' +#' Splits ages into groups defined by the \code{split} parameter. +#' @param x age, e.g. calculated with \code{\link{age}} +#' @param split_at values to split \code{x}, defaults to 0-11, 12-24, 26-54, 55-74 and 75+. See Details. +#' @details To split ages, the input can be: +#' \itemize{ +#' \item{A numeric vector. A vector of \code{c(10, 20)} will split on 0-9, 10-19 and 20+. A value of only \code{50} will split on 0-49 and 50+. +#' The default is to split on young children (0-11), youth (12-24), young adults (26-54), middle-aged adults (55-74) and elderly (75+).} +#' \item{A character:} +#' \itemize{ +#' \item{\code{"children"}, equivalent of: \code{c(0, 1, 2, 4, 6, 13, 18)}. This will split on 0, 1, 2-3, 4-5, 6-12, 13-17 and 18+.} +#' \item{\code{"elderly"} or \code{"seniors"}, equivalent: of \code{c(65, 75, 85, 95)}. This will split on 0-64, 65-74, 75-84, 85-94 and 95+.} +#' \item{\code{"fives"}, equivalent: of \code{1:20 * 5}. This will split on 0-4, 5-9, 10-14, 15-19 and so forth.} +#' \item{\code{"tens"}, equivalent: of \code{1:10 * 10}. This will split on 0-9, 10-19, 20-29 and so forth.} +#' } +#' } +#' @return Ordered \code{\link{factor}} +#' @seealso age +#' @export +#' @examples +#' ages <- c(3, 8, 16, 54, 31, 76, 101, 43, 21) +#' +#' # split on 0-49 and 50+ +#' age_groups(ages, 50) +#' +#' # split on 0-20, 21-49 and 50+ +#' age_groups(ages, c(21, 50)) +#' +#' # split on every ten years +#' age_groups(ages, 1:10 * 10) +#' age_groups(ages, "tens") +#' +#' # split on every five years +#' age_groups(ages, 1:20 * 5) +#' age_groups(ages, "fives") +#' +#' # split on children +#' age_groups(ages, "children") +#' +#' # resistance of ciprofloxacine per age group +#' septic_patients %>% +#' mutate(first_isolate = first_isolate(.)) %>% +#' filter(first_isolate == TRUE, +#' mo == as.mo("E. coli")) %>% +#' group_by(age_group = age_groups(age)) %>% +#' select(age_group, +#' cipr) %>% +#' ggplot_rsi(x = "age_group") +age_groups <- function(x, split_at = c(12, 25, 55, 75)) { + if (is.character(split_at)) { + if (split_at %like% "^child") { + split_at <- c(0, 1, 2, 4, 6, 13, 18) + } + if (split_at %like% "^elder" | split_at %like% "^senior") { + split_at <- c(65, 75, 85, 95) + } + if (split_at %like% "fives") { + split_at <- 1:20 * 5 + } + if (split_at %like% "^tens") { + split_at <- 1:10 * 10 + } + } + if (!is.numeric(x) | !is.numeric(split_at)) { + stop("`x` and `split_at` must both be numeric.") + } + split_at <- sort(unique(split_at)) + if (!split_at[1] == 0) { + split_at <- c(0, split_at) + } + if (length(split_at) == 1) { + # only 0 available + stop("invalid value for `split_at`.") + } + + # turn input values to 'split_at' indices + y <- x + for (i in 1:length(split_at)) { + y[x >= split_at[i]] <- i + } + + # create labels + labs <- split_at + for (i in 2:length(labs)) { + if (split_at[i - 1] == split_at[i] - 1) { + labs[i - 1] <- split_at[i - 1] + } else { + labs[i - 1] <- paste0(split_at[i - 1], "-", split_at[i] - 1) + } + } + # last category + labs[length(labs)] <- paste0(split_at[length(split_at)], "+") + + factor(labs[y], levels = labs, ordered = TRUE) +} diff --git a/R/ggplot_rsi.R b/R/ggplot_rsi.R index e5fa46e6..62732ebf 100644 --- a/R/ggplot_rsi.R +++ b/R/ggplot_rsi.R @@ -86,6 +86,17 @@ #' size = 1, #' linetype = 2, #' alpha = 0.25) +#' +#' # resistance of ciprofloxacine per age group +#' septic_patients %>% +#' mutate(first_isolate = first_isolate(.)) %>% +#' filter(first_isolate == TRUE, +#' mo == as.mo("E. coli")) %>% +#' # `age_group` is also a function of this package: +#' group_by(age_group = age_groups(age)) %>% +#' select(age_group, +#' cipr) %>% +#' ggplot_rsi(x = "age_group") #' \donttest{ #' #' # for colourblind mode, use divergent colours from the viridis package: diff --git a/man/age.Rd b/man/age.Rd new file mode 100644 index 00000000..d0ef60a1 --- /dev/null +++ b/man/age.Rd @@ -0,0 +1,22 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/age.R +\name{age} +\alias{age} +\title{Age in years of individuals} +\usage{ +age(x, y = Sys.Date()) +} +\arguments{ +\item{x}{date(s) - will be coerced with \code{\link{as.POSIXlt}}} + +\item{y}{reference date(s) - defaults to \code{\link{Sys.Date}} - will be coerced with \code{\link{as.POSIXlt}}} +} +\value{ +Integer (no decimals) +} +\description{ +Calculates age in years based on a reference date, which is the sytem time at default. +} +\seealso{ +age_groups +} diff --git a/man/age_groups.Rd b/man/age_groups.Rd new file mode 100644 index 00000000..ef10947b --- /dev/null +++ b/man/age_groups.Rd @@ -0,0 +1,66 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/age.R +\name{age_groups} +\alias{age_groups} +\title{Split ages in age groups} +\usage{ +age_groups(x, split_at = c(12, 25, 55, 75)) +} +\arguments{ +\item{x}{age, e.g. calculated with \code{\link{age}}} + +\item{split_at}{values to split \code{x}, defaults to 0-11, 12-24, 26-54, 55-74 and 75+. See Details.} +} +\value{ +Ordered \code{\link{factor}} +} +\description{ +Splits ages into groups defined by the \code{split} parameter. +} +\details{ +To split ages, the input can be: +\itemize{ + \item{A numeric vector. A vector of \code{c(10, 20)} will split on 0-9, 10-19 and 20+. A value of only \code{50} will split on 0-49 and 50+. + The default is to split on young children (0-11), youth (12-24), young adults (26-54), middle-aged adults (55-74) and elderly (75+).} + \item{A character:} + \itemize{ + \item{\code{"children"}, equivalent of: \code{c(0, 1, 2, 4, 6, 13, 18)}. This will split on 0, 1, 2-3, 4-5, 6-12, 13-17 and 18+.} + \item{\code{"elderly"} or \code{"seniors"}, equivalent: of \code{c(65, 75, 85, 95)}. This will split on 0-64, 65-74, 75-84, 85-94 and 95+.} + \item{\code{"fives"}, equivalent: of \code{1:20 * 5}. This will split on 0-4, 5-9, 10-14, 15-19 and so forth.} + \item{\code{"tens"}, equivalent: of \code{1:10 * 10}. This will split on 0-9, 10-19, 20-29 and so forth.} + } +} +} +\examples{ +ages <- c(3, 8, 16, 54, 31, 76, 101, 43, 21) + +# split on 0-49 and 50+ +age_groups(ages, 50) + +# split on 0-20, 21-49 and 50+ +age_groups(ages, c(21, 50)) + +# split on every ten years +age_groups(ages, 1:10 * 10) +age_groups(ages, "tens") + +# split on every five years +age_groups(ages, 1:20 * 5) +age_groups(ages, "fives") + +# split on children +age_groups(ages, "children") + +# resistance of ciprofloxacine per age group +septic_patients \%>\% + mutate(first_isolate = first_isolate(.)) \%>\% + filter(first_isolate == TRUE, + mo == as.mo("E. coli")) \%>\% + group_by(age_group = age_groups(age)) \%>\% + select(age_group, + cipr) \%>\% + ggplot_rsi(x = "age_group") +} +\seealso{ +age +} diff --git a/man/ggplot_rsi.Rd b/man/ggplot_rsi.Rd index 36fc6f25..ec2b6bb8 100644 --- a/man/ggplot_rsi.Rd +++ b/man/ggplot_rsi.Rd @@ -114,6 +114,17 @@ septic_patients \%>\% size = 1, linetype = 2, alpha = 0.25) + +# resistance of ciprofloxacine per age group +septic_patients \%>\% + mutate(first_isolate = first_isolate(.)) \%>\% + filter(first_isolate == TRUE, + mo == as.mo("E. coli")) \%>\% + # `age_group` is also a function of this package: + group_by(age_group = age_groups(age)) \%>\% + select(age_group, + cipr) \%>\% + ggplot_rsi(x = "age_group") \donttest{ # for colourblind mode, use divergent colours from the viridis package: diff --git a/tests/testthat/test-age.R b/tests/testthat/test-age.R new file mode 100644 index 00000000..fb5a419d --- /dev/null +++ b/tests/testthat/test-age.R @@ -0,0 +1,29 @@ +context("g.test.R") + +test_that("age works", { + expect_equal(age(x = c("1980-01-01", "1985-01-01", "1990-01-01"), + y = "2019-01-01"), + c(39, 34, 29)) + + expect_error(age(x = c("1980-01-01", "1985-01-01", "1990-01-01"), + y = c("2019-01-01", "2019-01-01"))) + + expect_error(age(x = c("1980-01-01", "1985-01-01", "1990-01-01"), + y = "1975-01-01")) + + expect_warning(age(x = c("1800-01-01", "1805-01-01", "1810-01-01"), + y = "2019-01-01")) +}) + +test_that("age_groups works", { + ages <- c(3, 8, 16, 54, 31, 76, 101, 43, 21) + + expect_equal(length(unique(age_groups(ages, 50))), + 2) + expect_equal(length(unique(age_groups(ages, c(50, 60)))), + 3) + expect_identical(class(age_groups(ages)), + c("ordered", "factor")) + + +})