2018-12-15 22:40:07 +01:00
# ==================================================================== #
# TITLE #
2022-10-05 09:12:22 +02:00
# AMR: An R Package for Working with Antimicrobial Resistance Data #
2018-12-15 22:40:07 +01:00
# #
2019-01-02 23:24:07 +01:00
# SOURCE #
2020-07-08 14:48:06 +02:00
# https://github.com/msberends/AMR #
2018-12-15 22:40:07 +01:00
# #
2022-10-05 09:12:22 +02:00
# CITE AS #
# Berends MS, Luz CF, Friedrich AW, Sinha BNM, Albers CJ, Glasner C #
# (2022). AMR: An R Package for Working with Antimicrobial Resistance #
# Data. Journal of Statistical Software, 104(3), 1-31. #
# doi:10.18637/jss.v104.i03 #
# #
2022-12-27 15:16:15 +01:00
# Developed at the University of Groningen and the University Medical #
# Center Groningen in The Netherlands, in collaboration with many #
# colleagues from around the world, see our website. #
2018-12-15 22:40:07 +01:00
# #
2019-01-02 23:24:07 +01:00
# This R package is free software; you can freely use and distribute #
# it for both personal and commercial purposes under the terms of the #
# GNU General Public License version 2.0 (GNU GPL-2), as published by #
# the Free Software Foundation. #
2020-01-05 17:22:09 +01:00
# We created this package for both routine data analysis and academic #
# research and it was publicly released in the hope that it will be #
# useful, but it comes WITHOUT ANY WARRANTY OR LIABILITY. #
2020-10-08 11:16:03 +02:00
# #
# Visit our website for the full manual and a complete tutorial about #
2021-02-02 23:57:35 +01:00
# how to conduct AMR data analysis: https://msberends.github.io/AMR/ #
2018-12-15 22:40:07 +01:00
# ==================================================================== #
2021-01-18 16:57:56 +01:00
#' Age in Years of Individuals
2018-12-15 22:40:07 +01:00
#'
2022-08-21 16:37:20 +02:00
#' Calculates age in years based on a reference date, which is the system date at default.
2021-05-23 23:11:16 +02:00
#' @param x date(s), [character] (vectors) will be coerced with [as.POSIXlt()]
#' @param reference reference date(s) (defaults to today), [character] (vectors) will be coerced with [as.POSIXlt()]
2021-05-12 18:15:03 +02:00
#' @param exact a [logical] to indicate whether age calculation should be exact, i.e. with decimals. It divides the number of days of [year-to-date](https://en.wikipedia.org/wiki/Year-to-date) (YTD) of `x` by the number of days in the year of `reference` (either 365 or 366).
#' @param na.rm a [logical] to indicate whether missing values should be removed
2020-12-22 00:51:17 +01:00
#' @param ... arguments passed on to [as.POSIXlt()], such as `origin`
2020-10-19 17:09:19 +02:00
#' @details Ages below 0 will be returned as `NA` with a warning. Ages above 120 will only give a warning.
2022-08-28 10:31:50 +02:00
#'
2021-05-23 23:11:16 +02:00
#' This function vectorises over both `x` and `reference`, meaning that either can have a length of 1 while the other argument has a larger length.
2020-03-07 21:48:21 +01:00
#' @return An [integer] (no decimals) if `exact = FALSE`, a [double] (with decimals) otherwise
2019-11-28 22:32:17 +01:00
#' @seealso To split ages into groups, use the [age_groups()] function.
2018-12-15 22:40:07 +01:00
#' @export
2019-01-25 13:18:41 +01:00
#' @examples
2022-08-21 16:37:20 +02:00
#' # 10 random pre-Y2K birth dates
#' df <- data.frame(birth_date = as.Date("2000-01-01") - runif(10) * 25000)
2022-08-28 10:31:50 +02:00
#'
2019-05-31 14:25:11 +02:00
#' # add ages
2019-01-25 13:18:41 +01:00
#' df$age <- age(df$birth_date)
2022-08-28 10:31:50 +02:00
#'
2019-05-31 14:25:11 +02:00
#' # add exact ages
#' df$age_exact <- age(df$birth_date, exact = TRUE)
2022-08-28 10:31:50 +02:00
#'
2022-08-21 16:37:20 +02:00
#' # add age at millenium switch
#' df$age_at_y2k <- age(df$birth_date, "2000-01-01")
2019-05-31 14:25:11 +02:00
#'
#' df
2020-10-19 17:09:19 +02:00
age <- function ( x , reference = Sys.Date ( ) , exact = FALSE , na.rm = FALSE , ... ) {
meet_criteria ( x , allow_class = c ( " character" , " Date" , " POSIXt" ) )
meet_criteria ( reference , allow_class = c ( " character" , " Date" , " POSIXt" ) )
meet_criteria ( exact , allow_class = " logical" , has_length = 1 )
meet_criteria ( na.rm , allow_class = " logical" , has_length = 1 )
2022-08-28 10:31:50 +02:00
2018-12-22 22:39:34 +01:00
if ( length ( x ) != length ( reference ) ) {
2021-05-23 23:11:16 +02:00
if ( length ( x ) == 1 ) {
x <- rep ( x , length ( reference ) )
} else if ( length ( reference ) == 1 ) {
reference <- rep ( reference , length ( x ) )
} else {
stop_ ( " `x` and `reference` must be of same length, or `reference` must be of length 1." )
}
2018-12-15 22:40:07 +01:00
}
2020-10-19 17:09:19 +02:00
x <- as.POSIXlt ( x , ... )
reference <- as.POSIXlt ( reference , ... )
2022-08-28 10:31:50 +02:00
2018-12-15 22:40:07 +01:00
# from https://stackoverflow.com/a/25450756/4575331
2019-01-25 13:18:41 +01:00
years_gap <- reference $ year - x $ year
2020-05-16 13:05:47 +02:00
ages <- ifelse ( reference $ mon < x $ mon | ( reference $ mon == x $ mon & reference $ mday < x $ mday ) ,
2022-08-28 10:31:50 +02:00
as.integer ( years_gap - 1 ) ,
as.integer ( years_gap )
)
2019-05-31 14:25:11 +02:00
# add decimals
if ( exact == TRUE ) {
# get dates of `x` when `x` would have the year of `reference`
2023-01-23 15:01:21 +01:00
x_in_reference_year <- as.POSIXlt (
paste0 (
format ( as.Date ( reference ) , " %Y" ) ,
format ( as.Date ( x ) , " -%m-%d" )
) ,
format = " %Y-%m-%d"
2022-08-28 10:31:50 +02:00
)
2019-05-31 14:25:11 +02:00
# get differences in days
2022-08-28 10:31:50 +02:00
n_days_x_rest <- as.double ( difftime ( as.Date ( reference ) ,
as.Date ( x_in_reference_year ) ,
units = " days"
) )
2019-05-31 14:25:11 +02:00
# get numbers of days the years of `reference` has for a reliable denominator
2021-05-24 00:06:28 +02:00
n_days_reference_year <- as.POSIXlt ( paste0 ( format ( as.Date ( reference ) , " %Y" ) , " -12-31" ) ,
2022-08-28 10:31:50 +02:00
format = " %Y-%m-%d"
) $ yday + 1
2019-05-31 14:25:11 +02:00
# add decimal parts of year
2019-05-31 20:25:57 +02:00
mod <- n_days_x_rest / n_days_reference_year
# negative mods are cases where `x_in_reference_year` > `reference` - so 'add' a year
2021-05-24 09:00:11 +02:00
mod [ ! is.na ( mod ) & mod < 0 ] <- mod [ ! is.na ( mod ) & mod < 0 ] + 1
2019-05-31 20:25:57 +02:00
# and finally add to ages
ages <- ages + mod
2019-05-31 14:25:11 +02:00
}
2022-08-28 10:31:50 +02:00
2019-01-25 13:18:41 +01:00
if ( any ( ages < 0 , na.rm = TRUE ) ) {
2021-05-24 09:00:11 +02:00
ages [ ! is.na ( ages ) & ages < 0 ] <- NA
2022-03-02 15:38:55 +01:00
warning_ ( " in `age()`: NAs introduced for ages below 0." )
2019-01-25 13:18:41 +01:00
}
if ( any ( ages > 120 , na.rm = TRUE ) ) {
2022-03-02 15:38:55 +01:00
warning_ ( " in `age()`: some ages are above 120." )
2018-12-15 22:40:07 +01:00
}
2022-08-28 10:31:50 +02:00
2019-09-02 15:17:41 +02:00
if ( isTRUE ( na.rm ) ) {
ages <- ages [ ! is.na ( ages ) ]
}
2022-08-28 10:31:50 +02:00
2021-05-24 00:06:28 +02:00
if ( exact == TRUE ) {
as.double ( ages )
} else {
as.integer ( ages )
}
2018-12-15 22:40:07 +01:00
}
2021-01-18 16:57:56 +01:00
#' Split Ages into Age Groups
2018-12-15 22:40:07 +01:00
#'
2020-12-22 00:51:17 +01:00
#' Split ages into age groups defined by the `split` argument. This allows for easier demographic (antimicrobial resistance) analysis.
2019-11-28 22:32:17 +01:00
#' @param x age, e.g. calculated with [age()]
2021-01-18 16:57:56 +01:00
#' @param split_at values to split `x` at, defaults to age groups 0-11, 12-24, 25-54, 55-74 and 75+. See *Details*.
2020-03-07 21:48:21 +01:00
#' @param na.rm a [logical] to indicate whether missing values should be removed
2020-12-22 00:51:17 +01:00
#' @details To split ages, the input for the `split_at` argument can be:
2022-08-28 10:31:50 +02:00
#'
2021-05-12 18:15:03 +02:00
#' * A [numeric] vector. A value of e.g. `c(10, 20)` will split `x` on 0-9, 10-19 and 20+. A value of only `50` will split `x` on 0-49 and 50+.
2019-11-28 22:32:17 +01:00
#' The default is to split on young children (0-11), youth (12-24), young adults (25-54), middle-aged adults (55-74) and elderly (75+).
#' * A character:
#' - `"children"` or `"kids"`, equivalent of: `c(0, 1, 2, 4, 6, 13, 18)`. This will split on 0, 1, 2-3, 4-5, 6-12, 13-17 and 18+.
#' - `"elderly"` or `"seniors"`, equivalent of: `c(65, 75, 85)`. This will split on 0-64, 65-74, 75-84, 85+.
2020-10-19 17:09:19 +02:00
#' - `"fives"`, equivalent of: `1:20 * 5`. This will split on 0-4, 5-9, ..., 95-99, 100+.
#' - `"tens"`, equivalent of: `1:10 * 10`. This will split on 0-9, 10-19, ..., 90-99, 100+.
2020-03-07 21:48:21 +01:00
#' @return Ordered [factor]
2019-11-28 22:32:17 +01:00
#' @seealso To determine ages, based on one or more reference dates, use the [age()] function.
2018-12-15 22:40:07 +01:00
#' @export
#' @examples
#' ages <- c(3, 8, 16, 54, 31, 76, 101, 43, 21)
#'
2018-12-16 22:45:12 +01:00
#' # split into 0-49 and 50+
2018-12-15 22:40:07 +01:00
#' age_groups(ages, 50)
#'
2018-12-16 22:45:12 +01:00
#' # split into 0-19, 20-49 and 50+
#' age_groups(ages, c(20, 50))
2018-12-15 22:40:07 +01:00
#'
2018-12-16 22:45:12 +01:00
#' # split into groups of ten years
2019-04-09 10:34:40 +02:00
#' age_groups(ages, 1:10 * 10)
2018-12-16 22:45:12 +01:00
#' age_groups(ages, split_at = "tens")
2018-12-15 22:40:07 +01:00
#'
2018-12-16 22:45:12 +01:00
#' # split into groups of five years
2019-04-09 10:34:40 +02:00
#' age_groups(ages, 1:20 * 5)
2018-12-16 22:45:12 +01:00
#' age_groups(ages, split_at = "fives")
2018-12-15 22:40:07 +01:00
#'
2018-12-16 22:45:12 +01:00
#' # split specifically for children
2022-08-21 16:37:20 +02:00
#' age_groups(ages, c(1, 2, 4, 6, 13, 18))
2020-10-19 17:09:19 +02:00
#' age_groups(ages, "children")
2018-12-15 22:40:07 +01:00
#'
2020-09-29 23:35:46 +02:00
#' \donttest{
2020-10-19 17:09:19 +02:00
#' # resistance of ciprofloxacin per age group
2022-11-05 08:18:06 +01:00
#' if (require("dplyr") && require("ggplot2")) {
2020-11-16 16:57:55 +01:00
#' example_isolates %>%
#' filter_first_isolate() %>%
2022-08-27 20:49:37 +02:00
#' filter(mo == as.mo("Escherichia coli")) %>%
2020-11-16 16:57:55 +01:00
#' group_by(age_group = age_groups(age)) %>%
#' select(age_group, CIP) %>%
2023-01-21 23:47:20 +01:00
#' ggplot_sir(
2022-08-28 10:31:50 +02:00
#' x = "age_group",
#' minimum = 0,
#' x.title = "Age Group",
#' title = "Ciprofloxacin resistance per age group"
#' )
2020-11-16 16:57:55 +01:00
#' }
2019-11-03 22:41:29 +01:00
#' }
2019-09-02 15:17:41 +02:00
age_groups <- function ( x , split_at = c ( 12 , 25 , 55 , 75 ) , na.rm = FALSE ) {
2021-04-07 08:37:42 +02:00
meet_criteria ( x , allow_class = c ( " numeric" , " integer" ) , is_positive_or_zero = TRUE , is_finite = TRUE )
meet_criteria ( split_at , allow_class = c ( " numeric" , " integer" , " character" ) , is_positive_or_zero = TRUE , is_finite = TRUE )
2022-08-28 10:31:50 +02:00
meet_criteria ( na.rm , allow_class = " logical" , has_length = 1 )
2019-09-12 15:08:53 +02:00
if ( any ( x < 0 , na.rm = TRUE ) ) {
x [x < 0 ] <- NA
2022-03-02 15:38:55 +01:00
warning_ ( " in `age_groups()`: NAs introduced for ages below 0." )
2019-09-12 15:08:53 +02:00
}
2018-12-15 22:40:07 +01:00
if ( is.character ( split_at ) ) {
2018-12-16 22:45:12 +01:00
split_at <- split_at [1L ]
2019-05-29 00:36:48 +02:00
if ( split_at %like% " ^(child|kid|junior)" ) {
2018-12-15 22:40:07 +01:00
split_at <- c ( 0 , 1 , 2 , 4 , 6 , 13 , 18 )
2018-12-16 22:45:12 +01:00
} else if ( split_at %like% " ^(elder|senior)" ) {
2022-08-28 10:31:50 +02:00
split_at <- c ( 65 , 75 , 85 )
2018-12-16 22:45:12 +01:00
} else if ( split_at %like% " ^five" ) {
2019-04-09 10:34:40 +02:00
split_at <- 1 : 20 * 5
2018-12-16 22:45:12 +01:00
} else if ( split_at %like% " ^ten" ) {
2019-04-09 10:34:40 +02:00
split_at <- 1 : 10 * 10
2018-12-15 22:40:07 +01:00
}
}
2019-06-13 14:28:46 +02:00
split_at <- sort ( unique ( as.integer ( split_at ) ) )
2018-12-15 22:40:07 +01:00
if ( ! split_at [1 ] == 0 ) {
2019-05-29 00:36:48 +02:00
# add base number 0
2018-12-15 22:40:07 +01:00
split_at <- c ( 0 , split_at )
}
2019-05-29 00:36:48 +02:00
split_at <- split_at [ ! is.na ( split_at ) ]
2020-06-22 11:18:40 +02:00
stop_if ( length ( split_at ) == 1 , " invalid value for `split_at`" ) # only 0 is available
2022-08-28 10:31:50 +02:00
2018-12-15 22:40:07 +01:00
# turn input values to 'split_at' indices
y <- x
2020-10-19 17:09:19 +02:00
lbls <- split_at
2019-10-11 17:21:02 +02:00
for ( i in seq_len ( length ( split_at ) ) ) {
2018-12-15 22:40:07 +01:00
y [x >= split_at [i ] ] <- i
2018-12-16 22:45:12 +01:00
# create labels
2020-10-19 17:09:19 +02:00
lbls [i - 1 ] <- paste0 ( unique ( c ( split_at [i - 1 ] , split_at [i ] - 1 ) ) , collapse = " -" )
2018-12-15 22:40:07 +01:00
}
2022-08-28 10:31:50 +02:00
2018-12-15 22:40:07 +01:00
# last category
2020-10-19 17:09:19 +02:00
lbls [length ( lbls ) ] <- paste0 ( split_at [length ( split_at ) ] , " +" )
2022-08-28 10:31:50 +02:00
2020-10-19 17:09:19 +02:00
agegroups <- factor ( lbls [y ] , levels = lbls , ordered = TRUE )
2022-08-28 10:31:50 +02:00
2019-09-02 15:17:41 +02:00
if ( isTRUE ( na.rm ) ) {
agegroups <- agegroups [ ! is.na ( agegroups ) ]
}
2022-08-28 10:31:50 +02:00
2019-09-02 15:17:41 +02:00
agegroups
2018-12-15 22:40:07 +01:00
}