AMR/R/age.R

193 lines
7.8 KiB
R
Raw Normal View History

2018-12-15 22:40:07 +01:00
# ==================================================================== #
# TITLE #
# Antimicrobial Resistance (AMR) Analysis #
# #
2019-01-02 23:24:07 +01:00
# SOURCE #
# https://gitlab.com/msberends/AMR #
2018-12-15 22:40:07 +01:00
# #
# LICENCE #
2019-01-02 23:24:07 +01:00
# (c) 2019 Berends MS (m.s.berends@umcg.nl), Luz CF (c.f.luz@umcg.nl) #
2018-12-15 22:40:07 +01:00
# #
2019-01-02 23:24:07 +01:00
# This R package is free software; you can freely use and distribute #
# it for both personal and commercial purposes under the terms of the #
# GNU General Public License version 2.0 (GNU GPL-2), as published by #
# the Free Software Foundation. #
# #
# This R package was created for academic research and was publicly #
# released in the hope that it will be useful, but it comes WITHOUT #
# ANY WARRANTY OR LIABILITY. #
2019-04-05 18:47:39 +02:00
# Visit our website for more info: https://msberends.gitlab.io/AMR. #
2018-12-15 22:40:07 +01:00
# ==================================================================== #
#' Age in years of individuals
#'
2019-01-12 11:06:58 +01:00
#' Calculates age in years based on a reference date, which is the sytem date at default.
#' @param x date(s), will be coerced with [as.POSIXlt()]
#' @param reference reference date(s) (defaults to today), will be coerced with [as.POSIXlt()] and cannot be lower than `x`
#' @param exact a logical to indicate whether age calculation should be exact, i.e. with decimals. It divides the number of days of [year-to-date](https://en.wikipedia.org/wiki/Year-to-date) (YTD) of `x` by the number of days in the year of `reference` (either 365 or 366).
#' @param na.rm a logical to indicate whether missing values should be removed
#' @return An integer (no decimals) if `exact = FALSE`, a double (with decimals) otherwise
#' @seealso To split ages into groups, use the [age_groups()] function.
2018-12-15 22:40:07 +01:00
#' @importFrom dplyr if_else
2019-01-02 23:24:07 +01:00
#' @inheritSection AMR Read more on our website!
2018-12-15 22:40:07 +01:00
#' @export
2019-01-25 13:18:41 +01:00
#' @examples
#' # 10 random birth dates
#' df <- data.frame(birth_date = Sys.Date() - runif(10) * 25000)
#' # add ages
2019-01-25 13:18:41 +01:00
#' df$age <- age(df$birth_date)
#' # add exact ages
#' df$age_exact <- age(df$birth_date, exact = TRUE)
#'
#' df
age <- function(x, reference = Sys.Date(), exact = FALSE, na.rm = FALSE) {
2018-12-22 22:39:34 +01:00
if (length(x) != length(reference)) {
if (length(reference) == 1) {
reference <- rep(reference, length(x))
2018-12-15 22:40:07 +01:00
} else {
2018-12-22 22:39:34 +01:00
stop("`x` and `reference` must be of same length, or `reference` must be of length 1.")
2018-12-15 22:40:07 +01:00
}
}
2019-05-31 20:25:57 +02:00
x <- as.POSIXlt(x)
reference <- as.POSIXlt(reference)
2019-01-25 13:18:41 +01:00
2018-12-15 22:40:07 +01:00
# from https://stackoverflow.com/a/25450756/4575331
2019-01-25 13:18:41 +01:00
years_gap <- reference$year - x$year
2018-12-22 22:39:34 +01:00
ages <- if_else(reference$mon < x$mon | (reference$mon == x$mon & reference$mday < x$mday),
2019-01-12 11:06:58 +01:00
as.integer(years_gap - 1),
as.integer(years_gap))
2019-01-25 13:18:41 +01:00
# add decimals
if (exact == TRUE) {
# get dates of `x` when `x` would have the year of `reference`
2019-05-31 20:25:57 +02:00
x_in_reference_year <- as.POSIXlt(paste0(format(reference, "%Y"), format(x, "-%m-%d")))
# get differences in days
2019-05-31 20:25:57 +02:00
n_days_x_rest <- as.double(difftime(reference, x_in_reference_year, units = "days"))
# get numbers of days the years of `reference` has for a reliable denominator
2019-05-31 20:25:57 +02:00
n_days_reference_year <- as.POSIXlt(paste0(format(reference, "%Y"), "-12-31"))$yday + 1
# add decimal parts of year
2019-05-31 20:25:57 +02:00
mod <- n_days_x_rest / n_days_reference_year
# negative mods are cases where `x_in_reference_year` > `reference` - so 'add' a year
mod[mod < 0] <- 1 + mod[mod < 0]
# and finally add to ages
ages <- ages + mod
}
2019-01-25 13:18:41 +01:00
if (any(ages < 0, na.rm = TRUE)) {
ages[ages < 0] <- NA
2019-05-29 00:36:48 +02:00
warning("NAs introduced for ages below 0.")
2019-01-25 13:18:41 +01:00
}
if (any(ages > 120, na.rm = TRUE)) {
warning("Some ages are above 120.")
2018-12-15 22:40:07 +01:00
}
if (isTRUE(na.rm)) {
ages <- ages[!is.na(ages)]
}
2019-01-25 13:18:41 +01:00
2018-12-15 22:40:07 +01:00
ages
}
#' Split ages into age groups
2018-12-15 22:40:07 +01:00
#'
#' Split ages into age groups defined by the `split` parameter. This allows for easier demographic (antimicrobial resistance) analysis.
#' @param x age, e.g. calculated with [age()]
#' @param split_at values to split `x` at, defaults to age groups 0-11, 12-24, 25-54, 55-74 and 75+. See Details.
#' @param na.rm a logical to indicate whether missing values should be removed
2018-12-15 22:40:07 +01:00
#' @details To split ages, the input can be:
#'
#' * A numeric vector. A vector of e.g. `c(10, 20)` will split on 0-9, 10-19 and 20+. A value of only `50` will split on 0-49 and 50+.
#' The default is to split on young children (0-11), youth (12-24), young adults (25-54), middle-aged adults (55-74) and elderly (75+).
#' * A character:
#' - `"children"` or `"kids"`, equivalent of: `c(0, 1, 2, 4, 6, 13, 18)`. This will split on 0, 1, 2-3, 4-5, 6-12, 13-17 and 18+.
#' - `"elderly"` or `"seniors"`, equivalent of: `c(65, 75, 85)`. This will split on 0-64, 65-74, 75-84, 85+.
#' - `"fives"`, equivalent of: `1:20 * 5`. This will split on 0-4, 5-9, 10-14, ..., 90-94, 95-99, 100+.
#' - `"tens"`, equivalent of: `1:10 * 10`. This will split on 0-9, 10-19, 20-29, ... 80-89, 90-99, 100+.
#' @return Ordered [`factor`]
#' @seealso To determine ages, based on one or more reference dates, use the [age()] function.
2018-12-15 22:40:07 +01:00
#' @export
2019-01-02 23:24:07 +01:00
#' @inheritSection AMR Read more on our website!
2018-12-15 22:40:07 +01:00
#' @examples
#' ages <- c(3, 8, 16, 54, 31, 76, 101, 43, 21)
#'
#' # split into 0-49 and 50+
2018-12-15 22:40:07 +01:00
#' age_groups(ages, 50)
#'
#' # split into 0-19, 20-49 and 50+
#' age_groups(ages, c(20, 50))
2018-12-15 22:40:07 +01:00
#'
#' # split into groups of ten years
2019-04-09 10:34:40 +02:00
#' age_groups(ages, 1:10 * 10)
#' age_groups(ages, split_at = "tens")
2018-12-15 22:40:07 +01:00
#'
#' # split into groups of five years
2019-04-09 10:34:40 +02:00
#' age_groups(ages, 1:20 * 5)
#' age_groups(ages, split_at = "fives")
2018-12-15 22:40:07 +01:00
#'
#' # split specifically for children
2018-12-15 22:40:07 +01:00
#' age_groups(ages, "children")
#' # same:
#' age_groups(ages, c(1, 2, 4, 6, 13, 17))
2018-12-15 22:40:07 +01:00
#'
2019-11-03 22:41:29 +01:00
#' \dontrun{
2018-12-15 22:40:07 +01:00
#' # resistance of ciprofloxacine per age group
2018-12-16 09:50:14 +01:00
#' library(dplyr)
#' example_isolates %>%
2019-05-30 08:51:38 +02:00
#' filter_first_isolate() %>%
2019-05-29 00:36:48 +02:00
#' filter(mo == as.mo("E. coli")) %>%
2018-12-15 22:40:07 +01:00
#' group_by(age_group = age_groups(age)) %>%
2019-05-29 00:36:48 +02:00
#' select(age_group, CIP) %>%
2018-12-15 22:40:07 +01:00
#' ggplot_rsi(x = "age_group")
2019-11-03 22:41:29 +01:00
#' }
age_groups <- function(x, split_at = c(12, 25, 55, 75), na.rm = FALSE) {
2019-06-13 14:28:46 +02:00
if (!is.numeric(x)) {
stop("`x` and must be numeric, not a ", paste0(class(x), collapse = "/"), ".")
}
if (any(x < 0, na.rm = TRUE)) {
x[x < 0] <- NA
warning("NAs introduced for ages below 0.")
}
2018-12-15 22:40:07 +01:00
if (is.character(split_at)) {
split_at <- split_at[1L]
2019-05-29 00:36:48 +02:00
if (split_at %like% "^(child|kid|junior)") {
2018-12-15 22:40:07 +01:00
split_at <- c(0, 1, 2, 4, 6, 13, 18)
} else if (split_at %like% "^(elder|senior)") {
2019-04-09 10:34:40 +02:00
split_at <- c(65, 75, 85)
} else if (split_at %like% "^five") {
2019-04-09 10:34:40 +02:00
split_at <- 1:20 * 5
} else if (split_at %like% "^ten") {
2019-04-09 10:34:40 +02:00
split_at <- 1:10 * 10
2018-12-15 22:40:07 +01:00
}
}
2019-06-13 14:28:46 +02:00
split_at <- sort(unique(as.integer(split_at)))
2018-12-15 22:40:07 +01:00
if (!split_at[1] == 0) {
2019-05-29 00:36:48 +02:00
# add base number 0
2018-12-15 22:40:07 +01:00
split_at <- c(0, split_at)
}
2019-05-29 00:36:48 +02:00
split_at <- split_at[!is.na(split_at)]
2018-12-15 22:40:07 +01:00
if (length(split_at) == 1) {
2019-05-29 00:36:48 +02:00
# only 0 is available
2018-12-15 22:40:07 +01:00
stop("invalid value for `split_at`.")
}
# turn input values to 'split_at' indices
y <- x
labs <- split_at
2019-10-11 17:21:02 +02:00
for (i in seq_len(length(split_at))) {
2018-12-15 22:40:07 +01:00
y[x >= split_at[i]] <- i
# create labels
2019-01-12 11:06:58 +01:00
labs[i - 1] <- paste0(unique(c(split_at[i - 1], split_at[i] - 1)), collapse = "-")
2018-12-15 22:40:07 +01:00
}
# last category
labs[length(labs)] <- paste0(split_at[length(split_at)], "+")
agegroups <- factor(labs[y], levels = labs, ordered = TRUE)
if (isTRUE(na.rm)) {
agegroups <- agegroups[!is.na(agegroups)]
}
agegroups
2018-12-15 22:40:07 +01:00
}