2018-02-21 11:52:31 +01:00
# ==================================================================== #
# TITLE #
# Antimicrobial Resistance (AMR) Analysis #
# #
# AUTHORS #
# Berends MS (m.s.berends@umcg.nl), Luz CF (c.f.luz@umcg.nl) #
# #
# LICENCE #
# This program is free software; you can redistribute it and/or modify #
# it under the terms of the GNU General Public License version 2.0, #
# as published by the Free Software Foundation. #
# #
# This program is distributed in the hope that it will be useful, #
# but WITHOUT ANY WARRANTY; without even the implied warranty of #
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the #
# GNU General Public License for more details. #
# ==================================================================== #
2018-08-29 12:27:37 +02:00
#' Dataset with 423 antibiotics
2018-02-21 11:52:31 +01:00
#'
2018-08-29 12:27:37 +02:00
#' A dataset containing all antibiotics with a J0 code and some other antimicrobial agents, with their DDD's. Except for trade names and abbreviations, all properties were downloaded from the WHO, see Source.
#' @format A data.frame with 423 observations and 18 variables:
2018-02-21 11:52:31 +01:00
#' \describe{
#' \item{\code{atc}}{ATC code, like \code{J01CR02}}
2018-08-25 22:01:14 +02:00
#' \item{\code{certe}}{Certe code, like \code{amcl}}
2018-02-21 11:52:31 +01:00
#' \item{\code{umcg}}{UMCG code, like \code{AMCL}}
2018-08-28 13:51:13 +02:00
#' \item{\code{abbr}}{Abbreviation as used by many countries, used internally by \code{\link{as.atc}}}
#' \item{\code{official}}{Official name by the WHO, like \code{"Amoxicillin and beta-lactamase inhibitor"}}
2018-02-21 11:52:31 +01:00
#' \item{\code{official_nl}}{Official name in the Netherlands, like \code{"Amoxicilline met enzymremmer"}}
2018-02-26 15:53:09 +01:00
#' \item{\code{trivial_nl}}{Trivial name in Dutch, like \code{"Amoxicilline/clavulaanzuur"}}
2018-08-29 12:27:37 +02:00
#' \item{\code{trade_name}}{Trade name as used by many countries (a total of 294), used internally by \code{\link{as.atc}}}
2018-03-19 12:43:22 +01:00
#' \item{\code{oral_ddd}}{Defined Daily Dose (DDD), oral treatment}
2018-02-21 11:52:31 +01:00
#' \item{\code{oral_units}}{Units of \code{ddd_units}}
2018-03-19 12:43:22 +01:00
#' \item{\code{iv_ddd}}{Defined Daily Dose (DDD), parenteral treatment}
2018-02-21 11:52:31 +01:00
#' \item{\code{iv_units}}{Units of \code{iv_ddd}}
2018-03-19 12:43:22 +01:00
#' \item{\code{atc_group1}}{ATC group, like \code{"Macrolides, lincosamides and streptogramins"}}
#' \item{\code{atc_group2}}{Subgroup of \code{atc_group1}, like \code{"Macrolides"}}
#' \item{\code{atc_group1_nl}}{ATC group in Dutch, like \code{"Macroliden, lincosamiden en streptograminen"}}
#' \item{\code{atc_group2_nl}}{Subgroup of \code{atc_group1} in Dutch, like \code{"Macroliden"}}
#' \item{\code{useful_gramnegative}}{\code{FALSE} if not useful according to EUCAST, \code{NA} otherwise (see Source)}
#' \item{\code{useful_grampositive}}{\code{FALSE} if not useful according to EUCAST, \code{NA} otherwise (see Source)}
2018-02-21 11:52:31 +01:00
#' }
2018-03-19 12:43:22 +01:00
#' @source - World Health Organization: \url{https://www.whocc.no/atc_ddd_index/} \cr - EUCAST - Expert rules intrinsic exceptional V3.1 \cr - MOLIS (LIS of Certe): \url{https://www.certe.nl} \cr - GLIMS (LIS of UMCG): \url{https://www.umcg.nl}
2018-03-23 14:46:02 +01:00
#' @seealso \code{\link{microorganisms}}
2018-08-25 22:01:14 +02:00
# use this later to further fill AMR::antibiotics
# drug <- "Ciprofloxacin"
# url <- xml2::read_html(paste0("https://www.ncbi.nlm.nih.gov/pccompound?term=", drug)) %>%
# html_nodes(".rslt") %>%
# .[[1]] %>%
# html_nodes(".title a") %>%
# html_attr("href") %>%
# gsub("/compound/", "/rest/pug_view/data/compound/", ., fixed = TRUE) %>%
# paste0("/XML/?response_type=display")
# synonyms <- url %>%
# read_xml() %>%
# xml_contents() %>% .[[6]] %>%
# xml_contents() %>% .[[8]] %>%
# xml_contents() %>% .[[3]] %>%
# xml_contents() %>% .[[3]] %>%
# xml_contents() %>%
# paste() %>%
# .[. %like% "StringValueList"] %>%
# gsub("[</]+StringValueList[>]", "", .)
2018-08-29 12:27:37 +02:00
2018-03-19 12:43:22 +01:00
# last two columns created with:
# antibiotics %>%
2018-04-02 11:11:21 +02:00
# mutate(useful_gramnegative =
2018-03-19 12:43:22 +01:00
# if_else(
# atc_group1 %like% '(fusidic|glycopeptide|macrolide|lincosamide|daptomycin|linezolid)' |
# atc_group2 %like% '(fusidic|glycopeptide|macrolide|lincosamide|daptomycin|linezolid)' |
# official %like% '(fusidic|glycopeptide|macrolide|lincosamide|daptomycin|linezolid)',
# FALSE,
# NA
# ),
# useful_grampositive =
# if_else(
# atc_group1 %like% '(aztreonam|temocillin|polymyxin|colistin|nalidixic)' |
# atc_group2 %like% '(aztreonam|temocillin|polymyxin|colistin|nalidixic)' |
# official %like% '(aztreonam|temocillin|polymyxin|colistin|nalidixic)',
# FALSE,
# NA
# )
# )
2018-08-29 12:27:37 +02:00
#
# ADD NEW TRADE NAMES FROM OTHER DATAFRAME
# antibiotics_add_to_property <- function(ab_df, atc, property, value) {
# if (length(atc) > 1L) {
# stop("only one atc at a time")
# }
# if (!property %in% c("abbr", "trade_name")) {
# stop("only possible for abbr and trade_name")
# }
#
# value <- gsub(ab_df[which(ab_df$atc == atc),] %>% pull("official"), "", value, fixed = TRUE)
# value <- gsub("||", "|", value, fixed = TRUE)
# value <- gsub("[äáàâ]", "a", value)
# value <- gsub("[ëéèê]", "e", value)
# value <- gsub("[ïíìî]", "i", value)
# value <- gsub("[öóòô]", "o", value)
# value <- gsub("[üúùû]", "u", value)
# if (!atc %in% ab_df$atc) {
# message("SKIPPING - UNKNOWN ATC: ", atc)
# }
# if (is.na(value)) {
# message("SKIPPING - VALUE MISSES: ", atc)
# }
# if (atc %in% ab_df$atc & !is.na(value)) {
# current <- ab_df[which(ab_df$atc == atc),] %>% pull(property)
# if (!is.na(current)) {
# value <- paste(current, value, sep = "|")
# }
# value <- strsplit(value, "|", fixed = TRUE) %>% unlist() %>% unique() %>% paste(collapse = "|")
# value <- gsub("||", "|", value, fixed = TRUE)
# # print(value)
# ab_df[which(ab_df$atc == atc), property] <- value
# message("Added ", value, " to ", ab_official(atc), " (", atc, ", ", ab_certe(atc), ")")
# }
# ab_df
# }
#
2018-03-19 12:43:22 +01:00
" antibiotics"
2018-02-21 11:52:31 +01:00
2018-08-28 13:51:13 +02:00
#' Dataset with ~2650 microorganisms
2018-02-21 11:52:31 +01:00
#'
2018-08-28 13:51:13 +02:00
#' A dataset containing 2,646 microorganisms. MO codes of the UMCG can be looked up using \code{\link{microorganisms.umcg}}.
#' @format A data.frame with 2,646 observations and 12 variables:
2018-02-21 11:52:31 +01:00
#' \describe{
#' \item{\code{bactid}}{ID of microorganism}
#' \item{\code{bactsys}}{Bactsyscode of microorganism}
#' \item{\code{family}}{Family name of microorganism}
#' \item{\code{genus}}{Genus name of microorganism, like \code{"Echerichia"}}
#' \item{\code{species}}{Species name of microorganism, like \code{"coli"}}
#' \item{\code{subspecies}}{Subspecies name of bio-/serovar of microorganism, like \code{"EHEC"}}
#' \item{\code{fullname}}{Full name, like \code{"Echerichia coli (EHEC)"}}
2018-03-19 12:43:22 +01:00
#' \item{\code{type}}{Type of microorganism, like \code{"Bacteria"} and \code{"Fungus/yeast"}}
#' \item{\code{gramstain}}{Gram of microorganism, like \code{"Negative rods"}}
#' \item{\code{aerobic}}{Logical whether bacteria is aerobic}
#' \item{\code{type_nl}}{Type of microorganism in Dutch, like \code{"Bacterie"} and \code{"Schimmel/gist"}}
#' \item{\code{gramstain_nl}}{Gram of microorganism in Dutch, like \code{"Negatieve staven"}}
2018-02-21 11:52:31 +01:00
#' }
2018-07-25 14:17:04 +02:00
# source MOLIS (LIS of Certe) - \url{https://www.certe.nl}
2018-08-03 11:46:06 +02:00
# new <- microorganisms %>% filter(genus == "Bacteroides") %>% .[1,]
# new[1, 'bactid'] <- "DIAPNU"
# new[1, 'bactsys'] <- "DIAPNU"
# new[1, 'family'] <- "Veillonellaceae"
# new[1, 'genus'] <- "Dialister"
# new[1, 'species'] <- "pneumosintes"
# new[1, 'subspecies'] <- NA
# new[1, 'fullname'] <- paste(new[1, 'genus'], new[1, 'species'])
# microorganisms <- microorganisms %>% bind_rows(new) %>% arrange(bactid)
2018-03-23 14:46:02 +01:00
#' @seealso \code{\link{guess_bactid}} \code{\link{antibiotics}} \code{\link{microorganisms.umcg}}
" microorganisms"
2018-02-21 11:52:31 +01:00
#' Translation table for UMCG with ~1100 microorganisms
#'
2018-03-23 14:46:02 +01:00
#' A dataset containing all bacteria codes of UMCG MMB. These codes can be joined to data with an ID from \code{\link{microorganisms}$bactid} (using \code{\link{left_join_microorganisms}}). GLIMS codes can also be translated to valid \code{bactid}'s with \code{\link{guess_bactid}}.
2018-02-21 11:52:31 +01:00
#' @format A data.frame with 1090 observations and 2 variables:
#' \describe{
#' \item{\code{mocode}}{Code of microorganism according to UMCG MMB}
2018-03-23 14:46:02 +01:00
#' \item{\code{bactid}}{Code of microorganism in \code{\link{microorganisms}}}
2018-02-21 11:52:31 +01:00
#' }
2018-07-25 14:17:04 +02:00
# source MOLIS (LIS of Certe) - \url{https://www.certe.nl} \cr \cr GLIMS (LIS of UMCG) - \url{https://www.umcg.nl}
2018-03-23 14:46:02 +01:00
#' @seealso \code{\link{guess_bactid}} \code{\link{microorganisms}}
" microorganisms.umcg"
2018-02-27 20:01:02 +01:00
#' Dataset with 2000 blood culture isolates of septic patients
#'
2018-08-12 22:34:03 +02:00
#' An anonymised dataset containing 2000 microbial blood culture isolates with their full antibiograms found in septic patients in 4 different hospitals in the Netherlands, between 2001 and 2017. It is true, genuine data. This \code{data.frame} can be used to practice AMR analysis. For examples, press F1.
2018-07-25 14:17:04 +02:00
#' @format A data.frame with 2000 observations and 49 variables:
2018-02-27 20:01:02 +01:00
#' \describe{
#' \item{\code{date}}{date of receipt at the laboratory}
2018-08-12 22:34:03 +02:00
#' \item{\code{hospital_id}}{ID of the hospital, from A to D}
2018-02-27 20:01:02 +01:00
#' \item{\code{ward_icu}}{logical to determine if ward is an intensive care unit}
#' \item{\code{ward_clinical}}{logical to determine if ward is a regular clinical ward}
#' \item{\code{ward_outpatient}}{logical to determine if ward is an outpatient clinic}
#' \item{\code{age}}{age of the patient}
#' \item{\code{sex}}{sex of the patient}
#' \item{\code{patient_id}}{ID of the patient, first 10 characters of an SHA hash containing irretrievable information}
2018-03-23 14:46:02 +01:00
#' \item{\code{bactid}}{ID of microorganism, see \code{\link{microorganisms}}}
2018-07-25 14:17:04 +02:00
#' \item{\code{peni:rifa}}{40 different antibiotics with class \code{rsi} (see \code{\link{as.rsi}}); these column names occur in \code{\link{antibiotics}} data set and can be translated with \code{\link{abname}}}
2018-02-27 20:01:02 +01:00
#' }
2018-07-25 14:17:04 +02:00
# source MOLIS (LIS of Certe) - \url{https://www.certe.nl}
2018-03-23 14:46:02 +01:00
#' @examples
#' # ----------- #
#' # PREPARATION #
#' # ----------- #
2018-04-02 11:11:21 +02:00
#'
2018-03-23 14:46:02 +01:00
#' # Save this example dataset to an object, so we can edit it:
#' my_data <- septic_patients
2018-04-02 11:11:21 +02:00
#'
2018-03-23 14:46:02 +01:00
#' # load the dplyr package to make data science A LOT easier
#' library(dplyr)
2018-04-02 11:11:21 +02:00
#'
2018-03-23 14:46:02 +01:00
#' # Add first isolates to our dataset:
2018-04-02 11:11:21 +02:00
#' my_data <- my_data %>%
#' mutate(first_isolates = first_isolate(my_data, "date", "patient_id", "bactid"))
#'
2018-03-23 14:46:02 +01:00
#' # -------- #
#' # ANALYSIS #
#' # -------- #
2018-04-02 11:11:21 +02:00
#'
2018-07-25 14:17:04 +02:00
#' # 1. Get the amoxicillin resistance percentages (p)
#' # and numbers (n) of E. coli, divided by hospital:
2018-04-02 11:11:21 +02:00
#'
2018-03-23 14:46:02 +01:00
#' my_data %>%
2018-07-25 14:17:04 +02:00
#' filter(bactid == guess_bactid("E. coli"),
2018-04-02 11:11:21 +02:00
#' first_isolates == TRUE) %>%
#' group_by(hospital_id) %>%
2018-07-25 14:17:04 +02:00
#' summarise(n = n_rsi(amox),
2018-08-10 15:01:05 +02:00
#' p = portion_IR(amox))
2018-04-02 11:11:21 +02:00
#'
#'
#' # 2. Get the amoxicillin/clavulanic acid resistance
2018-03-23 14:46:02 +01:00
#' # percentages of E. coli, trend over the years:
2018-04-02 11:11:21 +02:00
#'
#' my_data %>%
2018-03-23 14:46:02 +01:00
#' filter(bactid == guess_bactid("E. coli"),
2018-04-02 11:11:21 +02:00
#' first_isolates == TRUE) %>%
#' group_by(year = format(date, "%Y")) %>%
2018-07-25 14:17:04 +02:00
#' summarise(n = n_rsi(amcl),
2018-08-10 15:01:05 +02:00
#' p = portion_IR(amcl, minimum = 20))
2018-03-19 12:43:22 +01:00
" septic_patients"