AMR/R/atc_online.R

226 lines
8.9 KiB
R
Raw Normal View History

2019-01-26 23:22:56 +01:00
# ==================================================================== #
# TITLE #
# Antimicrobial Resistance (AMR) Data Analysis for R #
2019-01-26 23:22:56 +01:00
# #
# SOURCE #
2020-07-08 14:48:06 +02:00
# https://github.com/msberends/AMR #
2019-01-26 23:22:56 +01:00
# #
# LICENCE #
2021-12-23 18:56:28 +01:00
# (c) 2018-2022 Berends MS, Luz CF et al. #
2020-10-08 11:16:03 +02:00
# Developed at the University of Groningen, the Netherlands, in #
# collaboration with non-profit organisations Certe Medical #
# Diagnostics & Advice, and University Medical Center Groningen. #
2019-01-26 23:22:56 +01:00
# #
# This R package is free software; you can freely use and distribute #
# it for both personal and commercial purposes under the terms of the #
# GNU General Public License version 2.0 (GNU GPL-2), as published by #
# the Free Software Foundation. #
# We created this package for both routine data analysis and academic #
# research and it was publicly released in the hope that it will be #
# useful, but it comes WITHOUT ANY WARRANTY OR LIABILITY. #
2020-10-08 11:16:03 +02:00
# #
# Visit our website for the full manual and a complete tutorial about #
# how to conduct AMR data analysis: https://msberends.github.io/AMR/ #
2019-01-26 23:22:56 +01:00
# ==================================================================== #
#' Get ATC Properties from WHOCC Website
2019-01-26 23:22:56 +01:00
#'
2021-08-19 23:43:02 +02:00
#' Gets data from the WHOCC website to determine properties of an Anatomical Therapeutic Chemical (ATC) (e.g. an antibiotic), such as the name, defined daily dose (DDD) or standard unit.
#' @param atc_code a [character] (vector) with ATC code(s) of antibiotics, will be coerced with [as.ab()] and [ab_atc()] internally if not a valid ATC code
#' @param property property of an ATC code. Valid values are `"ATC"`, `"Name"`, `"DDD"`, `"U"` (`"unit"`), `"Adm.R"`, `"Note"` and `groups`. For this last option, all hierarchical groups of an ATC code will be returned, see *Examples*.
#' @param administration type of administration when using `property = "Adm.R"`, see *Details*
#' @param url url of website of the WHOCC. The sign `%s` can be used as a placeholder for ATC codes.
#' @param url_vet url of website of the WHOCC for veterinary medicine. The sign `%s` can be used as a placeholder for ATC_vet codes (that all start with "Q").
2020-12-22 00:51:17 +01:00
#' @param ... arguments to pass on to `atc_property`
2019-01-26 23:22:56 +01:00
#' @details
2020-12-22 00:51:17 +01:00
#' Options for argument `administration`:
#'
#' - `"Implant"` = Implant
#' - `"Inhal"` = Inhalation
#' - `"Instill"` = Instillation
#' - `"N"` = nasal
#' - `"O"` = oral
#' - `"P"` = parenteral
#' - `"R"` = rectal
#' - `"SL"` = sublingual/buccal
#' - `"TD"` = transdermal
#' - `"V"` = vaginal
2019-01-26 23:22:56 +01:00
#'
#' Abbreviations of return values when using `property = "U"` (unit):
#'
#' - `"g"` = gram
#' - `"mg"` = milligram
#' - `"mcg"` = microgram
#' - `"U"` = unit
#' - `"TU"` = thousand units
#' - `"MU"` = million units
#' - `"mmol"` = millimole
#' - `"ml"` = millilitre (e.g. eyedrops)
#'
#' **N.B. This function requires an internet connection and only works if the following packages are installed: `curl`, `rvest`, `xml2`.**
2019-01-26 23:22:56 +01:00
#' @export
#' @rdname atc_online
#' @source <https://www.whocc.no/atc_ddd_alterations__cumulative/ddd_alterations/abbrevations/>
2019-01-26 23:22:56 +01:00
#' @examples
2021-05-24 09:34:08 +02:00
#' \donttest{
#' if (requireNamespace("curl") && requireNamespace("rvest") && requireNamespace("xml2")) {
#' # oral DDD (Defined Daily Dose) of amoxicillin
#' atc_online_property("J01CA04", "DDD", "O")
2021-08-19 23:43:02 +02:00
#' atc_online_ddd(ab_atc("amox"))
#'
2021-05-24 09:34:08 +02:00
#' # parenteral DDD (Defined Daily Dose) of amoxicillin
#' atc_online_property("J01CA04", "DDD", "P")
2019-01-26 23:22:56 +01:00
#'
2021-05-24 09:34:08 +02:00
#' atc_online_property("J01CA04", property = "groups") # search hierarchical groups of amoxicillin
#' }
2019-01-26 23:22:56 +01:00
#' }
atc_online_property <- function(atc_code,
property,
2019-10-11 17:21:02 +02:00
administration = "O",
url = "https://www.whocc.no/atc_ddd_index/?code=%s&showdescription=no",
url_vet = "https://www.whocc.no/atcvet/atcvet_index/?code=%s&showdescription=no") {
meet_criteria(atc_code, allow_class = "character")
2021-08-19 23:43:02 +02:00
meet_criteria(property, allow_class = "character", has_length = 1, is_in = c("ATC", "Name", "DDD", "U", "unit", "Adm.R", "Note", "groups"), ignore.case = TRUE)
meet_criteria(administration, allow_class = "character", has_length = 1)
meet_criteria(url, allow_class = "character", has_length = 1, looks_like = "https?://")
meet_criteria(url_vet, allow_class = "character", has_length = 1, looks_like = "https?://")
2020-02-14 19:54:13 +01:00
2020-06-17 15:14:37 +02:00
has_internet <- import_fn("has_internet", "curl")
html_attr <- import_fn("html_attr", "rvest")
html_children <- import_fn("html_children", "rvest")
html_node <- import_fn("html_node", "rvest")
html_nodes <- import_fn("html_nodes", "rvest")
html_table <- import_fn("html_table", "rvest")
html_text <- import_fn("html_text", "rvest")
read_html <- import_fn("read_html", "xml2")
2020-07-13 09:17:24 +02:00
2020-05-16 13:05:47 +02:00
check_dataset_integrity()
2021-08-17 14:34:11 +02:00
if (!all(atc_code %in% unlist(antibiotics$atc))) {
atc_code <- as.character(ab_atc(atc_code, only_first = TRUE))
2019-01-26 23:22:56 +01:00
}
2020-05-16 20:08:21 +02:00
if (!has_internet()) {
2020-10-27 15:56:51 +01:00
message_("There appears to be no internet connection, returning NA.",
add_fn = font_red,
as_note = FALSE)
2019-01-26 23:22:56 +01:00
return(rep(NA, length(atc_code)))
}
2020-07-13 09:17:24 +02:00
2021-08-19 23:43:02 +02:00
property <- tolower(property)
2019-01-26 23:22:56 +01:00
# also allow unit as property
2021-08-19 23:43:02 +02:00
if (property == "unit") {
property <- "u"
2019-01-26 23:22:56 +01:00
}
2019-10-11 17:21:02 +02:00
if (property == "ddd") {
2019-01-26 23:22:56 +01:00
returnvalue <- rep(NA_real_, length(atc_code))
2019-10-11 17:21:02 +02:00
} else if (property == "groups") {
2019-01-26 23:22:56 +01:00
returnvalue <- list()
} else {
returnvalue <- rep(NA_character_, length(atc_code))
}
2020-07-13 09:17:24 +02:00
progress <- progress_ticker(n = length(atc_code), 3)
2020-05-18 13:59:34 +02:00
on.exit(close(progress))
2019-10-11 17:21:02 +02:00
for (i in seq_len(length(atc_code))) {
2020-07-13 09:17:24 +02:00
2020-05-18 10:30:53 +02:00
progress$tick()
if (atc_code[i] %like% "^Q") {
# veterinary drugs, ATC_vet codes start with a "Q"
atc_url <- url_vet
} else {
atc_url <- url
}
atc_url <- sub("%s", atc_code[i], atc_url, fixed = TRUE)
2020-07-13 09:17:24 +02:00
2019-01-26 23:22:56 +01:00
if (property == "groups") {
2021-10-05 14:00:35 +02:00
out <- tryCatch(
read_html(atc_url) %pm>%
html_node("#content") %pm>%
html_children() %pm>%
html_node("a"),
error = function(e) NULL)
if (is.null(out)) {
message_("Connection to ", atc_url, " failed.")
return(rep(NA, length(atc_code)))
}
2020-07-13 09:17:24 +02:00
2019-01-26 23:22:56 +01:00
# get URLS of items
2021-10-05 14:00:35 +02:00
hrefs <- out %pm>% html_attr("href")
2019-01-26 23:22:56 +01:00
# get text of items
2021-10-05 14:00:35 +02:00
texts <- out %pm>% html_text()
2019-01-26 23:22:56 +01:00
# select only text items where URL like "code="
texts <- texts[grepl("?code=", tolower(hrefs), fixed = TRUE)]
# last one is antibiotics, skip it
2019-10-11 17:21:02 +02:00
texts <- texts[seq_len(length(texts)) - 1]
2019-01-26 23:22:56 +01:00
returnvalue <- c(list(texts), returnvalue)
2020-07-13 09:17:24 +02:00
2019-01-26 23:22:56 +01:00
} else {
2021-10-05 14:00:35 +02:00
out <- tryCatch(
read_html(atc_url) %pm>%
html_nodes("table") %pm>%
html_table(header = TRUE) %pm>%
as.data.frame(stringsAsFactors = FALSE),
error = function(e) NULL)
if (is.null(out)) {
message_("Connection to ", atc_url, " failed.")
return(rep(NA, length(atc_code)))
}
2020-07-13 09:17:24 +02:00
2019-01-26 23:22:56 +01:00
# case insensitive column names
2021-10-05 14:00:35 +02:00
colnames(out) <- gsub("^atc.*", "atc", tolower(colnames(out)))
2020-07-13 09:17:24 +02:00
2021-10-05 14:00:35 +02:00
if (length(out) == 0) {
warning_("in `atc_online_property()`: ATC not found: ", atc_code[i], ". Please check ", atc_url, ".")
2019-01-26 23:22:56 +01:00
returnvalue[i] <- NA
next
}
2020-07-13 09:17:24 +02:00
2019-10-11 17:21:02 +02:00
if (property %in% c("atc", "name")) {
2019-01-26 23:22:56 +01:00
# ATC and name are only in first row
2021-10-05 14:00:35 +02:00
returnvalue[i] <- out[1, property]
2019-01-26 23:22:56 +01:00
} else {
2021-10-05 14:00:35 +02:00
if (!"adm.r" %in% colnames(out) | is.na(out[1, "adm.r"])) {
2019-01-26 23:22:56 +01:00
returnvalue[i] <- NA
next
} else {
2021-10-05 14:00:35 +02:00
for (j in seq_len(nrow(out))) {
if (out[j, "adm.r"] == administration) {
returnvalue[i] <- out[j, property]
2019-01-26 23:22:56 +01:00
}
}
}
}
}
}
2020-07-13 09:17:24 +02:00
2019-01-26 23:22:56 +01:00
if (property == "groups" & length(returnvalue) == 1) {
returnvalue <- returnvalue[[1]]
}
2020-07-13 09:17:24 +02:00
2019-01-26 23:22:56 +01:00
returnvalue
}
#' @rdname atc_online
#' @export
atc_online_groups <- function(atc_code, ...) {
meet_criteria(atc_code, allow_class = "character")
2019-01-26 23:22:56 +01:00
atc_online_property(atc_code = atc_code, property = "groups", ...)
}
#' @rdname atc_online
#' @export
atc_online_ddd <- function(atc_code, ...) {
meet_criteria(atc_code, allow_class = "character")
2019-01-26 23:22:56 +01:00
atc_online_property(atc_code = atc_code, property = "ddd", ...)
}
2021-08-19 23:43:02 +02:00
#' @rdname atc_online
#' @export
atc_online_ddd_units <- function(atc_code, ...) {
meet_criteria(atc_code, allow_class = "character")
atc_online_property(atc_code = atc_code, property = "unit", ...)
}